| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1 | /* String (str/bytes) object implementation */ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 2 |  | 
 | 3 | #define PY_SSIZE_T_CLEAN | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 5 | #include "Python.h" | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 6 | #include <ctype.h> | 
| Mark Dickinson | 826f3fe | 2008-12-05 21:55:28 +0000 | [diff] [blame] | 7 | #include <stddef.h> | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 8 |  | 
 | 9 | #ifdef COUNT_ALLOCS | 
| Martin v. Löwis | b90304a | 2009-01-07 18:40:40 +0000 | [diff] [blame] | 10 | Py_ssize_t null_strings, one_strings; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 11 | #endif | 
 | 12 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 13 | static PyStringObject *characters[UCHAR_MAX + 1]; | 
 | 14 | static PyStringObject *nullstring; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 15 |  | 
 | 16 | /* This dictionary holds all interned strings.  Note that references to | 
 | 17 |    strings in this dictionary are *not* counted in the string's ob_refcnt. | 
 | 18 |    When the interned string reaches a refcnt of 0 the string deallocation | 
 | 19 |    function will delete the reference from this dictionary. | 
 | 20 |  | 
 | 21 |    Another way to look at this is that to say that the actual reference | 
 | 22 |    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0) | 
 | 23 | */ | 
 | 24 | static PyObject *interned; | 
 | 25 |  | 
| Mark Dickinson | 826f3fe | 2008-12-05 21:55:28 +0000 | [diff] [blame] | 26 | /* PyStringObject_SIZE gives the basic size of a string; any memory allocation | 
 | 27 |    for a string of length n should request PyStringObject_SIZE + n bytes. | 
 | 28 |  | 
 | 29 |    Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves | 
 | 30 |    3 bytes per string allocation on a typical system. | 
 | 31 | */ | 
 | 32 | #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1) | 
 | 33 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 34 | /* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 35 |    For PyString_FromString(), the parameter `str' points to a null-terminated | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 36 |    string containing exactly `size' bytes. | 
 | 37 |  | 
| Martin Panter | 200a615 | 2016-05-30 04:04:50 +0000 | [diff] [blame] | 38 |    For PyString_FromStringAndSize(), the parameter `str' is | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 39 |    either NULL or else points to a string containing at least `size' bytes. | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 40 |    For PyString_FromStringAndSize(), the string in the `str' parameter does | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 41 |    not have to be null-terminated.  (Therefore it is safe to construct a | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 42 |    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.) | 
 | 43 |    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1' | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 44 |    bytes (setting the last byte to the null terminating character) and you can | 
 | 45 |    fill in the data yourself.  If `str' is non-NULL then the resulting | 
 | 46 |    PyString object must be treated as immutable and you must not fill in nor | 
 | 47 |    alter the data yourself, since the strings may be shared. | 
 | 48 |  | 
 | 49 |    The PyObject member `op->ob_size', which denotes the number of "extra | 
 | 50 |    items" in a variable-size object, will contain the number of bytes | 
| Eli Bendersky | 72de205 | 2011-03-24 22:38:25 +0200 | [diff] [blame] | 51 |    allocated for string data, not counting the null terminating character. | 
 | 52 |    It is therefore equal to the `size' parameter (for | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 53 |    PyString_FromStringAndSize()) or the length of the string in the `str' | 
 | 54 |    parameter (for PyString_FromString()). | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 55 | */ | 
 | 56 | PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 57 | PyString_FromStringAndSize(const char *str, Py_ssize_t size) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 58 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 59 |     register PyStringObject *op; | 
 | 60 |     if (size < 0) { | 
 | 61 |         PyErr_SetString(PyExc_SystemError, | 
 | 62 |             "Negative size passed to PyString_FromStringAndSize"); | 
 | 63 |         return NULL; | 
 | 64 |     } | 
 | 65 |     if (size == 0 && (op = nullstring) != NULL) { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 66 | #ifdef COUNT_ALLOCS | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 67 |         null_strings++; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 68 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 69 |         Py_INCREF(op); | 
 | 70 |         return (PyObject *)op; | 
 | 71 |     } | 
 | 72 |     if (size == 1 && str != NULL && | 
 | 73 |         (op = characters[*str & UCHAR_MAX]) != NULL) | 
 | 74 |     { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 75 | #ifdef COUNT_ALLOCS | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 76 |         one_strings++; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 77 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 78 |         Py_INCREF(op); | 
 | 79 |         return (PyObject *)op; | 
 | 80 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 81 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 82 |     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { | 
 | 83 |         PyErr_SetString(PyExc_OverflowError, "string is too large"); | 
 | 84 |         return NULL; | 
 | 85 |     } | 
| Neal Norwitz | e7d8be8 | 2008-07-31 17:17:14 +0000 | [diff] [blame] | 86 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 87 |     /* Inline PyObject_NewVar */ | 
 | 88 |     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); | 
 | 89 |     if (op == NULL) | 
 | 90 |         return PyErr_NoMemory(); | 
| Martin Panter | 646b528 | 2016-06-21 23:58:05 +0000 | [diff] [blame] | 91 |     (void)PyObject_INIT_VAR(op, &PyString_Type, size); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 92 |     op->ob_shash = -1; | 
 | 93 |     op->ob_sstate = SSTATE_NOT_INTERNED; | 
 | 94 |     if (str != NULL) | 
 | 95 |         Py_MEMCPY(op->ob_sval, str, size); | 
 | 96 |     op->ob_sval[size] = '\0'; | 
 | 97 |     /* share short strings */ | 
 | 98 |     if (size == 0) { | 
 | 99 |         PyObject *t = (PyObject *)op; | 
 | 100 |         PyString_InternInPlace(&t); | 
 | 101 |         op = (PyStringObject *)t; | 
 | 102 |         nullstring = op; | 
 | 103 |         Py_INCREF(op); | 
 | 104 |     } else if (size == 1 && str != NULL) { | 
 | 105 |         PyObject *t = (PyObject *)op; | 
 | 106 |         PyString_InternInPlace(&t); | 
 | 107 |         op = (PyStringObject *)t; | 
 | 108 |         characters[*str & UCHAR_MAX] = op; | 
 | 109 |         Py_INCREF(op); | 
 | 110 |     } | 
 | 111 |     return (PyObject *) op; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 112 | } | 
 | 113 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 114 | PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 115 | PyString_FromString(const char *str) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 116 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 117 |     register size_t size; | 
 | 118 |     register PyStringObject *op; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 119 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 120 |     assert(str != NULL); | 
 | 121 |     size = strlen(str); | 
 | 122 |     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { | 
 | 123 |         PyErr_SetString(PyExc_OverflowError, | 
 | 124 |             "string is too long for a Python string"); | 
 | 125 |         return NULL; | 
 | 126 |     } | 
 | 127 |     if (size == 0 && (op = nullstring) != NULL) { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 128 | #ifdef COUNT_ALLOCS | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 129 |         null_strings++; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 130 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 131 |         Py_INCREF(op); | 
 | 132 |         return (PyObject *)op; | 
 | 133 |     } | 
 | 134 |     if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 135 | #ifdef COUNT_ALLOCS | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 136 |         one_strings++; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 137 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 138 |         Py_INCREF(op); | 
 | 139 |         return (PyObject *)op; | 
 | 140 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 141 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 142 |     /* Inline PyObject_NewVar */ | 
 | 143 |     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); | 
 | 144 |     if (op == NULL) | 
 | 145 |         return PyErr_NoMemory(); | 
| Martin Panter | 646b528 | 2016-06-21 23:58:05 +0000 | [diff] [blame] | 146 |     (void)PyObject_INIT_VAR(op, &PyString_Type, size); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 147 |     op->ob_shash = -1; | 
 | 148 |     op->ob_sstate = SSTATE_NOT_INTERNED; | 
 | 149 |     Py_MEMCPY(op->ob_sval, str, size+1); | 
 | 150 |     /* share short strings */ | 
 | 151 |     if (size == 0) { | 
 | 152 |         PyObject *t = (PyObject *)op; | 
 | 153 |         PyString_InternInPlace(&t); | 
 | 154 |         op = (PyStringObject *)t; | 
 | 155 |         nullstring = op; | 
 | 156 |         Py_INCREF(op); | 
 | 157 |     } else if (size == 1) { | 
 | 158 |         PyObject *t = (PyObject *)op; | 
 | 159 |         PyString_InternInPlace(&t); | 
 | 160 |         op = (PyStringObject *)t; | 
 | 161 |         characters[*str & UCHAR_MAX] = op; | 
 | 162 |         Py_INCREF(op); | 
 | 163 |     } | 
 | 164 |     return (PyObject *) op; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 165 | } | 
 | 166 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 167 | PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 168 | PyString_FromFormatV(const char *format, va_list vargs) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 169 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 170 |     va_list count; | 
 | 171 |     Py_ssize_t n = 0; | 
 | 172 |     const char* f; | 
 | 173 |     char *s; | 
 | 174 |     PyObject* string; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 175 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 176 | #ifdef VA_LIST_IS_ARRAY | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 177 |     Py_MEMCPY(count, vargs, sizeof(va_list)); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 178 | #else | 
 | 179 | #ifdef  __va_copy | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 180 |     __va_copy(count, vargs); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 181 | #else | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 182 |     count = vargs; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 183 | #endif | 
 | 184 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 185 |     /* step 1: figure out how large a buffer we need */ | 
 | 186 |     for (f = format; *f; f++) { | 
 | 187 |         if (*f == '%') { | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 188 | #ifdef HAVE_LONG_LONG | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 189 |             int longlongflag = 0; | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 190 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 191 |             const char* p = f; | 
 | 192 |             while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f))) | 
 | 193 |                 ; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 194 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 195 |             /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since | 
 | 196 |              * they don't affect the amount of space we reserve. | 
 | 197 |              */ | 
 | 198 |             if (*f == 'l') { | 
 | 199 |                 if (f[1] == 'd' || f[1] == 'u') { | 
 | 200 |                     ++f; | 
 | 201 |                 } | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 202 | #ifdef HAVE_LONG_LONG | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 203 |                 else if (f[1] == 'l' && | 
 | 204 |                          (f[2] == 'd' || f[2] == 'u')) { | 
 | 205 |                     longlongflag = 1; | 
 | 206 |                     f += 2; | 
 | 207 |                 } | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 208 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 209 |             } | 
 | 210 |             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { | 
 | 211 |                 ++f; | 
 | 212 |             } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 213 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 214 |             switch (*f) { | 
 | 215 |             case 'c': | 
 | 216 |                 (void)va_arg(count, int); | 
 | 217 |                 /* fall through... */ | 
 | 218 |             case '%': | 
 | 219 |                 n++; | 
 | 220 |                 break; | 
 | 221 |             case 'd': case 'u': case 'i': case 'x': | 
 | 222 |                 (void) va_arg(count, int); | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 223 | #ifdef HAVE_LONG_LONG | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 224 |                 /* Need at most | 
 | 225 |                    ceil(log10(256)*SIZEOF_LONG_LONG) digits, | 
 | 226 |                    plus 1 for the sign.  53/22 is an upper | 
 | 227 |                    bound for log10(256). */ | 
 | 228 |                 if (longlongflag) | 
 | 229 |                     n += 2 + (SIZEOF_LONG_LONG*53-1) / 22; | 
 | 230 |                 else | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 231 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 232 |                     /* 20 bytes is enough to hold a 64-bit | 
 | 233 |                        integer.  Decimal takes the most | 
 | 234 |                        space.  This isn't enough for | 
 | 235 |                        octal. */ | 
 | 236 |                     n += 20; | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 237 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 238 |                 break; | 
 | 239 |             case 's': | 
 | 240 |                 s = va_arg(count, char*); | 
 | 241 |                 n += strlen(s); | 
 | 242 |                 break; | 
 | 243 |             case 'p': | 
 | 244 |                 (void) va_arg(count, int); | 
 | 245 |                 /* maximum 64-bit pointer representation: | 
 | 246 |                  * 0xffffffffffffffff | 
 | 247 |                  * so 19 characters is enough. | 
 | 248 |                  * XXX I count 18 -- what's the extra for? | 
 | 249 |                  */ | 
 | 250 |                 n += 19; | 
 | 251 |                 break; | 
 | 252 |             default: | 
 | 253 |                 /* if we stumble upon an unknown | 
 | 254 |                    formatting code, copy the rest of | 
 | 255 |                    the format string to the output | 
 | 256 |                    string. (we cannot just skip the | 
 | 257 |                    code, since there's no way to know | 
 | 258 |                    what's in the argument list) */ | 
 | 259 |                 n += strlen(p); | 
 | 260 |                 goto expand; | 
 | 261 |             } | 
 | 262 |         } else | 
 | 263 |             n++; | 
 | 264 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 265 |  expand: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 266 |     /* step 2: fill the buffer */ | 
 | 267 |     /* Since we've analyzed how much space we need for the worst case, | 
 | 268 |        use sprintf directly instead of the slower PyOS_snprintf. */ | 
 | 269 |     string = PyString_FromStringAndSize(NULL, n); | 
 | 270 |     if (!string) | 
 | 271 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 272 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 273 |     s = PyString_AsString(string); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 274 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 275 |     for (f = format; *f; f++) { | 
 | 276 |         if (*f == '%') { | 
 | 277 |             const char* p = f++; | 
 | 278 |             Py_ssize_t i; | 
 | 279 |             int longflag = 0; | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 280 | #ifdef HAVE_LONG_LONG | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 281 |             int longlongflag = 0; | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 282 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 283 |             int size_tflag = 0; | 
 | 284 |             /* parse the width.precision part (we're only | 
 | 285 |                interested in the precision value, if any) */ | 
 | 286 |             n = 0; | 
 | 287 |             while (isdigit(Py_CHARMASK(*f))) | 
 | 288 |                 n = (n*10) + *f++ - '0'; | 
 | 289 |             if (*f == '.') { | 
 | 290 |                 f++; | 
 | 291 |                 n = 0; | 
 | 292 |                 while (isdigit(Py_CHARMASK(*f))) | 
 | 293 |                     n = (n*10) + *f++ - '0'; | 
 | 294 |             } | 
 | 295 |             while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f))) | 
 | 296 |                 f++; | 
 | 297 |             /* Handle %ld, %lu, %lld and %llu. */ | 
 | 298 |             if (*f == 'l') { | 
 | 299 |                 if (f[1] == 'd' || f[1] == 'u') { | 
 | 300 |                     longflag = 1; | 
 | 301 |                     ++f; | 
 | 302 |                 } | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 303 | #ifdef HAVE_LONG_LONG | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 304 |                 else if (f[1] == 'l' && | 
 | 305 |                          (f[2] == 'd' || f[2] == 'u')) { | 
 | 306 |                     longlongflag = 1; | 
 | 307 |                     f += 2; | 
 | 308 |                 } | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 309 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 310 |             } | 
 | 311 |             /* handle the size_t flag. */ | 
 | 312 |             else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { | 
 | 313 |                 size_tflag = 1; | 
 | 314 |                 ++f; | 
 | 315 |             } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 316 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 317 |             switch (*f) { | 
 | 318 |             case 'c': | 
 | 319 |                 *s++ = va_arg(vargs, int); | 
 | 320 |                 break; | 
 | 321 |             case 'd': | 
 | 322 |                 if (longflag) | 
 | 323 |                     sprintf(s, "%ld", va_arg(vargs, long)); | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 324 | #ifdef HAVE_LONG_LONG | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 325 |                 else if (longlongflag) | 
 | 326 |                     sprintf(s, "%" PY_FORMAT_LONG_LONG "d", | 
 | 327 |                         va_arg(vargs, PY_LONG_LONG)); | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 328 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 329 |                 else if (size_tflag) | 
 | 330 |                     sprintf(s, "%" PY_FORMAT_SIZE_T "d", | 
 | 331 |                         va_arg(vargs, Py_ssize_t)); | 
 | 332 |                 else | 
 | 333 |                     sprintf(s, "%d", va_arg(vargs, int)); | 
 | 334 |                 s += strlen(s); | 
 | 335 |                 break; | 
 | 336 |             case 'u': | 
 | 337 |                 if (longflag) | 
 | 338 |                     sprintf(s, "%lu", | 
 | 339 |                         va_arg(vargs, unsigned long)); | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 340 | #ifdef HAVE_LONG_LONG | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 341 |                 else if (longlongflag) | 
 | 342 |                     sprintf(s, "%" PY_FORMAT_LONG_LONG "u", | 
 | 343 |                         va_arg(vargs, PY_LONG_LONG)); | 
| Mark Dickinson | 82864d1 | 2009-11-15 16:18:58 +0000 | [diff] [blame] | 344 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 345 |                 else if (size_tflag) | 
 | 346 |                     sprintf(s, "%" PY_FORMAT_SIZE_T "u", | 
 | 347 |                         va_arg(vargs, size_t)); | 
 | 348 |                 else | 
 | 349 |                     sprintf(s, "%u", | 
 | 350 |                         va_arg(vargs, unsigned int)); | 
 | 351 |                 s += strlen(s); | 
 | 352 |                 break; | 
 | 353 |             case 'i': | 
 | 354 |                 sprintf(s, "%i", va_arg(vargs, int)); | 
 | 355 |                 s += strlen(s); | 
 | 356 |                 break; | 
 | 357 |             case 'x': | 
 | 358 |                 sprintf(s, "%x", va_arg(vargs, int)); | 
 | 359 |                 s += strlen(s); | 
 | 360 |                 break; | 
 | 361 |             case 's': | 
 | 362 |                 p = va_arg(vargs, char*); | 
 | 363 |                 i = strlen(p); | 
 | 364 |                 if (n > 0 && i > n) | 
 | 365 |                     i = n; | 
 | 366 |                 Py_MEMCPY(s, p, i); | 
 | 367 |                 s += i; | 
 | 368 |                 break; | 
 | 369 |             case 'p': | 
 | 370 |                 sprintf(s, "%p", va_arg(vargs, void*)); | 
 | 371 |                 /* %p is ill-defined:  ensure leading 0x. */ | 
 | 372 |                 if (s[1] == 'X') | 
 | 373 |                     s[1] = 'x'; | 
 | 374 |                 else if (s[1] != 'x') { | 
 | 375 |                     memmove(s+2, s, strlen(s)+1); | 
 | 376 |                     s[0] = '0'; | 
 | 377 |                     s[1] = 'x'; | 
 | 378 |                 } | 
 | 379 |                 s += strlen(s); | 
 | 380 |                 break; | 
 | 381 |             case '%': | 
 | 382 |                 *s++ = '%'; | 
 | 383 |                 break; | 
 | 384 |             default: | 
 | 385 |                 strcpy(s, p); | 
 | 386 |                 s += strlen(s); | 
 | 387 |                 goto end; | 
 | 388 |             } | 
 | 389 |         } else | 
 | 390 |             *s++ = *f; | 
 | 391 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 392 |  | 
 | 393 |  end: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 394 |     if (_PyString_Resize(&string, s - PyString_AS_STRING(string))) | 
 | 395 |         return NULL; | 
 | 396 |     return string; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 397 | } | 
 | 398 |  | 
 | 399 | PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 400 | PyString_FromFormat(const char *format, ...) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 401 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 402 |     PyObject* ret; | 
 | 403 |     va_list vargs; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 404 |  | 
 | 405 | #ifdef HAVE_STDARG_PROTOTYPES | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 406 |     va_start(vargs, format); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 407 | #else | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 408 |     va_start(vargs); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 409 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 410 |     ret = PyString_FromFormatV(format, vargs); | 
 | 411 |     va_end(vargs); | 
 | 412 |     return ret; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 413 | } | 
 | 414 |  | 
 | 415 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 416 | PyObject *PyString_Decode(const char *s, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 417 |                           Py_ssize_t size, | 
 | 418 |                           const char *encoding, | 
 | 419 |                           const char *errors) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 420 | { | 
 | 421 |     PyObject *v, *str; | 
 | 422 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 423 |     str = PyString_FromStringAndSize(s, size); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 424 |     if (str == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 425 |         return NULL; | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 426 |     v = PyString_AsDecodedString(str, encoding, errors); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 427 |     Py_DECREF(str); | 
 | 428 |     return v; | 
 | 429 | } | 
 | 430 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 431 | PyObject *PyString_AsDecodedObject(PyObject *str, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 432 |                                    const char *encoding, | 
 | 433 |                                    const char *errors) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 434 | { | 
 | 435 |     PyObject *v; | 
 | 436 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 437 |     if (!PyString_Check(str)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 438 |         PyErr_BadArgument(); | 
 | 439 |         goto onError; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 440 |     } | 
 | 441 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 442 |     if (encoding == NULL) { | 
 | 443 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 444 |         encoding = PyUnicode_GetDefaultEncoding(); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 445 | #else | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 446 |         PyErr_SetString(PyExc_ValueError, "no encoding specified"); | 
 | 447 |         goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 448 | #endif | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 449 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 450 |  | 
 | 451 |     /* Decode via the codec registry */ | 
| Serhiy Storchaka | c7797dc | 2015-05-31 20:21:00 +0300 | [diff] [blame] | 452 |     v = _PyCodec_DecodeText(str, encoding, errors); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 453 |     if (v == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 454 |         goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 455 |  | 
 | 456 |     return v; | 
 | 457 |  | 
 | 458 |  onError: | 
 | 459 |     return NULL; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 460 | } | 
 | 461 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 462 | PyObject *PyString_AsDecodedString(PyObject *str, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 463 |                                    const char *encoding, | 
 | 464 |                                    const char *errors) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 465 | { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 466 |     PyObject *v; | 
 | 467 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 468 |     v = PyString_AsDecodedObject(str, encoding, errors); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 469 |     if (v == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 470 |         goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 471 |  | 
 | 472 | #ifdef Py_USING_UNICODE | 
 | 473 |     /* Convert Unicode to a string using the default encoding */ | 
 | 474 |     if (PyUnicode_Check(v)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 475 |         PyObject *temp = v; | 
 | 476 |         v = PyUnicode_AsEncodedString(v, NULL, NULL); | 
 | 477 |         Py_DECREF(temp); | 
 | 478 |         if (v == NULL) | 
 | 479 |             goto onError; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 480 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 481 | #endif | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 482 |     if (!PyString_Check(v)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 483 |         PyErr_Format(PyExc_TypeError, | 
 | 484 |                      "decoder did not return a string object (type=%.400s)", | 
 | 485 |                      Py_TYPE(v)->tp_name); | 
 | 486 |         Py_DECREF(v); | 
 | 487 |         goto onError; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 488 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 489 |  | 
 | 490 |     return v; | 
 | 491 |  | 
 | 492 |  onError: | 
 | 493 |     return NULL; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 494 | } | 
 | 495 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 496 | PyObject *PyString_Encode(const char *s, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 497 |                           Py_ssize_t size, | 
 | 498 |                           const char *encoding, | 
 | 499 |                           const char *errors) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 500 | { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 501 |     PyObject *v, *str; | 
 | 502 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 503 |     str = PyString_FromStringAndSize(s, size); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 504 |     if (str == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 505 |         return NULL; | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 506 |     v = PyString_AsEncodedString(str, encoding, errors); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 507 |     Py_DECREF(str); | 
 | 508 |     return v; | 
 | 509 | } | 
 | 510 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 511 | PyObject *PyString_AsEncodedObject(PyObject *str, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 512 |                                    const char *encoding, | 
 | 513 |                                    const char *errors) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 514 | { | 
 | 515 |     PyObject *v; | 
 | 516 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 517 |     if (!PyString_Check(str)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 518 |         PyErr_BadArgument(); | 
 | 519 |         goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 520 |     } | 
 | 521 |  | 
 | 522 |     if (encoding == NULL) { | 
 | 523 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 524 |         encoding = PyUnicode_GetDefaultEncoding(); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 525 | #else | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 526 |         PyErr_SetString(PyExc_ValueError, "no encoding specified"); | 
 | 527 |         goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 528 | #endif | 
 | 529 |     } | 
 | 530 |  | 
 | 531 |     /* Encode via the codec registry */ | 
| Serhiy Storchaka | c7797dc | 2015-05-31 20:21:00 +0300 | [diff] [blame] | 532 |     v = _PyCodec_EncodeText(str, encoding, errors); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 533 |     if (v == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 534 |         goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 535 |  | 
 | 536 |     return v; | 
 | 537 |  | 
 | 538 |  onError: | 
 | 539 |     return NULL; | 
 | 540 | } | 
 | 541 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 542 | PyObject *PyString_AsEncodedString(PyObject *str, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 543 |                                    const char *encoding, | 
 | 544 |                                    const char *errors) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 545 | { | 
 | 546 |     PyObject *v; | 
 | 547 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 548 |     v = PyString_AsEncodedObject(str, encoding, errors); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 549 |     if (v == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 550 |         goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 551 |  | 
 | 552 | #ifdef Py_USING_UNICODE | 
 | 553 |     /* Convert Unicode to a string using the default encoding */ | 
 | 554 |     if (PyUnicode_Check(v)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 555 |         PyObject *temp = v; | 
 | 556 |         v = PyUnicode_AsEncodedString(v, NULL, NULL); | 
 | 557 |         Py_DECREF(temp); | 
 | 558 |         if (v == NULL) | 
 | 559 |             goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 560 |     } | 
 | 561 | #endif | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 562 |     if (!PyString_Check(v)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 563 |         PyErr_Format(PyExc_TypeError, | 
 | 564 |                      "encoder did not return a string object (type=%.400s)", | 
 | 565 |                      Py_TYPE(v)->tp_name); | 
 | 566 |         Py_DECREF(v); | 
 | 567 |         goto onError; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 568 |     } | 
 | 569 |  | 
 | 570 |     return v; | 
 | 571 |  | 
 | 572 |  onError: | 
 | 573 |     return NULL; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 574 | } | 
 | 575 |  | 
 | 576 | static void | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 577 | string_dealloc(PyObject *op) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 578 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 579 |     switch (PyString_CHECK_INTERNED(op)) { | 
 | 580 |         case SSTATE_NOT_INTERNED: | 
 | 581 |             break; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 582 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 583 |         case SSTATE_INTERNED_MORTAL: | 
 | 584 |             /* revive dead object temporarily for DelItem */ | 
 | 585 |             Py_REFCNT(op) = 3; | 
 | 586 |             if (PyDict_DelItem(interned, op) != 0) | 
 | 587 |                 Py_FatalError( | 
 | 588 |                     "deletion of interned string failed"); | 
 | 589 |             break; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 590 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 591 |         case SSTATE_INTERNED_IMMORTAL: | 
 | 592 |             Py_FatalError("Immortal interned string died."); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 593 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 594 |         default: | 
 | 595 |             Py_FatalError("Inconsistent interned string state."); | 
 | 596 |     } | 
 | 597 |     Py_TYPE(op)->tp_free(op); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 598 | } | 
 | 599 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 600 | /* Unescape a backslash-escaped string. If unicode is non-zero, | 
 | 601 |    the string is a u-literal. If recode_encoding is non-zero, | 
 | 602 |    the string is UTF-8 encoded and should be re-encoded in the | 
 | 603 |    specified encoding.  */ | 
 | 604 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 605 | PyObject *PyString_DecodeEscape(const char *s, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 606 |                                 Py_ssize_t len, | 
 | 607 |                                 const char *errors, | 
 | 608 |                                 Py_ssize_t unicode, | 
 | 609 |                                 const char *recode_encoding) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 610 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 611 |     int c; | 
 | 612 |     char *p, *buf; | 
 | 613 |     const char *end; | 
 | 614 |     PyObject *v; | 
| Jay Bosamiya | c3c9db8 | 2017-06-18 22:11:03 +0530 | [diff] [blame] | 615 |     Py_ssize_t newlen; | 
 | 616 |     /* Check for integer overflow */ | 
 | 617 |     if (recode_encoding && (len > PY_SSIZE_T_MAX / 4)) { | 
 | 618 |         PyErr_SetString(PyExc_OverflowError, "string is too large"); | 
 | 619 |         return NULL; | 
 | 620 |     } | 
 | 621 |     newlen = recode_encoding ? 4*len:len; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 622 |     v = PyString_FromStringAndSize((char *)NULL, newlen); | 
 | 623 |     if (v == NULL) | 
 | 624 |         return NULL; | 
 | 625 |     p = buf = PyString_AsString(v); | 
 | 626 |     end = s + len; | 
 | 627 |     while (s < end) { | 
 | 628 |         if (*s != '\\') { | 
 | 629 |           non_esc: | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 630 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 631 |             if (recode_encoding && (*s & 0x80)) { | 
 | 632 |                 PyObject *u, *w; | 
 | 633 |                 char *r; | 
 | 634 |                 const char* t; | 
 | 635 |                 Py_ssize_t rn; | 
 | 636 |                 t = s; | 
 | 637 |                 /* Decode non-ASCII bytes as UTF-8. */ | 
 | 638 |                 while (t < end && (*t & 0x80)) t++; | 
 | 639 |                 u = PyUnicode_DecodeUTF8(s, t - s, errors); | 
 | 640 |                 if(!u) goto failed; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 641 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 642 |                 /* Recode them in target encoding. */ | 
 | 643 |                 w = PyUnicode_AsEncodedString( | 
 | 644 |                     u, recode_encoding, errors); | 
 | 645 |                 Py_DECREF(u); | 
 | 646 |                 if (!w)                 goto failed; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 647 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 648 |                 /* Append bytes to output buffer. */ | 
 | 649 |                 assert(PyString_Check(w)); | 
 | 650 |                 r = PyString_AS_STRING(w); | 
 | 651 |                 rn = PyString_GET_SIZE(w); | 
 | 652 |                 Py_MEMCPY(p, r, rn); | 
 | 653 |                 p += rn; | 
 | 654 |                 Py_DECREF(w); | 
 | 655 |                 s = t; | 
 | 656 |             } else { | 
 | 657 |                 *p++ = *s++; | 
 | 658 |             } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 659 | #else | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 660 |             *p++ = *s++; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 661 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 662 |             continue; | 
 | 663 |         } | 
 | 664 |         s++; | 
 | 665 |         if (s==end) { | 
 | 666 |             PyErr_SetString(PyExc_ValueError, | 
 | 667 |                             "Trailing \\ in string"); | 
 | 668 |             goto failed; | 
 | 669 |         } | 
 | 670 |         switch (*s++) { | 
 | 671 |         /* XXX This assumes ASCII! */ | 
 | 672 |         case '\n': break; | 
 | 673 |         case '\\': *p++ = '\\'; break; | 
 | 674 |         case '\'': *p++ = '\''; break; | 
 | 675 |         case '\"': *p++ = '\"'; break; | 
 | 676 |         case 'b': *p++ = '\b'; break; | 
 | 677 |         case 'f': *p++ = '\014'; break; /* FF */ | 
 | 678 |         case 't': *p++ = '\t'; break; | 
 | 679 |         case 'n': *p++ = '\n'; break; | 
 | 680 |         case 'r': *p++ = '\r'; break; | 
 | 681 |         case 'v': *p++ = '\013'; break; /* VT */ | 
 | 682 |         case 'a': *p++ = '\007'; break; /* BEL, not classic C */ | 
 | 683 |         case '0': case '1': case '2': case '3': | 
 | 684 |         case '4': case '5': case '6': case '7': | 
 | 685 |             c = s[-1] - '0'; | 
 | 686 |             if (s < end && '0' <= *s && *s <= '7') { | 
 | 687 |                 c = (c<<3) + *s++ - '0'; | 
 | 688 |                 if (s < end && '0' <= *s && *s <= '7') | 
 | 689 |                     c = (c<<3) + *s++ - '0'; | 
 | 690 |             } | 
 | 691 |             *p++ = c; | 
 | 692 |             break; | 
 | 693 |         case 'x': | 
 | 694 |             if (s+1 < end && | 
 | 695 |                 isxdigit(Py_CHARMASK(s[0])) && | 
 | 696 |                 isxdigit(Py_CHARMASK(s[1]))) | 
 | 697 |             { | 
 | 698 |                 unsigned int x = 0; | 
 | 699 |                 c = Py_CHARMASK(*s); | 
 | 700 |                 s++; | 
 | 701 |                 if (isdigit(c)) | 
 | 702 |                     x = c - '0'; | 
 | 703 |                 else if (islower(c)) | 
 | 704 |                     x = 10 + c - 'a'; | 
 | 705 |                 else | 
 | 706 |                     x = 10 + c - 'A'; | 
 | 707 |                 x = x << 4; | 
 | 708 |                 c = Py_CHARMASK(*s); | 
 | 709 |                 s++; | 
 | 710 |                 if (isdigit(c)) | 
 | 711 |                     x += c - '0'; | 
 | 712 |                 else if (islower(c)) | 
 | 713 |                     x += 10 + c - 'a'; | 
 | 714 |                 else | 
 | 715 |                     x += 10 + c - 'A'; | 
 | 716 |                 *p++ = x; | 
 | 717 |                 break; | 
 | 718 |             } | 
 | 719 |             if (!errors || strcmp(errors, "strict") == 0) { | 
 | 720 |                 PyErr_SetString(PyExc_ValueError, | 
 | 721 |                                 "invalid \\x escape"); | 
 | 722 |                 goto failed; | 
 | 723 |             } | 
 | 724 |             if (strcmp(errors, "replace") == 0) { | 
 | 725 |                 *p++ = '?'; | 
 | 726 |             } else if (strcmp(errors, "ignore") == 0) | 
 | 727 |                 /* do nothing */; | 
 | 728 |             else { | 
 | 729 |                 PyErr_Format(PyExc_ValueError, | 
 | 730 |                              "decoding error; " | 
 | 731 |                              "unknown error handling code: %.400s", | 
 | 732 |                              errors); | 
 | 733 |                 goto failed; | 
 | 734 |             } | 
| Serhiy Storchaka | 01b3a08 | 2013-01-25 23:30:50 +0200 | [diff] [blame] | 735 |             /* skip \x */ | 
 | 736 |             if (s < end && isxdigit(Py_CHARMASK(s[0]))) | 
 | 737 |                 s++; /* and a hexdigit */ | 
 | 738 |             break; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 739 | #ifndef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 740 |         case 'u': | 
 | 741 |         case 'U': | 
 | 742 |         case 'N': | 
 | 743 |             if (unicode) { | 
 | 744 |                 PyErr_SetString(PyExc_ValueError, | 
 | 745 |                           "Unicode escapes not legal " | 
 | 746 |                           "when Unicode disabled"); | 
 | 747 |                 goto failed; | 
 | 748 |             } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 749 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 750 |         default: | 
 | 751 |             *p++ = '\\'; | 
 | 752 |             s--; | 
| Ezio Melotti | 24b07bc | 2011-03-15 18:55:01 +0200 | [diff] [blame] | 753 |             goto non_esc; /* an arbitrary number of unescaped | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 754 |                              UTF-8 bytes may follow. */ | 
 | 755 |         } | 
 | 756 |     } | 
| Kristján Valur Jónsson | be580f2 | 2014-04-25 09:51:21 +0000 | [diff] [blame] | 757 |     if (p-buf < newlen) | 
 | 758 |         _PyString_Resize(&v, p - buf); /* v is cleared on error */ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 759 |     return v; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 760 |   failed: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 761 |     Py_DECREF(v); | 
 | 762 |     return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 763 | } | 
 | 764 |  | 
 | 765 | /* -------------------------------------------------------------------- */ | 
 | 766 | /* object api */ | 
 | 767 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 768 | static Py_ssize_t | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 769 | string_getsize(register PyObject *op) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 770 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 771 |     char *s; | 
 | 772 |     Py_ssize_t len; | 
 | 773 |     if (PyString_AsStringAndSize(op, &s, &len)) | 
 | 774 |         return -1; | 
 | 775 |     return len; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 776 | } | 
 | 777 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 778 | static /*const*/ char * | 
 | 779 | string_getbuffer(register PyObject *op) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 780 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 781 |     char *s; | 
 | 782 |     Py_ssize_t len; | 
 | 783 |     if (PyString_AsStringAndSize(op, &s, &len)) | 
 | 784 |         return NULL; | 
 | 785 |     return s; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 786 | } | 
 | 787 |  | 
 | 788 | Py_ssize_t | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 789 | PyString_Size(register PyObject *op) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 790 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 791 |     if (!PyString_Check(op)) | 
 | 792 |         return string_getsize(op); | 
 | 793 |     return Py_SIZE(op); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 794 | } | 
 | 795 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 796 | /*const*/ char * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 797 | PyString_AsString(register PyObject *op) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 798 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 799 |     if (!PyString_Check(op)) | 
 | 800 |         return string_getbuffer(op); | 
 | 801 |     return ((PyStringObject *)op) -> ob_sval; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 802 | } | 
 | 803 |  | 
 | 804 | int | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 805 | PyString_AsStringAndSize(register PyObject *obj, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 806 |                          register char **s, | 
 | 807 |                          register Py_ssize_t *len) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 808 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 809 |     if (s == NULL) { | 
 | 810 |         PyErr_BadInternalCall(); | 
 | 811 |         return -1; | 
 | 812 |     } | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 813 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 814 |     if (!PyString_Check(obj)) { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 815 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 816 |         if (PyUnicode_Check(obj)) { | 
 | 817 |             obj = _PyUnicode_AsDefaultEncodedString(obj, NULL); | 
 | 818 |             if (obj == NULL) | 
 | 819 |                 return -1; | 
 | 820 |         } | 
 | 821 |         else | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 822 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 823 |         { | 
 | 824 |             PyErr_Format(PyExc_TypeError, | 
 | 825 |                          "expected string or Unicode object, " | 
 | 826 |                          "%.200s found", Py_TYPE(obj)->tp_name); | 
 | 827 |             return -1; | 
 | 828 |         } | 
 | 829 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 830 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 831 |     *s = PyString_AS_STRING(obj); | 
 | 832 |     if (len != NULL) | 
 | 833 |         *len = PyString_GET_SIZE(obj); | 
 | 834 |     else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) { | 
 | 835 |         PyErr_SetString(PyExc_TypeError, | 
 | 836 |                         "expected string without null bytes"); | 
 | 837 |         return -1; | 
 | 838 |     } | 
 | 839 |     return 0; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 840 | } | 
 | 841 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 842 | /* -------------------------------------------------------------------- */ | 
 | 843 | /* Methods */ | 
 | 844 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 845 | #include "stringlib/stringdefs.h" | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 846 | #include "stringlib/fastsearch.h" | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 847 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 848 | #include "stringlib/count.h" | 
 | 849 | #include "stringlib/find.h" | 
 | 850 | #include "stringlib/partition.h" | 
| Antoine Pitrou | 6467213 | 2010-01-13 07:55:48 +0000 | [diff] [blame] | 851 | #include "stringlib/split.h" | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 852 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 853 | #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 854 | #include "stringlib/localeutil.h" | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 855 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 856 |  | 
 | 857 |  | 
 | 858 | static int | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 859 | string_print(PyStringObject *op, FILE *fp, int flags) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 860 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 861 |     Py_ssize_t i, str_len; | 
 | 862 |     char c; | 
 | 863 |     int quote; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 864 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 865 |     /* XXX Ought to check for interrupts when writing long strings */ | 
 | 866 |     if (! PyString_CheckExact(op)) { | 
 | 867 |         int ret; | 
 | 868 |         /* A str subclass may have its own __str__ method. */ | 
 | 869 |         op = (PyStringObject *) PyObject_Str((PyObject *)op); | 
 | 870 |         if (op == NULL) | 
 | 871 |             return -1; | 
 | 872 |         ret = string_print(op, fp, flags); | 
 | 873 |         Py_DECREF(op); | 
 | 874 |         return ret; | 
 | 875 |     } | 
 | 876 |     if (flags & Py_PRINT_RAW) { | 
 | 877 |         char *data = op->ob_sval; | 
 | 878 |         Py_ssize_t size = Py_SIZE(op); | 
 | 879 |         Py_BEGIN_ALLOW_THREADS | 
 | 880 |         while (size > INT_MAX) { | 
 | 881 |             /* Very long strings cannot be written atomically. | 
 | 882 |              * But don't write exactly INT_MAX bytes at a time | 
 | 883 |              * to avoid memory aligment issues. | 
 | 884 |              */ | 
 | 885 |             const int chunk_size = INT_MAX & ~0x3FFF; | 
 | 886 |             fwrite(data, 1, chunk_size, fp); | 
 | 887 |             data += chunk_size; | 
 | 888 |             size -= chunk_size; | 
 | 889 |         } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 890 | #ifdef __VMS | 
| Ronald Oussoren | 3687e80 | 2013-07-11 13:33:55 +0200 | [diff] [blame] | 891 |         if (size) fwrite(data, (size_t)size, 1, fp); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 892 | #else | 
| Ronald Oussoren | 3687e80 | 2013-07-11 13:33:55 +0200 | [diff] [blame] | 893 |         fwrite(data, 1, (size_t)size, fp); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 894 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 895 |         Py_END_ALLOW_THREADS | 
 | 896 |         return 0; | 
 | 897 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 898 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 899 |     /* figure out which quote to use; single is preferred */ | 
 | 900 |     quote = '\''; | 
 | 901 |     if (memchr(op->ob_sval, '\'', Py_SIZE(op)) && | 
 | 902 |         !memchr(op->ob_sval, '"', Py_SIZE(op))) | 
 | 903 |         quote = '"'; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 904 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 905 |     str_len = Py_SIZE(op); | 
 | 906 |     Py_BEGIN_ALLOW_THREADS | 
 | 907 |     fputc(quote, fp); | 
 | 908 |     for (i = 0; i < str_len; i++) { | 
 | 909 |         /* Since strings are immutable and the caller should have a | 
| Martin Panter | 3d36f0f | 2016-07-28 02:37:04 +0000 | [diff] [blame] | 910 |         reference, accessing the internal buffer should not be an issue | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 911 |         with the GIL released. */ | 
 | 912 |         c = op->ob_sval[i]; | 
 | 913 |         if (c == quote || c == '\\') | 
 | 914 |             fprintf(fp, "\\%c", c); | 
 | 915 |         else if (c == '\t') | 
 | 916 |             fprintf(fp, "\\t"); | 
 | 917 |         else if (c == '\n') | 
 | 918 |             fprintf(fp, "\\n"); | 
 | 919 |         else if (c == '\r') | 
 | 920 |             fprintf(fp, "\\r"); | 
 | 921 |         else if (c < ' ' || c >= 0x7f) | 
 | 922 |             fprintf(fp, "\\x%02x", c & 0xff); | 
 | 923 |         else | 
 | 924 |             fputc(c, fp); | 
 | 925 |     } | 
 | 926 |     fputc(quote, fp); | 
 | 927 |     Py_END_ALLOW_THREADS | 
 | 928 |     return 0; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 929 | } | 
 | 930 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 931 | PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 932 | PyString_Repr(PyObject *obj, int smartquotes) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 933 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 934 |     register PyStringObject* op = (PyStringObject*) obj; | 
| Benjamin Peterson | f8c4b3a | 2014-09-29 19:01:18 -0400 | [diff] [blame] | 935 |     size_t newsize; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 936 |     PyObject *v; | 
| Benjamin Peterson | f8c4b3a | 2014-09-29 19:01:18 -0400 | [diff] [blame] | 937 |     if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 938 |         PyErr_SetString(PyExc_OverflowError, | 
 | 939 |             "string is too large to make repr"); | 
 | 940 |         return NULL; | 
 | 941 |     } | 
| Benjamin Peterson | f8c4b3a | 2014-09-29 19:01:18 -0400 | [diff] [blame] | 942 |     newsize = 2 + 4*Py_SIZE(op); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 943 |     v = PyString_FromStringAndSize((char *)NULL, newsize); | 
 | 944 |     if (v == NULL) { | 
 | 945 |         return NULL; | 
 | 946 |     } | 
 | 947 |     else { | 
 | 948 |         register Py_ssize_t i; | 
 | 949 |         register char c; | 
 | 950 |         register char *p; | 
 | 951 |         int quote; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 952 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 953 |         /* figure out which quote to use; single is preferred */ | 
 | 954 |         quote = '\''; | 
 | 955 |         if (smartquotes && | 
 | 956 |             memchr(op->ob_sval, '\'', Py_SIZE(op)) && | 
 | 957 |             !memchr(op->ob_sval, '"', Py_SIZE(op))) | 
 | 958 |             quote = '"'; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 959 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 960 |         p = PyString_AS_STRING(v); | 
 | 961 |         *p++ = quote; | 
 | 962 |         for (i = 0; i < Py_SIZE(op); i++) { | 
 | 963 |             /* There's at least enough room for a hex escape | 
 | 964 |                and a closing quote. */ | 
 | 965 |             assert(newsize - (p - PyString_AS_STRING(v)) >= 5); | 
 | 966 |             c = op->ob_sval[i]; | 
 | 967 |             if (c == quote || c == '\\') | 
 | 968 |                 *p++ = '\\', *p++ = c; | 
 | 969 |             else if (c == '\t') | 
 | 970 |                 *p++ = '\\', *p++ = 't'; | 
 | 971 |             else if (c == '\n') | 
 | 972 |                 *p++ = '\\', *p++ = 'n'; | 
 | 973 |             else if (c == '\r') | 
 | 974 |                 *p++ = '\\', *p++ = 'r'; | 
 | 975 |             else if (c < ' ' || c >= 0x7f) { | 
 | 976 |                 /* For performance, we don't want to call | 
 | 977 |                    PyOS_snprintf here (extra layers of | 
 | 978 |                    function call). */ | 
 | 979 |                 sprintf(p, "\\x%02x", c & 0xff); | 
 | 980 |                 p += 4; | 
 | 981 |             } | 
 | 982 |             else | 
 | 983 |                 *p++ = c; | 
 | 984 |         } | 
 | 985 |         assert(newsize - (p - PyString_AS_STRING(v)) >= 1); | 
 | 986 |         *p++ = quote; | 
 | 987 |         *p = '\0'; | 
 | 988 |         if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) | 
 | 989 |             return NULL; | 
 | 990 |         return v; | 
 | 991 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 992 | } | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 993 |  | 
 | 994 | static PyObject * | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 995 | string_repr(PyObject *op) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 996 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 997 |     return PyString_Repr(op, 1); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 998 | } | 
 | 999 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1000 | static PyObject * | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1001 | string_str(PyObject *s) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1002 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1003 |     assert(PyString_Check(s)); | 
 | 1004 |     if (PyString_CheckExact(s)) { | 
 | 1005 |         Py_INCREF(s); | 
 | 1006 |         return s; | 
 | 1007 |     } | 
 | 1008 |     else { | 
 | 1009 |         /* Subtype -- return genuine string with the same value. */ | 
 | 1010 |         PyStringObject *t = (PyStringObject *) s; | 
 | 1011 |         return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t)); | 
 | 1012 |     } | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1013 | } | 
 | 1014 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1015 | static Py_ssize_t | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1016 | string_length(PyStringObject *a) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1017 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1018 |     return Py_SIZE(a); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1019 | } | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1020 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1021 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1022 | string_concat(register PyStringObject *a, register PyObject *bb) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1023 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1024 |     register Py_ssize_t size; | 
 | 1025 |     register PyStringObject *op; | 
 | 1026 |     if (!PyString_Check(bb)) { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1027 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1028 |         if (PyUnicode_Check(bb)) | 
 | 1029 |             return PyUnicode_Concat((PyObject *)a, bb); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1030 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1031 |         if (PyByteArray_Check(bb)) | 
 | 1032 |             return PyByteArray_Concat((PyObject *)a, bb); | 
 | 1033 |         PyErr_Format(PyExc_TypeError, | 
 | 1034 |                      "cannot concatenate 'str' and '%.200s' objects", | 
 | 1035 |                      Py_TYPE(bb)->tp_name); | 
 | 1036 |         return NULL; | 
 | 1037 |     } | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1038 | #define b ((PyStringObject *)bb) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1039 |     /* Optimize cases with empty left or right operand */ | 
 | 1040 |     if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) && | 
 | 1041 |         PyString_CheckExact(a) && PyString_CheckExact(b)) { | 
 | 1042 |         if (Py_SIZE(a) == 0) { | 
 | 1043 |             Py_INCREF(bb); | 
 | 1044 |             return bb; | 
 | 1045 |         } | 
 | 1046 |         Py_INCREF(a); | 
 | 1047 |         return (PyObject *)a; | 
 | 1048 |     } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1049 |     /* Check that string sizes are not negative, to prevent an | 
 | 1050 |        overflow in cases where we are passed incorrectly-created | 
 | 1051 |        strings with negative lengths (due to a bug in other code). | 
 | 1052 |     */ | 
 | 1053 |     if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 || | 
 | 1054 |         Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) { | 
 | 1055 |         PyErr_SetString(PyExc_OverflowError, | 
 | 1056 |                         "strings are too large to concat"); | 
 | 1057 |         return NULL; | 
 | 1058 |     } | 
| Serhiy Storchaka | 373773d | 2016-07-12 15:46:57 +0300 | [diff] [blame] | 1059 |     size = Py_SIZE(a) + Py_SIZE(b); | 
| Mark Dickinson | 826f3fe | 2008-12-05 21:55:28 +0000 | [diff] [blame] | 1060 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1061 |     /* Inline PyObject_NewVar */ | 
 | 1062 |     if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { | 
 | 1063 |         PyErr_SetString(PyExc_OverflowError, | 
 | 1064 |                         "strings are too large to concat"); | 
 | 1065 |         return NULL; | 
 | 1066 |     } | 
 | 1067 |     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); | 
 | 1068 |     if (op == NULL) | 
 | 1069 |         return PyErr_NoMemory(); | 
| Martin Panter | 646b528 | 2016-06-21 23:58:05 +0000 | [diff] [blame] | 1070 |     (void)PyObject_INIT_VAR(op, &PyString_Type, size); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1071 |     op->ob_shash = -1; | 
 | 1072 |     op->ob_sstate = SSTATE_NOT_INTERNED; | 
 | 1073 |     Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); | 
 | 1074 |     Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b)); | 
 | 1075 |     op->ob_sval[size] = '\0'; | 
 | 1076 |     return (PyObject *) op; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1077 | #undef b | 
 | 1078 | } | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1079 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1080 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1081 | string_repeat(register PyStringObject *a, register Py_ssize_t n) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1082 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1083 |     register Py_ssize_t i; | 
 | 1084 |     register Py_ssize_t j; | 
 | 1085 |     register Py_ssize_t size; | 
 | 1086 |     register PyStringObject *op; | 
 | 1087 |     size_t nbytes; | 
 | 1088 |     if (n < 0) | 
 | 1089 |         n = 0; | 
| Serhiy Storchaka | 373773d | 2016-07-12 15:46:57 +0300 | [diff] [blame] | 1090 |     /* watch out for overflows:  the size can overflow Py_ssize_t, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1091 |      * and the # of bytes needed can overflow size_t | 
 | 1092 |      */ | 
| Serhiy Storchaka | 373773d | 2016-07-12 15:46:57 +0300 | [diff] [blame] | 1093 |     if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1094 |         PyErr_SetString(PyExc_OverflowError, | 
 | 1095 |             "repeated string is too long"); | 
 | 1096 |         return NULL; | 
 | 1097 |     } | 
| Serhiy Storchaka | 373773d | 2016-07-12 15:46:57 +0300 | [diff] [blame] | 1098 |     size = Py_SIZE(a) * n; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1099 |     if (size == Py_SIZE(a) && PyString_CheckExact(a)) { | 
 | 1100 |         Py_INCREF(a); | 
 | 1101 |         return (PyObject *)a; | 
 | 1102 |     } | 
 | 1103 |     nbytes = (size_t)size; | 
 | 1104 |     if (nbytes + PyStringObject_SIZE <= nbytes) { | 
 | 1105 |         PyErr_SetString(PyExc_OverflowError, | 
 | 1106 |             "repeated string is too long"); | 
 | 1107 |         return NULL; | 
 | 1108 |     } | 
 | 1109 |     op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes); | 
 | 1110 |     if (op == NULL) | 
 | 1111 |         return PyErr_NoMemory(); | 
| Martin Panter | 646b528 | 2016-06-21 23:58:05 +0000 | [diff] [blame] | 1112 |     (void)PyObject_INIT_VAR(op, &PyString_Type, size); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1113 |     op->ob_shash = -1; | 
 | 1114 |     op->ob_sstate = SSTATE_NOT_INTERNED; | 
 | 1115 |     op->ob_sval[size] = '\0'; | 
 | 1116 |     if (Py_SIZE(a) == 1 && n > 0) { | 
 | 1117 |         memset(op->ob_sval, a->ob_sval[0] , n); | 
 | 1118 |         return (PyObject *) op; | 
 | 1119 |     } | 
 | 1120 |     i = 0; | 
 | 1121 |     if (i < size) { | 
 | 1122 |         Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); | 
 | 1123 |         i = Py_SIZE(a); | 
 | 1124 |     } | 
 | 1125 |     while (i < size) { | 
 | 1126 |         j = (i <= size-i)  ?  i  :  size-i; | 
 | 1127 |         Py_MEMCPY(op->ob_sval+i, op->ob_sval, j); | 
 | 1128 |         i += j; | 
 | 1129 |     } | 
 | 1130 |     return (PyObject *) op; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1131 | } | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1132 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1133 | /* String slice a[i:j] consists of characters a[i] ... a[j-1] */ | 
 | 1134 |  | 
 | 1135 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1136 | string_slice(register PyStringObject *a, register Py_ssize_t i, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1137 |              register Py_ssize_t j) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1138 |      /* j -- may be negative! */ | 
 | 1139 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1140 |     if (i < 0) | 
 | 1141 |         i = 0; | 
 | 1142 |     if (j < 0) | 
 | 1143 |         j = 0; /* Avoid signed/unsigned bug in next line */ | 
 | 1144 |     if (j > Py_SIZE(a)) | 
 | 1145 |         j = Py_SIZE(a); | 
 | 1146 |     if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) { | 
 | 1147 |         /* It's the same as a */ | 
 | 1148 |         Py_INCREF(a); | 
 | 1149 |         return (PyObject *)a; | 
 | 1150 |     } | 
 | 1151 |     if (j < i) | 
 | 1152 |         j = i; | 
 | 1153 |     return PyString_FromStringAndSize(a->ob_sval + i, j-i); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1154 | } | 
 | 1155 |  | 
 | 1156 | static int | 
 | 1157 | string_contains(PyObject *str_obj, PyObject *sub_obj) | 
 | 1158 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1159 |     if (!PyString_CheckExact(sub_obj)) { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1160 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1161 |         if (PyUnicode_Check(sub_obj)) | 
 | 1162 |             return PyUnicode_Contains(str_obj, sub_obj); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1163 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1164 |         if (!PyString_Check(sub_obj)) { | 
 | 1165 |             PyErr_Format(PyExc_TypeError, | 
 | 1166 |                 "'in <string>' requires string as left operand, " | 
 | 1167 |                 "not %.200s", Py_TYPE(sub_obj)->tp_name); | 
 | 1168 |             return -1; | 
 | 1169 |         } | 
 | 1170 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1171 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1172 |     return stringlib_contains_obj(str_obj, sub_obj); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1173 | } | 
 | 1174 |  | 
 | 1175 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1176 | string_item(PyStringObject *a, register Py_ssize_t i) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1177 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1178 |     char pchar; | 
 | 1179 |     PyObject *v; | 
 | 1180 |     if (i < 0 || i >= Py_SIZE(a)) { | 
 | 1181 |         PyErr_SetString(PyExc_IndexError, "string index out of range"); | 
 | 1182 |         return NULL; | 
 | 1183 |     } | 
 | 1184 |     pchar = a->ob_sval[i]; | 
 | 1185 |     v = (PyObject *)characters[pchar & UCHAR_MAX]; | 
 | 1186 |     if (v == NULL) | 
 | 1187 |         v = PyString_FromStringAndSize(&pchar, 1); | 
 | 1188 |     else { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1189 | #ifdef COUNT_ALLOCS | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1190 |         one_strings++; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1191 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1192 |         Py_INCREF(v); | 
 | 1193 |     } | 
 | 1194 |     return v; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1195 | } | 
 | 1196 |  | 
 | 1197 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1198 | string_richcompare(PyStringObject *a, PyStringObject *b, int op) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1199 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1200 |     int c; | 
 | 1201 |     Py_ssize_t len_a, len_b; | 
 | 1202 |     Py_ssize_t min_len; | 
 | 1203 |     PyObject *result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1204 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1205 |     /* Make sure both arguments are strings. */ | 
 | 1206 |     if (!(PyString_Check(a) && PyString_Check(b))) { | 
 | 1207 |         result = Py_NotImplemented; | 
 | 1208 |         goto out; | 
 | 1209 |     } | 
 | 1210 |     if (a == b) { | 
 | 1211 |         switch (op) { | 
 | 1212 |         case Py_EQ:case Py_LE:case Py_GE: | 
 | 1213 |             result = Py_True; | 
 | 1214 |             goto out; | 
 | 1215 |         case Py_NE:case Py_LT:case Py_GT: | 
 | 1216 |             result = Py_False; | 
 | 1217 |             goto out; | 
 | 1218 |         } | 
 | 1219 |     } | 
 | 1220 |     if (op == Py_EQ) { | 
 | 1221 |         /* Supporting Py_NE here as well does not save | 
 | 1222 |            much time, since Py_NE is rarely used.  */ | 
 | 1223 |         if (Py_SIZE(a) == Py_SIZE(b) | 
 | 1224 |             && (a->ob_sval[0] == b->ob_sval[0] | 
 | 1225 |             && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) { | 
 | 1226 |             result = Py_True; | 
 | 1227 |         } else { | 
 | 1228 |             result = Py_False; | 
 | 1229 |         } | 
 | 1230 |         goto out; | 
 | 1231 |     } | 
 | 1232 |     len_a = Py_SIZE(a); len_b = Py_SIZE(b); | 
 | 1233 |     min_len = (len_a < len_b) ? len_a : len_b; | 
 | 1234 |     if (min_len > 0) { | 
 | 1235 |         c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); | 
 | 1236 |         if (c==0) | 
 | 1237 |             c = memcmp(a->ob_sval, b->ob_sval, min_len); | 
 | 1238 |     } else | 
 | 1239 |         c = 0; | 
 | 1240 |     if (c == 0) | 
 | 1241 |         c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; | 
 | 1242 |     switch (op) { | 
 | 1243 |     case Py_LT: c = c <  0; break; | 
 | 1244 |     case Py_LE: c = c <= 0; break; | 
 | 1245 |     case Py_EQ: assert(0);  break; /* unreachable */ | 
 | 1246 |     case Py_NE: c = c != 0; break; | 
 | 1247 |     case Py_GT: c = c >  0; break; | 
 | 1248 |     case Py_GE: c = c >= 0; break; | 
 | 1249 |     default: | 
 | 1250 |         result = Py_NotImplemented; | 
 | 1251 |         goto out; | 
 | 1252 |     } | 
 | 1253 |     result = c ? Py_True : Py_False; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1254 |   out: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1255 |     Py_INCREF(result); | 
 | 1256 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1257 | } | 
 | 1258 |  | 
 | 1259 | int | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1260 | _PyString_Eq(PyObject *o1, PyObject *o2) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1261 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1262 |     PyStringObject *a = (PyStringObject*) o1; | 
 | 1263 |     PyStringObject *b = (PyStringObject*) o2; | 
 | 1264 |     return Py_SIZE(a) == Py_SIZE(b) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1265 |       && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1266 | } | 
 | 1267 |  | 
 | 1268 | static long | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1269 | string_hash(PyStringObject *a) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1270 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1271 |     register Py_ssize_t len; | 
 | 1272 |     register unsigned char *p; | 
 | 1273 |     register long x; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1274 |  | 
| Benjamin Peterson | f51c384 | 2012-04-09 14:53:07 -0400 | [diff] [blame] | 1275 | #ifdef Py_DEBUG | 
| Benjamin Peterson | 26da920 | 2012-02-21 11:08:50 -0500 | [diff] [blame] | 1276 |     assert(_Py_HashSecret_Initialized); | 
| Benjamin Peterson | f51c384 | 2012-04-09 14:53:07 -0400 | [diff] [blame] | 1277 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1278 |     if (a->ob_shash != -1) | 
 | 1279 |         return a->ob_shash; | 
 | 1280 |     len = Py_SIZE(a); | 
| Barry Warsaw | 1e13eb0 | 2012-02-20 20:42:21 -0500 | [diff] [blame] | 1281 |     /* | 
 | 1282 |       We make the hash of the empty string be 0, rather than using | 
 | 1283 |       (prefix ^ suffix), since this slightly obfuscates the hash secret | 
 | 1284 |     */ | 
 | 1285 |     if (len == 0) { | 
 | 1286 |         a->ob_shash = 0; | 
 | 1287 |         return 0; | 
 | 1288 |     } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1289 |     p = (unsigned char *) a->ob_sval; | 
| Barry Warsaw | 1e13eb0 | 2012-02-20 20:42:21 -0500 | [diff] [blame] | 1290 |     x = _Py_HashSecret.prefix; | 
 | 1291 |     x ^= *p << 7; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1292 |     while (--len >= 0) | 
 | 1293 |         x = (1000003*x) ^ *p++; | 
 | 1294 |     x ^= Py_SIZE(a); | 
| Barry Warsaw | 1e13eb0 | 2012-02-20 20:42:21 -0500 | [diff] [blame] | 1295 |     x ^= _Py_HashSecret.suffix; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1296 |     if (x == -1) | 
 | 1297 |         x = -2; | 
 | 1298 |     a->ob_shash = x; | 
 | 1299 |     return x; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1300 | } | 
 | 1301 |  | 
 | 1302 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1303 | string_subscript(PyStringObject* self, PyObject* item) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1304 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1305 |     if (PyIndex_Check(item)) { | 
 | 1306 |         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); | 
 | 1307 |         if (i == -1 && PyErr_Occurred()) | 
 | 1308 |             return NULL; | 
 | 1309 |         if (i < 0) | 
 | 1310 |             i += PyString_GET_SIZE(self); | 
 | 1311 |         return string_item(self, i); | 
 | 1312 |     } | 
 | 1313 |     else if (PySlice_Check(item)) { | 
 | 1314 |         Py_ssize_t start, stop, step, slicelength, cur, i; | 
 | 1315 |         char* source_buf; | 
 | 1316 |         char* result_buf; | 
 | 1317 |         PyObject* result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1318 |  | 
| Serhiy Storchaka | 5e79321 | 2017-04-15 20:11:12 +0300 | [diff] [blame] | 1319 |         if (_PySlice_Unpack(item, &start, &stop, &step) < 0) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1320 |             return NULL; | 
 | 1321 |         } | 
| Serhiy Storchaka | e41390a | 2017-04-08 11:48:57 +0300 | [diff] [blame] | 1322 |         slicelength = _PySlice_AdjustIndices(PyString_GET_SIZE(self), &start, | 
 | 1323 |                                             &stop, step); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1324 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1325 |         if (slicelength <= 0) { | 
 | 1326 |             return PyString_FromStringAndSize("", 0); | 
 | 1327 |         } | 
 | 1328 |         else if (start == 0 && step == 1 && | 
 | 1329 |                  slicelength == PyString_GET_SIZE(self) && | 
 | 1330 |                  PyString_CheckExact(self)) { | 
 | 1331 |             Py_INCREF(self); | 
 | 1332 |             return (PyObject *)self; | 
 | 1333 |         } | 
 | 1334 |         else if (step == 1) { | 
 | 1335 |             return PyString_FromStringAndSize( | 
 | 1336 |                 PyString_AS_STRING(self) + start, | 
 | 1337 |                 slicelength); | 
 | 1338 |         } | 
 | 1339 |         else { | 
 | 1340 |             source_buf = PyString_AsString((PyObject*)self); | 
 | 1341 |             result_buf = (char *)PyMem_Malloc(slicelength); | 
 | 1342 |             if (result_buf == NULL) | 
 | 1343 |                 return PyErr_NoMemory(); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1344 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1345 |             for (cur = start, i = 0; i < slicelength; | 
 | 1346 |                  cur += step, i++) { | 
 | 1347 |                 result_buf[i] = source_buf[cur]; | 
 | 1348 |             } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1349 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1350 |             result = PyString_FromStringAndSize(result_buf, | 
 | 1351 |                                                 slicelength); | 
 | 1352 |             PyMem_Free(result_buf); | 
 | 1353 |             return result; | 
 | 1354 |         } | 
 | 1355 |     } | 
 | 1356 |     else { | 
 | 1357 |         PyErr_Format(PyExc_TypeError, | 
 | 1358 |                      "string indices must be integers, not %.200s", | 
 | 1359 |                      Py_TYPE(item)->tp_name); | 
 | 1360 |         return NULL; | 
 | 1361 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1362 | } | 
 | 1363 |  | 
 | 1364 | static Py_ssize_t | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1365 | string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1366 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1367 |     if ( index != 0 ) { | 
 | 1368 |         PyErr_SetString(PyExc_SystemError, | 
 | 1369 |                         "accessing non-existent string segment"); | 
 | 1370 |         return -1; | 
 | 1371 |     } | 
 | 1372 |     *ptr = (void *)self->ob_sval; | 
 | 1373 |     return Py_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1374 | } | 
 | 1375 |  | 
 | 1376 | static Py_ssize_t | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1377 | string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1378 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1379 |     PyErr_SetString(PyExc_TypeError, | 
 | 1380 |                     "Cannot use string as modifiable buffer"); | 
 | 1381 |     return -1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1382 | } | 
 | 1383 |  | 
 | 1384 | static Py_ssize_t | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1385 | string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1386 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1387 |     if ( lenp ) | 
 | 1388 |         *lenp = Py_SIZE(self); | 
 | 1389 |     return 1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1390 | } | 
 | 1391 |  | 
 | 1392 | static Py_ssize_t | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1393 | string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1394 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1395 |     if ( index != 0 ) { | 
 | 1396 |         PyErr_SetString(PyExc_SystemError, | 
 | 1397 |                         "accessing non-existent string segment"); | 
 | 1398 |         return -1; | 
 | 1399 |     } | 
 | 1400 |     *ptr = self->ob_sval; | 
 | 1401 |     return Py_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1402 | } | 
 | 1403 |  | 
 | 1404 | static int | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1405 | string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1406 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1407 |     return PyBuffer_FillInfo(view, (PyObject*)self, | 
 | 1408 |                              (void *)self->ob_sval, Py_SIZE(self), | 
 | 1409 |                              1, flags); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1410 | } | 
 | 1411 |  | 
 | 1412 | static PySequenceMethods string_as_sequence = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1413 |     (lenfunc)string_length, /*sq_length*/ | 
 | 1414 |     (binaryfunc)string_concat, /*sq_concat*/ | 
 | 1415 |     (ssizeargfunc)string_repeat, /*sq_repeat*/ | 
 | 1416 |     (ssizeargfunc)string_item, /*sq_item*/ | 
 | 1417 |     (ssizessizeargfunc)string_slice, /*sq_slice*/ | 
 | 1418 |     0,                  /*sq_ass_item*/ | 
 | 1419 |     0,                  /*sq_ass_slice*/ | 
 | 1420 |     (objobjproc)string_contains /*sq_contains*/ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1421 | }; | 
 | 1422 |  | 
 | 1423 | static PyMappingMethods string_as_mapping = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1424 |     (lenfunc)string_length, | 
 | 1425 |     (binaryfunc)string_subscript, | 
 | 1426 |     0, | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1427 | }; | 
 | 1428 |  | 
 | 1429 | static PyBufferProcs string_as_buffer = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1430 |     (readbufferproc)string_buffer_getreadbuf, | 
 | 1431 |     (writebufferproc)string_buffer_getwritebuf, | 
 | 1432 |     (segcountproc)string_buffer_getsegcount, | 
 | 1433 |     (charbufferproc)string_buffer_getcharbuf, | 
 | 1434 |     (getbufferproc)string_buffer_getbuffer, | 
 | 1435 |     0, /* XXX */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1436 | }; | 
 | 1437 |  | 
 | 1438 |  | 
| Martin v. Löwis | f91d46a | 2008-08-12 14:49:50 +0000 | [diff] [blame] | 1439 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1440 | #define LEFTSTRIP 0 | 
 | 1441 | #define RIGHTSTRIP 1 | 
 | 1442 | #define BOTHSTRIP 2 | 
 | 1443 |  | 
 | 1444 | /* Arrays indexed by above */ | 
 | 1445 | static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"}; | 
 | 1446 |  | 
 | 1447 | #define STRIPNAME(i) (stripformat[i]+3) | 
 | 1448 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1449 | PyDoc_STRVAR(split__doc__, | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1450 | "S.split([sep [,maxsplit]]) -> list of strings\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1451 | \n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1452 | Return a list of the words in the string S, using sep as the\n\ | 
 | 1453 | delimiter string.  If maxsplit is given, at most maxsplit\n\ | 
 | 1454 | splits are done. If sep is not specified or is None, any\n\ | 
 | 1455 | whitespace string is a separator and empty strings are removed\n\ | 
 | 1456 | from the result."); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1457 |  | 
 | 1458 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1459 | string_split(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1460 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1461 |     Py_ssize_t len = PyString_GET_SIZE(self), n; | 
 | 1462 |     Py_ssize_t maxsplit = -1; | 
 | 1463 |     const char *s = PyString_AS_STRING(self), *sub; | 
 | 1464 |     PyObject *subobj = Py_None; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1465 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1466 |     if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) | 
 | 1467 |         return NULL; | 
 | 1468 |     if (maxsplit < 0) | 
 | 1469 |         maxsplit = PY_SSIZE_T_MAX; | 
 | 1470 |     if (subobj == Py_None) | 
 | 1471 |         return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); | 
 | 1472 |     if (PyString_Check(subobj)) { | 
 | 1473 |         sub = PyString_AS_STRING(subobj); | 
 | 1474 |         n = PyString_GET_SIZE(subobj); | 
 | 1475 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1476 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1477 |     else if (PyUnicode_Check(subobj)) | 
 | 1478 |         return PyUnicode_Split((PyObject *)self, subobj, maxsplit); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1479 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1480 |     else if (PyObject_AsCharBuffer(subobj, &sub, &n)) | 
 | 1481 |         return NULL; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1482 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1483 |     return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1484 | } | 
 | 1485 |  | 
 | 1486 | PyDoc_STRVAR(partition__doc__, | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1487 | "S.partition(sep) -> (head, sep, tail)\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1488 | \n\ | 
| Andrew M. Kuchling | efeb43e | 2008-10-04 01:05:56 +0000 | [diff] [blame] | 1489 | Search for the separator sep in S, and return the part before it,\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1490 | the separator itself, and the part after it.  If the separator is not\n\ | 
| Andrew M. Kuchling | efeb43e | 2008-10-04 01:05:56 +0000 | [diff] [blame] | 1491 | found, return S and two empty strings."); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1492 |  | 
 | 1493 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1494 | string_partition(PyStringObject *self, PyObject *sep_obj) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1495 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1496 |     const char *sep; | 
 | 1497 |     Py_ssize_t sep_len; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1498 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1499 |     if (PyString_Check(sep_obj)) { | 
 | 1500 |         sep = PyString_AS_STRING(sep_obj); | 
 | 1501 |         sep_len = PyString_GET_SIZE(sep_obj); | 
 | 1502 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1503 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1504 |     else if (PyUnicode_Check(sep_obj)) | 
 | 1505 |         return PyUnicode_Partition((PyObject *) self, sep_obj); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1506 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1507 |     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) | 
 | 1508 |         return NULL; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1509 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1510 |     return stringlib_partition( | 
 | 1511 |         (PyObject*) self, | 
 | 1512 |         PyString_AS_STRING(self), PyString_GET_SIZE(self), | 
 | 1513 |         sep_obj, sep, sep_len | 
 | 1514 |         ); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1515 | } | 
 | 1516 |  | 
 | 1517 | PyDoc_STRVAR(rpartition__doc__, | 
| Ezio Melotti | 1fafaab | 2010-01-25 11:24:37 +0000 | [diff] [blame] | 1518 | "S.rpartition(sep) -> (head, sep, tail)\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1519 | \n\ | 
| Andrew M. Kuchling | efeb43e | 2008-10-04 01:05:56 +0000 | [diff] [blame] | 1520 | Search for the separator sep in S, starting at the end of S, and return\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1521 | the part before it, the separator itself, and the part after it.  If the\n\ | 
| Andrew M. Kuchling | efeb43e | 2008-10-04 01:05:56 +0000 | [diff] [blame] | 1522 | separator is not found, return two empty strings and S."); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1523 |  | 
 | 1524 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1525 | string_rpartition(PyStringObject *self, PyObject *sep_obj) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1526 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1527 |     const char *sep; | 
 | 1528 |     Py_ssize_t sep_len; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1529 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1530 |     if (PyString_Check(sep_obj)) { | 
 | 1531 |         sep = PyString_AS_STRING(sep_obj); | 
 | 1532 |         sep_len = PyString_GET_SIZE(sep_obj); | 
 | 1533 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1534 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1535 |     else if (PyUnicode_Check(sep_obj)) | 
 | 1536 |         return PyUnicode_RPartition((PyObject *) self, sep_obj); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1537 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1538 |     else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) | 
 | 1539 |         return NULL; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1540 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1541 |     return stringlib_rpartition( | 
 | 1542 |         (PyObject*) self, | 
 | 1543 |         PyString_AS_STRING(self), PyString_GET_SIZE(self), | 
 | 1544 |         sep_obj, sep, sep_len | 
 | 1545 |         ); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1546 | } | 
 | 1547 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1548 | PyDoc_STRVAR(rsplit__doc__, | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1549 | "S.rsplit([sep [,maxsplit]]) -> list of strings\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1550 | \n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1551 | Return a list of the words in the string S, using sep as the\n\ | 
 | 1552 | delimiter string, starting at the end of the string and working\n\ | 
 | 1553 | to the front.  If maxsplit is given, at most maxsplit splits are\n\ | 
 | 1554 | done. If sep is not specified or is None, any whitespace string\n\ | 
 | 1555 | is a separator."); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1556 |  | 
 | 1557 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1558 | string_rsplit(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1559 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1560 |     Py_ssize_t len = PyString_GET_SIZE(self), n; | 
 | 1561 |     Py_ssize_t maxsplit = -1; | 
 | 1562 |     const char *s = PyString_AS_STRING(self), *sub; | 
 | 1563 |     PyObject *subobj = Py_None; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1564 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1565 |     if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) | 
 | 1566 |         return NULL; | 
 | 1567 |     if (maxsplit < 0) | 
 | 1568 |         maxsplit = PY_SSIZE_T_MAX; | 
 | 1569 |     if (subobj == Py_None) | 
 | 1570 |         return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); | 
 | 1571 |     if (PyString_Check(subobj)) { | 
 | 1572 |         sub = PyString_AS_STRING(subobj); | 
 | 1573 |         n = PyString_GET_SIZE(subobj); | 
 | 1574 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1575 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1576 |     else if (PyUnicode_Check(subobj)) | 
 | 1577 |         return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1578 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1579 |     else if (PyObject_AsCharBuffer(subobj, &sub, &n)) | 
 | 1580 |         return NULL; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 1581 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1582 |     return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1583 | } | 
 | 1584 |  | 
 | 1585 |  | 
 | 1586 | PyDoc_STRVAR(join__doc__, | 
| Georg Brandl | 9b4e582 | 2009-10-14 18:48:32 +0000 | [diff] [blame] | 1587 | "S.join(iterable) -> string\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1588 | \n\ | 
 | 1589 | Return a string which is the concatenation of the strings in the\n\ | 
| Georg Brandl | 9b4e582 | 2009-10-14 18:48:32 +0000 | [diff] [blame] | 1590 | iterable.  The separator between elements is S."); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1591 |  | 
 | 1592 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1593 | string_join(PyStringObject *self, PyObject *orig) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1594 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1595 |     char *sep = PyString_AS_STRING(self); | 
 | 1596 |     const Py_ssize_t seplen = PyString_GET_SIZE(self); | 
 | 1597 |     PyObject *res = NULL; | 
 | 1598 |     char *p; | 
 | 1599 |     Py_ssize_t seqlen = 0; | 
 | 1600 |     size_t sz = 0; | 
 | 1601 |     Py_ssize_t i; | 
 | 1602 |     PyObject *seq, *item; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1603 |  | 
| Benjamin Peterson | 1643d5c | 2014-09-28 12:48:46 -0400 | [diff] [blame] | 1604 |     seq = PySequence_Fast(orig, "can only join an iterable"); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1605 |     if (seq == NULL) { | 
 | 1606 |         return NULL; | 
 | 1607 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1608 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1609 |     seqlen = PySequence_Size(seq); | 
 | 1610 |     if (seqlen == 0) { | 
 | 1611 |         Py_DECREF(seq); | 
 | 1612 |         return PyString_FromString(""); | 
 | 1613 |     } | 
 | 1614 |     if (seqlen == 1) { | 
 | 1615 |         item = PySequence_Fast_GET_ITEM(seq, 0); | 
 | 1616 |         if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) { | 
 | 1617 |             Py_INCREF(item); | 
 | 1618 |             Py_DECREF(seq); | 
 | 1619 |             return item; | 
 | 1620 |         } | 
 | 1621 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1622 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1623 |     /* There are at least two things to join, or else we have a subclass | 
 | 1624 |      * of the builtin types in the sequence. | 
 | 1625 |      * Do a pre-pass to figure out the total amount of space we'll | 
 | 1626 |      * need (sz), see whether any argument is absurd, and defer to | 
 | 1627 |      * the Unicode join if appropriate. | 
 | 1628 |      */ | 
 | 1629 |     for (i = 0; i < seqlen; i++) { | 
 | 1630 |         const size_t old_sz = sz; | 
 | 1631 |         item = PySequence_Fast_GET_ITEM(seq, i); | 
 | 1632 |         if (!PyString_Check(item)){ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1633 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1634 |             if (PyUnicode_Check(item)) { | 
 | 1635 |                 /* Defer to Unicode join. | 
| Martin Panter | b1d867f | 2016-05-26 05:28:50 +0000 | [diff] [blame] | 1636 |                  * CAUTION:  There's no guarantee that the | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1637 |                  * original sequence can be iterated over | 
 | 1638 |                  * again, so we must pass seq here. | 
 | 1639 |                  */ | 
 | 1640 |                 PyObject *result; | 
 | 1641 |                 result = PyUnicode_Join((PyObject *)self, seq); | 
 | 1642 |                 Py_DECREF(seq); | 
 | 1643 |                 return result; | 
 | 1644 |             } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1645 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1646 |             PyErr_Format(PyExc_TypeError, | 
 | 1647 |                          "sequence item %zd: expected string," | 
 | 1648 |                          " %.80s found", | 
 | 1649 |                          i, Py_TYPE(item)->tp_name); | 
 | 1650 |             Py_DECREF(seq); | 
 | 1651 |             return NULL; | 
 | 1652 |         } | 
 | 1653 |         sz += PyString_GET_SIZE(item); | 
 | 1654 |         if (i != 0) | 
 | 1655 |             sz += seplen; | 
 | 1656 |         if (sz < old_sz || sz > PY_SSIZE_T_MAX) { | 
 | 1657 |             PyErr_SetString(PyExc_OverflowError, | 
 | 1658 |                 "join() result is too long for a Python string"); | 
 | 1659 |             Py_DECREF(seq); | 
 | 1660 |             return NULL; | 
 | 1661 |         } | 
 | 1662 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1663 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1664 |     /* Allocate result space. */ | 
 | 1665 |     res = PyString_FromStringAndSize((char*)NULL, sz); | 
 | 1666 |     if (res == NULL) { | 
 | 1667 |         Py_DECREF(seq); | 
 | 1668 |         return NULL; | 
 | 1669 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1670 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1671 |     /* Catenate everything. */ | 
 | 1672 |     p = PyString_AS_STRING(res); | 
 | 1673 |     for (i = 0; i < seqlen; ++i) { | 
 | 1674 |         size_t n; | 
 | 1675 |         item = PySequence_Fast_GET_ITEM(seq, i); | 
 | 1676 |         n = PyString_GET_SIZE(item); | 
 | 1677 |         Py_MEMCPY(p, PyString_AS_STRING(item), n); | 
 | 1678 |         p += n; | 
 | 1679 |         if (i < seqlen - 1) { | 
 | 1680 |             Py_MEMCPY(p, sep, seplen); | 
 | 1681 |             p += seplen; | 
 | 1682 |         } | 
 | 1683 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1684 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1685 |     Py_DECREF(seq); | 
 | 1686 |     return res; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1687 | } | 
 | 1688 |  | 
 | 1689 | PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1690 | _PyString_Join(PyObject *sep, PyObject *x) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1691 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1692 |     assert(sep != NULL && PyString_Check(sep)); | 
 | 1693 |     assert(x != NULL); | 
 | 1694 |     return string_join((PyStringObject *)sep, x); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1695 | } | 
 | 1696 |  | 
| Antoine Pitrou | 6467213 | 2010-01-13 07:55:48 +0000 | [diff] [blame] | 1697 | /* helper macro to fixup start/end slice values */ | 
 | 1698 | #define ADJUST_INDICES(start, end, len)         \ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1699 |     if (end > len)                          \ | 
 | 1700 |         end = len;                          \ | 
 | 1701 |     else if (end < 0) {                     \ | 
 | 1702 |         end += len;                         \ | 
 | 1703 |         if (end < 0)                        \ | 
 | 1704 |         end = 0;                        \ | 
 | 1705 |     }                                       \ | 
 | 1706 |     if (start < 0) {                        \ | 
 | 1707 |         start += len;                       \ | 
 | 1708 |         if (start < 0)                      \ | 
 | 1709 |         start = 0;                      \ | 
 | 1710 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1711 |  | 
 | 1712 | Py_LOCAL_INLINE(Py_ssize_t) | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1713 | string_find_internal(PyStringObject *self, PyObject *args, int dir) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1714 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1715 |     PyObject *subobj; | 
 | 1716 |     const char *sub; | 
 | 1717 |     Py_ssize_t sub_len; | 
 | 1718 |     Py_ssize_t start=0, end=PY_SSIZE_T_MAX; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1719 |  | 
| Jesus Cea | 44e8168 | 2011-04-20 16:39:15 +0200 | [diff] [blame] | 1720 |     if (!stringlib_parse_args_finds("find/rfind/index/rindex", | 
 | 1721 |                                     args, &subobj, &start, &end)) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1722 |         return -2; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1723 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1724 |     if (PyString_Check(subobj)) { | 
 | 1725 |         sub = PyString_AS_STRING(subobj); | 
 | 1726 |         sub_len = PyString_GET_SIZE(subobj); | 
 | 1727 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1728 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1729 |     else if (PyUnicode_Check(subobj)) | 
 | 1730 |         return PyUnicode_Find( | 
 | 1731 |             (PyObject *)self, subobj, start, end, dir); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1732 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1733 |     else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) | 
 | 1734 |         /* XXX - the "expected a character buffer object" is pretty | 
 | 1735 |            confusing for a non-expert.  remap to something else ? */ | 
 | 1736 |         return -2; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1737 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1738 |     if (dir > 0) | 
 | 1739 |         return stringlib_find_slice( | 
 | 1740 |             PyString_AS_STRING(self), PyString_GET_SIZE(self), | 
 | 1741 |             sub, sub_len, start, end); | 
 | 1742 |     else | 
 | 1743 |         return stringlib_rfind_slice( | 
 | 1744 |             PyString_AS_STRING(self), PyString_GET_SIZE(self), | 
 | 1745 |             sub, sub_len, start, end); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1746 | } | 
 | 1747 |  | 
 | 1748 |  | 
 | 1749 | PyDoc_STRVAR(find__doc__, | 
 | 1750 | "S.find(sub [,start [,end]]) -> int\n\ | 
 | 1751 | \n\ | 
 | 1752 | Return the lowest index in S where substring sub is found,\n\ | 
| Senthil Kumaran | 5e3a19d | 2011-07-27 23:36:51 +0800 | [diff] [blame] | 1753 | such that sub is contained within S[start:end].  Optional\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1754 | arguments start and end are interpreted as in slice notation.\n\ | 
 | 1755 | \n\ | 
 | 1756 | Return -1 on failure."); | 
 | 1757 |  | 
 | 1758 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1759 | string_find(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1760 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1761 |     Py_ssize_t result = string_find_internal(self, args, +1); | 
 | 1762 |     if (result == -2) | 
 | 1763 |         return NULL; | 
 | 1764 |     return PyInt_FromSsize_t(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1765 | } | 
 | 1766 |  | 
 | 1767 |  | 
 | 1768 | PyDoc_STRVAR(index__doc__, | 
 | 1769 | "S.index(sub [,start [,end]]) -> int\n\ | 
 | 1770 | \n\ | 
 | 1771 | Like S.find() but raise ValueError when the substring is not found."); | 
 | 1772 |  | 
 | 1773 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1774 | string_index(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1775 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1776 |     Py_ssize_t result = string_find_internal(self, args, +1); | 
 | 1777 |     if (result == -2) | 
 | 1778 |         return NULL; | 
 | 1779 |     if (result == -1) { | 
 | 1780 |         PyErr_SetString(PyExc_ValueError, | 
 | 1781 |                         "substring not found"); | 
 | 1782 |         return NULL; | 
 | 1783 |     } | 
 | 1784 |     return PyInt_FromSsize_t(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1785 | } | 
 | 1786 |  | 
 | 1787 |  | 
 | 1788 | PyDoc_STRVAR(rfind__doc__, | 
 | 1789 | "S.rfind(sub [,start [,end]]) -> int\n\ | 
 | 1790 | \n\ | 
 | 1791 | Return the highest index in S where substring sub is found,\n\ | 
| Senthil Kumaran | 5e3a19d | 2011-07-27 23:36:51 +0800 | [diff] [blame] | 1792 | such that sub is contained within S[start:end].  Optional\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1793 | arguments start and end are interpreted as in slice notation.\n\ | 
 | 1794 | \n\ | 
 | 1795 | Return -1 on failure."); | 
 | 1796 |  | 
 | 1797 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1798 | string_rfind(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1799 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1800 |     Py_ssize_t result = string_find_internal(self, args, -1); | 
 | 1801 |     if (result == -2) | 
 | 1802 |         return NULL; | 
 | 1803 |     return PyInt_FromSsize_t(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1804 | } | 
 | 1805 |  | 
 | 1806 |  | 
 | 1807 | PyDoc_STRVAR(rindex__doc__, | 
 | 1808 | "S.rindex(sub [,start [,end]]) -> int\n\ | 
 | 1809 | \n\ | 
 | 1810 | Like S.rfind() but raise ValueError when the substring is not found."); | 
 | 1811 |  | 
 | 1812 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1813 | string_rindex(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1814 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1815 |     Py_ssize_t result = string_find_internal(self, args, -1); | 
 | 1816 |     if (result == -2) | 
 | 1817 |         return NULL; | 
 | 1818 |     if (result == -1) { | 
 | 1819 |         PyErr_SetString(PyExc_ValueError, | 
 | 1820 |                         "substring not found"); | 
 | 1821 |         return NULL; | 
 | 1822 |     } | 
 | 1823 |     return PyInt_FromSsize_t(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1824 | } | 
 | 1825 |  | 
 | 1826 |  | 
 | 1827 | Py_LOCAL_INLINE(PyObject *) | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1828 | do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1829 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1830 |     char *s = PyString_AS_STRING(self); | 
 | 1831 |     Py_ssize_t len = PyString_GET_SIZE(self); | 
 | 1832 |     char *sep = PyString_AS_STRING(sepobj); | 
 | 1833 |     Py_ssize_t seplen = PyString_GET_SIZE(sepobj); | 
 | 1834 |     Py_ssize_t i, j; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1835 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1836 |     i = 0; | 
 | 1837 |     if (striptype != RIGHTSTRIP) { | 
 | 1838 |         while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) { | 
 | 1839 |             i++; | 
 | 1840 |         } | 
 | 1841 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1842 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1843 |     j = len; | 
 | 1844 |     if (striptype != LEFTSTRIP) { | 
 | 1845 |         do { | 
 | 1846 |             j--; | 
 | 1847 |         } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen)); | 
 | 1848 |         j++; | 
 | 1849 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1850 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1851 |     if (i == 0 && j == len && PyString_CheckExact(self)) { | 
 | 1852 |         Py_INCREF(self); | 
 | 1853 |         return (PyObject*)self; | 
 | 1854 |     } | 
 | 1855 |     else | 
 | 1856 |         return PyString_FromStringAndSize(s+i, j-i); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1857 | } | 
 | 1858 |  | 
 | 1859 |  | 
 | 1860 | Py_LOCAL_INLINE(PyObject *) | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1861 | do_strip(PyStringObject *self, int striptype) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1862 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1863 |     char *s = PyString_AS_STRING(self); | 
 | 1864 |     Py_ssize_t len = PyString_GET_SIZE(self), i, j; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1865 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1866 |     i = 0; | 
 | 1867 |     if (striptype != RIGHTSTRIP) { | 
 | 1868 |         while (i < len && isspace(Py_CHARMASK(s[i]))) { | 
 | 1869 |             i++; | 
 | 1870 |         } | 
 | 1871 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1872 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1873 |     j = len; | 
 | 1874 |     if (striptype != LEFTSTRIP) { | 
 | 1875 |         do { | 
 | 1876 |             j--; | 
 | 1877 |         } while (j >= i && isspace(Py_CHARMASK(s[j]))); | 
 | 1878 |         j++; | 
 | 1879 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1880 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1881 |     if (i == 0 && j == len && PyString_CheckExact(self)) { | 
 | 1882 |         Py_INCREF(self); | 
 | 1883 |         return (PyObject*)self; | 
 | 1884 |     } | 
 | 1885 |     else | 
 | 1886 |         return PyString_FromStringAndSize(s+i, j-i); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1887 | } | 
 | 1888 |  | 
 | 1889 |  | 
 | 1890 | Py_LOCAL_INLINE(PyObject *) | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1891 | do_argstrip(PyStringObject *self, int striptype, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1892 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1893 |     PyObject *sep = NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1894 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1895 |     if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) | 
 | 1896 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1897 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1898 |     if (sep != NULL && sep != Py_None) { | 
 | 1899 |         if (PyString_Check(sep)) | 
 | 1900 |             return do_xstrip(self, striptype, sep); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1901 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1902 |         else if (PyUnicode_Check(sep)) { | 
 | 1903 |             PyObject *uniself = PyUnicode_FromObject((PyObject *)self); | 
 | 1904 |             PyObject *res; | 
 | 1905 |             if (uniself==NULL) | 
 | 1906 |                 return NULL; | 
 | 1907 |             res = _PyUnicode_XStrip((PyUnicodeObject *)uniself, | 
 | 1908 |                 striptype, sep); | 
 | 1909 |             Py_DECREF(uniself); | 
 | 1910 |             return res; | 
 | 1911 |         } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1912 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1913 |         PyErr_Format(PyExc_TypeError, | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1914 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1915 |                      "%s arg must be None, str or unicode", | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1916 | #else | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1917 |                      "%s arg must be None or str", | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1918 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1919 |                      STRIPNAME(striptype)); | 
 | 1920 |         return NULL; | 
 | 1921 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1922 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1923 |     return do_strip(self, striptype); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1924 | } | 
 | 1925 |  | 
 | 1926 |  | 
 | 1927 | PyDoc_STRVAR(strip__doc__, | 
 | 1928 | "S.strip([chars]) -> string or unicode\n\ | 
 | 1929 | \n\ | 
 | 1930 | Return a copy of the string S with leading and trailing\n\ | 
 | 1931 | whitespace removed.\n\ | 
 | 1932 | If chars is given and not None, remove characters in chars instead.\n\ | 
 | 1933 | If chars is unicode, S will be converted to unicode before stripping"); | 
 | 1934 |  | 
 | 1935 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1936 | string_strip(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1937 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1938 |     if (PyTuple_GET_SIZE(args) == 0) | 
 | 1939 |         return do_strip(self, BOTHSTRIP); /* Common case */ | 
 | 1940 |     else | 
 | 1941 |         return do_argstrip(self, BOTHSTRIP, args); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1942 | } | 
 | 1943 |  | 
 | 1944 |  | 
 | 1945 | PyDoc_STRVAR(lstrip__doc__, | 
 | 1946 | "S.lstrip([chars]) -> string or unicode\n\ | 
 | 1947 | \n\ | 
 | 1948 | Return a copy of the string S with leading whitespace removed.\n\ | 
 | 1949 | If chars is given and not None, remove characters in chars instead.\n\ | 
 | 1950 | If chars is unicode, S will be converted to unicode before stripping"); | 
 | 1951 |  | 
 | 1952 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1953 | string_lstrip(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1954 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1955 |     if (PyTuple_GET_SIZE(args) == 0) | 
 | 1956 |         return do_strip(self, LEFTSTRIP); /* Common case */ | 
 | 1957 |     else | 
 | 1958 |         return do_argstrip(self, LEFTSTRIP, args); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1959 | } | 
 | 1960 |  | 
 | 1961 |  | 
 | 1962 | PyDoc_STRVAR(rstrip__doc__, | 
 | 1963 | "S.rstrip([chars]) -> string or unicode\n\ | 
 | 1964 | \n\ | 
 | 1965 | Return a copy of the string S with trailing whitespace removed.\n\ | 
 | 1966 | If chars is given and not None, remove characters in chars instead.\n\ | 
 | 1967 | If chars is unicode, S will be converted to unicode before stripping"); | 
 | 1968 |  | 
 | 1969 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1970 | string_rstrip(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1971 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1972 |     if (PyTuple_GET_SIZE(args) == 0) | 
 | 1973 |         return do_strip(self, RIGHTSTRIP); /* Common case */ | 
 | 1974 |     else | 
 | 1975 |         return do_argstrip(self, RIGHTSTRIP, args); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1976 | } | 
 | 1977 |  | 
 | 1978 |  | 
 | 1979 | PyDoc_STRVAR(lower__doc__, | 
 | 1980 | "S.lower() -> string\n\ | 
 | 1981 | \n\ | 
 | 1982 | Return a copy of the string S converted to lowercase."); | 
 | 1983 |  | 
 | 1984 | /* _tolower and _toupper are defined by SUSv2, but they're not ISO C */ | 
 | 1985 | #ifndef _tolower | 
 | 1986 | #define _tolower tolower | 
 | 1987 | #endif | 
 | 1988 |  | 
 | 1989 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 1990 | string_lower(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1991 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1992 |     char *s; | 
 | 1993 |     Py_ssize_t i, n = PyString_GET_SIZE(self); | 
 | 1994 |     PyObject *newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1995 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1996 |     newobj = PyString_FromStringAndSize(NULL, n); | 
 | 1997 |     if (!newobj) | 
 | 1998 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 1999 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2000 |     s = PyString_AS_STRING(newobj); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2001 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2002 |     Py_MEMCPY(s, PyString_AS_STRING(self), n); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2003 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2004 |     for (i = 0; i < n; i++) { | 
 | 2005 |         int c = Py_CHARMASK(s[i]); | 
 | 2006 |         if (isupper(c)) | 
 | 2007 |             s[i] = _tolower(c); | 
 | 2008 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2009 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2010 |     return newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2011 | } | 
 | 2012 |  | 
 | 2013 | PyDoc_STRVAR(upper__doc__, | 
 | 2014 | "S.upper() -> string\n\ | 
 | 2015 | \n\ | 
 | 2016 | Return a copy of the string S converted to uppercase."); | 
 | 2017 |  | 
 | 2018 | #ifndef _toupper | 
 | 2019 | #define _toupper toupper | 
 | 2020 | #endif | 
 | 2021 |  | 
 | 2022 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2023 | string_upper(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2024 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2025 |     char *s; | 
 | 2026 |     Py_ssize_t i, n = PyString_GET_SIZE(self); | 
 | 2027 |     PyObject *newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2028 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2029 |     newobj = PyString_FromStringAndSize(NULL, n); | 
 | 2030 |     if (!newobj) | 
 | 2031 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2032 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2033 |     s = PyString_AS_STRING(newobj); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2034 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2035 |     Py_MEMCPY(s, PyString_AS_STRING(self), n); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2036 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2037 |     for (i = 0; i < n; i++) { | 
 | 2038 |         int c = Py_CHARMASK(s[i]); | 
 | 2039 |         if (islower(c)) | 
 | 2040 |             s[i] = _toupper(c); | 
 | 2041 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2042 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2043 |     return newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2044 | } | 
 | 2045 |  | 
 | 2046 | PyDoc_STRVAR(title__doc__, | 
 | 2047 | "S.title() -> string\n\ | 
 | 2048 | \n\ | 
 | 2049 | Return a titlecased version of S, i.e. words start with uppercase\n\ | 
 | 2050 | characters, all remaining cased characters have lowercase."); | 
 | 2051 |  | 
 | 2052 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2053 | string_title(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2054 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2055 |     char *s = PyString_AS_STRING(self), *s_new; | 
 | 2056 |     Py_ssize_t i, n = PyString_GET_SIZE(self); | 
 | 2057 |     int previous_is_cased = 0; | 
 | 2058 |     PyObject *newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2059 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2060 |     newobj = PyString_FromStringAndSize(NULL, n); | 
 | 2061 |     if (newobj == NULL) | 
 | 2062 |         return NULL; | 
 | 2063 |     s_new = PyString_AsString(newobj); | 
 | 2064 |     for (i = 0; i < n; i++) { | 
 | 2065 |         int c = Py_CHARMASK(*s++); | 
 | 2066 |         if (islower(c)) { | 
 | 2067 |             if (!previous_is_cased) | 
 | 2068 |                 c = toupper(c); | 
 | 2069 |             previous_is_cased = 1; | 
 | 2070 |         } else if (isupper(c)) { | 
 | 2071 |             if (previous_is_cased) | 
 | 2072 |                 c = tolower(c); | 
 | 2073 |             previous_is_cased = 1; | 
 | 2074 |         } else | 
 | 2075 |             previous_is_cased = 0; | 
 | 2076 |         *s_new++ = c; | 
 | 2077 |     } | 
 | 2078 |     return newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2079 | } | 
 | 2080 |  | 
 | 2081 | PyDoc_STRVAR(capitalize__doc__, | 
 | 2082 | "S.capitalize() -> string\n\ | 
 | 2083 | \n\ | 
 | 2084 | Return a copy of the string S with only its first character\n\ | 
 | 2085 | capitalized."); | 
 | 2086 |  | 
 | 2087 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2088 | string_capitalize(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2089 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2090 |     char *s = PyString_AS_STRING(self), *s_new; | 
 | 2091 |     Py_ssize_t i, n = PyString_GET_SIZE(self); | 
 | 2092 |     PyObject *newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2093 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2094 |     newobj = PyString_FromStringAndSize(NULL, n); | 
 | 2095 |     if (newobj == NULL) | 
 | 2096 |         return NULL; | 
 | 2097 |     s_new = PyString_AsString(newobj); | 
 | 2098 |     if (0 < n) { | 
 | 2099 |         int c = Py_CHARMASK(*s++); | 
 | 2100 |         if (islower(c)) | 
 | 2101 |             *s_new = toupper(c); | 
 | 2102 |         else | 
 | 2103 |             *s_new = c; | 
 | 2104 |         s_new++; | 
 | 2105 |     } | 
 | 2106 |     for (i = 1; i < n; i++) { | 
 | 2107 |         int c = Py_CHARMASK(*s++); | 
 | 2108 |         if (isupper(c)) | 
 | 2109 |             *s_new = tolower(c); | 
 | 2110 |         else | 
 | 2111 |             *s_new = c; | 
 | 2112 |         s_new++; | 
 | 2113 |     } | 
 | 2114 |     return newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2115 | } | 
 | 2116 |  | 
 | 2117 |  | 
 | 2118 | PyDoc_STRVAR(count__doc__, | 
 | 2119 | "S.count(sub[, start[, end]]) -> int\n\ | 
 | 2120 | \n\ | 
 | 2121 | Return the number of non-overlapping occurrences of substring sub in\n\ | 
 | 2122 | string S[start:end].  Optional arguments start and end are interpreted\n\ | 
 | 2123 | as in slice notation."); | 
 | 2124 |  | 
 | 2125 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2126 | string_count(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2127 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2128 |     PyObject *sub_obj; | 
 | 2129 |     const char *str = PyString_AS_STRING(self), *sub; | 
 | 2130 |     Py_ssize_t sub_len; | 
 | 2131 |     Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2132 |  | 
| Jesus Cea | 44e8168 | 2011-04-20 16:39:15 +0200 | [diff] [blame] | 2133 |     if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end)) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2134 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2135 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2136 |     if (PyString_Check(sub_obj)) { | 
 | 2137 |         sub = PyString_AS_STRING(sub_obj); | 
 | 2138 |         sub_len = PyString_GET_SIZE(sub_obj); | 
 | 2139 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2140 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2141 |     else if (PyUnicode_Check(sub_obj)) { | 
 | 2142 |         Py_ssize_t count; | 
 | 2143 |         count = PyUnicode_Count((PyObject *)self, sub_obj, start, end); | 
 | 2144 |         if (count == -1) | 
 | 2145 |             return NULL; | 
 | 2146 |         else | 
 | 2147 |             return PyInt_FromSsize_t(count); | 
 | 2148 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2149 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2150 |     else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) | 
 | 2151 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2152 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2153 |     ADJUST_INDICES(start, end, PyString_GET_SIZE(self)); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2154 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2155 |     return PyInt_FromSsize_t( | 
 | 2156 |         stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) | 
 | 2157 |         ); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2158 | } | 
 | 2159 |  | 
 | 2160 | PyDoc_STRVAR(swapcase__doc__, | 
 | 2161 | "S.swapcase() -> string\n\ | 
 | 2162 | \n\ | 
 | 2163 | Return a copy of the string S with uppercase characters\n\ | 
 | 2164 | converted to lowercase and vice versa."); | 
 | 2165 |  | 
 | 2166 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2167 | string_swapcase(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2168 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2169 |     char *s = PyString_AS_STRING(self), *s_new; | 
 | 2170 |     Py_ssize_t i, n = PyString_GET_SIZE(self); | 
 | 2171 |     PyObject *newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2172 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2173 |     newobj = PyString_FromStringAndSize(NULL, n); | 
 | 2174 |     if (newobj == NULL) | 
 | 2175 |         return NULL; | 
 | 2176 |     s_new = PyString_AsString(newobj); | 
 | 2177 |     for (i = 0; i < n; i++) { | 
 | 2178 |         int c = Py_CHARMASK(*s++); | 
 | 2179 |         if (islower(c)) { | 
 | 2180 |             *s_new = toupper(c); | 
 | 2181 |         } | 
 | 2182 |         else if (isupper(c)) { | 
 | 2183 |             *s_new = tolower(c); | 
 | 2184 |         } | 
 | 2185 |         else | 
 | 2186 |             *s_new = c; | 
 | 2187 |         s_new++; | 
 | 2188 |     } | 
 | 2189 |     return newobj; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2190 | } | 
 | 2191 |  | 
 | 2192 |  | 
 | 2193 | PyDoc_STRVAR(translate__doc__, | 
 | 2194 | "S.translate(table [,deletechars]) -> string\n\ | 
 | 2195 | \n\ | 
 | 2196 | Return a copy of the string S, where all characters occurring\n\ | 
 | 2197 | in the optional argument deletechars are removed, and the\n\ | 
 | 2198 | remaining characters have been mapped through the given\n\ | 
| Mark Dickinson | cb9bf1a | 2011-06-25 11:00:12 +0200 | [diff] [blame] | 2199 | translation table, which must be a string of length 256 or None.\n\ | 
 | 2200 | If the table argument is None, no translation is applied and\n\ | 
 | 2201 | the operation simply removes the characters in deletechars."); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2202 |  | 
 | 2203 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2204 | string_translate(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2205 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2206 |     register char *input, *output; | 
 | 2207 |     const char *table; | 
 | 2208 |     register Py_ssize_t i, c, changed = 0; | 
 | 2209 |     PyObject *input_obj = (PyObject*)self; | 
 | 2210 |     const char *output_start, *del_table=NULL; | 
 | 2211 |     Py_ssize_t inlen, tablen, dellen = 0; | 
 | 2212 |     PyObject *result; | 
 | 2213 |     int trans_table[256]; | 
 | 2214 |     PyObject *tableobj, *delobj = NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2215 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2216 |     if (!PyArg_UnpackTuple(args, "translate", 1, 2, | 
 | 2217 |                           &tableobj, &delobj)) | 
 | 2218 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2219 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2220 |     if (PyString_Check(tableobj)) { | 
 | 2221 |         table = PyString_AS_STRING(tableobj); | 
 | 2222 |         tablen = PyString_GET_SIZE(tableobj); | 
 | 2223 |     } | 
 | 2224 |     else if (tableobj == Py_None) { | 
 | 2225 |         table = NULL; | 
 | 2226 |         tablen = 256; | 
 | 2227 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2228 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2229 |     else if (PyUnicode_Check(tableobj)) { | 
 | 2230 |         /* Unicode .translate() does not support the deletechars | 
 | 2231 |            parameter; instead a mapping to None will cause characters | 
 | 2232 |            to be deleted. */ | 
 | 2233 |         if (delobj != NULL) { | 
 | 2234 |             PyErr_SetString(PyExc_TypeError, | 
 | 2235 |             "deletions are implemented differently for unicode"); | 
 | 2236 |             return NULL; | 
 | 2237 |         } | 
 | 2238 |         return PyUnicode_Translate((PyObject *)self, tableobj, NULL); | 
 | 2239 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2240 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2241 |     else if (PyObject_AsCharBuffer(tableobj, &table, &tablen)) | 
 | 2242 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2243 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2244 |     if (tablen != 256) { | 
 | 2245 |         PyErr_SetString(PyExc_ValueError, | 
 | 2246 |           "translation table must be 256 characters long"); | 
 | 2247 |         return NULL; | 
 | 2248 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2249 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2250 |     if (delobj != NULL) { | 
 | 2251 |         if (PyString_Check(delobj)) { | 
 | 2252 |             del_table = PyString_AS_STRING(delobj); | 
 | 2253 |             dellen = PyString_GET_SIZE(delobj); | 
 | 2254 |         } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2255 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2256 |         else if (PyUnicode_Check(delobj)) { | 
 | 2257 |             PyErr_SetString(PyExc_TypeError, | 
 | 2258 |             "deletions are implemented differently for unicode"); | 
 | 2259 |             return NULL; | 
 | 2260 |         } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2261 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2262 |         else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) | 
 | 2263 |             return NULL; | 
 | 2264 |     } | 
 | 2265 |     else { | 
 | 2266 |         del_table = NULL; | 
 | 2267 |         dellen = 0; | 
 | 2268 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2269 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2270 |     inlen = PyString_GET_SIZE(input_obj); | 
 | 2271 |     result = PyString_FromStringAndSize((char *)NULL, inlen); | 
 | 2272 |     if (result == NULL) | 
 | 2273 |         return NULL; | 
 | 2274 |     output_start = output = PyString_AsString(result); | 
 | 2275 |     input = PyString_AS_STRING(input_obj); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2276 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2277 |     if (dellen == 0 && table != NULL) { | 
 | 2278 |         /* If no deletions are required, use faster code */ | 
 | 2279 |         for (i = inlen; --i >= 0; ) { | 
 | 2280 |             c = Py_CHARMASK(*input++); | 
 | 2281 |             if (Py_CHARMASK((*output++ = table[c])) != c) | 
 | 2282 |                 changed = 1; | 
 | 2283 |         } | 
 | 2284 |         if (changed || !PyString_CheckExact(input_obj)) | 
 | 2285 |             return result; | 
 | 2286 |         Py_DECREF(result); | 
 | 2287 |         Py_INCREF(input_obj); | 
 | 2288 |         return input_obj; | 
 | 2289 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2290 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2291 |     if (table == NULL) { | 
 | 2292 |         for (i = 0; i < 256; i++) | 
 | 2293 |             trans_table[i] = Py_CHARMASK(i); | 
 | 2294 |     } else { | 
 | 2295 |         for (i = 0; i < 256; i++) | 
 | 2296 |             trans_table[i] = Py_CHARMASK(table[i]); | 
 | 2297 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2298 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2299 |     for (i = 0; i < dellen; i++) | 
 | 2300 |         trans_table[(int) Py_CHARMASK(del_table[i])] = -1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2301 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2302 |     for (i = inlen; --i >= 0; ) { | 
 | 2303 |         c = Py_CHARMASK(*input++); | 
 | 2304 |         if (trans_table[c] != -1) | 
 | 2305 |             if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) | 
 | 2306 |                 continue; | 
 | 2307 |         changed = 1; | 
 | 2308 |     } | 
 | 2309 |     if (!changed && PyString_CheckExact(input_obj)) { | 
 | 2310 |         Py_DECREF(result); | 
 | 2311 |         Py_INCREF(input_obj); | 
 | 2312 |         return input_obj; | 
 | 2313 |     } | 
 | 2314 |     /* Fix the size of the resulting string */ | 
 | 2315 |     if (inlen > 0 && _PyString_Resize(&result, output - output_start)) | 
 | 2316 |         return NULL; | 
 | 2317 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2318 | } | 
 | 2319 |  | 
 | 2320 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2321 | /* find and count characters and substrings */ | 
 | 2322 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2323 | #define findchar(target, target_len, c)                         \ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2324 |   ((char *)memchr((const void *)(target), c, target_len)) | 
 | 2325 |  | 
 | 2326 | /* String ops must return a string.  */ | 
 | 2327 | /* If the object is subclass of string, create a copy */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2328 | Py_LOCAL(PyStringObject *) | 
 | 2329 | return_self(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2330 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2331 |     if (PyString_CheckExact(self)) { | 
 | 2332 |         Py_INCREF(self); | 
 | 2333 |         return self; | 
 | 2334 |     } | 
 | 2335 |     return (PyStringObject *)PyString_FromStringAndSize( | 
 | 2336 |         PyString_AS_STRING(self), | 
 | 2337 |         PyString_GET_SIZE(self)); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2338 | } | 
 | 2339 |  | 
 | 2340 | Py_LOCAL_INLINE(Py_ssize_t) | 
| Ronald Oussoren | 3687e80 | 2013-07-11 13:33:55 +0200 | [diff] [blame] | 2341 | countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2342 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2343 |     Py_ssize_t count=0; | 
 | 2344 |     const char *start=target; | 
 | 2345 |     const char *end=target+target_len; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2346 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2347 |     while ( (start=findchar(start, end-start, c)) != NULL ) { | 
 | 2348 |         count++; | 
 | 2349 |         if (count >= maxcount) | 
 | 2350 |             break; | 
 | 2351 |         start += 1; | 
 | 2352 |     } | 
 | 2353 |     return count; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2354 | } | 
 | 2355 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2356 |  | 
 | 2357 | /* Algorithms for different cases of string replacement */ | 
 | 2358 |  | 
 | 2359 | /* len(self)>=1, from="", len(to)>=1, maxcount>=1 */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2360 | Py_LOCAL(PyStringObject *) | 
 | 2361 | replace_interleave(PyStringObject *self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2362 |                    const char *to_s, Py_ssize_t to_len, | 
 | 2363 |                    Py_ssize_t maxcount) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2364 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2365 |     char *self_s, *result_s; | 
 | 2366 |     Py_ssize_t self_len, result_len; | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2367 |     Py_ssize_t count, i; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2368 |     PyStringObject *result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2369 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2370 |     self_len = PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2371 |  | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2372 |     /* 1 at the end plus 1 after every character; | 
 | 2373 |        count = min(maxcount, self_len + 1) */ | 
 | 2374 |     if (maxcount <= self_len) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2375 |         count = maxcount; | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2376 |     } | 
 | 2377 |     else { | 
 | 2378 |         /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */ | 
 | 2379 |         count = self_len + 1; | 
 | 2380 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2381 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2382 |     /* Check for overflow */ | 
 | 2383 |     /*   result_len = count * to_len + self_len; */ | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2384 |     assert(count > 0); | 
 | 2385 |     if (to_len > (PY_SSIZE_T_MAX - self_len) / count) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2386 |         PyErr_SetString(PyExc_OverflowError, | 
 | 2387 |                         "replace string is too long"); | 
 | 2388 |         return NULL; | 
 | 2389 |     } | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2390 |     result_len = count * to_len + self_len; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2391 |     if (! (result = (PyStringObject *) | 
 | 2392 |                      PyString_FromStringAndSize(NULL, result_len)) ) | 
 | 2393 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2394 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2395 |     self_s = PyString_AS_STRING(self); | 
 | 2396 |     result_s = PyString_AS_STRING(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2397 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2398 |     /* TODO: special case single character, which doesn't need memcpy */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2399 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2400 |     /* Lay the first one down (guaranteed this will occur) */ | 
 | 2401 |     Py_MEMCPY(result_s, to_s, to_len); | 
 | 2402 |     result_s += to_len; | 
 | 2403 |     count -= 1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2404 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2405 |     for (i=0; i<count; i++) { | 
 | 2406 |         *result_s++ = *self_s++; | 
 | 2407 |         Py_MEMCPY(result_s, to_s, to_len); | 
 | 2408 |         result_s += to_len; | 
 | 2409 |     } | 
 | 2410 |  | 
 | 2411 |     /* Copy the rest of the original string */ | 
 | 2412 |     Py_MEMCPY(result_s, self_s, self_len-i); | 
 | 2413 |  | 
 | 2414 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2415 | } | 
 | 2416 |  | 
 | 2417 | /* Special case for deleting a single character */ | 
 | 2418 | /* len(self)>=1, len(from)==1, to="", maxcount>=1 */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2419 | Py_LOCAL(PyStringObject *) | 
 | 2420 | replace_delete_single_character(PyStringObject *self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2421 |                                 char from_c, Py_ssize_t maxcount) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2422 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2423 |     char *self_s, *result_s; | 
 | 2424 |     char *start, *next, *end; | 
 | 2425 |     Py_ssize_t self_len, result_len; | 
 | 2426 |     Py_ssize_t count; | 
 | 2427 |     PyStringObject *result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2428 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2429 |     self_len = PyString_GET_SIZE(self); | 
 | 2430 |     self_s = PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2431 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2432 |     count = countchar(self_s, self_len, from_c, maxcount); | 
 | 2433 |     if (count == 0) { | 
 | 2434 |         return return_self(self); | 
 | 2435 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2436 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2437 |     result_len = self_len - count;  /* from_len == 1 */ | 
 | 2438 |     assert(result_len>=0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2439 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2440 |     if ( (result = (PyStringObject *) | 
 | 2441 |                     PyString_FromStringAndSize(NULL, result_len)) == NULL) | 
 | 2442 |         return NULL; | 
 | 2443 |     result_s = PyString_AS_STRING(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2444 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2445 |     start = self_s; | 
 | 2446 |     end = self_s + self_len; | 
 | 2447 |     while (count-- > 0) { | 
 | 2448 |         next = findchar(start, end-start, from_c); | 
 | 2449 |         if (next == NULL) | 
 | 2450 |             break; | 
 | 2451 |         Py_MEMCPY(result_s, start, next-start); | 
 | 2452 |         result_s += (next-start); | 
 | 2453 |         start = next+1; | 
 | 2454 |     } | 
 | 2455 |     Py_MEMCPY(result_s, start, end-start); | 
 | 2456 |  | 
 | 2457 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2458 | } | 
 | 2459 |  | 
 | 2460 | /* len(self)>=1, len(from)>=2, to="", maxcount>=1 */ | 
 | 2461 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2462 | Py_LOCAL(PyStringObject *) | 
 | 2463 | replace_delete_substring(PyStringObject *self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2464 |                          const char *from_s, Py_ssize_t from_len, | 
 | 2465 |                          Py_ssize_t maxcount) { | 
 | 2466 |     char *self_s, *result_s; | 
 | 2467 |     char *start, *next, *end; | 
 | 2468 |     Py_ssize_t self_len, result_len; | 
 | 2469 |     Py_ssize_t count, offset; | 
 | 2470 |     PyStringObject *result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2471 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2472 |     self_len = PyString_GET_SIZE(self); | 
 | 2473 |     self_s = PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2474 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2475 |     count = stringlib_count(self_s, self_len, | 
 | 2476 |                             from_s, from_len, | 
 | 2477 |                             maxcount); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2478 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2479 |     if (count == 0) { | 
 | 2480 |         /* no matches */ | 
 | 2481 |         return return_self(self); | 
 | 2482 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2483 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2484 |     result_len = self_len - (count * from_len); | 
 | 2485 |     assert (result_len>=0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2486 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2487 |     if ( (result = (PyStringObject *) | 
 | 2488 |           PyString_FromStringAndSize(NULL, result_len)) == NULL ) | 
 | 2489 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2490 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2491 |     result_s = PyString_AS_STRING(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2492 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2493 |     start = self_s; | 
 | 2494 |     end = self_s + self_len; | 
 | 2495 |     while (count-- > 0) { | 
 | 2496 |         offset = stringlib_find(start, end-start, | 
 | 2497 |                                 from_s, from_len, | 
 | 2498 |                                 0); | 
 | 2499 |         if (offset == -1) | 
 | 2500 |             break; | 
 | 2501 |         next = start + offset; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2502 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2503 |         Py_MEMCPY(result_s, start, next-start); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2504 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2505 |         result_s += (next-start); | 
 | 2506 |         start = next+from_len; | 
 | 2507 |     } | 
 | 2508 |     Py_MEMCPY(result_s, start, end-start); | 
 | 2509 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2510 | } | 
 | 2511 |  | 
 | 2512 | /* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2513 | Py_LOCAL(PyStringObject *) | 
 | 2514 | replace_single_character_in_place(PyStringObject *self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2515 |                                   char from_c, char to_c, | 
 | 2516 |                                   Py_ssize_t maxcount) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2517 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2518 |     char *self_s, *result_s, *start, *end, *next; | 
 | 2519 |     Py_ssize_t self_len; | 
 | 2520 |     PyStringObject *result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2521 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2522 |     /* The result string will be the same size */ | 
 | 2523 |     self_s = PyString_AS_STRING(self); | 
 | 2524 |     self_len = PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2525 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2526 |     next = findchar(self_s, self_len, from_c); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2527 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2528 |     if (next == NULL) { | 
 | 2529 |         /* No matches; return the original string */ | 
 | 2530 |         return return_self(self); | 
 | 2531 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2532 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2533 |     /* Need to make a new string */ | 
 | 2534 |     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); | 
 | 2535 |     if (result == NULL) | 
 | 2536 |         return NULL; | 
 | 2537 |     result_s = PyString_AS_STRING(result); | 
 | 2538 |     Py_MEMCPY(result_s, self_s, self_len); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2539 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2540 |     /* change everything in-place, starting with this one */ | 
 | 2541 |     start =  result_s + (next-self_s); | 
 | 2542 |     *start = to_c; | 
 | 2543 |     start++; | 
 | 2544 |     end = result_s + self_len; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2545 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2546 |     while (--maxcount > 0) { | 
 | 2547 |         next = findchar(start, end-start, from_c); | 
 | 2548 |         if (next == NULL) | 
 | 2549 |             break; | 
 | 2550 |         *next = to_c; | 
 | 2551 |         start = next+1; | 
 | 2552 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2553 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2554 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2555 | } | 
 | 2556 |  | 
 | 2557 | /* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2558 | Py_LOCAL(PyStringObject *) | 
 | 2559 | replace_substring_in_place(PyStringObject *self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2560 |                            const char *from_s, Py_ssize_t from_len, | 
 | 2561 |                            const char *to_s, Py_ssize_t to_len, | 
 | 2562 |                            Py_ssize_t maxcount) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2563 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2564 |     char *result_s, *start, *end; | 
 | 2565 |     char *self_s; | 
 | 2566 |     Py_ssize_t self_len, offset; | 
 | 2567 |     PyStringObject *result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2568 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2569 |     /* The result string will be the same size */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2570 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2571 |     self_s = PyString_AS_STRING(self); | 
 | 2572 |     self_len = PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2573 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2574 |     offset = stringlib_find(self_s, self_len, | 
 | 2575 |                             from_s, from_len, | 
 | 2576 |                             0); | 
 | 2577 |     if (offset == -1) { | 
 | 2578 |         /* No matches; return the original string */ | 
 | 2579 |         return return_self(self); | 
 | 2580 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2581 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2582 |     /* Need to make a new string */ | 
 | 2583 |     result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); | 
 | 2584 |     if (result == NULL) | 
 | 2585 |         return NULL; | 
 | 2586 |     result_s = PyString_AS_STRING(result); | 
 | 2587 |     Py_MEMCPY(result_s, self_s, self_len); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2588 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2589 |     /* change everything in-place, starting with this one */ | 
 | 2590 |     start =  result_s + offset; | 
 | 2591 |     Py_MEMCPY(start, to_s, from_len); | 
 | 2592 |     start += from_len; | 
 | 2593 |     end = result_s + self_len; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2594 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2595 |     while ( --maxcount > 0) { | 
 | 2596 |         offset = stringlib_find(start, end-start, | 
 | 2597 |                                 from_s, from_len, | 
 | 2598 |                                 0); | 
 | 2599 |         if (offset==-1) | 
 | 2600 |             break; | 
 | 2601 |         Py_MEMCPY(start+offset, to_s, from_len); | 
 | 2602 |         start += offset+from_len; | 
 | 2603 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2604 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2605 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2606 | } | 
 | 2607 |  | 
 | 2608 | /* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2609 | Py_LOCAL(PyStringObject *) | 
 | 2610 | replace_single_character(PyStringObject *self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2611 |                          char from_c, | 
 | 2612 |                          const char *to_s, Py_ssize_t to_len, | 
 | 2613 |                          Py_ssize_t maxcount) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2614 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2615 |     char *self_s, *result_s; | 
 | 2616 |     char *start, *next, *end; | 
 | 2617 |     Py_ssize_t self_len, result_len; | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2618 |     Py_ssize_t count; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2619 |     PyStringObject *result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2620 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2621 |     self_s = PyString_AS_STRING(self); | 
 | 2622 |     self_len = PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2623 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2624 |     count = countchar(self_s, self_len, from_c, maxcount); | 
 | 2625 |     if (count == 0) { | 
 | 2626 |         /* no matches, return unchanged */ | 
 | 2627 |         return return_self(self); | 
 | 2628 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2629 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2630 |     /* use the difference between current and new, hence the "-1" */ | 
 | 2631 |     /*   result_len = self_len + count * (to_len-1)  */ | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2632 |     assert(count > 0); | 
 | 2633 |     if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2634 |         PyErr_SetString(PyExc_OverflowError, "replace string is too long"); | 
 | 2635 |         return NULL; | 
 | 2636 |     } | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2637 |     result_len = self_len + count * (to_len - 1); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2638 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2639 |     if ( (result = (PyStringObject *) | 
 | 2640 |           PyString_FromStringAndSize(NULL, result_len)) == NULL) | 
 | 2641 |         return NULL; | 
 | 2642 |     result_s = PyString_AS_STRING(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2643 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2644 |     start = self_s; | 
 | 2645 |     end = self_s + self_len; | 
 | 2646 |     while (count-- > 0) { | 
 | 2647 |         next = findchar(start, end-start, from_c); | 
 | 2648 |         if (next == NULL) | 
 | 2649 |             break; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2650 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2651 |         if (next == start) { | 
 | 2652 |             /* replace with the 'to' */ | 
 | 2653 |             Py_MEMCPY(result_s, to_s, to_len); | 
 | 2654 |             result_s += to_len; | 
 | 2655 |             start += 1; | 
 | 2656 |         } else { | 
 | 2657 |             /* copy the unchanged old then the 'to' */ | 
 | 2658 |             Py_MEMCPY(result_s, start, next-start); | 
 | 2659 |             result_s += (next-start); | 
 | 2660 |             Py_MEMCPY(result_s, to_s, to_len); | 
 | 2661 |             result_s += to_len; | 
 | 2662 |             start = next+1; | 
 | 2663 |         } | 
 | 2664 |     } | 
 | 2665 |     /* Copy the remainder of the remaining string */ | 
 | 2666 |     Py_MEMCPY(result_s, start, end-start); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2667 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2668 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2669 | } | 
 | 2670 |  | 
 | 2671 | /* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2672 | Py_LOCAL(PyStringObject *) | 
 | 2673 | replace_substring(PyStringObject *self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2674 |                   const char *from_s, Py_ssize_t from_len, | 
 | 2675 |                   const char *to_s, Py_ssize_t to_len, | 
 | 2676 |                   Py_ssize_t maxcount) { | 
 | 2677 |     char *self_s, *result_s; | 
 | 2678 |     char *start, *next, *end; | 
 | 2679 |     Py_ssize_t self_len, result_len; | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2680 |     Py_ssize_t count, offset; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2681 |     PyStringObject *result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2682 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2683 |     self_s = PyString_AS_STRING(self); | 
 | 2684 |     self_len = PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2685 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2686 |     count = stringlib_count(self_s, self_len, | 
 | 2687 |                             from_s, from_len, | 
 | 2688 |                             maxcount); | 
| Antoine Pitrou | 6467213 | 2010-01-13 07:55:48 +0000 | [diff] [blame] | 2689 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2690 |     if (count == 0) { | 
 | 2691 |         /* no matches, return unchanged */ | 
 | 2692 |         return return_self(self); | 
 | 2693 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2694 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2695 |     /* Check for overflow */ | 
 | 2696 |     /*    result_len = self_len + count * (to_len-from_len) */ | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2697 |     assert(count > 0); | 
 | 2698 |     if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2699 |         PyErr_SetString(PyExc_OverflowError, "replace string is too long"); | 
 | 2700 |         return NULL; | 
 | 2701 |     } | 
| Xiang Zhang | 7bdb516 | 2017-01-09 11:13:20 +0800 | [diff] [blame] | 2702 |     result_len = self_len + count * (to_len - from_len); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2703 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2704 |     if ( (result = (PyStringObject *) | 
 | 2705 |           PyString_FromStringAndSize(NULL, result_len)) == NULL) | 
 | 2706 |         return NULL; | 
 | 2707 |     result_s = PyString_AS_STRING(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2708 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2709 |     start = self_s; | 
 | 2710 |     end = self_s + self_len; | 
 | 2711 |     while (count-- > 0) { | 
 | 2712 |         offset = stringlib_find(start, end-start, | 
 | 2713 |                                 from_s, from_len, | 
 | 2714 |                                 0); | 
 | 2715 |         if (offset == -1) | 
 | 2716 |             break; | 
 | 2717 |         next = start+offset; | 
 | 2718 |         if (next == start) { | 
 | 2719 |             /* replace with the 'to' */ | 
 | 2720 |             Py_MEMCPY(result_s, to_s, to_len); | 
 | 2721 |             result_s += to_len; | 
 | 2722 |             start += from_len; | 
 | 2723 |         } else { | 
 | 2724 |             /* copy the unchanged old then the 'to' */ | 
 | 2725 |             Py_MEMCPY(result_s, start, next-start); | 
 | 2726 |             result_s += (next-start); | 
 | 2727 |             Py_MEMCPY(result_s, to_s, to_len); | 
 | 2728 |             result_s += to_len; | 
 | 2729 |             start = next+from_len; | 
 | 2730 |         } | 
 | 2731 |     } | 
 | 2732 |     /* Copy the remainder of the remaining string */ | 
 | 2733 |     Py_MEMCPY(result_s, start, end-start); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2734 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2735 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2736 | } | 
 | 2737 |  | 
 | 2738 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2739 | Py_LOCAL(PyStringObject *) | 
 | 2740 | replace(PyStringObject *self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2741 |     const char *from_s, Py_ssize_t from_len, | 
 | 2742 |     const char *to_s, Py_ssize_t to_len, | 
 | 2743 |     Py_ssize_t maxcount) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2744 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2745 |     if (maxcount < 0) { | 
 | 2746 |         maxcount = PY_SSIZE_T_MAX; | 
 | 2747 |     } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) { | 
 | 2748 |         /* nothing to do; return the original string */ | 
 | 2749 |         return return_self(self); | 
 | 2750 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2751 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2752 |     if (maxcount == 0 || | 
 | 2753 |         (from_len == 0 && to_len == 0)) { | 
 | 2754 |         /* nothing to do; return the original string */ | 
 | 2755 |         return return_self(self); | 
 | 2756 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2757 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2758 |     /* Handle zero-length special cases */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2759 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2760 |     if (from_len == 0) { | 
 | 2761 |         /* insert the 'to' string everywhere.   */ | 
 | 2762 |         /*    >>> "Python".replace("", ".")     */ | 
 | 2763 |         /*    '.P.y.t.h.o.n.'                   */ | 
 | 2764 |         return replace_interleave(self, to_s, to_len, maxcount); | 
 | 2765 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2766 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2767 |     /* Except for "".replace("", "A") == "A" there is no way beyond this */ | 
 | 2768 |     /* point for an empty self string to generate a non-empty string */ | 
 | 2769 |     /* Special case so the remaining code always gets a non-empty string */ | 
 | 2770 |     if (PyString_GET_SIZE(self) == 0) { | 
 | 2771 |         return return_self(self); | 
 | 2772 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2773 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2774 |     if (to_len == 0) { | 
| Martin Panter | 440bbd0 | 2016-09-08 05:22:16 +0000 | [diff] [blame] | 2775 |         /* delete all occurrences of 'from' string */ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2776 |         if (from_len == 1) { | 
 | 2777 |             return replace_delete_single_character( | 
 | 2778 |                 self, from_s[0], maxcount); | 
 | 2779 |         } else { | 
 | 2780 |             return replace_delete_substring(self, from_s, from_len, maxcount); | 
 | 2781 |         } | 
 | 2782 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2783 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2784 |     /* Handle special case where both strings have the same length */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2785 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2786 |     if (from_len == to_len) { | 
 | 2787 |         if (from_len == 1) { | 
 | 2788 |             return replace_single_character_in_place( | 
 | 2789 |                 self, | 
 | 2790 |                 from_s[0], | 
 | 2791 |                 to_s[0], | 
 | 2792 |                 maxcount); | 
 | 2793 |         } else { | 
 | 2794 |             return replace_substring_in_place( | 
 | 2795 |                 self, from_s, from_len, to_s, to_len, maxcount); | 
 | 2796 |         } | 
 | 2797 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2798 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2799 |     /* Otherwise use the more generic algorithms */ | 
 | 2800 |     if (from_len == 1) { | 
 | 2801 |         return replace_single_character(self, from_s[0], | 
 | 2802 |                                         to_s, to_len, maxcount); | 
 | 2803 |     } else { | 
 | 2804 |         /* len('from')>=2, len('to')>=1 */ | 
 | 2805 |         return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); | 
 | 2806 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2807 | } | 
 | 2808 |  | 
 | 2809 | PyDoc_STRVAR(replace__doc__, | 
| Ezio Melotti | 2f06b78 | 2010-06-26 18:44:42 +0000 | [diff] [blame] | 2810 | "S.replace(old, new[, count]) -> string\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2811 | \n\ | 
 | 2812 | Return a copy of string S with all occurrences of substring\n\ | 
 | 2813 | old replaced by new.  If the optional argument count is\n\ | 
 | 2814 | given, only the first count occurrences are replaced."); | 
 | 2815 |  | 
 | 2816 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2817 | string_replace(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2818 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2819 |     Py_ssize_t count = -1; | 
 | 2820 |     PyObject *from, *to; | 
 | 2821 |     const char *from_s, *to_s; | 
 | 2822 |     Py_ssize_t from_len, to_len; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2823 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2824 |     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) | 
 | 2825 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2826 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2827 |     if (PyString_Check(from)) { | 
 | 2828 |         from_s = PyString_AS_STRING(from); | 
 | 2829 |         from_len = PyString_GET_SIZE(from); | 
 | 2830 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2831 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2832 |     if (PyUnicode_Check(from)) | 
 | 2833 |         return PyUnicode_Replace((PyObject *)self, | 
 | 2834 |                                  from, to, count); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2835 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2836 |     else if (PyObject_AsCharBuffer(from, &from_s, &from_len)) | 
 | 2837 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2838 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2839 |     if (PyString_Check(to)) { | 
 | 2840 |         to_s = PyString_AS_STRING(to); | 
 | 2841 |         to_len = PyString_GET_SIZE(to); | 
 | 2842 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2843 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2844 |     else if (PyUnicode_Check(to)) | 
 | 2845 |         return PyUnicode_Replace((PyObject *)self, | 
 | 2846 |                                  from, to, count); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2847 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2848 |     else if (PyObject_AsCharBuffer(to, &to_s, &to_len)) | 
 | 2849 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2850 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2851 |     return (PyObject *)replace((PyStringObject *) self, | 
 | 2852 |                                from_s, from_len, | 
 | 2853 |                                to_s, to_len, count); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2854 | } | 
 | 2855 |  | 
 | 2856 | /** End DALKE **/ | 
 | 2857 |  | 
 | 2858 | /* Matches the end (direction >= 0) or start (direction < 0) of self | 
 | 2859 |  * against substr, using the start and end arguments. Returns | 
 | 2860 |  * -1 on error, 0 if not found and 1 if found. | 
 | 2861 |  */ | 
 | 2862 | Py_LOCAL(int) | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2863 | _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2864 |                   Py_ssize_t end, int direction) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2865 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2866 |     Py_ssize_t len = PyString_GET_SIZE(self); | 
 | 2867 |     Py_ssize_t slen; | 
 | 2868 |     const char* sub; | 
 | 2869 |     const char* str; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2870 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2871 |     if (PyString_Check(substr)) { | 
 | 2872 |         sub = PyString_AS_STRING(substr); | 
 | 2873 |         slen = PyString_GET_SIZE(substr); | 
 | 2874 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2875 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2876 |     else if (PyUnicode_Check(substr)) | 
 | 2877 |         return PyUnicode_Tailmatch((PyObject *)self, | 
 | 2878 |                                    substr, start, end, direction); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2879 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2880 |     else if (PyObject_AsCharBuffer(substr, &sub, &slen)) | 
 | 2881 |         return -1; | 
 | 2882 |     str = PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2883 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2884 |     ADJUST_INDICES(start, end, len); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2885 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2886 |     if (direction < 0) { | 
 | 2887 |         /* startswith */ | 
 | 2888 |         if (start+slen > len) | 
 | 2889 |             return 0; | 
 | 2890 |     } else { | 
 | 2891 |         /* endswith */ | 
 | 2892 |         if (end-start < slen || start > len) | 
 | 2893 |             return 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2894 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2895 |         if (end-slen > start) | 
 | 2896 |             start = end - slen; | 
 | 2897 |     } | 
 | 2898 |     if (end-start >= slen) | 
 | 2899 |         return ! memcmp(str+start, sub, slen); | 
 | 2900 |     return 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2901 | } | 
 | 2902 |  | 
 | 2903 |  | 
 | 2904 | PyDoc_STRVAR(startswith__doc__, | 
 | 2905 | "S.startswith(prefix[, start[, end]]) -> bool\n\ | 
 | 2906 | \n\ | 
 | 2907 | Return True if S starts with the specified prefix, False otherwise.\n\ | 
 | 2908 | With optional start, test S beginning at that position.\n\ | 
 | 2909 | With optional end, stop comparing S at that position.\n\ | 
 | 2910 | prefix can also be a tuple of strings to try."); | 
 | 2911 |  | 
 | 2912 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2913 | string_startswith(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2914 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2915 |     Py_ssize_t start = 0; | 
 | 2916 |     Py_ssize_t end = PY_SSIZE_T_MAX; | 
 | 2917 |     PyObject *subobj; | 
 | 2918 |     int result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2919 |  | 
| Jesus Cea | 44e8168 | 2011-04-20 16:39:15 +0200 | [diff] [blame] | 2920 |     if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2921 |         return NULL; | 
 | 2922 |     if (PyTuple_Check(subobj)) { | 
 | 2923 |         Py_ssize_t i; | 
 | 2924 |         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { | 
 | 2925 |             result = _string_tailmatch(self, | 
 | 2926 |                             PyTuple_GET_ITEM(subobj, i), | 
 | 2927 |                             start, end, -1); | 
 | 2928 |             if (result == -1) | 
 | 2929 |                 return NULL; | 
 | 2930 |             else if (result) { | 
 | 2931 |                 Py_RETURN_TRUE; | 
 | 2932 |             } | 
 | 2933 |         } | 
 | 2934 |         Py_RETURN_FALSE; | 
 | 2935 |     } | 
 | 2936 |     result = _string_tailmatch(self, subobj, start, end, -1); | 
| Ezio Melotti | e3685f6 | 2011-04-26 05:12:51 +0300 | [diff] [blame] | 2937 |     if (result == -1) { | 
 | 2938 |         if (PyErr_ExceptionMatches(PyExc_TypeError)) | 
 | 2939 |             PyErr_Format(PyExc_TypeError, "startswith first arg must be str, " | 
 | 2940 |                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2941 |         return NULL; | 
| Ezio Melotti | e3685f6 | 2011-04-26 05:12:51 +0300 | [diff] [blame] | 2942 |     } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2943 |     else | 
 | 2944 |         return PyBool_FromLong(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2945 | } | 
 | 2946 |  | 
 | 2947 |  | 
 | 2948 | PyDoc_STRVAR(endswith__doc__, | 
 | 2949 | "S.endswith(suffix[, start[, end]]) -> bool\n\ | 
 | 2950 | \n\ | 
 | 2951 | Return True if S ends with the specified suffix, False otherwise.\n\ | 
 | 2952 | With optional start, test S beginning at that position.\n\ | 
 | 2953 | With optional end, stop comparing S at that position.\n\ | 
 | 2954 | suffix can also be a tuple of strings to try."); | 
 | 2955 |  | 
 | 2956 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 2957 | string_endswith(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2958 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2959 |     Py_ssize_t start = 0; | 
 | 2960 |     Py_ssize_t end = PY_SSIZE_T_MAX; | 
 | 2961 |     PyObject *subobj; | 
 | 2962 |     int result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2963 |  | 
| Jesus Cea | 44e8168 | 2011-04-20 16:39:15 +0200 | [diff] [blame] | 2964 |     if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2965 |         return NULL; | 
 | 2966 |     if (PyTuple_Check(subobj)) { | 
 | 2967 |         Py_ssize_t i; | 
 | 2968 |         for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { | 
 | 2969 |             result = _string_tailmatch(self, | 
 | 2970 |                             PyTuple_GET_ITEM(subobj, i), | 
 | 2971 |                             start, end, +1); | 
 | 2972 |             if (result == -1) | 
 | 2973 |                 return NULL; | 
 | 2974 |             else if (result) { | 
 | 2975 |                 Py_RETURN_TRUE; | 
 | 2976 |             } | 
 | 2977 |         } | 
 | 2978 |         Py_RETURN_FALSE; | 
 | 2979 |     } | 
 | 2980 |     result = _string_tailmatch(self, subobj, start, end, +1); | 
| Ezio Melotti | e3685f6 | 2011-04-26 05:12:51 +0300 | [diff] [blame] | 2981 |     if (result == -1) { | 
 | 2982 |         if (PyErr_ExceptionMatches(PyExc_TypeError)) | 
 | 2983 |             PyErr_Format(PyExc_TypeError, "endswith first arg must be str, " | 
 | 2984 |                          "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2985 |         return NULL; | 
| Ezio Melotti | e3685f6 | 2011-04-26 05:12:51 +0300 | [diff] [blame] | 2986 |     } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 2987 |     else | 
 | 2988 |         return PyBool_FromLong(result); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 2989 | } | 
 | 2990 |  | 
 | 2991 |  | 
 | 2992 | PyDoc_STRVAR(encode__doc__, | 
 | 2993 | "S.encode([encoding[,errors]]) -> object\n\ | 
 | 2994 | \n\ | 
 | 2995 | Encodes S using the codec registered for encoding. encoding defaults\n\ | 
 | 2996 | to the default encoding. errors may be given to set a different error\n\ | 
 | 2997 | handling scheme. Default is 'strict' meaning that encoding errors raise\n\ | 
 | 2998 | a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\ | 
 | 2999 | 'xmlcharrefreplace' as well as any other name registered with\n\ | 
 | 3000 | codecs.register_error that is able to handle UnicodeEncodeErrors."); | 
 | 3001 |  | 
 | 3002 | static PyObject * | 
| Benjamin Peterson | 332d721 | 2009-09-18 21:14:55 +0000 | [diff] [blame] | 3003 | string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3004 | { | 
| Benjamin Peterson | 332d721 | 2009-09-18 21:14:55 +0000 | [diff] [blame] | 3005 |     static char *kwlist[] = {"encoding", "errors", 0}; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3006 |     char *encoding = NULL; | 
 | 3007 |     char *errors = NULL; | 
 | 3008 |     PyObject *v; | 
 | 3009 |  | 
| Benjamin Peterson | 332d721 | 2009-09-18 21:14:55 +0000 | [diff] [blame] | 3010 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3011 |                                      kwlist, &encoding, &errors)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3012 |         return NULL; | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3013 |     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3014 |     if (v == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3015 |         goto onError; | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3016 |     if (!PyString_Check(v) && !PyUnicode_Check(v)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3017 |         PyErr_Format(PyExc_TypeError, | 
 | 3018 |                      "encoder did not return a string/unicode object " | 
 | 3019 |                      "(type=%.400s)", | 
 | 3020 |                      Py_TYPE(v)->tp_name); | 
 | 3021 |         Py_DECREF(v); | 
 | 3022 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3023 |     } | 
 | 3024 |     return v; | 
 | 3025 |  | 
 | 3026 |  onError: | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3027 |     return NULL; | 
 | 3028 | } | 
 | 3029 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3030 |  | 
 | 3031 | PyDoc_STRVAR(decode__doc__, | 
 | 3032 | "S.decode([encoding[,errors]]) -> object\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3033 | \n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3034 | Decodes S using the codec registered for encoding. encoding defaults\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3035 | to the default encoding. errors may be given to set a different error\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3036 | handling scheme. Default is 'strict' meaning that encoding errors raise\n\ | 
 | 3037 | a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\ | 
| Andrew M. Kuchling | efeb43e | 2008-10-04 01:05:56 +0000 | [diff] [blame] | 3038 | as well as any other name registered with codecs.register_error that is\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3039 | able to handle UnicodeDecodeErrors."); | 
 | 3040 |  | 
 | 3041 | static PyObject * | 
| Benjamin Peterson | 332d721 | 2009-09-18 21:14:55 +0000 | [diff] [blame] | 3042 | string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3043 | { | 
| Benjamin Peterson | 332d721 | 2009-09-18 21:14:55 +0000 | [diff] [blame] | 3044 |     static char *kwlist[] = {"encoding", "errors", 0}; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3045 |     char *encoding = NULL; | 
 | 3046 |     char *errors = NULL; | 
 | 3047 |     PyObject *v; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3048 |  | 
| Benjamin Peterson | 332d721 | 2009-09-18 21:14:55 +0000 | [diff] [blame] | 3049 |     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3050 |                                      kwlist, &encoding, &errors)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3051 |         return NULL; | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3052 |     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3053 |     if (v == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3054 |         goto onError; | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3055 |     if (!PyString_Check(v) && !PyUnicode_Check(v)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3056 |         PyErr_Format(PyExc_TypeError, | 
 | 3057 |                      "decoder did not return a string/unicode object " | 
 | 3058 |                      "(type=%.400s)", | 
 | 3059 |                      Py_TYPE(v)->tp_name); | 
 | 3060 |         Py_DECREF(v); | 
 | 3061 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3062 |     } | 
 | 3063 |     return v; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3064 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3065 |  onError: | 
 | 3066 |     return NULL; | 
 | 3067 | } | 
 | 3068 |  | 
 | 3069 |  | 
 | 3070 | PyDoc_STRVAR(expandtabs__doc__, | 
 | 3071 | "S.expandtabs([tabsize]) -> string\n\ | 
 | 3072 | \n\ | 
 | 3073 | Return a copy of S where all tab characters are expanded using spaces.\n\ | 
 | 3074 | If tabsize is not given, a tab size of 8 characters is assumed."); | 
 | 3075 |  | 
 | 3076 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3077 | string_expandtabs(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3078 | { | 
 | 3079 |     const char *e, *p, *qe; | 
 | 3080 |     char *q; | 
 | 3081 |     Py_ssize_t i, j, incr; | 
 | 3082 |     PyObject *u; | 
 | 3083 |     int tabsize = 8; | 
 | 3084 |  | 
 | 3085 |     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3086 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3087 |  | 
 | 3088 |     /* First pass: determine size of output string */ | 
 | 3089 |     i = 0; /* chars up to and including most recent \n or \r */ | 
 | 3090 |     j = 0; /* chars since most recent \n or \r (use in tab calculations) */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3091 |     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */ | 
| Benjamin Peterson | 0e431b9 | 2014-03-30 19:16:44 -0400 | [diff] [blame] | 3092 |     for (p = PyString_AS_STRING(self); p < e; p++) { | 
 | 3093 |         if (*p == '\t') { | 
 | 3094 |             if (tabsize > 0) { | 
 | 3095 |                 incr = tabsize - (j % tabsize); | 
 | 3096 |                 if (j > PY_SSIZE_T_MAX - incr) | 
 | 3097 |                     goto overflow1; | 
 | 3098 |                 j += incr; | 
 | 3099 |             } | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3100 |         } | 
| Benjamin Peterson | 0e431b9 | 2014-03-30 19:16:44 -0400 | [diff] [blame] | 3101 |         else { | 
 | 3102 |             if (j > PY_SSIZE_T_MAX - 1) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3103 |                 goto overflow1; | 
| Benjamin Peterson | 0e431b9 | 2014-03-30 19:16:44 -0400 | [diff] [blame] | 3104 |             j++; | 
 | 3105 |             if (*p == '\n' || *p == '\r') { | 
 | 3106 |                 if (i > PY_SSIZE_T_MAX - j) | 
 | 3107 |                     goto overflow1; | 
 | 3108 |                 i += j; | 
 | 3109 |                 j = 0; | 
 | 3110 |             } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3111 |         } | 
 | 3112 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3113 |  | 
 | 3114 |     if (i > PY_SSIZE_T_MAX - j) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3115 |         goto overflow1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3116 |  | 
 | 3117 |     /* Second pass: create output string and fill it */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3118 |     u = PyString_FromStringAndSize(NULL, i + j); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3119 |     if (!u) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3120 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3121 |  | 
 | 3122 |     j = 0; /* same as in first pass */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3123 |     q = PyString_AS_STRING(u); /* next output char */ | 
 | 3124 |     qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3125 |  | 
| Benjamin Peterson | 8312ecc | 2014-03-30 19:23:24 -0400 | [diff] [blame] | 3126 |     for (p = PyString_AS_STRING(self); p < e; p++) { | 
 | 3127 |         if (*p == '\t') { | 
 | 3128 |             if (tabsize > 0) { | 
 | 3129 |                 i = tabsize - (j % tabsize); | 
 | 3130 |                 j += i; | 
 | 3131 |                 while (i--) { | 
 | 3132 |                     if (q >= qe) | 
 | 3133 |                         goto overflow2; | 
 | 3134 |                     *q++ = ' '; | 
 | 3135 |                 } | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3136 |             } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3137 |         } | 
| Benjamin Peterson | 8312ecc | 2014-03-30 19:23:24 -0400 | [diff] [blame] | 3138 |         else { | 
 | 3139 |             if (q >= qe) | 
 | 3140 |                 goto overflow2; | 
 | 3141 |             *q++ = *p; | 
 | 3142 |             j++; | 
 | 3143 |             if (*p == '\n' || *p == '\r') | 
 | 3144 |                 j = 0; | 
 | 3145 |         } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3146 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3147 |  | 
 | 3148 |     return u; | 
 | 3149 |  | 
 | 3150 |   overflow2: | 
 | 3151 |     Py_DECREF(u); | 
 | 3152 |   overflow1: | 
 | 3153 |     PyErr_SetString(PyExc_OverflowError, "new string is too long"); | 
 | 3154 |     return NULL; | 
 | 3155 | } | 
 | 3156 |  | 
 | 3157 | Py_LOCAL_INLINE(PyObject *) | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3158 | pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3159 | { | 
 | 3160 |     PyObject *u; | 
 | 3161 |  | 
 | 3162 |     if (left < 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3163 |         left = 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3164 |     if (right < 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3165 |         right = 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3166 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3167 |     if (left == 0 && right == 0 && PyString_CheckExact(self)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3168 |         Py_INCREF(self); | 
 | 3169 |         return (PyObject *)self; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3170 |     } | 
 | 3171 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3172 |     u = PyString_FromStringAndSize(NULL, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3173 |                                    left + PyString_GET_SIZE(self) + right); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3174 |     if (u) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3175 |         if (left) | 
 | 3176 |             memset(PyString_AS_STRING(u), fill, left); | 
 | 3177 |         Py_MEMCPY(PyString_AS_STRING(u) + left, | 
 | 3178 |                PyString_AS_STRING(self), | 
 | 3179 |                PyString_GET_SIZE(self)); | 
 | 3180 |         if (right) | 
 | 3181 |             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self), | 
 | 3182 |                fill, right); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3183 |     } | 
 | 3184 |  | 
 | 3185 |     return u; | 
 | 3186 | } | 
 | 3187 |  | 
 | 3188 | PyDoc_STRVAR(ljust__doc__, | 
 | 3189 | "S.ljust(width[, fillchar]) -> string\n" | 
 | 3190 | "\n" | 
| Andrew M. Kuchling | efeb43e | 2008-10-04 01:05:56 +0000 | [diff] [blame] | 3191 | "Return S left-justified in a string of length width. Padding is\n" | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3192 | "done using the specified fill character (default is a space)."); | 
 | 3193 |  | 
 | 3194 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3195 | string_ljust(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3196 | { | 
 | 3197 |     Py_ssize_t width; | 
 | 3198 |     char fillchar = ' '; | 
 | 3199 |  | 
 | 3200 |     if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3201 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3202 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3203 |     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3204 |         Py_INCREF(self); | 
 | 3205 |         return (PyObject*) self; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3206 |     } | 
 | 3207 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3208 |     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3209 | } | 
 | 3210 |  | 
 | 3211 |  | 
 | 3212 | PyDoc_STRVAR(rjust__doc__, | 
 | 3213 | "S.rjust(width[, fillchar]) -> string\n" | 
 | 3214 | "\n" | 
| Andrew M. Kuchling | efeb43e | 2008-10-04 01:05:56 +0000 | [diff] [blame] | 3215 | "Return S right-justified in a string of length width. Padding is\n" | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3216 | "done using the specified fill character (default is a space)"); | 
 | 3217 |  | 
 | 3218 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3219 | string_rjust(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3220 | { | 
 | 3221 |     Py_ssize_t width; | 
 | 3222 |     char fillchar = ' '; | 
 | 3223 |  | 
 | 3224 |     if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3225 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3226 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3227 |     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3228 |         Py_INCREF(self); | 
 | 3229 |         return (PyObject*) self; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3230 |     } | 
 | 3231 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3232 |     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3233 | } | 
 | 3234 |  | 
 | 3235 |  | 
 | 3236 | PyDoc_STRVAR(center__doc__, | 
 | 3237 | "S.center(width[, fillchar]) -> string\n" | 
 | 3238 | "\n" | 
 | 3239 | "Return S centered in a string of length width. Padding is\n" | 
 | 3240 | "done using the specified fill character (default is a space)"); | 
 | 3241 |  | 
 | 3242 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3243 | string_center(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3244 | { | 
 | 3245 |     Py_ssize_t marg, left; | 
 | 3246 |     Py_ssize_t width; | 
 | 3247 |     char fillchar = ' '; | 
 | 3248 |  | 
 | 3249 |     if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3250 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3251 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3252 |     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3253 |         Py_INCREF(self); | 
 | 3254 |         return (PyObject*) self; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3255 |     } | 
 | 3256 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3257 |     marg = width - PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3258 |     left = marg / 2 + (marg & width & 1); | 
 | 3259 |  | 
 | 3260 |     return pad(self, left, marg - left, fillchar); | 
 | 3261 | } | 
 | 3262 |  | 
 | 3263 | PyDoc_STRVAR(zfill__doc__, | 
 | 3264 | "S.zfill(width) -> string\n" | 
 | 3265 | "\n" | 
 | 3266 | "Pad a numeric string S with zeros on the left, to fill a field\n" | 
 | 3267 | "of the specified width.  The string S is never truncated."); | 
 | 3268 |  | 
 | 3269 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3270 | string_zfill(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3271 | { | 
 | 3272 |     Py_ssize_t fill; | 
 | 3273 |     PyObject *s; | 
 | 3274 |     char *p; | 
 | 3275 |     Py_ssize_t width; | 
 | 3276 |  | 
 | 3277 |     if (!PyArg_ParseTuple(args, "n:zfill", &width)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3278 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3279 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3280 |     if (PyString_GET_SIZE(self) >= width) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3281 |         if (PyString_CheckExact(self)) { | 
 | 3282 |             Py_INCREF(self); | 
 | 3283 |             return (PyObject*) self; | 
 | 3284 |         } | 
 | 3285 |         else | 
 | 3286 |             return PyString_FromStringAndSize( | 
| Martin Panter | ca56dd4 | 2016-09-17 07:54:55 +0000 | [diff] [blame] | 3287 |                 PyString_AS_STRING(self), | 
 | 3288 |                 PyString_GET_SIZE(self) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3289 |             ); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3290 |     } | 
 | 3291 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3292 |     fill = width - PyString_GET_SIZE(self); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3293 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3294 |     s = pad(self, fill, 0, '0'); | 
 | 3295 |  | 
 | 3296 |     if (s == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3297 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3298 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3299 |     p = PyString_AS_STRING(s); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3300 |     if (p[fill] == '+' || p[fill] == '-') { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3301 |         /* move sign to beginning of string */ | 
 | 3302 |         p[0] = p[fill]; | 
 | 3303 |         p[fill] = '0'; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3304 |     } | 
 | 3305 |  | 
 | 3306 |     return (PyObject*) s; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3307 | } | 
 | 3308 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3309 | PyDoc_STRVAR(isspace__doc__, | 
 | 3310 | "S.isspace() -> bool\n\ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3311 | \n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3312 | Return True if all characters in S are whitespace\n\ | 
 | 3313 | and there is at least one character in S, False otherwise."); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3314 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3315 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3316 | string_isspace(PyStringObject *self) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3317 | { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3318 |     register const unsigned char *p | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3319 |         = (unsigned char *) PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3320 |     register const unsigned char *e; | 
 | 3321 |  | 
 | 3322 |     /* Shortcut for single character strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3323 |     if (PyString_GET_SIZE(self) == 1 && | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3324 |         isspace(*p)) | 
 | 3325 |         return PyBool_FromLong(1); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3326 |  | 
 | 3327 |     /* Special case for empty strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3328 |     if (PyString_GET_SIZE(self) == 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3329 |         return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3330 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3331 |     e = p + PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3332 |     for (; p < e; p++) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3333 |         if (!isspace(*p)) | 
 | 3334 |             return PyBool_FromLong(0); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3335 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3336 |     return PyBool_FromLong(1); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3337 | } | 
 | 3338 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3339 |  | 
 | 3340 | PyDoc_STRVAR(isalpha__doc__, | 
 | 3341 | "S.isalpha() -> bool\n\ | 
 | 3342 | \n\ | 
 | 3343 | Return True if all characters in S are alphabetic\n\ | 
 | 3344 | and there is at least one character in S, False otherwise."); | 
 | 3345 |  | 
 | 3346 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3347 | string_isalpha(PyStringObject *self) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3348 | { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3349 |     register const unsigned char *p | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3350 |         = (unsigned char *) PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3351 |     register const unsigned char *e; | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3352 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3353 |     /* Shortcut for single character strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3354 |     if (PyString_GET_SIZE(self) == 1 && | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3355 |         isalpha(*p)) | 
 | 3356 |         return PyBool_FromLong(1); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3357 |  | 
 | 3358 |     /* Special case for empty strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3359 |     if (PyString_GET_SIZE(self) == 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3360 |         return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3361 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3362 |     e = p + PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3363 |     for (; p < e; p++) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3364 |         if (!isalpha(*p)) | 
 | 3365 |             return PyBool_FromLong(0); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3366 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3367 |     return PyBool_FromLong(1); | 
 | 3368 | } | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3369 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3370 |  | 
 | 3371 | PyDoc_STRVAR(isalnum__doc__, | 
 | 3372 | "S.isalnum() -> bool\n\ | 
 | 3373 | \n\ | 
 | 3374 | Return True if all characters in S are alphanumeric\n\ | 
 | 3375 | and there is at least one character in S, False otherwise."); | 
 | 3376 |  | 
 | 3377 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3378 | string_isalnum(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3379 | { | 
 | 3380 |     register const unsigned char *p | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3381 |         = (unsigned char *) PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3382 |     register const unsigned char *e; | 
 | 3383 |  | 
 | 3384 |     /* Shortcut for single character strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3385 |     if (PyString_GET_SIZE(self) == 1 && | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3386 |         isalnum(*p)) | 
 | 3387 |         return PyBool_FromLong(1); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3388 |  | 
 | 3389 |     /* Special case for empty strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3390 |     if (PyString_GET_SIZE(self) == 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3391 |         return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3392 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3393 |     e = p + PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3394 |     for (; p < e; p++) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3395 |         if (!isalnum(*p)) | 
 | 3396 |             return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3397 |     } | 
 | 3398 |     return PyBool_FromLong(1); | 
 | 3399 | } | 
 | 3400 |  | 
 | 3401 |  | 
 | 3402 | PyDoc_STRVAR(isdigit__doc__, | 
 | 3403 | "S.isdigit() -> bool\n\ | 
 | 3404 | \n\ | 
 | 3405 | Return True if all characters in S are digits\n\ | 
 | 3406 | and there is at least one character in S, False otherwise."); | 
 | 3407 |  | 
 | 3408 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3409 | string_isdigit(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3410 | { | 
 | 3411 |     register const unsigned char *p | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3412 |         = (unsigned char *) PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3413 |     register const unsigned char *e; | 
 | 3414 |  | 
 | 3415 |     /* Shortcut for single character strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3416 |     if (PyString_GET_SIZE(self) == 1 && | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3417 |         isdigit(*p)) | 
 | 3418 |         return PyBool_FromLong(1); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3419 |  | 
 | 3420 |     /* Special case for empty strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3421 |     if (PyString_GET_SIZE(self) == 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3422 |         return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3423 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3424 |     e = p + PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3425 |     for (; p < e; p++) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3426 |         if (!isdigit(*p)) | 
 | 3427 |             return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3428 |     } | 
 | 3429 |     return PyBool_FromLong(1); | 
 | 3430 | } | 
 | 3431 |  | 
 | 3432 |  | 
 | 3433 | PyDoc_STRVAR(islower__doc__, | 
 | 3434 | "S.islower() -> bool\n\ | 
 | 3435 | \n\ | 
 | 3436 | Return True if all cased characters in S are lowercase and there is\n\ | 
 | 3437 | at least one cased character in S, False otherwise."); | 
 | 3438 |  | 
 | 3439 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3440 | string_islower(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3441 | { | 
 | 3442 |     register const unsigned char *p | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3443 |         = (unsigned char *) PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3444 |     register const unsigned char *e; | 
 | 3445 |     int cased; | 
 | 3446 |  | 
 | 3447 |     /* Shortcut for single character strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3448 |     if (PyString_GET_SIZE(self) == 1) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3449 |         return PyBool_FromLong(islower(*p) != 0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3450 |  | 
 | 3451 |     /* Special case for empty strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3452 |     if (PyString_GET_SIZE(self) == 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3453 |         return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3454 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3455 |     e = p + PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3456 |     cased = 0; | 
 | 3457 |     for (; p < e; p++) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3458 |         if (isupper(*p)) | 
 | 3459 |             return PyBool_FromLong(0); | 
 | 3460 |         else if (!cased && islower(*p)) | 
 | 3461 |             cased = 1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3462 |     } | 
 | 3463 |     return PyBool_FromLong(cased); | 
 | 3464 | } | 
 | 3465 |  | 
 | 3466 |  | 
 | 3467 | PyDoc_STRVAR(isupper__doc__, | 
 | 3468 | "S.isupper() -> bool\n\ | 
 | 3469 | \n\ | 
 | 3470 | Return True if all cased characters in S are uppercase and there is\n\ | 
 | 3471 | at least one cased character in S, False otherwise."); | 
 | 3472 |  | 
 | 3473 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3474 | string_isupper(PyStringObject *self) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3475 | { | 
 | 3476 |     register const unsigned char *p | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3477 |         = (unsigned char *) PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3478 |     register const unsigned char *e; | 
 | 3479 |     int cased; | 
 | 3480 |  | 
 | 3481 |     /* Shortcut for single character strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3482 |     if (PyString_GET_SIZE(self) == 1) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3483 |         return PyBool_FromLong(isupper(*p) != 0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3484 |  | 
 | 3485 |     /* Special case for empty strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3486 |     if (PyString_GET_SIZE(self) == 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3487 |         return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3488 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3489 |     e = p + PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3490 |     cased = 0; | 
 | 3491 |     for (; p < e; p++) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3492 |         if (islower(*p)) | 
 | 3493 |             return PyBool_FromLong(0); | 
 | 3494 |         else if (!cased && isupper(*p)) | 
 | 3495 |             cased = 1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3496 |     } | 
 | 3497 |     return PyBool_FromLong(cased); | 
 | 3498 | } | 
 | 3499 |  | 
 | 3500 |  | 
 | 3501 | PyDoc_STRVAR(istitle__doc__, | 
 | 3502 | "S.istitle() -> bool\n\ | 
 | 3503 | \n\ | 
 | 3504 | Return True if S is a titlecased string and there is at least one\n\ | 
 | 3505 | character in S, i.e. uppercase characters may only follow uncased\n\ | 
 | 3506 | characters and lowercase characters only cased ones. Return False\n\ | 
 | 3507 | otherwise."); | 
 | 3508 |  | 
 | 3509 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3510 | string_istitle(PyStringObject *self, PyObject *uncased) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3511 | { | 
 | 3512 |     register const unsigned char *p | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3513 |         = (unsigned char *) PyString_AS_STRING(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3514 |     register const unsigned char *e; | 
 | 3515 |     int cased, previous_is_cased; | 
 | 3516 |  | 
 | 3517 |     /* Shortcut for single character strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3518 |     if (PyString_GET_SIZE(self) == 1) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3519 |         return PyBool_FromLong(isupper(*p) != 0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3520 |  | 
 | 3521 |     /* Special case for empty strings */ | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3522 |     if (PyString_GET_SIZE(self) == 0) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3523 |         return PyBool_FromLong(0); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3524 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3525 |     e = p + PyString_GET_SIZE(self); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3526 |     cased = 0; | 
 | 3527 |     previous_is_cased = 0; | 
 | 3528 |     for (; p < e; p++) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3529 |         register const unsigned char ch = *p; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3530 |  | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3531 |         if (isupper(ch)) { | 
 | 3532 |             if (previous_is_cased) | 
 | 3533 |                 return PyBool_FromLong(0); | 
 | 3534 |             previous_is_cased = 1; | 
 | 3535 |             cased = 1; | 
 | 3536 |         } | 
 | 3537 |         else if (islower(ch)) { | 
 | 3538 |             if (!previous_is_cased) | 
 | 3539 |                 return PyBool_FromLong(0); | 
 | 3540 |             previous_is_cased = 1; | 
 | 3541 |             cased = 1; | 
 | 3542 |         } | 
 | 3543 |         else | 
 | 3544 |             previous_is_cased = 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3545 |     } | 
 | 3546 |     return PyBool_FromLong(cased); | 
 | 3547 | } | 
 | 3548 |  | 
 | 3549 |  | 
 | 3550 | PyDoc_STRVAR(splitlines__doc__, | 
| Raymond Hettinger | aad5b02 | 2012-06-02 01:42:58 -0400 | [diff] [blame] | 3551 | "S.splitlines(keepends=False) -> list of strings\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3552 | \n\ | 
 | 3553 | Return a list of the lines in S, breaking at line boundaries.\n\ | 
 | 3554 | Line breaks are not included in the resulting list unless keepends\n\ | 
 | 3555 | is given and true."); | 
 | 3556 |  | 
 | 3557 | static PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3558 | string_splitlines(PyStringObject *self, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3559 | { | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3560 |     int keepends = 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3561 |  | 
 | 3562 |     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3563 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3564 |  | 
| Antoine Pitrou | 6467213 | 2010-01-13 07:55:48 +0000 | [diff] [blame] | 3565 |     return stringlib_splitlines( | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3566 |         (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self), | 
 | 3567 |         keepends | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3568 |     ); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3569 | } | 
 | 3570 |  | 
| Robert Schuppenies | 51df064 | 2008-06-01 16:16:17 +0000 | [diff] [blame] | 3571 | PyDoc_STRVAR(sizeof__doc__, | 
| Georg Brandl | 7a6de8b | 2008-06-01 16:42:16 +0000 | [diff] [blame] | 3572 | "S.__sizeof__() -> size of S in memory, in bytes"); | 
| Robert Schuppenies | 51df064 | 2008-06-01 16:16:17 +0000 | [diff] [blame] | 3573 |  | 
 | 3574 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3575 | string_sizeof(PyStringObject *v) | 
| Robert Schuppenies | 51df064 | 2008-06-01 16:16:17 +0000 | [diff] [blame] | 3576 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3577 |     Py_ssize_t res; | 
 | 3578 |     res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize; | 
 | 3579 |     return PyInt_FromSsize_t(res); | 
| Robert Schuppenies | 51df064 | 2008-06-01 16:16:17 +0000 | [diff] [blame] | 3580 | } | 
 | 3581 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3582 | static PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3583 | string_getnewargs(PyStringObject *v) | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3584 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3585 |     return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v)); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3586 | } | 
 | 3587 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3588 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3589 | #include "stringlib/string_format.h" | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3590 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3591 | PyDoc_STRVAR(format__doc__, | 
| Georg Brandl | 05f819b | 2010-07-31 19:07:37 +0000 | [diff] [blame] | 3592 | "S.format(*args, **kwargs) -> string\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3593 | \n\ | 
| Eric Smith | 6c84085 | 2010-11-06 19:43:44 +0000 | [diff] [blame] | 3594 | Return a formatted version of S, using substitutions from args and kwargs.\n\ | 
 | 3595 | The substitutions are identified by braces ('{' and '}')."); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3596 |  | 
| Eric Smith | dc13b79 | 2008-05-30 18:10:04 +0000 | [diff] [blame] | 3597 | static PyObject * | 
 | 3598 | string__format__(PyObject* self, PyObject* args) | 
 | 3599 | { | 
 | 3600 |     PyObject *format_spec; | 
 | 3601 |     PyObject *result = NULL; | 
 | 3602 |     PyObject *tmp = NULL; | 
 | 3603 |  | 
 | 3604 |     /* If 2.x, convert format_spec to the same type as value */ | 
 | 3605 |     /* This is to allow things like u''.format('') */ | 
 | 3606 |     if (!PyArg_ParseTuple(args, "O:__format__", &format_spec)) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3607 |         goto done; | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3608 |     if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) { | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3609 |         PyErr_Format(PyExc_TypeError, "__format__ arg must be str " | 
 | 3610 |                      "or unicode, not %s", Py_TYPE(format_spec)->tp_name); | 
 | 3611 |         goto done; | 
| Eric Smith | dc13b79 | 2008-05-30 18:10:04 +0000 | [diff] [blame] | 3612 |     } | 
 | 3613 |     tmp = PyObject_Str(format_spec); | 
 | 3614 |     if (tmp == NULL) | 
| Antoine Pitrou | 619f16e | 2010-06-09 16:24:00 +0000 | [diff] [blame] | 3615 |         goto done; | 
| Eric Smith | dc13b79 | 2008-05-30 18:10:04 +0000 | [diff] [blame] | 3616 |     format_spec = tmp; | 
 | 3617 |  | 
 | 3618 |     result = _PyBytes_FormatAdvanced(self, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3619 |                                      PyString_AS_STRING(format_spec), | 
 | 3620 |                                      PyString_GET_SIZE(format_spec)); | 
| Eric Smith | dc13b79 | 2008-05-30 18:10:04 +0000 | [diff] [blame] | 3621 | done: | 
 | 3622 |     Py_XDECREF(tmp); | 
 | 3623 |     return result; | 
 | 3624 | } | 
 | 3625 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3626 | PyDoc_STRVAR(p_format__doc__, | 
| Georg Brandl | 05f819b | 2010-07-31 19:07:37 +0000 | [diff] [blame] | 3627 | "S.__format__(format_spec) -> string\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3628 | \n\ | 
| Eric Smith | 6c84085 | 2010-11-06 19:43:44 +0000 | [diff] [blame] | 3629 | Return a formatted version of S as described by format_spec."); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3630 |  | 
| Martin v. Löwis | f91d46a | 2008-08-12 14:49:50 +0000 | [diff] [blame] | 3631 |  | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3632 | static PyMethodDef | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3633 | string_methods[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3634 |     /* Counterparts of the obsolete stropmodule functions; except | 
 | 3635 |        string.maketrans(). */ | 
 | 3636 |     {"join", (PyCFunction)string_join, METH_O, join__doc__}, | 
 | 3637 |     {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__}, | 
 | 3638 |     {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__}, | 
 | 3639 |     {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__}, | 
 | 3640 |     {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__}, | 
 | 3641 |     {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__}, | 
 | 3642 |     {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__}, | 
 | 3643 |     {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__}, | 
 | 3644 |     {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__}, | 
 | 3645 |     {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__}, | 
 | 3646 |     {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__}, | 
 | 3647 |     {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__}, | 
 | 3648 |     {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, | 
 | 3649 |      capitalize__doc__}, | 
 | 3650 |     {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__}, | 
 | 3651 |     {"endswith", (PyCFunction)string_endswith, METH_VARARGS, | 
 | 3652 |      endswith__doc__}, | 
 | 3653 |     {"partition", (PyCFunction)string_partition, METH_O, partition__doc__}, | 
 | 3654 |     {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__}, | 
 | 3655 |     {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__}, | 
 | 3656 |     {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__}, | 
 | 3657 |     {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__}, | 
 | 3658 |     {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__}, | 
 | 3659 |     {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__}, | 
 | 3660 |     {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__}, | 
 | 3661 |     {"rpartition", (PyCFunction)string_rpartition, METH_O, | 
 | 3662 |      rpartition__doc__}, | 
 | 3663 |     {"startswith", (PyCFunction)string_startswith, METH_VARARGS, | 
 | 3664 |      startswith__doc__}, | 
 | 3665 |     {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__}, | 
 | 3666 |     {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, | 
 | 3667 |      swapcase__doc__}, | 
 | 3668 |     {"translate", (PyCFunction)string_translate, METH_VARARGS, | 
 | 3669 |      translate__doc__}, | 
 | 3670 |     {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__}, | 
 | 3671 |     {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__}, | 
 | 3672 |     {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__}, | 
 | 3673 |     {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__}, | 
 | 3674 |     {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__}, | 
 | 3675 |     {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, | 
 | 3676 |     {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__}, | 
 | 3677 |     {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, | 
 | 3678 |     {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, | 
 | 3679 |     {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, | 
 | 3680 |     {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, | 
 | 3681 |     {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, | 
 | 3682 |      expandtabs__doc__}, | 
 | 3683 |     {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, | 
 | 3684 |      splitlines__doc__}, | 
 | 3685 |     {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS, | 
 | 3686 |      sizeof__doc__}, | 
 | 3687 |     {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS}, | 
 | 3688 |     {NULL,     NULL}                         /* sentinel */ | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3689 | }; | 
 | 3690 |  | 
 | 3691 | static PyObject * | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3692 | str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 3693 |  | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3694 | static PyObject * | 
 | 3695 | string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | 
 | 3696 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3697 |     PyObject *x = NULL; | 
 | 3698 |     static char *kwlist[] = {"object", 0}; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3699 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3700 |     if (type != &PyString_Type) | 
 | 3701 |         return str_subtype_new(type, args, kwds); | 
 | 3702 |     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x)) | 
 | 3703 |         return NULL; | 
 | 3704 |     if (x == NULL) | 
 | 3705 |         return PyString_FromString(""); | 
 | 3706 |     return PyObject_Str(x); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3707 | } | 
 | 3708 |  | 
 | 3709 | static PyObject * | 
 | 3710 | str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | 
 | 3711 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3712 |     PyObject *tmp, *pnew; | 
 | 3713 |     Py_ssize_t n; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3714 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3715 |     assert(PyType_IsSubtype(type, &PyString_Type)); | 
 | 3716 |     tmp = string_new(&PyString_Type, args, kwds); | 
 | 3717 |     if (tmp == NULL) | 
 | 3718 |         return NULL; | 
| Serhiy Storchaka | 8d30ad7 | 2015-11-25 15:55:54 +0200 | [diff] [blame] | 3719 |     assert(PyString_Check(tmp)); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3720 |     n = PyString_GET_SIZE(tmp); | 
 | 3721 |     pnew = type->tp_alloc(type, n); | 
 | 3722 |     if (pnew != NULL) { | 
 | 3723 |         Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); | 
 | 3724 |         ((PyStringObject *)pnew)->ob_shash = | 
 | 3725 |             ((PyStringObject *)tmp)->ob_shash; | 
 | 3726 |         ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; | 
 | 3727 |     } | 
 | 3728 |     Py_DECREF(tmp); | 
 | 3729 |     return pnew; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3730 | } | 
 | 3731 |  | 
 | 3732 | static PyObject * | 
 | 3733 | basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds) | 
 | 3734 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3735 |     PyErr_SetString(PyExc_TypeError, | 
 | 3736 |                     "The basestring type cannot be instantiated"); | 
 | 3737 |     return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3738 | } | 
 | 3739 |  | 
 | 3740 | static PyObject * | 
 | 3741 | string_mod(PyObject *v, PyObject *w) | 
 | 3742 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3743 |     if (!PyString_Check(v)) { | 
 | 3744 |         Py_INCREF(Py_NotImplemented); | 
 | 3745 |         return Py_NotImplemented; | 
 | 3746 |     } | 
 | 3747 |     return PyString_Format(v, w); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3748 | } | 
 | 3749 |  | 
 | 3750 | PyDoc_STRVAR(basestring_doc, | 
 | 3751 | "Type basestring cannot be instantiated; it is the base for str and unicode."); | 
 | 3752 |  | 
 | 3753 | static PyNumberMethods string_as_number = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3754 |     0,                          /*nb_add*/ | 
 | 3755 |     0,                          /*nb_subtract*/ | 
 | 3756 |     0,                          /*nb_multiply*/ | 
 | 3757 |     0,                          /*nb_divide*/ | 
 | 3758 |     string_mod,                 /*nb_remainder*/ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3759 | }; | 
 | 3760 |  | 
 | 3761 |  | 
 | 3762 | PyTypeObject PyBaseString_Type = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3763 |     PyVarObject_HEAD_INIT(&PyType_Type, 0) | 
 | 3764 |     "basestring", | 
 | 3765 |     0, | 
 | 3766 |     0, | 
 | 3767 |     0,                                          /* tp_dealloc */ | 
 | 3768 |     0,                                          /* tp_print */ | 
 | 3769 |     0,                                          /* tp_getattr */ | 
 | 3770 |     0,                                          /* tp_setattr */ | 
 | 3771 |     0,                                          /* tp_compare */ | 
 | 3772 |     0,                                          /* tp_repr */ | 
 | 3773 |     0,                                          /* tp_as_number */ | 
 | 3774 |     0,                                          /* tp_as_sequence */ | 
 | 3775 |     0,                                          /* tp_as_mapping */ | 
 | 3776 |     0,                                          /* tp_hash */ | 
 | 3777 |     0,                                          /* tp_call */ | 
 | 3778 |     0,                                          /* tp_str */ | 
 | 3779 |     0,                                          /* tp_getattro */ | 
 | 3780 |     0,                                          /* tp_setattro */ | 
 | 3781 |     0,                                          /* tp_as_buffer */ | 
 | 3782 |     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | 
 | 3783 |     basestring_doc,                             /* tp_doc */ | 
 | 3784 |     0,                                          /* tp_traverse */ | 
 | 3785 |     0,                                          /* tp_clear */ | 
 | 3786 |     0,                                          /* tp_richcompare */ | 
 | 3787 |     0,                                          /* tp_weaklistoffset */ | 
 | 3788 |     0,                                          /* tp_iter */ | 
 | 3789 |     0,                                          /* tp_iternext */ | 
 | 3790 |     0,                                          /* tp_methods */ | 
 | 3791 |     0,                                          /* tp_members */ | 
 | 3792 |     0,                                          /* tp_getset */ | 
 | 3793 |     &PyBaseObject_Type,                         /* tp_base */ | 
 | 3794 |     0,                                          /* tp_dict */ | 
 | 3795 |     0,                                          /* tp_descr_get */ | 
 | 3796 |     0,                                          /* tp_descr_set */ | 
 | 3797 |     0,                                          /* tp_dictoffset */ | 
 | 3798 |     0,                                          /* tp_init */ | 
 | 3799 |     0,                                          /* tp_alloc */ | 
 | 3800 |     basestring_new,                             /* tp_new */ | 
 | 3801 |     0,                                          /* tp_free */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3802 | }; | 
 | 3803 |  | 
 | 3804 | PyDoc_STRVAR(string_doc, | 
| Chris Jerdonek | ad4b000 | 2012-10-07 20:37:54 -0700 | [diff] [blame] | 3805 | "str(object='') -> string\n\ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3806 | \n\ | 
 | 3807 | Return a nice string representation of the object.\n\ | 
 | 3808 | If the argument is a string, the return value is the same object."); | 
 | 3809 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3810 | PyTypeObject PyString_Type = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3811 |     PyVarObject_HEAD_INIT(&PyType_Type, 0) | 
 | 3812 |     "str", | 
 | 3813 |     PyStringObject_SIZE, | 
 | 3814 |     sizeof(char), | 
 | 3815 |     string_dealloc,                             /* tp_dealloc */ | 
 | 3816 |     (printfunc)string_print,                    /* tp_print */ | 
 | 3817 |     0,                                          /* tp_getattr */ | 
 | 3818 |     0,                                          /* tp_setattr */ | 
 | 3819 |     0,                                          /* tp_compare */ | 
 | 3820 |     string_repr,                                /* tp_repr */ | 
 | 3821 |     &string_as_number,                          /* tp_as_number */ | 
 | 3822 |     &string_as_sequence,                        /* tp_as_sequence */ | 
 | 3823 |     &string_as_mapping,                         /* tp_as_mapping */ | 
 | 3824 |     (hashfunc)string_hash,                      /* tp_hash */ | 
 | 3825 |     0,                                          /* tp_call */ | 
 | 3826 |     string_str,                                 /* tp_str */ | 
 | 3827 |     PyObject_GenericGetAttr,                    /* tp_getattro */ | 
 | 3828 |     0,                                          /* tp_setattro */ | 
 | 3829 |     &string_as_buffer,                          /* tp_as_buffer */ | 
 | 3830 |     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES | | 
 | 3831 |         Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS | | 
 | 3832 |         Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */ | 
 | 3833 |     string_doc,                                 /* tp_doc */ | 
 | 3834 |     0,                                          /* tp_traverse */ | 
 | 3835 |     0,                                          /* tp_clear */ | 
 | 3836 |     (richcmpfunc)string_richcompare,            /* tp_richcompare */ | 
 | 3837 |     0,                                          /* tp_weaklistoffset */ | 
 | 3838 |     0,                                          /* tp_iter */ | 
 | 3839 |     0,                                          /* tp_iternext */ | 
 | 3840 |     string_methods,                             /* tp_methods */ | 
 | 3841 |     0,                                          /* tp_members */ | 
 | 3842 |     0,                                          /* tp_getset */ | 
 | 3843 |     &PyBaseString_Type,                         /* tp_base */ | 
 | 3844 |     0,                                          /* tp_dict */ | 
 | 3845 |     0,                                          /* tp_descr_get */ | 
 | 3846 |     0,                                          /* tp_descr_set */ | 
 | 3847 |     0,                                          /* tp_dictoffset */ | 
 | 3848 |     0,                                          /* tp_init */ | 
 | 3849 |     0,                                          /* tp_alloc */ | 
 | 3850 |     string_new,                                 /* tp_new */ | 
 | 3851 |     PyObject_Del,                               /* tp_free */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3852 | }; | 
 | 3853 |  | 
 | 3854 | void | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3855 | PyString_Concat(register PyObject **pv, register PyObject *w) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3856 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3857 |     register PyObject *v; | 
 | 3858 |     if (*pv == NULL) | 
 | 3859 |         return; | 
 | 3860 |     if (w == NULL || !PyString_Check(*pv)) { | 
| Serhiy Storchaka | a8d64ae | 2013-02-02 18:43:58 +0200 | [diff] [blame] | 3861 |         Py_CLEAR(*pv); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3862 |         return; | 
 | 3863 |     } | 
 | 3864 |     v = string_concat((PyStringObject *) *pv, w); | 
| Serhiy Storchaka | 763a61c | 2016-04-10 18:05:12 +0300 | [diff] [blame] | 3865 |     Py_SETREF(*pv, v); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3866 | } | 
 | 3867 |  | 
 | 3868 | void | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3869 | PyString_ConcatAndDel(register PyObject **pv, register PyObject *w) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3870 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3871 |     PyString_Concat(pv, w); | 
 | 3872 |     Py_XDECREF(w); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3873 | } | 
 | 3874 |  | 
 | 3875 |  | 
 | 3876 | /* The following function breaks the notion that strings are immutable: | 
 | 3877 |    it changes the size of a string.  We get away with this only if there | 
 | 3878 |    is only one module referencing the object.  You can also think of it | 
 | 3879 |    as creating a new string object and destroying the old one, only | 
 | 3880 |    more efficiently.  In any case, don't use this if the string may | 
 | 3881 |    already be known to some other part of the code... | 
 | 3882 |    Note that if there's not enough memory to resize the string, the original | 
 | 3883 |    string object at *pv is deallocated, *pv is set to NULL, an "out of | 
 | 3884 |    memory" exception is set, and -1 is returned.  Else (on success) 0 is | 
 | 3885 |    returned, and the value in *pv may or may not be the same as on input. | 
 | 3886 |    As always, an extra byte is allocated for a trailing \0 byte (newsize | 
 | 3887 |    does *not* include that), and a trailing \0 byte is stored. | 
 | 3888 | */ | 
 | 3889 |  | 
 | 3890 | int | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3891 | _PyString_Resize(PyObject **pv, Py_ssize_t newsize) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3892 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3893 |     register PyObject *v; | 
 | 3894 |     register PyStringObject *sv; | 
 | 3895 |     v = *pv; | 
 | 3896 |     if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 || | 
 | 3897 |         PyString_CHECK_INTERNED(v)) { | 
 | 3898 |         *pv = 0; | 
 | 3899 |         Py_DECREF(v); | 
 | 3900 |         PyErr_BadInternalCall(); | 
 | 3901 |         return -1; | 
 | 3902 |     } | 
 | 3903 |     /* XXX UNREF/NEWREF interface should be more symmetrical */ | 
 | 3904 |     _Py_DEC_REFTOTAL; | 
 | 3905 |     _Py_ForgetReference(v); | 
 | 3906 |     *pv = (PyObject *) | 
 | 3907 |         PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize); | 
 | 3908 |     if (*pv == NULL) { | 
 | 3909 |         PyObject_Del(v); | 
 | 3910 |         PyErr_NoMemory(); | 
 | 3911 |         return -1; | 
 | 3912 |     } | 
 | 3913 |     _Py_NewReference(*pv); | 
 | 3914 |     sv = (PyStringObject *) *pv; | 
 | 3915 |     Py_SIZE(sv) = newsize; | 
 | 3916 |     sv->ob_sval[newsize] = '\0'; | 
 | 3917 |     sv->ob_shash = -1;          /* invalidate cached hash value */ | 
 | 3918 |     return 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3919 | } | 
 | 3920 |  | 
 | 3921 | /* Helpers for formatstring */ | 
 | 3922 |  | 
 | 3923 | Py_LOCAL_INLINE(PyObject *) | 
 | 3924 | getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) | 
 | 3925 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3926 |     Py_ssize_t argidx = *p_argidx; | 
 | 3927 |     if (argidx < arglen) { | 
 | 3928 |         (*p_argidx)++; | 
 | 3929 |         if (arglen < 0) | 
 | 3930 |             return args; | 
 | 3931 |         else | 
 | 3932 |             return PyTuple_GetItem(args, argidx); | 
 | 3933 |     } | 
 | 3934 |     PyErr_SetString(PyExc_TypeError, | 
 | 3935 |                     "not enough arguments for format string"); | 
 | 3936 |     return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3937 | } | 
 | 3938 |  | 
 | 3939 | /* Format codes | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3940 |  * F_LJUST      '-' | 
 | 3941 |  * F_SIGN       '+' | 
 | 3942 |  * F_BLANK      ' ' | 
 | 3943 |  * F_ALT        '#' | 
 | 3944 |  * F_ZERO       '0' | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3945 |  */ | 
 | 3946 | #define F_LJUST (1<<0) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3947 | #define F_SIGN  (1<<1) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3948 | #define F_BLANK (1<<2) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3949 | #define F_ALT   (1<<3) | 
 | 3950 | #define F_ZERO  (1<<4) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3951 |  | 
| Mark Dickinson | 18cfada | 2009-11-23 18:46:41 +0000 | [diff] [blame] | 3952 | /* Returns a new reference to a PyString object, or NULL on failure. */ | 
 | 3953 |  | 
 | 3954 | static PyObject * | 
 | 3955 | formatfloat(PyObject *v, int flags, int prec, int type) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3956 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3957 |     char *p; | 
 | 3958 |     PyObject *result; | 
 | 3959 |     double x; | 
| Eric Smith | c1bdf89 | 2009-10-26 17:46:17 +0000 | [diff] [blame] | 3960 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3961 |     x = PyFloat_AsDouble(v); | 
 | 3962 |     if (x == -1.0 && PyErr_Occurred()) { | 
 | 3963 |         PyErr_Format(PyExc_TypeError, "float argument required, " | 
 | 3964 |                      "not %.200s", Py_TYPE(v)->tp_name); | 
 | 3965 |         return NULL; | 
 | 3966 |     } | 
| Mark Dickinson | 18cfada | 2009-11-23 18:46:41 +0000 | [diff] [blame] | 3967 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3968 |     if (prec < 0) | 
 | 3969 |         prec = 6; | 
| Mark Dickinson | 174e909 | 2009-03-29 16:17:16 +0000 | [diff] [blame] | 3970 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3971 |     p = PyOS_double_to_string(x, type, prec, | 
 | 3972 |                               (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3973 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3974 |     if (p == NULL) | 
 | 3975 |         return NULL; | 
 | 3976 |     result = PyString_FromStringAndSize(p, strlen(p)); | 
 | 3977 |     PyMem_Free(p); | 
 | 3978 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3979 | } | 
 | 3980 |  | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 3981 | /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3982 |  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for | 
 | 3983 |  * Python's regular ints. | 
 | 3984 |  * Return value:  a new PyString*, or NULL if error. | 
 | 3985 |  *  .  *pbuf is set to point into it, | 
 | 3986 |  *     *plen set to the # of chars following that. | 
 | 3987 |  *     Caller must decref it when done using pbuf. | 
 | 3988 |  *     The string starting at *pbuf is of the form | 
 | 3989 |  *         "-"? ("0x" | "0X")? digit+ | 
 | 3990 |  *     "0x"/"0X" are present only for x and X conversions, with F_ALT | 
 | 3991 |  *         set in flags.  The case of hex digits will be correct, | 
 | 3992 |  *     There will be at least prec digits, zero-filled on the left if | 
 | 3993 |  *         necessary to get that many. | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 3994 |  * val          object to be converted | 
 | 3995 |  * flags        bitmask of format flags; only F_ALT is looked at | 
 | 3996 |  * prec         minimum number of digits; 0-fill on left if needed | 
 | 3997 |  * type         a character in [duoxX]; u acts the same as d | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 3998 |  * | 
 | 3999 |  * CAUTION:  o, x and X conversions on regular ints can never | 
 | 4000 |  * produce a '-' sign, but can for Python's unbounded ints. | 
 | 4001 |  */ | 
 | 4002 | PyObject* | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 4003 | _PyString_FormatLong(PyObject *val, int flags, int prec, int type, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4004 |                      char **pbuf, int *plen) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4005 | { | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4006 |     PyObject *result = NULL, *r1; | 
 | 4007 |     const char *s; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4008 |     char *buf; | 
 | 4009 |     Py_ssize_t i; | 
 | 4010 |     int sign;           /* 1 if '-', else 0 */ | 
 | 4011 |     int len;            /* number of characters */ | 
 | 4012 |     Py_ssize_t llen; | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4013 |     int numdigits;      /* len == numnondigits + skipped + numdigits */ | 
 | 4014 |     int numnondigits, skipped, filled; | 
 | 4015 |     const char *method; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4016 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4017 |     switch (type) { | 
 | 4018 |     case 'd': | 
 | 4019 |     case 'u': | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4020 |         method = "str"; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4021 |         result = Py_TYPE(val)->tp_str(val); | 
 | 4022 |         break; | 
 | 4023 |     case 'o': | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4024 |         method = "oct"; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4025 |         result = Py_TYPE(val)->tp_as_number->nb_oct(val); | 
 | 4026 |         break; | 
 | 4027 |     case 'x': | 
 | 4028 |     case 'X': | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4029 |         method = "hex"; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4030 |         result = Py_TYPE(val)->tp_as_number->nb_hex(val); | 
 | 4031 |         break; | 
 | 4032 |     default: | 
 | 4033 |         assert(!"'type' not in [duoxX]"); | 
 | 4034 |     } | 
 | 4035 |     if (!result) | 
 | 4036 |         return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4037 |  | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4038 |     if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4039 |         Py_DECREF(result); | 
 | 4040 |         return NULL; | 
 | 4041 |     } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4042 |     if (llen > INT_MAX) { | 
 | 4043 |         PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4044 |         Py_DECREF(result); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4045 |         return NULL; | 
 | 4046 |     } | 
 | 4047 |     len = (int)llen; | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4048 |     if (len > 0 && s[len-1] == 'L') { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4049 |         --len; | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4050 |         if (len == 0) | 
 | 4051 |             goto error; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4052 |     } | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4053 |     sign = s[0] == '-'; | 
 | 4054 |     numnondigits = sign; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4055 |  | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4056 |     /* Need to skip 0x, 0X or 0. */ | 
 | 4057 |     skipped = 0; | 
 | 4058 |     switch (type) { | 
 | 4059 |     case 'o': | 
 | 4060 |         if (s[sign] != '0') | 
 | 4061 |             goto error; | 
 | 4062 |         /* If 0 is only digit, leave it alone. */ | 
 | 4063 |         if ((flags & F_ALT) == 0 && len - sign > 1) | 
 | 4064 |             skipped = 1; | 
 | 4065 |         break; | 
 | 4066 |     case 'x': | 
 | 4067 |     case 'X': | 
 | 4068 |         if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X')) | 
 | 4069 |             goto error; | 
 | 4070 |         if ((flags & F_ALT) == 0) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4071 |             skipped = 2; | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4072 |         else | 
 | 4073 |             numnondigits += 2; | 
 | 4074 |         break; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4075 |     } | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4076 |     numdigits = len - numnondigits - skipped; | 
 | 4077 |     if (numdigits <= 0) | 
 | 4078 |         goto error; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4079 |  | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4080 |     filled = prec - numdigits; | 
 | 4081 |     if (filled < 0) | 
 | 4082 |         filled = 0; | 
 | 4083 |     len = numnondigits + filled + numdigits; | 
 | 4084 |  | 
 | 4085 |     /* To modify the string in-place, there can only be one reference. */ | 
 | 4086 |     if (skipped >= filled && | 
 | 4087 |         PyString_CheckExact(result) && | 
 | 4088 |         Py_REFCNT(result) == 1 && | 
 | 4089 |         !PyString_CHECK_INTERNED(result)) | 
 | 4090 |     { | 
 | 4091 |         r1 = NULL; | 
 | 4092 |         buf = (char *)s + skipped - filled; | 
 | 4093 |     } | 
 | 4094 |     else { | 
 | 4095 |         r1 = result; | 
 | 4096 |         result = PyString_FromStringAndSize(NULL, len); | 
 | 4097 |         if (!result) { | 
 | 4098 |             Py_DECREF(r1); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4099 |             return NULL; | 
 | 4100 |         } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4101 |         buf = PyString_AS_STRING(result); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4102 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4103 |  | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4104 |     for (i = numnondigits; --i >= 0;) | 
 | 4105 |         buf[i] = s[i]; | 
 | 4106 |     buf += numnondigits; | 
 | 4107 |     s += numnondigits + skipped; | 
 | 4108 |     for (i = 0; i < filled; i++) | 
 | 4109 |         *buf++ = '0'; | 
 | 4110 |     if (r1 == NULL) { | 
 | 4111 |         assert(buf == s); | 
 | 4112 |         buf += numdigits; | 
 | 4113 |     } | 
 | 4114 |     else { | 
 | 4115 |         for (i = 0; i < numdigits; i++) | 
 | 4116 |             *buf++ = *s++; | 
 | 4117 |     } | 
 | 4118 |     *buf = '\0'; | 
 | 4119 |     buf -= len; | 
 | 4120 |     Py_XDECREF(r1); | 
 | 4121 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4122 |     /* Fix up case for hex conversions. */ | 
 | 4123 |     if (type == 'X') { | 
 | 4124 |         /* Need to convert all lower case letters to upper case. | 
 | 4125 |            and need to convert 0x to 0X (and -0x to -0X). */ | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4126 |         for (i = 0; i < len; i++) { | 
 | 4127 |             if (buf[i] >= 'a' && buf[i] <= 'z') | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4128 |                 buf[i] -= 'a'-'A'; | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4129 |         } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4130 |     } | 
 | 4131 |     *pbuf = buf; | 
 | 4132 |     *plen = len; | 
 | 4133 |     return result; | 
| Serhiy Storchaka | c30f27d | 2016-12-01 10:27:11 +0200 | [diff] [blame] | 4134 |  | 
 | 4135 | error: | 
 | 4136 |     PyErr_Format(PyExc_ValueError, | 
 | 4137 |                  "%%%c format: invalid result of __%s__ (type=%.200s)", | 
 | 4138 |                  type, method, Py_TYPE(val)->tp_name); | 
 | 4139 |     Py_DECREF(result); | 
 | 4140 |     return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4141 | } | 
 | 4142 |  | 
 | 4143 | Py_LOCAL_INLINE(int) | 
 | 4144 | formatint(char *buf, size_t buflen, int flags, | 
 | 4145 |           int prec, int type, PyObject *v) | 
 | 4146 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4147 |     /* fmt = '%#.' + `prec` + 'l' + `type` | 
 | 4148 |        worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) | 
 | 4149 |        + 1 + 1 = 24 */ | 
 | 4150 |     char fmt[64];       /* plenty big enough! */ | 
 | 4151 |     char *sign; | 
 | 4152 |     long x; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4153 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4154 |     x = PyInt_AsLong(v); | 
 | 4155 |     if (x == -1 && PyErr_Occurred()) { | 
 | 4156 |         PyErr_Format(PyExc_TypeError, "int argument required, not %.200s", | 
 | 4157 |                      Py_TYPE(v)->tp_name); | 
 | 4158 |         return -1; | 
 | 4159 |     } | 
 | 4160 |     if (x < 0 && type == 'u') { | 
 | 4161 |         type = 'd'; | 
 | 4162 |     } | 
 | 4163 |     if (x < 0 && (type == 'x' || type == 'X' || type == 'o')) | 
 | 4164 |         sign = "-"; | 
 | 4165 |     else | 
 | 4166 |         sign = ""; | 
 | 4167 |     if (prec < 0) | 
 | 4168 |         prec = 1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4169 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4170 |     if ((flags & F_ALT) && | 
 | 4171 |         (type == 'x' || type == 'X')) { | 
 | 4172 |         /* When converting under %#x or %#X, there are a number | 
 | 4173 |          * of issues that cause pain: | 
 | 4174 |          * - when 0 is being converted, the C standard leaves off | 
 | 4175 |          *   the '0x' or '0X', which is inconsistent with other | 
 | 4176 |          *   %#x/%#X conversions and inconsistent with Python's | 
 | 4177 |          *   hex() function | 
 | 4178 |          * - there are platforms that violate the standard and | 
 | 4179 |          *   convert 0 with the '0x' or '0X' | 
 | 4180 |          *   (Metrowerks, Compaq Tru64) | 
 | 4181 |          * - there are platforms that give '0x' when converting | 
 | 4182 |          *   under %#X, but convert 0 in accordance with the | 
 | 4183 |          *   standard (OS/2 EMX) | 
 | 4184 |          * | 
 | 4185 |          * We can achieve the desired consistency by inserting our | 
 | 4186 |          * own '0x' or '0X' prefix, and substituting %x/%X in place | 
 | 4187 |          * of %#x/%#X. | 
 | 4188 |          * | 
 | 4189 |          * Note that this is the same approach as used in | 
 | 4190 |          * formatint() in unicodeobject.c | 
 | 4191 |          */ | 
 | 4192 |         PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c", | 
 | 4193 |                       sign, type, prec, type); | 
 | 4194 |     } | 
 | 4195 |     else { | 
 | 4196 |         PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c", | 
 | 4197 |                       sign, (flags&F_ALT) ? "#" : "", | 
 | 4198 |                       prec, type); | 
 | 4199 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4200 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4201 |     /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal)) | 
 | 4202 |      * worst case buf = '-0x' + [0-9]*prec, where prec >= 11 | 
 | 4203 |      */ | 
 | 4204 |     if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) { | 
 | 4205 |         PyErr_SetString(PyExc_OverflowError, | 
 | 4206 |             "formatted integer is too long (precision too large?)"); | 
 | 4207 |         return -1; | 
 | 4208 |     } | 
 | 4209 |     if (sign[0]) | 
 | 4210 |         PyOS_snprintf(buf, buflen, fmt, -x); | 
 | 4211 |     else | 
 | 4212 |         PyOS_snprintf(buf, buflen, fmt, x); | 
 | 4213 |     return (int)strlen(buf); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4214 | } | 
 | 4215 |  | 
 | 4216 | Py_LOCAL_INLINE(int) | 
 | 4217 | formatchar(char *buf, size_t buflen, PyObject *v) | 
 | 4218 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4219 |     /* presume that the buffer is at least 2 characters long */ | 
 | 4220 |     if (PyString_Check(v)) { | 
 | 4221 |         if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) | 
 | 4222 |             return -1; | 
 | 4223 |     } | 
 | 4224 |     else { | 
 | 4225 |         if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0])) | 
 | 4226 |             return -1; | 
 | 4227 |     } | 
 | 4228 |     buf[1] = '\0'; | 
 | 4229 |     return 1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4230 | } | 
 | 4231 |  | 
 | 4232 | /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) | 
 | 4233 |  | 
| Mark Dickinson | 18cfada | 2009-11-23 18:46:41 +0000 | [diff] [blame] | 4234 |    FORMATBUFLEN is the length of the buffer in which the ints & | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4235 |    chars are formatted. XXX This is a magic number. Each formatting | 
 | 4236 |    routine does bounds checking to ensure no overflow, but a better | 
 | 4237 |    solution may be to malloc a buffer of appropriate size for each | 
 | 4238 |    format. For now, the current solution is sufficient. | 
 | 4239 | */ | 
 | 4240 | #define FORMATBUFLEN (size_t)120 | 
 | 4241 |  | 
 | 4242 | PyObject * | 
| Gregory P. Smith | 99a3dce | 2008-06-10 17:42:36 +0000 | [diff] [blame] | 4243 | PyString_Format(PyObject *format, PyObject *args) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4244 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4245 |     char *fmt, *res; | 
 | 4246 |     Py_ssize_t arglen, argidx; | 
 | 4247 |     Py_ssize_t reslen, rescnt, fmtcnt; | 
 | 4248 |     int args_owned = 0; | 
 | 4249 |     PyObject *result, *orig_args; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4250 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4251 |     PyObject *v, *w; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4252 | #endif | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4253 |     PyObject *dict = NULL; | 
 | 4254 |     if (format == NULL || !PyString_Check(format) || args == NULL) { | 
 | 4255 |         PyErr_BadInternalCall(); | 
 | 4256 |         return NULL; | 
 | 4257 |     } | 
 | 4258 |     orig_args = args; | 
 | 4259 |     fmt = PyString_AS_STRING(format); | 
 | 4260 |     fmtcnt = PyString_GET_SIZE(format); | 
 | 4261 |     reslen = rescnt = fmtcnt + 100; | 
 | 4262 |     result = PyString_FromStringAndSize((char *)NULL, reslen); | 
 | 4263 |     if (result == NULL) | 
 | 4264 |         return NULL; | 
 | 4265 |     res = PyString_AsString(result); | 
 | 4266 |     if (PyTuple_Check(args)) { | 
 | 4267 |         arglen = PyTuple_GET_SIZE(args); | 
 | 4268 |         argidx = 0; | 
 | 4269 |     } | 
 | 4270 |     else { | 
 | 4271 |         arglen = -1; | 
 | 4272 |         argidx = -2; | 
 | 4273 |     } | 
| Benjamin Peterson | da2c7eb | 2013-03-23 22:32:00 -0500 | [diff] [blame] | 4274 |     if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && | 
 | 4275 |         !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type)) | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4276 |         dict = args; | 
 | 4277 |     while (--fmtcnt >= 0) { | 
 | 4278 |         if (*fmt != '%') { | 
 | 4279 |             if (--rescnt < 0) { | 
 | 4280 |                 rescnt = fmtcnt + 100; | 
 | 4281 |                 reslen += rescnt; | 
 | 4282 |                 if (_PyString_Resize(&result, reslen)) | 
 | 4283 |                     return NULL; | 
 | 4284 |                 res = PyString_AS_STRING(result) | 
 | 4285 |                     + reslen - rescnt; | 
 | 4286 |                 --rescnt; | 
 | 4287 |             } | 
 | 4288 |             *res++ = *fmt++; | 
 | 4289 |         } | 
 | 4290 |         else { | 
 | 4291 |             /* Got a format specifier */ | 
 | 4292 |             int flags = 0; | 
 | 4293 |             Py_ssize_t width = -1; | 
 | 4294 |             int prec = -1; | 
 | 4295 |             int c = '\0'; | 
 | 4296 |             int fill; | 
 | 4297 |             int isnumok; | 
 | 4298 |             PyObject *v = NULL; | 
 | 4299 |             PyObject *temp = NULL; | 
 | 4300 |             char *pbuf; | 
 | 4301 |             int sign; | 
 | 4302 |             Py_ssize_t len; | 
 | 4303 |             char formatbuf[FORMATBUFLEN]; | 
 | 4304 |                  /* For format{int,char}() */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4305 | #ifdef Py_USING_UNICODE | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4306 |             char *fmt_start = fmt; | 
 | 4307 |             Py_ssize_t argidx_start = argidx; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4308 | #endif | 
 | 4309 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4310 |             fmt++; | 
 | 4311 |             if (*fmt == '(') { | 
 | 4312 |                 char *keystart; | 
 | 4313 |                 Py_ssize_t keylen; | 
 | 4314 |                 PyObject *key; | 
 | 4315 |                 int pcount = 1; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4316 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4317 |                 if (dict == NULL) { | 
 | 4318 |                     PyErr_SetString(PyExc_TypeError, | 
 | 4319 |                              "format requires a mapping"); | 
 | 4320 |                     goto error; | 
 | 4321 |                 } | 
 | 4322 |                 ++fmt; | 
 | 4323 |                 --fmtcnt; | 
 | 4324 |                 keystart = fmt; | 
 | 4325 |                 /* Skip over balanced parentheses */ | 
 | 4326 |                 while (pcount > 0 && --fmtcnt >= 0) { | 
 | 4327 |                     if (*fmt == ')') | 
 | 4328 |                         --pcount; | 
 | 4329 |                     else if (*fmt == '(') | 
 | 4330 |                         ++pcount; | 
 | 4331 |                     fmt++; | 
 | 4332 |                 } | 
 | 4333 |                 keylen = fmt - keystart - 1; | 
 | 4334 |                 if (fmtcnt < 0 || pcount > 0) { | 
 | 4335 |                     PyErr_SetString(PyExc_ValueError, | 
 | 4336 |                                "incomplete format key"); | 
 | 4337 |                     goto error; | 
 | 4338 |                 } | 
 | 4339 |                 key = PyString_FromStringAndSize(keystart, | 
 | 4340 |                                                  keylen); | 
 | 4341 |                 if (key == NULL) | 
 | 4342 |                     goto error; | 
 | 4343 |                 if (args_owned) { | 
 | 4344 |                     Py_DECREF(args); | 
 | 4345 |                     args_owned = 0; | 
 | 4346 |                 } | 
 | 4347 |                 args = PyObject_GetItem(dict, key); | 
 | 4348 |                 Py_DECREF(key); | 
 | 4349 |                 if (args == NULL) { | 
 | 4350 |                     goto error; | 
 | 4351 |                 } | 
 | 4352 |                 args_owned = 1; | 
 | 4353 |                 arglen = -1; | 
 | 4354 |                 argidx = -2; | 
 | 4355 |             } | 
 | 4356 |             while (--fmtcnt >= 0) { | 
 | 4357 |                 switch (c = *fmt++) { | 
 | 4358 |                 case '-': flags |= F_LJUST; continue; | 
 | 4359 |                 case '+': flags |= F_SIGN; continue; | 
 | 4360 |                 case ' ': flags |= F_BLANK; continue; | 
 | 4361 |                 case '#': flags |= F_ALT; continue; | 
 | 4362 |                 case '0': flags |= F_ZERO; continue; | 
 | 4363 |                 } | 
 | 4364 |                 break; | 
 | 4365 |             } | 
 | 4366 |             if (c == '*') { | 
 | 4367 |                 v = getnextarg(args, arglen, &argidx); | 
 | 4368 |                 if (v == NULL) | 
 | 4369 |                     goto error; | 
 | 4370 |                 if (!PyInt_Check(v)) { | 
 | 4371 |                     PyErr_SetString(PyExc_TypeError, | 
 | 4372 |                                     "* wants int"); | 
 | 4373 |                     goto error; | 
 | 4374 |                 } | 
| Serhiy Storchaka | 926f3a3 | 2013-01-19 23:35:46 +0200 | [diff] [blame] | 4375 |                 width = PyInt_AsSsize_t(v); | 
 | 4376 |                 if (width == -1 && PyErr_Occurred()) | 
 | 4377 |                     goto error; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4378 |                 if (width < 0) { | 
 | 4379 |                     flags |= F_LJUST; | 
 | 4380 |                     width = -width; | 
 | 4381 |                 } | 
 | 4382 |                 if (--fmtcnt >= 0) | 
 | 4383 |                     c = *fmt++; | 
 | 4384 |             } | 
 | 4385 |             else if (c >= 0 && isdigit(c)) { | 
 | 4386 |                 width = c - '0'; | 
 | 4387 |                 while (--fmtcnt >= 0) { | 
 | 4388 |                     c = Py_CHARMASK(*fmt++); | 
 | 4389 |                     if (!isdigit(c)) | 
 | 4390 |                         break; | 
| Mark Dickinson | 75d3600 | 2012-10-28 10:00:46 +0000 | [diff] [blame] | 4391 |                     if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4392 |                         PyErr_SetString( | 
 | 4393 |                             PyExc_ValueError, | 
 | 4394 |                             "width too big"); | 
 | 4395 |                         goto error; | 
 | 4396 |                     } | 
 | 4397 |                     width = width*10 + (c - '0'); | 
 | 4398 |                 } | 
 | 4399 |             } | 
 | 4400 |             if (c == '.') { | 
 | 4401 |                 prec = 0; | 
 | 4402 |                 if (--fmtcnt >= 0) | 
 | 4403 |                     c = *fmt++; | 
 | 4404 |                 if (c == '*') { | 
 | 4405 |                     v = getnextarg(args, arglen, &argidx); | 
 | 4406 |                     if (v == NULL) | 
 | 4407 |                         goto error; | 
 | 4408 |                     if (!PyInt_Check(v)) { | 
 | 4409 |                         PyErr_SetString( | 
 | 4410 |                             PyExc_TypeError, | 
 | 4411 |                             "* wants int"); | 
 | 4412 |                         goto error; | 
 | 4413 |                     } | 
| Serhiy Storchaka | 926f3a3 | 2013-01-19 23:35:46 +0200 | [diff] [blame] | 4414 |                     prec = _PyInt_AsInt(v); | 
 | 4415 |                     if (prec == -1 && PyErr_Occurred()) | 
 | 4416 |                         goto error; | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4417 |                     if (prec < 0) | 
 | 4418 |                         prec = 0; | 
 | 4419 |                     if (--fmtcnt >= 0) | 
 | 4420 |                         c = *fmt++; | 
 | 4421 |                 } | 
 | 4422 |                 else if (c >= 0 && isdigit(c)) { | 
 | 4423 |                     prec = c - '0'; | 
 | 4424 |                     while (--fmtcnt >= 0) { | 
 | 4425 |                         c = Py_CHARMASK(*fmt++); | 
 | 4426 |                         if (!isdigit(c)) | 
 | 4427 |                             break; | 
| Mark Dickinson | 75d3600 | 2012-10-28 10:00:46 +0000 | [diff] [blame] | 4428 |                         if (prec > (INT_MAX - ((int)c - '0')) / 10) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4429 |                             PyErr_SetString( | 
 | 4430 |                                 PyExc_ValueError, | 
 | 4431 |                                 "prec too big"); | 
 | 4432 |                             goto error; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4433 |                         } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4434 |                         prec = prec*10 + (c - '0'); | 
 | 4435 |                     } | 
 | 4436 |                 } | 
 | 4437 |             } /* prec */ | 
 | 4438 |             if (fmtcnt >= 0) { | 
 | 4439 |                 if (c == 'h' || c == 'l' || c == 'L') { | 
 | 4440 |                     if (--fmtcnt >= 0) | 
 | 4441 |                         c = *fmt++; | 
 | 4442 |                 } | 
 | 4443 |             } | 
 | 4444 |             if (fmtcnt < 0) { | 
 | 4445 |                 PyErr_SetString(PyExc_ValueError, | 
 | 4446 |                                 "incomplete format"); | 
 | 4447 |                 goto error; | 
 | 4448 |             } | 
 | 4449 |             if (c != '%') { | 
 | 4450 |                 v = getnextarg(args, arglen, &argidx); | 
 | 4451 |                 if (v == NULL) | 
 | 4452 |                     goto error; | 
 | 4453 |             } | 
 | 4454 |             sign = 0; | 
 | 4455 |             fill = ' '; | 
 | 4456 |             switch (c) { | 
 | 4457 |             case '%': | 
 | 4458 |                 pbuf = "%"; | 
 | 4459 |                 len = 1; | 
 | 4460 |                 break; | 
 | 4461 |             case 's': | 
 | 4462 | #ifdef Py_USING_UNICODE | 
 | 4463 |                 if (PyUnicode_Check(v)) { | 
 | 4464 |                     fmt = fmt_start; | 
 | 4465 |                     argidx = argidx_start; | 
 | 4466 |                     goto unicode; | 
 | 4467 |                 } | 
 | 4468 | #endif | 
 | 4469 |                 temp = _PyObject_Str(v); | 
 | 4470 | #ifdef Py_USING_UNICODE | 
 | 4471 |                 if (temp != NULL && PyUnicode_Check(temp)) { | 
 | 4472 |                     Py_DECREF(temp); | 
 | 4473 |                     fmt = fmt_start; | 
 | 4474 |                     argidx = argidx_start; | 
 | 4475 |                     goto unicode; | 
 | 4476 |                 } | 
 | 4477 | #endif | 
 | 4478 |                 /* Fall through */ | 
 | 4479 |             case 'r': | 
 | 4480 |                 if (c == 'r') | 
 | 4481 |                     temp = PyObject_Repr(v); | 
 | 4482 |                 if (temp == NULL) | 
 | 4483 |                     goto error; | 
 | 4484 |                 if (!PyString_Check(temp)) { | 
 | 4485 |                     PyErr_SetString(PyExc_TypeError, | 
 | 4486 |                       "%s argument has non-string str()"); | 
 | 4487 |                     Py_DECREF(temp); | 
 | 4488 |                     goto error; | 
 | 4489 |                 } | 
 | 4490 |                 pbuf = PyString_AS_STRING(temp); | 
 | 4491 |                 len = PyString_GET_SIZE(temp); | 
 | 4492 |                 if (prec >= 0 && len > prec) | 
 | 4493 |                     len = prec; | 
 | 4494 |                 break; | 
 | 4495 |             case 'i': | 
 | 4496 |             case 'd': | 
 | 4497 |             case 'u': | 
 | 4498 |             case 'o': | 
 | 4499 |             case 'x': | 
 | 4500 |             case 'X': | 
 | 4501 |                 if (c == 'i') | 
 | 4502 |                     c = 'd'; | 
 | 4503 |                 isnumok = 0; | 
 | 4504 |                 if (PyNumber_Check(v)) { | 
 | 4505 |                     PyObject *iobj=NULL; | 
 | 4506 |  | 
| Serhiy Storchaka | 48c8bf2 | 2018-07-31 09:09:36 +0300 | [diff] [blame] | 4507 |                     if (_PyAnyInt_Check(v)) { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4508 |                         iobj = v; | 
 | 4509 |                         Py_INCREF(iobj); | 
 | 4510 |                     } | 
 | 4511 |                     else { | 
 | 4512 |                         iobj = PyNumber_Int(v); | 
| Benjamin Peterson | a708adf | 2013-01-02 12:21:32 -0600 | [diff] [blame] | 4513 |                         if (iobj==NULL) { | 
| Benjamin Peterson | 8f53ded | 2013-01-02 12:25:15 -0600 | [diff] [blame] | 4514 |                             PyErr_Clear(); | 
 | 4515 |                             iobj = PyNumber_Long(v); | 
 | 4516 |                         } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4517 |                     } | 
 | 4518 |                     if (iobj!=NULL) { | 
 | 4519 |                         if (PyInt_Check(iobj)) { | 
 | 4520 |                             isnumok = 1; | 
 | 4521 |                             pbuf = formatbuf; | 
 | 4522 |                             len = formatint(pbuf, | 
 | 4523 |                                             sizeof(formatbuf), | 
 | 4524 |                                             flags, prec, c, iobj); | 
 | 4525 |                             Py_DECREF(iobj); | 
 | 4526 |                             if (len < 0) | 
 | 4527 |                                 goto error; | 
 | 4528 |                             sign = 1; | 
 | 4529 |                         } | 
 | 4530 |                         else if (PyLong_Check(iobj)) { | 
 | 4531 |                             int ilen; | 
 | 4532 |  | 
 | 4533 |                             isnumok = 1; | 
 | 4534 |                             temp = _PyString_FormatLong(iobj, flags, | 
 | 4535 |                                 prec, c, &pbuf, &ilen); | 
 | 4536 |                             Py_DECREF(iobj); | 
 | 4537 |                             len = ilen; | 
 | 4538 |                             if (!temp) | 
 | 4539 |                                 goto error; | 
 | 4540 |                             sign = 1; | 
 | 4541 |                         } | 
 | 4542 |                         else { | 
 | 4543 |                             Py_DECREF(iobj); | 
 | 4544 |                         } | 
 | 4545 |                     } | 
 | 4546 |                 } | 
 | 4547 |                 if (!isnumok) { | 
 | 4548 |                     PyErr_Format(PyExc_TypeError, | 
 | 4549 |                         "%%%c format: a number is required, " | 
 | 4550 |                         "not %.200s", c, Py_TYPE(v)->tp_name); | 
 | 4551 |                     goto error; | 
 | 4552 |                 } | 
 | 4553 |                 if (flags & F_ZERO) | 
 | 4554 |                     fill = '0'; | 
 | 4555 |                 break; | 
 | 4556 |             case 'e': | 
 | 4557 |             case 'E': | 
 | 4558 |             case 'f': | 
 | 4559 |             case 'F': | 
 | 4560 |             case 'g': | 
 | 4561 |             case 'G': | 
 | 4562 |                 temp = formatfloat(v, flags, prec, c); | 
 | 4563 |                 if (temp == NULL) | 
 | 4564 |                     goto error; | 
 | 4565 |                 pbuf = PyString_AS_STRING(temp); | 
 | 4566 |                 len = PyString_GET_SIZE(temp); | 
 | 4567 |                 sign = 1; | 
 | 4568 |                 if (flags & F_ZERO) | 
 | 4569 |                     fill = '0'; | 
 | 4570 |                 break; | 
 | 4571 |             case 'c': | 
 | 4572 | #ifdef Py_USING_UNICODE | 
 | 4573 |                 if (PyUnicode_Check(v)) { | 
 | 4574 |                     fmt = fmt_start; | 
 | 4575 |                     argidx = argidx_start; | 
 | 4576 |                     goto unicode; | 
 | 4577 |                 } | 
 | 4578 | #endif | 
 | 4579 |                 pbuf = formatbuf; | 
 | 4580 |                 len = formatchar(pbuf, sizeof(formatbuf), v); | 
 | 4581 |                 if (len < 0) | 
 | 4582 |                     goto error; | 
 | 4583 |                 break; | 
 | 4584 |             default: | 
 | 4585 |                 PyErr_Format(PyExc_ValueError, | 
 | 4586 |                   "unsupported format character '%c' (0x%x) " | 
 | 4587 |                   "at index %zd", | 
 | 4588 |                   c, c, | 
 | 4589 |                   (Py_ssize_t)(fmt - 1 - | 
 | 4590 |                                PyString_AsString(format))); | 
 | 4591 |                 goto error; | 
 | 4592 |             } | 
 | 4593 |             if (sign) { | 
 | 4594 |                 if (*pbuf == '-' || *pbuf == '+') { | 
 | 4595 |                     sign = *pbuf++; | 
 | 4596 |                     len--; | 
 | 4597 |                 } | 
 | 4598 |                 else if (flags & F_SIGN) | 
 | 4599 |                     sign = '+'; | 
 | 4600 |                 else if (flags & F_BLANK) | 
 | 4601 |                     sign = ' '; | 
 | 4602 |                 else | 
 | 4603 |                     sign = 0; | 
 | 4604 |             } | 
 | 4605 |             if (width < len) | 
 | 4606 |                 width = len; | 
 | 4607 |             if (rescnt - (sign != 0) < width) { | 
 | 4608 |                 reslen -= rescnt; | 
 | 4609 |                 rescnt = width + fmtcnt + 100; | 
 | 4610 |                 reslen += rescnt; | 
 | 4611 |                 if (reslen < 0) { | 
 | 4612 |                     Py_DECREF(result); | 
 | 4613 |                     Py_XDECREF(temp); | 
 | 4614 |                     return PyErr_NoMemory(); | 
 | 4615 |                 } | 
 | 4616 |                 if (_PyString_Resize(&result, reslen)) { | 
 | 4617 |                     Py_XDECREF(temp); | 
 | 4618 |                     return NULL; | 
 | 4619 |                 } | 
 | 4620 |                 res = PyString_AS_STRING(result) | 
 | 4621 |                     + reslen - rescnt; | 
 | 4622 |             } | 
 | 4623 |             if (sign) { | 
 | 4624 |                 if (fill != ' ') | 
 | 4625 |                     *res++ = sign; | 
 | 4626 |                 rescnt--; | 
 | 4627 |                 if (width > len) | 
 | 4628 |                     width--; | 
 | 4629 |             } | 
 | 4630 |             if ((flags & F_ALT) && (c == 'x' || c == 'X')) { | 
 | 4631 |                 assert(pbuf[0] == '0'); | 
 | 4632 |                 assert(pbuf[1] == c); | 
 | 4633 |                 if (fill != ' ') { | 
 | 4634 |                     *res++ = *pbuf++; | 
 | 4635 |                     *res++ = *pbuf++; | 
 | 4636 |                 } | 
 | 4637 |                 rescnt -= 2; | 
 | 4638 |                 width -= 2; | 
 | 4639 |                 if (width < 0) | 
 | 4640 |                     width = 0; | 
 | 4641 |                 len -= 2; | 
 | 4642 |             } | 
 | 4643 |             if (width > len && !(flags & F_LJUST)) { | 
 | 4644 |                 do { | 
 | 4645 |                     --rescnt; | 
 | 4646 |                     *res++ = fill; | 
 | 4647 |                 } while (--width > len); | 
 | 4648 |             } | 
 | 4649 |             if (fill == ' ') { | 
 | 4650 |                 if (sign) | 
 | 4651 |                     *res++ = sign; | 
 | 4652 |                 if ((flags & F_ALT) && | 
 | 4653 |                     (c == 'x' || c == 'X')) { | 
 | 4654 |                     assert(pbuf[0] == '0'); | 
 | 4655 |                     assert(pbuf[1] == c); | 
 | 4656 |                     *res++ = *pbuf++; | 
 | 4657 |                     *res++ = *pbuf++; | 
 | 4658 |                 } | 
 | 4659 |             } | 
 | 4660 |             Py_MEMCPY(res, pbuf, len); | 
 | 4661 |             res += len; | 
 | 4662 |             rescnt -= len; | 
 | 4663 |             while (--width >= len) { | 
 | 4664 |                 --rescnt; | 
 | 4665 |                 *res++ = ' '; | 
 | 4666 |             } | 
 | 4667 |             if (dict && (argidx < arglen) && c != '%') { | 
 | 4668 |                 PyErr_SetString(PyExc_TypeError, | 
 | 4669 |                            "not all arguments converted during string formatting"); | 
 | 4670 |                 Py_XDECREF(temp); | 
 | 4671 |                 goto error; | 
 | 4672 |             } | 
 | 4673 |             Py_XDECREF(temp); | 
 | 4674 |         } /* '%' */ | 
 | 4675 |     } /* until end */ | 
 | 4676 |     if (argidx < arglen && !dict) { | 
 | 4677 |         PyErr_SetString(PyExc_TypeError, | 
 | 4678 |                         "not all arguments converted during string formatting"); | 
 | 4679 |         goto error; | 
 | 4680 |     } | 
 | 4681 |     if (args_owned) { | 
 | 4682 |         Py_DECREF(args); | 
 | 4683 |     } | 
 | 4684 |     if (_PyString_Resize(&result, reslen - rescnt)) | 
 | 4685 |         return NULL; | 
 | 4686 |     return result; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4687 |  | 
 | 4688 | #ifdef Py_USING_UNICODE | 
 | 4689 |  unicode: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4690 |     if (args_owned) { | 
 | 4691 |         Py_DECREF(args); | 
 | 4692 |         args_owned = 0; | 
 | 4693 |     } | 
 | 4694 |     /* Fiddle args right (remove the first argidx arguments) */ | 
 | 4695 |     if (PyTuple_Check(orig_args) && argidx > 0) { | 
 | 4696 |         PyObject *v; | 
 | 4697 |         Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx; | 
 | 4698 |         v = PyTuple_New(n); | 
 | 4699 |         if (v == NULL) | 
 | 4700 |             goto error; | 
 | 4701 |         while (--n >= 0) { | 
 | 4702 |             PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx); | 
 | 4703 |             Py_INCREF(w); | 
 | 4704 |             PyTuple_SET_ITEM(v, n, w); | 
 | 4705 |         } | 
 | 4706 |         args = v; | 
 | 4707 |     } else { | 
 | 4708 |         Py_INCREF(orig_args); | 
 | 4709 |         args = orig_args; | 
 | 4710 |     } | 
 | 4711 |     args_owned = 1; | 
 | 4712 |     /* Take what we have of the result and let the Unicode formatting | 
 | 4713 |        function format the rest of the input. */ | 
 | 4714 |     rescnt = res - PyString_AS_STRING(result); | 
 | 4715 |     if (_PyString_Resize(&result, rescnt)) | 
 | 4716 |         goto error; | 
 | 4717 |     fmtcnt = PyString_GET_SIZE(format) - \ | 
 | 4718 |              (fmt - PyString_AS_STRING(format)); | 
 | 4719 |     format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL); | 
 | 4720 |     if (format == NULL) | 
 | 4721 |         goto error; | 
 | 4722 |     v = PyUnicode_Format(format, args); | 
 | 4723 |     Py_DECREF(format); | 
 | 4724 |     if (v == NULL) | 
 | 4725 |         goto error; | 
 | 4726 |     /* Paste what we have (result) to what the Unicode formatting | 
 | 4727 |        function returned (v) and return the result (or error) */ | 
 | 4728 |     w = PyUnicode_Concat(result, v); | 
 | 4729 |     Py_DECREF(result); | 
 | 4730 |     Py_DECREF(v); | 
 | 4731 |     Py_DECREF(args); | 
 | 4732 |     return w; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4733 | #endif /* Py_USING_UNICODE */ | 
 | 4734 |  | 
 | 4735 |  error: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4736 |     Py_DECREF(result); | 
 | 4737 |     if (args_owned) { | 
 | 4738 |         Py_DECREF(args); | 
 | 4739 |     } | 
 | 4740 |     return NULL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4741 | } | 
 | 4742 |  | 
 | 4743 | void | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 4744 | PyString_InternInPlace(PyObject **p) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4745 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4746 |     register PyStringObject *s = (PyStringObject *)(*p); | 
 | 4747 |     PyObject *t; | 
 | 4748 |     if (s == NULL || !PyString_Check(s)) | 
 | 4749 |         Py_FatalError("PyString_InternInPlace: strings only please!"); | 
 | 4750 |     /* If it's a string subclass, we don't really know what putting | 
 | 4751 |        it in the interned dict might do. */ | 
 | 4752 |     if (!PyString_CheckExact(s)) | 
 | 4753 |         return; | 
 | 4754 |     if (PyString_CHECK_INTERNED(s)) | 
 | 4755 |         return; | 
 | 4756 |     if (interned == NULL) { | 
 | 4757 |         interned = PyDict_New(); | 
 | 4758 |         if (interned == NULL) { | 
 | 4759 |             PyErr_Clear(); /* Don't leave an exception */ | 
 | 4760 |             return; | 
 | 4761 |         } | 
 | 4762 |     } | 
 | 4763 |     t = PyDict_GetItem(interned, (PyObject *)s); | 
 | 4764 |     if (t) { | 
 | 4765 |         Py_INCREF(t); | 
| Serhiy Storchaka | 763a61c | 2016-04-10 18:05:12 +0300 | [diff] [blame] | 4766 |         Py_SETREF(*p, t); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4767 |         return; | 
 | 4768 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4769 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4770 |     if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { | 
 | 4771 |         PyErr_Clear(); | 
 | 4772 |         return; | 
 | 4773 |     } | 
 | 4774 |     /* The two references in interned are not counted by refcnt. | 
 | 4775 |        The string deallocator will take care of this */ | 
 | 4776 |     Py_REFCNT(s) -= 2; | 
 | 4777 |     PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4778 | } | 
 | 4779 |  | 
 | 4780 | void | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 4781 | PyString_InternImmortal(PyObject **p) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4782 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4783 |     PyString_InternInPlace(p); | 
 | 4784 |     if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { | 
 | 4785 |         PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; | 
 | 4786 |         Py_INCREF(*p); | 
 | 4787 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4788 | } | 
 | 4789 |  | 
 | 4790 |  | 
 | 4791 | PyObject * | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 4792 | PyString_InternFromString(const char *cp) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4793 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4794 |     PyObject *s = PyString_FromString(cp); | 
 | 4795 |     if (s == NULL) | 
 | 4796 |         return NULL; | 
 | 4797 |     PyString_InternInPlace(&s); | 
 | 4798 |     return s; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4799 | } | 
 | 4800 |  | 
 | 4801 | void | 
| Gregory P. Smith | dd96db6 | 2008-06-09 04:58:54 +0000 | [diff] [blame] | 4802 | PyString_Fini(void) | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4803 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4804 |     int i; | 
| Serhiy Storchaka | a8d64ae | 2013-02-02 18:43:58 +0200 | [diff] [blame] | 4805 |     for (i = 0; i < UCHAR_MAX + 1; i++) | 
 | 4806 |         Py_CLEAR(characters[i]); | 
 | 4807 |     Py_CLEAR(nullstring); | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4808 | } | 
 | 4809 |  | 
 | 4810 | void _Py_ReleaseInternedStrings(void) | 
 | 4811 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4812 |     PyObject *keys; | 
 | 4813 |     PyStringObject *s; | 
 | 4814 |     Py_ssize_t i, n; | 
 | 4815 |     Py_ssize_t immortal_size = 0, mortal_size = 0; | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4816 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4817 |     if (interned == NULL || !PyDict_Check(interned)) | 
 | 4818 |         return; | 
 | 4819 |     keys = PyDict_Keys(interned); | 
 | 4820 |     if (keys == NULL || !PyList_Check(keys)) { | 
 | 4821 |         PyErr_Clear(); | 
 | 4822 |         return; | 
 | 4823 |     } | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4824 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4825 |     /* Since _Py_ReleaseInternedStrings() is intended to help a leak | 
 | 4826 |        detector, interned strings are not forcibly deallocated; rather, we | 
 | 4827 |        give them their stolen references back, and then clear and DECREF | 
 | 4828 |        the interned dict. */ | 
| Christian Heimes | 4472083 | 2008-05-26 13:01:01 +0000 | [diff] [blame] | 4829 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 4830 |     n = PyList_GET_SIZE(keys); | 
 | 4831 |     fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", | 
 | 4832 |         n); | 
 | 4833 |     for (i = 0; i < n; i++) { | 
 | 4834 |         s = (PyStringObject *) PyList_GET_ITEM(keys, i); | 
 | 4835 |         switch (s->ob_sstate) { | 
 | 4836 |         case SSTATE_NOT_INTERNED: | 
 | 4837 |             /* XXX Shouldn't happen */ | 
 | 4838 |             break; | 
 | 4839 |         case SSTATE_INTERNED_IMMORTAL: | 
 | 4840 |             Py_REFCNT(s) += 1; | 
 | 4841 |             immortal_size += Py_SIZE(s); | 
 | 4842 |             break; | 
 | 4843 |         case SSTATE_INTERNED_MORTAL: | 
 | 4844 |             Py_REFCNT(s) += 2; | 
 | 4845 |             mortal_size += Py_SIZE(s); | 
 | 4846 |             break; | 
 | 4847 |         default: | 
 | 4848 |             Py_FatalError("Inconsistent interned string state."); | 
 | 4849 |         } | 
 | 4850 |         s->ob_sstate = SSTATE_NOT_INTERNED; | 
 | 4851 |     } | 
 | 4852 |     fprintf(stderr, "total size of all interned strings: " | 
 | 4853 |                     "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " | 
 | 4854 |                     "mortal/immortal\n", mortal_size, immortal_size); | 
 | 4855 |     Py_DECREF(keys); | 
 | 4856 |     PyDict_Clear(interned); | 
| Serhiy Storchaka | a8d64ae | 2013-02-02 18:43:58 +0200 | [diff] [blame] | 4857 |     Py_CLEAR(interned); | 
| Christian Heimes | 1a6387e | 2008-03-26 12:49:49 +0000 | [diff] [blame] | 4858 | } |