| Skip Montanaro | 0b87444 | 2003-10-03 14:05:26 +0000 | [diff] [blame] | 1 | """A collection of string operations (most are no longer used). | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 2 |  | 
| Skip Montanaro | 0b87444 | 2003-10-03 14:05:26 +0000 | [diff] [blame] | 3 | Warning: most of the code you see here isn't normally used nowadays. | 
|  | 4 | Beginning with Python 1.6, many of these functions are implemented as | 
|  | 5 | methods on the standard string object. They used to be implemented by | 
|  | 6 | a built-in module called strop, but strop is now obsolete itself. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 7 |  | 
|  | 8 | Public module variables: | 
|  | 9 |  | 
|  | 10 | whitespace -- a string containing all characters considered whitespace | 
|  | 11 | lowercase -- a string containing all characters considered lowercase letters | 
|  | 12 | uppercase -- a string containing all characters considered uppercase letters | 
|  | 13 | letters -- a string containing all characters considered letters | 
|  | 14 | digits -- a string containing all characters considered decimal digits | 
|  | 15 | hexdigits -- a string containing all characters considered hexadecimal digits | 
|  | 16 | octdigits -- a string containing all characters considered octal digits | 
| Fred Drake | fd64c59 | 2000-09-18 19:38:11 +0000 | [diff] [blame] | 17 | punctuation -- a string containing all characters considered punctuation | 
|  | 18 | printable -- a string containing all characters considered printable | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 19 |  | 
|  | 20 | """ | 
|  | 21 |  | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 22 | # Some strings for ctype-style character classification | 
| Guido van Rossum | 8e2ec56 | 1993-07-29 09:37:38 +0000 | [diff] [blame] | 23 | whitespace = ' \t\n\r\v\f' | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 24 | lowercase = 'abcdefghijklmnopqrstuvwxyz' | 
|  | 25 | uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | 
|  | 26 | letters = lowercase + uppercase | 
| Fred Drake | 960fdf9 | 2001-07-20 18:38:26 +0000 | [diff] [blame] | 27 | ascii_lowercase = lowercase | 
|  | 28 | ascii_uppercase = uppercase | 
|  | 29 | ascii_letters = ascii_lowercase + ascii_uppercase | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 30 | digits = '0123456789' | 
|  | 31 | hexdigits = digits + 'abcdef' + 'ABCDEF' | 
|  | 32 | octdigits = '01234567' | 
| Tim Peters | 495ad3c | 2001-01-15 01:36:40 +0000 | [diff] [blame] | 33 | punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~""" | 
| Fred Drake | 6b2320f | 2000-09-18 16:46:17 +0000 | [diff] [blame] | 34 | printable = digits + letters + punctuation + whitespace | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 35 |  | 
|  | 36 | # Case conversion helpers | 
| Martin v. Löwis | 5357c65 | 2002-10-14 20:03:40 +0000 | [diff] [blame] | 37 | # Use str to convert Unicode literal in case of -U | 
| Martin v. Löwis | 5357c65 | 2002-10-14 20:03:40 +0000 | [diff] [blame] | 38 | l = map(chr, xrange(256)) | 
|  | 39 | _idmap = str('').join(l) | 
|  | 40 | del l | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 41 |  | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 42 | # Functions which aren't available as string methods. | 
|  | 43 |  | 
|  | 44 | # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def". | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 45 | def capwords(s, sep=None): | 
| Ezio Melotti | 9aac245 | 2009-09-26 11:20:53 +0000 | [diff] [blame] | 46 | """capwords(s [,sep]) -> string | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 47 |  | 
|  | 48 | Split the argument into words using split, capitalize each | 
|  | 49 | word using capitalize, and join the capitalized words using | 
| Ezio Melotti | 9aac245 | 2009-09-26 11:20:53 +0000 | [diff] [blame] | 50 | join.  If the optional second argument sep is absent or None, | 
|  | 51 | runs of whitespace characters are replaced by a single space | 
|  | 52 | and leading and trailing whitespace are removed, otherwise | 
|  | 53 | sep is used to split and join the words. | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 54 |  | 
|  | 55 | """ | 
| Ezio Melotti | 9aac245 | 2009-09-26 11:20:53 +0000 | [diff] [blame] | 56 | return (sep or ' ').join(x.capitalize() for x in s.split(sep)) | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 57 |  | 
|  | 58 |  | 
|  | 59 | # Construct a translation string | 
|  | 60 | _idmapL = None | 
|  | 61 | def maketrans(fromstr, tostr): | 
|  | 62 | """maketrans(frm, to) -> string | 
|  | 63 |  | 
|  | 64 | Return a translation table (a string of 256 bytes long) | 
|  | 65 | suitable for use in string.translate.  The strings frm and to | 
|  | 66 | must be of the same length. | 
|  | 67 |  | 
|  | 68 | """ | 
|  | 69 | if len(fromstr) != len(tostr): | 
|  | 70 | raise ValueError, "maketrans arguments must have same length" | 
|  | 71 | global _idmapL | 
|  | 72 | if not _idmapL: | 
| Georg Brandl | 74bbc79 | 2008-07-18 19:06:13 +0000 | [diff] [blame] | 73 | _idmapL = list(_idmap) | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 74 | L = _idmapL[:] | 
|  | 75 | fromstr = map(ord, fromstr) | 
|  | 76 | for i in range(len(fromstr)): | 
|  | 77 | L[fromstr[i]] = tostr[i] | 
|  | 78 | return ''.join(L) | 
|  | 79 |  | 
|  | 80 |  | 
| Raymond Hettinger | 57aef9c | 2004-12-07 07:55:07 +0000 | [diff] [blame] | 81 |  | 
| Raymond Hettinger | 0d58e2b | 2004-08-26 00:21:13 +0000 | [diff] [blame] | 82 | #################################################################### | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 83 | import re as _re | 
|  | 84 |  | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 85 | class _multimap: | 
|  | 86 | """Helper class for combining multiple mappings. | 
|  | 87 |  | 
|  | 88 | Used by .{safe_,}substitute() to combine the mapping and keyword | 
|  | 89 | arguments. | 
|  | 90 | """ | 
|  | 91 | def __init__(self, primary, secondary): | 
|  | 92 | self._primary = primary | 
|  | 93 | self._secondary = secondary | 
|  | 94 |  | 
|  | 95 | def __getitem__(self, key): | 
|  | 96 | try: | 
|  | 97 | return self._primary[key] | 
|  | 98 | except KeyError: | 
|  | 99 | return self._secondary[key] | 
|  | 100 |  | 
|  | 101 |  | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 102 | class _TemplateMetaclass(type): | 
|  | 103 | pattern = r""" | 
| Raymond Hettinger | 55593c3 | 2004-09-26 18:56:44 +0000 | [diff] [blame] | 104 | %(delim)s(?: | 
|  | 105 | (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters | 
|  | 106 | (?P<named>%(id)s)      |   # delimiter and a Python identifier | 
|  | 107 | {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier | 
|  | 108 | (?P<invalid>)              # Other ill-formed delimiter exprs | 
|  | 109 | ) | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 110 | """ | 
|  | 111 |  | 
|  | 112 | def __init__(cls, name, bases, dct): | 
| Guido van Rossum | f102e24 | 2007-03-23 18:53:03 +0000 | [diff] [blame] | 113 | super(_TemplateMetaclass, cls).__init__(name, bases, dct) | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 114 | if 'pattern' in dct: | 
|  | 115 | pattern = cls.pattern | 
|  | 116 | else: | 
|  | 117 | pattern = _TemplateMetaclass.pattern % { | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 118 | 'delim' : _re.escape(cls.delimiter), | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 119 | 'id'    : cls.idpattern, | 
|  | 120 | } | 
|  | 121 | cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) | 
|  | 122 |  | 
|  | 123 |  | 
|  | 124 | class Template: | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 125 | """A string class for supporting $-substitutions.""" | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 126 | __metaclass__ = _TemplateMetaclass | 
|  | 127 |  | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 128 | delimiter = '$' | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 129 | idpattern = r'[_a-z][_a-z0-9]*' | 
|  | 130 |  | 
|  | 131 | def __init__(self, template): | 
|  | 132 | self.template = template | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 133 |  | 
|  | 134 | # Search for $$, $identifier, ${identifier}, and any bare $'s | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 135 |  | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 136 | def _invalid(self, mo): | 
|  | 137 | i = mo.start('invalid') | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 138 | lines = self.template[:i].splitlines(True) | 
|  | 139 | if not lines: | 
|  | 140 | colno = 1 | 
|  | 141 | lineno = 1 | 
|  | 142 | else: | 
|  | 143 | colno = i - len(''.join(lines[:-1])) | 
|  | 144 | lineno = len(lines) | 
|  | 145 | raise ValueError('Invalid placeholder in string: line %d, col %d' % | 
|  | 146 | (lineno, colno)) | 
|  | 147 |  | 
| Serhiy Storchaka | 40fd0e8 | 2015-03-24 22:27:50 +0200 | [diff] [blame] | 148 | def substitute(*args, **kws): | 
|  | 149 | if not args: | 
|  | 150 | raise TypeError("descriptor 'substitute' of 'Template' object " | 
|  | 151 | "needs an argument") | 
|  | 152 | self, args = args[0], args[1:]  # allow the "self" keyword be passed | 
| Barry Warsaw | b6234a9 | 2004-09-13 15:25:15 +0000 | [diff] [blame] | 153 | if len(args) > 1: | 
|  | 154 | raise TypeError('Too many positional arguments') | 
|  | 155 | if not args: | 
|  | 156 | mapping = kws | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 157 | elif kws: | 
| Barry Warsaw | b6234a9 | 2004-09-13 15:25:15 +0000 | [diff] [blame] | 158 | mapping = _multimap(kws, args[0]) | 
|  | 159 | else: | 
|  | 160 | mapping = args[0] | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 161 | # Helper function for .sub() | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 162 | def convert(mo): | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 163 | # Check the most common path first. | 
|  | 164 | named = mo.group('named') or mo.group('braced') | 
|  | 165 | if named is not None: | 
|  | 166 | val = mapping[named] | 
|  | 167 | # We use this idiom instead of str() because the latter will | 
|  | 168 | # fail if val is a Unicode containing non-ASCII characters. | 
| Thomas Wouters | add1911 | 2006-07-05 11:03:49 +0000 | [diff] [blame] | 169 | return '%s' % (val,) | 
| Raymond Hettinger | 0d58e2b | 2004-08-26 00:21:13 +0000 | [diff] [blame] | 170 | if mo.group('escaped') is not None: | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 171 | return self.delimiter | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 172 | if mo.group('invalid') is not None: | 
|  | 173 | self._invalid(mo) | 
| Neal Norwitz | 6627a96 | 2004-10-17 16:27:18 +0000 | [diff] [blame] | 174 | raise ValueError('Unrecognized named group in pattern', | 
|  | 175 | self.pattern) | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 176 | return self.pattern.sub(convert, self.template) | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 177 |  | 
| Serhiy Storchaka | 40fd0e8 | 2015-03-24 22:27:50 +0200 | [diff] [blame] | 178 | def safe_substitute(*args, **kws): | 
|  | 179 | if not args: | 
|  | 180 | raise TypeError("descriptor 'safe_substitute' of 'Template' object " | 
|  | 181 | "needs an argument") | 
|  | 182 | self, args = args[0], args[1:]  # allow the "self" keyword be passed | 
| Barry Warsaw | b6234a9 | 2004-09-13 15:25:15 +0000 | [diff] [blame] | 183 | if len(args) > 1: | 
|  | 184 | raise TypeError('Too many positional arguments') | 
|  | 185 | if not args: | 
|  | 186 | mapping = kws | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 187 | elif kws: | 
| Barry Warsaw | b6234a9 | 2004-09-13 15:25:15 +0000 | [diff] [blame] | 188 | mapping = _multimap(kws, args[0]) | 
|  | 189 | else: | 
|  | 190 | mapping = args[0] | 
| Barry Warsaw | 46b629c | 2004-09-13 14:35:04 +0000 | [diff] [blame] | 191 | # Helper function for .sub() | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 192 | def convert(mo): | 
| Florent Xicluna | ff05e52 | 2010-09-18 23:34:07 +0000 | [diff] [blame] | 193 | named = mo.group('named') or mo.group('braced') | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 194 | if named is not None: | 
|  | 195 | try: | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 196 | # We use this idiom instead of str() because the latter | 
|  | 197 | # will fail if val is a Unicode containing non-ASCII | 
| Thomas Wouters | add1911 | 2006-07-05 11:03:49 +0000 | [diff] [blame] | 198 | return '%s' % (mapping[named],) | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 199 | except KeyError: | 
| Florent Xicluna | ff05e52 | 2010-09-18 23:34:07 +0000 | [diff] [blame] | 200 | return mo.group() | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 201 | if mo.group('escaped') is not None: | 
| Barry Warsaw | 17cb600 | 2004-09-18 00:06:34 +0000 | [diff] [blame] | 202 | return self.delimiter | 
| Barry Warsaw | b5c6b5b | 2004-09-13 20:52:50 +0000 | [diff] [blame] | 203 | if mo.group('invalid') is not None: | 
| Florent Xicluna | ff05e52 | 2010-09-18 23:34:07 +0000 | [diff] [blame] | 204 | return mo.group() | 
| Neal Norwitz | 6627a96 | 2004-10-17 16:27:18 +0000 | [diff] [blame] | 205 | raise ValueError('Unrecognized named group in pattern', | 
|  | 206 | self.pattern) | 
| Barry Warsaw | 12827c1 | 2004-09-10 03:08:08 +0000 | [diff] [blame] | 207 | return self.pattern.sub(convert, self.template) | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 208 |  | 
|  | 209 |  | 
| Raymond Hettinger | 57aef9c | 2004-12-07 07:55:07 +0000 | [diff] [blame] | 210 |  | 
| Raymond Hettinger | 0d58e2b | 2004-08-26 00:21:13 +0000 | [diff] [blame] | 211 | #################################################################### | 
| Barry Warsaw | 8bee761 | 2004-08-25 02:22:30 +0000 | [diff] [blame] | 212 | # NOTE: Everything below here is deprecated.  Use string methods instead. | 
|  | 213 | # This stuff will go away in Python 3.0. | 
|  | 214 |  | 
| Guido van Rossum | 710c352 | 1994-08-17 13:16:11 +0000 | [diff] [blame] | 215 | # Backward compatible names for exceptions | 
|  | 216 | index_error = ValueError | 
|  | 217 | atoi_error = ValueError | 
|  | 218 | atof_error = ValueError | 
|  | 219 | atol_error = ValueError | 
|  | 220 |  | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 221 | # convert UPPER CASE letters to lower case | 
|  | 222 | def lower(s): | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 223 | """lower(s) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 224 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 225 | Return a copy of the string s converted to lowercase. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 226 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 227 | """ | 
|  | 228 | return s.lower() | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 229 |  | 
|  | 230 | # Convert lower case letters to UPPER CASE | 
|  | 231 | def upper(s): | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 232 | """upper(s) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 233 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 234 | Return a copy of the string s converted to uppercase. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 235 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 236 | """ | 
|  | 237 | return s.upper() | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 238 |  | 
|  | 239 | # Swap lower case letters and UPPER CASE | 
|  | 240 | def swapcase(s): | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 241 | """swapcase(s) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 242 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 243 | Return a copy of the string s with upper case characters | 
|  | 244 | converted to lowercase and vice versa. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 245 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 246 | """ | 
|  | 247 | return s.swapcase() | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 248 |  | 
|  | 249 | # Strip leading and trailing tabs and spaces | 
| Martin v. Löwis | 1f04610 | 2002-11-08 12:09:59 +0000 | [diff] [blame] | 250 | def strip(s, chars=None): | 
| Neal Norwitz | a4864a2 | 2002-11-14 03:31:32 +0000 | [diff] [blame] | 251 | """strip(s [,chars]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 252 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 253 | Return a copy of the string s with leading and trailing | 
|  | 254 | whitespace removed. | 
| Neal Norwitz | ffe33b7 | 2003-04-10 22:35:32 +0000 | [diff] [blame] | 255 | If chars is given and not None, remove characters in chars instead. | 
| Neal Norwitz | a4864a2 | 2002-11-14 03:31:32 +0000 | [diff] [blame] | 256 | If chars is unicode, S will be converted to unicode before stripping. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 257 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 258 | """ | 
| Martin v. Löwis | 1f04610 | 2002-11-08 12:09:59 +0000 | [diff] [blame] | 259 | return s.strip(chars) | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 260 |  | 
| Guido van Rossum | 306a8a6 | 1996-08-08 18:40:59 +0000 | [diff] [blame] | 261 | # Strip leading tabs and spaces | 
| Neal Norwitz | ffe33b7 | 2003-04-10 22:35:32 +0000 | [diff] [blame] | 262 | def lstrip(s, chars=None): | 
|  | 263 | """lstrip(s [,chars]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 264 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 265 | Return a copy of the string s with leading whitespace removed. | 
| Neal Norwitz | ffe33b7 | 2003-04-10 22:35:32 +0000 | [diff] [blame] | 266 | If chars is given and not None, remove characters in chars instead. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 267 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 268 | """ | 
| Neal Norwitz | ffe33b7 | 2003-04-10 22:35:32 +0000 | [diff] [blame] | 269 | return s.lstrip(chars) | 
| Guido van Rossum | 306a8a6 | 1996-08-08 18:40:59 +0000 | [diff] [blame] | 270 |  | 
|  | 271 | # Strip trailing tabs and spaces | 
| Neal Norwitz | ffe33b7 | 2003-04-10 22:35:32 +0000 | [diff] [blame] | 272 | def rstrip(s, chars=None): | 
|  | 273 | """rstrip(s [,chars]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 274 |  | 
| Neal Norwitz | ffe33b7 | 2003-04-10 22:35:32 +0000 | [diff] [blame] | 275 | Return a copy of the string s with trailing whitespace removed. | 
|  | 276 | If chars is given and not None, remove characters in chars instead. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 277 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 278 | """ | 
| Neal Norwitz | ffe33b7 | 2003-04-10 22:35:32 +0000 | [diff] [blame] | 279 | return s.rstrip(chars) | 
| Guido van Rossum | 306a8a6 | 1996-08-08 18:40:59 +0000 | [diff] [blame] | 280 |  | 
|  | 281 |  | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 282 | # Split a string into a list of space/tab-separated words | 
| Guido van Rossum | 8f0c5a7 | 2000-03-10 23:22:10 +0000 | [diff] [blame] | 283 | def split(s, sep=None, maxsplit=-1): | 
| Fred Drake | e4f1366 | 1999-11-04 19:19:48 +0000 | [diff] [blame] | 284 | """split(s [,sep [,maxsplit]]) -> list of strings | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 285 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 286 | Return a list of the words in the string s, using sep as the | 
| Fred Drake | 1453754 | 2002-01-30 16:15:13 +0000 | [diff] [blame] | 287 | delimiter string.  If maxsplit is given, splits at no more than | 
|  | 288 | maxsplit places (resulting in at most maxsplit+1 words).  If sep | 
| Walter Dörwald | 065a32f | 2004-09-14 09:45:10 +0000 | [diff] [blame] | 289 | is not specified or is None, any whitespace string is a separator. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 290 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 291 | (split and splitfields are synonymous) | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 292 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 293 | """ | 
|  | 294 | return s.split(sep, maxsplit) | 
|  | 295 | splitfields = split | 
| Guido van Rossum | fac38b7 | 1991-04-07 13:42:19 +0000 | [diff] [blame] | 296 |  | 
| Hye-Shik Chang | 3ae811b | 2003-12-15 18:49:53 +0000 | [diff] [blame] | 297 | # Split a string into a list of space/tab-separated words | 
|  | 298 | def rsplit(s, sep=None, maxsplit=-1): | 
|  | 299 | """rsplit(s [,sep [,maxsplit]]) -> list of strings | 
|  | 300 |  | 
|  | 301 | Return a list of the words in the string s, using sep as the | 
|  | 302 | delimiter string, starting at the end of the string and working | 
|  | 303 | to the front.  If maxsplit is given, at most maxsplit splits are | 
|  | 304 | done. If sep is not specified or is None, any whitespace string | 
|  | 305 | is a separator. | 
|  | 306 | """ | 
|  | 307 | return s.rsplit(sep, maxsplit) | 
|  | 308 |  | 
| Guido van Rossum | 2ab1992 | 1995-06-22 18:58:00 +0000 | [diff] [blame] | 309 | # Join fields with optional separator | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 310 | def join(words, sep = ' '): | 
|  | 311 | """join(list [,sep]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 312 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 313 | Return a string composed of the words in list, with | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 314 | intervening occurrences of sep.  The default separator is a | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 315 | single space. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 316 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 317 | (joinfields and join are synonymous) | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 318 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 319 | """ | 
|  | 320 | return sep.join(words) | 
|  | 321 | joinfields = join | 
|  | 322 |  | 
| Guido van Rossum | d316607 | 1993-05-24 14:16:22 +0000 | [diff] [blame] | 323 | # Find substring, raise exception if not found | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 324 | def index(s, *args): | 
|  | 325 | """index(s, sub [,start [,end]]) -> int | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 326 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 327 | Like find but raises ValueError when the substring is not found. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 328 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 329 | """ | 
| Fred Drake | 046d272 | 2000-07-03 07:23:13 +0000 | [diff] [blame] | 330 | return s.index(*args) | 
| Guido van Rossum | d316607 | 1993-05-24 14:16:22 +0000 | [diff] [blame] | 331 |  | 
| Guido van Rossum | e65cce5 | 1993-11-08 15:05:21 +0000 | [diff] [blame] | 332 | # Find last substring, raise exception if not found | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 333 | def rindex(s, *args): | 
|  | 334 | """rindex(s, sub [,start [,end]]) -> int | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 335 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 336 | Like rfind but raises ValueError when the substring is not found. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 337 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 338 | """ | 
| Fred Drake | 046d272 | 2000-07-03 07:23:13 +0000 | [diff] [blame] | 339 | return s.rindex(*args) | 
| Guido van Rossum | b6775db | 1994-08-01 11:34:53 +0000 | [diff] [blame] | 340 |  | 
|  | 341 | # Count non-overlapping occurrences of substring | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 342 | def count(s, *args): | 
|  | 343 | """count(s, sub[, start[,end]]) -> int | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 344 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 345 | Return the number of occurrences of substring sub in string | 
|  | 346 | s[start:end].  Optional arguments start and end are | 
|  | 347 | interpreted as in slice notation. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 348 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 349 | """ | 
| Fred Drake | 046d272 | 2000-07-03 07:23:13 +0000 | [diff] [blame] | 350 | return s.count(*args) | 
| Guido van Rossum | e65cce5 | 1993-11-08 15:05:21 +0000 | [diff] [blame] | 351 |  | 
| Guido van Rossum | d316607 | 1993-05-24 14:16:22 +0000 | [diff] [blame] | 352 | # Find substring, return -1 if not found | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 353 | def find(s, *args): | 
|  | 354 | """find(s, sub [,start [,end]]) -> in | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 355 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 356 | Return the lowest index in s where substring sub is found, | 
|  | 357 | such that sub is contained within s[start,end].  Optional | 
|  | 358 | arguments start and end are interpreted as in slice notation. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 359 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 360 | Return -1 on failure. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 361 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 362 | """ | 
| Fred Drake | 046d272 | 2000-07-03 07:23:13 +0000 | [diff] [blame] | 363 | return s.find(*args) | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 364 |  | 
| Guido van Rossum | e65cce5 | 1993-11-08 15:05:21 +0000 | [diff] [blame] | 365 | # Find last substring, return -1 if not found | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 366 | def rfind(s, *args): | 
|  | 367 | """rfind(s, sub [,start [,end]]) -> int | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 368 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 369 | Return the highest index in s where substring sub is found, | 
|  | 370 | such that sub is contained within s[start,end].  Optional | 
|  | 371 | arguments start and end are interpreted as in slice notation. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 372 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 373 | Return -1 on failure. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 374 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 375 | """ | 
| Fred Drake | 046d272 | 2000-07-03 07:23:13 +0000 | [diff] [blame] | 376 | return s.rfind(*args) | 
| Guido van Rossum | e65cce5 | 1993-11-08 15:05:21 +0000 | [diff] [blame] | 377 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 378 | # for a bit of speed | 
|  | 379 | _float = float | 
|  | 380 | _int = int | 
|  | 381 | _long = long | 
| Guido van Rossum | d0753e2 | 1997-12-10 22:59:55 +0000 | [diff] [blame] | 382 |  | 
| Guido van Rossum | e61fa0a | 1993-10-22 13:56:35 +0000 | [diff] [blame] | 383 | # Convert string to float | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 384 | def atof(s): | 
|  | 385 | """atof(s) -> float | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 386 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 387 | Return the floating point number represented by the string s. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 388 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 389 | """ | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 390 | return _float(s) | 
|  | 391 |  | 
| Guido van Rossum | e61fa0a | 1993-10-22 13:56:35 +0000 | [diff] [blame] | 392 |  | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 393 | # Convert string to integer | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 394 | def atoi(s , base=10): | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 395 | """atoi(s [,base]) -> int | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 396 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 397 | Return the integer represented by the string s in the given | 
|  | 398 | base, which defaults to 10.  The string s must consist of one | 
|  | 399 | or more digits, possibly preceded by a sign.  If base is 0, it | 
|  | 400 | is chosen from the leading characters of s, 0 for octal, 0x or | 
|  | 401 | 0X for hexadecimal.  If base is 16, a preceding 0x or 0X is | 
|  | 402 | accepted. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 403 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 404 | """ | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 405 | return _int(s, base) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 406 |  | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 407 |  | 
| Guido van Rossum | e61fa0a | 1993-10-22 13:56:35 +0000 | [diff] [blame] | 408 | # Convert string to long integer | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 409 | def atol(s, base=10): | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 410 | """atol(s [,base]) -> long | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 411 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 412 | Return the long integer represented by the string s in the | 
|  | 413 | given base, which defaults to 10.  The string s must consist | 
|  | 414 | of one or more digits, possibly preceded by a sign.  If base | 
|  | 415 | is 0, it is chosen from the leading characters of s, 0 for | 
|  | 416 | octal, 0x or 0X for hexadecimal.  If base is 16, a preceding | 
|  | 417 | 0x or 0X is accepted.  A trailing L or l is not accepted, | 
|  | 418 | unless base is 0. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 419 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 420 | """ | 
| Guido van Rossum | 9e896b3 | 2000-04-05 20:11:21 +0000 | [diff] [blame] | 421 | return _long(s, base) | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 422 |  | 
| Guido van Rossum | e61fa0a | 1993-10-22 13:56:35 +0000 | [diff] [blame] | 423 |  | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 424 | # Left-justify a string | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 425 | def ljust(s, width, *args): | 
|  | 426 | """ljust(s, width[, fillchar]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 427 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 428 | Return a left-justified version of s, in a field of the | 
|  | 429 | specified width, padded with spaces as needed.  The string is | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 430 | never truncated.  If specified the fillchar is used instead of spaces. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 431 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 432 | """ | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 433 | return s.ljust(width, *args) | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 434 |  | 
|  | 435 | # Right-justify a string | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 436 | def rjust(s, width, *args): | 
|  | 437 | """rjust(s, width[, fillchar]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 438 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 439 | Return a right-justified version of s, in a field of the | 
|  | 440 | specified width, padded with spaces as needed.  The string is | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 441 | never truncated.  If specified the fillchar is used instead of spaces. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 442 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 443 | """ | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 444 | return s.rjust(width, *args) | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 445 |  | 
|  | 446 | # Center a string | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 447 | def center(s, width, *args): | 
|  | 448 | """center(s, width[, fillchar]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 449 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 450 | Return a center version of s, in a field of the specified | 
|  | 451 | width. padded with spaces as needed.  The string is never | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 452 | truncated.  If specified the fillchar is used instead of spaces. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 453 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 454 | """ | 
| Raymond Hettinger | 4f8f976 | 2003-11-26 08:21:35 +0000 | [diff] [blame] | 455 | return s.center(width, *args) | 
| Guido van Rossum | c636014 | 1990-10-13 19:23:40 +0000 | [diff] [blame] | 456 |  | 
|  | 457 | # Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' | 
|  | 458 | # Decadent feature: the argument may be a string or a number | 
|  | 459 | # (Use of this is deprecated; it should be a string as with ljust c.s.) | 
|  | 460 | def zfill(x, width): | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 461 | """zfill(x, width) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 462 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 463 | Pad a numeric string x with zeros on the left, to fill a field | 
|  | 464 | of the specified width.  The string x is never truncated. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 465 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 466 | """ | 
| Walter Dörwald | 65230a2 | 2002-06-03 15:58:32 +0000 | [diff] [blame] | 467 | if not isinstance(x, basestring): | 
| Walter Dörwald | 068325e | 2002-04-15 13:36:47 +0000 | [diff] [blame] | 468 | x = repr(x) | 
|  | 469 | return x.zfill(width) | 
| Guido van Rossum | 6ff2e90 | 1992-03-27 15:13:31 +0000 | [diff] [blame] | 470 |  | 
|  | 471 | # Expand tabs in a string. | 
|  | 472 | # Doesn't take non-printing chars into account, but does understand \n. | 
| Guido van Rossum | 894a7bb | 1995-08-10 19:42:05 +0000 | [diff] [blame] | 473 | def expandtabs(s, tabsize=8): | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 474 | """expandtabs(s [,tabsize]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 475 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 476 | Return a copy of the string s with all tab characters replaced | 
|  | 477 | by the appropriate number of spaces, depending on the current | 
|  | 478 | column, and the tabsize (default 8). | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 479 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 480 | """ | 
| Fred Drake | 046d272 | 2000-07-03 07:23:13 +0000 | [diff] [blame] | 481 | return s.expandtabs(tabsize) | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 482 |  | 
| Guido van Rossum | 2539528 | 1996-05-28 23:08:45 +0000 | [diff] [blame] | 483 | # Character translation through look-up table. | 
| Guido van Rossum | ed7253c | 1996-07-23 18:12:39 +0000 | [diff] [blame] | 484 | def translate(s, table, deletions=""): | 
| Guido van Rossum | 5aff775 | 2000-12-19 02:39:08 +0000 | [diff] [blame] | 485 | """translate(s,table [,deletions]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 486 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 487 | Return a copy of the string s, where all characters occurring | 
| Guido van Rossum | 5aff775 | 2000-12-19 02:39:08 +0000 | [diff] [blame] | 488 | in the optional argument deletions are removed, and the | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 489 | remaining characters have been mapped through the given | 
| Guido van Rossum | 5aff775 | 2000-12-19 02:39:08 +0000 | [diff] [blame] | 490 | translation table, which must be a string of length 256.  The | 
|  | 491 | deletions argument is not allowed for Unicode strings. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 492 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 493 | """ | 
| Raymond Hettinger | 4db5fe9 | 2007-04-12 04:10:00 +0000 | [diff] [blame] | 494 | if deletions or table is None: | 
| Guido van Rossum | 5aff775 | 2000-12-19 02:39:08 +0000 | [diff] [blame] | 495 | return s.translate(table, deletions) | 
|  | 496 | else: | 
|  | 497 | # Add s[:0] so that if s is Unicode and table is an 8-bit string, | 
|  | 498 | # table is converted to Unicode.  This means that table *cannot* | 
|  | 499 | # be a dictionary -- for that feature, use u.translate() directly. | 
|  | 500 | return s.translate(table + s[:0]) | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 501 |  | 
| Guido van Rossum | 8775d8b | 1996-06-11 18:43:00 +0000 | [diff] [blame] | 502 | # Capitalize a string, e.g. "aBc  dEf" -> "Abc  def". | 
|  | 503 | def capitalize(s): | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 504 | """capitalize(s) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 505 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 506 | Return a copy of the string s with only its first character | 
|  | 507 | capitalized. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 508 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 509 | """ | 
|  | 510 | return s.capitalize() | 
| Guido van Rossum | 8775d8b | 1996-06-11 18:43:00 +0000 | [diff] [blame] | 511 |  | 
| Guido van Rossum | 1eb9a81 | 1997-03-25 16:50:31 +0000 | [diff] [blame] | 512 | # Substring replacement (global) | 
| Senthil Kumaran | a240cb1 | 2010-09-08 12:40:45 +0000 | [diff] [blame] | 513 | def replace(s, old, new, maxreplace=-1): | 
|  | 514 | """replace (str, old, new[, maxreplace]) -> string | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 515 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 516 | Return a copy of string str with all occurrences of substring | 
| Senthil Kumaran | a240cb1 | 2010-09-08 12:40:45 +0000 | [diff] [blame] | 517 | old replaced by new. If the optional argument maxreplace is | 
|  | 518 | given, only the first maxreplace occurrences are replaced. | 
| Guido van Rossum | 2003204 | 1997-12-29 19:26:28 +0000 | [diff] [blame] | 519 |  | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 520 | """ | 
| Senthil Kumaran | a240cb1 | 2010-09-08 12:40:45 +0000 | [diff] [blame] | 521 | return s.replace(old, new, maxreplace) | 
| Guido van Rossum | 1eb9a81 | 1997-03-25 16:50:31 +0000 | [diff] [blame] | 522 |  | 
|  | 523 |  | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 524 | # Try importing optional built-in module "strop" -- if it exists, | 
|  | 525 | # it redefines some string operations that are 100-1000 times faster. | 
| Guido van Rossum | 8e2ec56 | 1993-07-29 09:37:38 +0000 | [diff] [blame] | 526 | # It also defines values for whitespace, lowercase and uppercase | 
|  | 527 | # that match <ctype.h>'s definitions. | 
| Guido van Rossum | 2db9135 | 1992-10-18 17:09:59 +0000 | [diff] [blame] | 528 |  | 
|  | 529 | try: | 
| Barry Warsaw | 226ae6c | 1999-10-12 19:54:53 +0000 | [diff] [blame] | 530 | from strop import maketrans, lowercase, uppercase, whitespace | 
|  | 531 | letters = lowercase + uppercase | 
| Guido van Rossum | b6775db | 1994-08-01 11:34:53 +0000 | [diff] [blame] | 532 | except ImportError: | 
| Fred Drake | 857c4c3 | 2000-02-10 16:21:11 +0000 | [diff] [blame] | 533 | pass                                          # Use the original versions | 
| Eric Smith | a9f7d62 | 2008-02-17 19:46:49 +0000 | [diff] [blame] | 534 |  | 
|  | 535 | ######################################################################## | 
|  | 536 | # the Formatter class | 
|  | 537 | # see PEP 3101 for details and purpose of this class | 
|  | 538 |  | 
| Benjamin Peterson | b7c95ce | 2008-11-09 01:52:32 +0000 | [diff] [blame] | 539 | # The hard parts are reused from the C implementation.  They're exposed as "_" | 
|  | 540 | # prefixed methods of str and unicode. | 
| Eric Smith | a9f7d62 | 2008-02-17 19:46:49 +0000 | [diff] [blame] | 541 |  | 
|  | 542 | # The overall parser is implemented in str._formatter_parser. | 
|  | 543 | # The field name parser is implemented in str._formatter_field_name_split | 
|  | 544 |  | 
|  | 545 | class Formatter(object): | 
| Serhiy Storchaka | 40fd0e8 | 2015-03-24 22:27:50 +0200 | [diff] [blame] | 546 | def format(*args, **kwargs): | 
|  | 547 | if not args: | 
|  | 548 | raise TypeError("descriptor 'format' of 'Formatter' object " | 
|  | 549 | "needs an argument") | 
|  | 550 | self, args = args[0], args[1:]  # allow the "self" keyword be passed | 
|  | 551 | try: | 
|  | 552 | format_string, args = args[0], args[1:] # allow the "format_string" keyword be passed | 
|  | 553 | except IndexError: | 
|  | 554 | if 'format_string' in kwargs: | 
|  | 555 | format_string = kwargs.pop('format_string') | 
|  | 556 | else: | 
|  | 557 | raise TypeError("format() missing 1 required positional " | 
|  | 558 | "argument: 'format_string'") | 
| Eric Smith | a9f7d62 | 2008-02-17 19:46:49 +0000 | [diff] [blame] | 559 | return self.vformat(format_string, args, kwargs) | 
|  | 560 |  | 
|  | 561 | def vformat(self, format_string, args, kwargs): | 
|  | 562 | used_args = set() | 
|  | 563 | result = self._vformat(format_string, args, kwargs, used_args, 2) | 
|  | 564 | self.check_unused_args(used_args, args, kwargs) | 
|  | 565 | return result | 
|  | 566 |  | 
|  | 567 | def _vformat(self, format_string, args, kwargs, used_args, recursion_depth): | 
|  | 568 | if recursion_depth < 0: | 
|  | 569 | raise ValueError('Max string recursion exceeded') | 
|  | 570 | result = [] | 
|  | 571 | for literal_text, field_name, format_spec, conversion in \ | 
|  | 572 | self.parse(format_string): | 
|  | 573 |  | 
|  | 574 | # output the literal text | 
|  | 575 | if literal_text: | 
|  | 576 | result.append(literal_text) | 
|  | 577 |  | 
|  | 578 | # if there's a field, output it | 
|  | 579 | if field_name is not None: | 
|  | 580 | # this is some markup, find the object and do | 
|  | 581 | #  the formatting | 
|  | 582 |  | 
|  | 583 | # given the field_name, find the object it references | 
|  | 584 | #  and the argument it came from | 
|  | 585 | obj, arg_used = self.get_field(field_name, args, kwargs) | 
|  | 586 | used_args.add(arg_used) | 
|  | 587 |  | 
|  | 588 | # do any conversion on the resulting object | 
|  | 589 | obj = self.convert_field(obj, conversion) | 
|  | 590 |  | 
|  | 591 | # expand the format spec, if needed | 
|  | 592 | format_spec = self._vformat(format_spec, args, kwargs, | 
|  | 593 | used_args, recursion_depth-1) | 
|  | 594 |  | 
|  | 595 | # format the object and append to the result | 
|  | 596 | result.append(self.format_field(obj, format_spec)) | 
|  | 597 |  | 
|  | 598 | return ''.join(result) | 
|  | 599 |  | 
|  | 600 |  | 
|  | 601 | def get_value(self, key, args, kwargs): | 
|  | 602 | if isinstance(key, (int, long)): | 
|  | 603 | return args[key] | 
|  | 604 | else: | 
|  | 605 | return kwargs[key] | 
|  | 606 |  | 
|  | 607 |  | 
|  | 608 | def check_unused_args(self, used_args, args, kwargs): | 
|  | 609 | pass | 
|  | 610 |  | 
|  | 611 |  | 
|  | 612 | def format_field(self, value, format_spec): | 
|  | 613 | return format(value, format_spec) | 
|  | 614 |  | 
|  | 615 |  | 
|  | 616 | def convert_field(self, value, conversion): | 
|  | 617 | # do any conversion on the resulting object | 
| R David Murray | d928b6a | 2012-08-19 17:57:29 -0400 | [diff] [blame] | 618 | if conversion is None: | 
|  | 619 | return value | 
| Eric Smith | a9f7d62 | 2008-02-17 19:46:49 +0000 | [diff] [blame] | 620 | elif conversion == 's': | 
|  | 621 | return str(value) | 
| R David Murray | d928b6a | 2012-08-19 17:57:29 -0400 | [diff] [blame] | 622 | elif conversion == 'r': | 
|  | 623 | return repr(value) | 
| Florent Xicluna | 9b90cd1 | 2010-09-13 07:46:37 +0000 | [diff] [blame] | 624 | raise ValueError("Unknown conversion specifier {0!s}".format(conversion)) | 
| Eric Smith | a9f7d62 | 2008-02-17 19:46:49 +0000 | [diff] [blame] | 625 |  | 
|  | 626 |  | 
|  | 627 | # returns an iterable that contains tuples of the form: | 
|  | 628 | # (literal_text, field_name, format_spec, conversion) | 
|  | 629 | # literal_text can be zero length | 
|  | 630 | # field_name can be None, in which case there's no | 
|  | 631 | #  object to format and output | 
|  | 632 | # if field_name is not None, it is looked up, formatted | 
|  | 633 | #  with format_spec and conversion and then used | 
|  | 634 | def parse(self, format_string): | 
|  | 635 | return format_string._formatter_parser() | 
|  | 636 |  | 
|  | 637 |  | 
|  | 638 | # given a field_name, find the object it references. | 
|  | 639 | #  field_name:   the field being looked up, e.g. "0.name" | 
|  | 640 | #                 or "lookup[3]" | 
|  | 641 | #  used_args:    a set of which args have been used | 
|  | 642 | #  args, kwargs: as passed in to vformat | 
|  | 643 | def get_field(self, field_name, args, kwargs): | 
|  | 644 | first, rest = field_name._formatter_field_name_split() | 
|  | 645 |  | 
|  | 646 | obj = self.get_value(first, args, kwargs) | 
|  | 647 |  | 
|  | 648 | # loop through the rest of the field_name, doing | 
|  | 649 | #  getattr or getitem as needed | 
|  | 650 | for is_attr, i in rest: | 
|  | 651 | if is_attr: | 
|  | 652 | obj = getattr(obj, i) | 
|  | 653 | else: | 
|  | 654 | obj = obj[i] | 
|  | 655 |  | 
|  | 656 | return obj, first |