Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(904)

Side by Side Diff: Lib/json/decoder.py

Issue 7311: [issue4136] merge json library with simplejson 2.0.3 Base URL: http://svn.python.org/view/*checkout*/python/trunk/
Patch Set: Created 5 years, 6 months ago , Downloaded from: http://bugs.python.org/file11822/json_issue4136_r66961.diff
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 """Implementation of JSONDecoder 1 """Implementation of JSONDecoder
2 """ 2 """
3 3
4 import re 4 import re
5 import sys 5 import sys
6 import struct
6 7
7 from json.scanner import Scanner, pattern 8 from json.scanner import make_scanner
8 try: 9 try:
9 from _json import scanstring as c_scanstring 10 from _json import scanstring as c_scanstring
10 except ImportError: 11 except ImportError:
11 c_scanstring = None 12 c_scanstring = None
12 13
13 __all__ = ['JSONDecoder'] 14 __all__ = ['JSONDecoder']
14 15
15 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 16 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
16 17
17 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf') 18 NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf')
(...skipping 15 matching lines...) Expand all
33 return fmt.format(msg, lineno, colno, pos) 34 return fmt.format(msg, lineno, colno, pos)
34 endlineno, endcolno = linecol(doc, end) 35 endlineno, endcolno = linecol(doc, end)
35 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 36 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
36 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 37 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
37 38
38 39
39 _CONSTANTS = { 40 _CONSTANTS = {
40 '-Infinity': NegInf, 41 '-Infinity': NegInf,
41 'Infinity': PosInf, 42 'Infinity': PosInf,
42 'NaN': NaN, 43 'NaN': NaN,
43 'true': True,
44 'false': False,
45 'null': None,
46 } 44 }
47 45
48
49 def JSONConstant(match, context, c=_CONSTANTS):
50 s = match.group(0)
51 fn = getattr(context, 'parse_constant', None)
52 if fn is None:
53 rval = c[s]
54 else:
55 rval = fn(s)
56 return rval, None
57 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
58
59
60 def JSONNumber(match, context):
61 match = JSONNumber.regex.match(match.string, *match.span())
62 integer, frac, exp = match.groups()
63 if frac or exp:
64 fn = getattr(context, 'parse_float', None) or float
65 res = fn(integer + (frac or '') + (exp or ''))
66 else:
67 fn = getattr(context, 'parse_int', None) or int
68 res = fn(integer)
69 return res, None
70 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
71
72
73 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 46 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
74 BACKSLASH = { 47 BACKSLASH = {
75 '"': u'"', '\\': u'\\', '/': u'/', 48 '"': u'"', '\\': u'\\', '/': u'/',
76 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 49 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
77 } 50 }
78 51
79 DEFAULT_ENCODING = "utf-8" 52 DEFAULT_ENCODING = "utf-8"
80 53
81 54
82 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU NK.match): 55 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU NK.match):
Martin v. Löwis 2009/01/04 13:22:29 This function should get some comments what all th
bob.ippolito 2009/01/05 01:28:19 Commented in the next patch.
83 if encoding is None: 56 if encoding is None:
84 encoding = DEFAULT_ENCODING 57 encoding = DEFAULT_ENCODING
85 chunks = [] 58 chunks = []
86 _append = chunks.append 59 _append = chunks.append
87 begin = end - 1 60 begin = end - 1
88 while 1: 61 while 1:
89 chunk = _m(s, end) 62 chunk = _m(s, end)
90 if chunk is None: 63 if chunk is None:
91 raise ValueError( 64 raise ValueError(
92 errmsg("Unterminated string starting at", s, begin)) 65 errmsg("Unterminated string starting at", s, begin))
93 end = chunk.end() 66 end = chunk.end()
94 content, terminator = chunk.groups() 67 content, terminator = chunk.groups()
95 if content: 68 if content:
96 if not isinstance(content, unicode): 69 if not isinstance(content, unicode):
97 content = unicode(content, encoding) 70 content = unicode(content, encoding)
98 _append(content) 71 _append(content)
Martin v. Löwis 2009/01/04 13:22:29 # 3 cases: end of string, control character, escap
bob.ippolito 2009/01/05 01:28:19 Commented in the next patch
99 if terminator == '"': 72 if terminator == '"':
100 break 73 break
101 elif terminator != '\\': 74 elif terminator != '\\':
102 if strict: 75 if strict:
103 msg = "Invalid control character {0!r} at".format(terminator) 76 msg = "Invalid control character {0!r} at".format(esc)
Martin v. Löwis 2009/01/04 13:22:29 esc isn't assigned until a few lines later. Is thi
bob.ippolito 2009/01/05 01:28:19 This is a bug in the exception handling code, fixe
104 raise ValueError(errmsg(msg, s, end)) 77 raise ValueError(errmsg(msg, s, end))
105 else: 78 else:
106 _append(terminator) 79 _append(terminator)
107 continue 80 continue
108 try: 81 try:
109 esc = s[end] 82 esc = s[end]
110 except IndexError: 83 except IndexError:
111 raise ValueError( 84 raise ValueError(
112 errmsg("Unterminated string starting at", s, begin)) 85 errmsg("Unterminated string starting at", s, begin))
113 if esc != 'u': 86 if esc != 'u':
114 try: 87 try:
115 m = _b[esc] 88 m = _b[esc]
116 except KeyError: 89 except KeyError:
117 msg = "Invalid \\escape: {0!r}".format(esc) 90 raise ValueError(
118 raise ValueError(errmsg(msg, s, end)) 91 errmsg("Invalid \\escape: {0!r}".format(esc), s, end))
119 end += 1 92 end += 1
120 else: 93 else:
121 esc = s[end + 1:end + 5] 94 esc = s[end + 1:end + 5]
122 next_end = end + 5 95 next_end = end + 5
123 msg = "Invalid \\uXXXX escape" 96 msg = "Invalid \\uXXXX escape"
124 try: 97 try:
125 if len(esc) != 4: 98 if len(esc) != 4:
126 raise ValueError 99 raise ValueError
127 uni = int(esc, 16) 100 uni = int(esc, 16)
128 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: 101 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
129 msg = "Invalid \\uXXXX\\uXXXX surrogate pair" 102 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
130 if not s[end + 5:end + 7] == '\\u': 103 if not s[end + 5:end + 7] == '\\u':
131 raise ValueError 104 raise ValueError
Martin v. Löwis 2009/01/04 13:22:29 No message?
bob.ippolito 2009/01/05 01:28:19 Exception is caught at the except block and re-rai
132 esc2 = s[end + 7:end + 11] 105 esc2 = s[end + 7:end + 11]
133 if len(esc2) != 4: 106 if len(esc2) != 4:
134 raise ValueError 107 raise ValueError
Martin v. Löwis 2009/01/04 13:22:29 No message?
bob.ippolito 2009/01/05 01:28:19 Exception is caught at the except block and re-rai
135 uni2 = int(esc2, 16) 108 uni2 = int(esc2, 16)
136 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 109 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
137 next_end += 6 110 next_end += 6
138 m = unichr(uni) 111 m = unichr(uni)
Martin v. Löwis 2009/01/04 13:22:29 What's the purpose of m?
bob.ippolito 2009/01/05 01:28:19 Renamed m to char in the next patch.
139 except ValueError: 112 except ValueError:
140 raise ValueError(errmsg(msg, s, end)) 113 raise ValueError(errmsg(msg, s, end))
141 end = next_end 114 end = next_end
142 _append(m) 115 _append(m)
143 return u''.join(chunks), end 116 return u''.join(chunks), end
144 117
145 118
146 # Use speedup 119 # Use speedup if available
147 if c_scanstring is not None: 120 scanstring = c_scanstring or py_scanstring
148 scanstring = c_scanstring
149 else:
150 scanstring = py_scanstring
151 121
152 def JSONString(match, context): 122 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
153 encoding = getattr(context, 'encoding', None) 123 WHITESPACE_STR = ' \t\n\r'
154 strict = getattr(context, 'strict', True)
155 return scanstring(match.string, match.end(), encoding, strict)
156 pattern(r'"')(JSONString)
157 124
158 125 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE .match, _ws=WHITESPACE_STR):
159 WHITESPACE = re.compile(r'\s*', FLAGS)
160
161
162 def JSONObject(match, context, _w=WHITESPACE.match):
163 pairs = {} 126 pairs = {}
164 s = match.string
165 end = _w(s, match.end()).end()
166 nextchar = s[end:end + 1] 127 nextchar = s[end:end + 1]
Martin v. Löwis 2009/01/04 13:22:29 Why not s[end]? Add comment if this is necessary.
bob.ippolito 2009/01/05 01:28:19 commented in next patch (only once). s[end] can ra
167 # Trivial empty object 128 # Normally we expect nextchar == '"'
168 if nextchar == '}':
169 return pairs, end + 1
170 if nextchar != '"': 129 if nextchar != '"':
171 raise ValueError(errmsg("Expecting property name", s, end)) 130 if nextchar in _ws:
131 end = _w(s, end).end()
132 nextchar = s[end:end + 1]
Martin v. Löwis 2009/01/04 13:22:29 Likewise. There are more places where it does slic
bob.ippolito 2009/01/05 01:28:19 commented in next patch (only once). s[end] can ra
133 # Trivial empty object
134 if nextchar == '}':
135 return pairs, end + 1
136 elif nextchar != '"':
137 raise ValueError(errmsg("Expecting property name", s, end))
172 end += 1 138 end += 1
173 encoding = getattr(context, 'encoding', None)
174 strict = getattr(context, 'strict', True)
175 iterscan = JSONScanner.iterscan
176 while True: 139 while True:
177 key, end = scanstring(s, end, encoding, strict) 140 key, end = scanstring(s, end, encoding, strict)
178 end = _w(s, end).end() 141
142 # To skip some function call overhead we optimize the fast paths where
143 # the JSON key separator is ": " or just ":".
179 if s[end:end + 1] != ':': 144 if s[end:end + 1] != ':':
180 raise ValueError(errmsg("Expecting : delimiter", s, end)) 145 end = _w(s, end).end()
181 end = _w(s, end + 1).end() 146 if s[end:end + 1] != ':':
147 raise ValueError(errmsg("Expecting : delimiter", s, end))
148
149 end += 1
150
182 try: 151 try:
183 value, end = iterscan(s, idx=end, context=context).next() 152 if s[end] in _ws:
153 end += 1
154 if s[end] in _ws:
155 end = _w(s, end + 1).end()
156 except IndexError:
157 pass
158
159 try:
160 value, end = scan_once(s, end)
184 except StopIteration: 161 except StopIteration:
185 raise ValueError(errmsg("Expecting object", s, end)) 162 raise ValueError(errmsg("Expecting object", s, end))
186 pairs[key] = value 163 pairs[key] = value
187 end = _w(s, end).end() 164
188 nextchar = s[end:end + 1] 165 try:
166 nextchar = s[end]
167 if nextchar in _ws:
168 end = _w(s, end + 1).end()
169 nextchar = s[end]
170 except IndexError:
171 nextchar = ''
189 end += 1 172 end += 1
173
190 if nextchar == '}': 174 if nextchar == '}':
191 break 175 break
192 if nextchar != ',': 176 elif nextchar != ',':
193 raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) 177 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
194 end = _w(s, end).end() 178
195 nextchar = s[end:end + 1] 179 try:
180 nextchar = s[end]
181 if nextchar in _ws:
182 end += 1
183 nextchar = s[end]
184 if nextchar in _ws:
185 end = _w(s, end + 1).end()
186 nextchar = s[end]
187 except IndexError:
188 nextchar = ''
189
196 end += 1 190 end += 1
197 if nextchar != '"': 191 if nextchar != '"':
198 raise ValueError(errmsg("Expecting property name", s, end - 1)) 192 raise ValueError(errmsg("Expecting property name", s, end - 1))
199 object_hook = getattr(context, 'object_hook', None) 193
200 if object_hook is not None: 194 if object_hook is not None:
201 pairs = object_hook(pairs) 195 pairs = object_hook(pairs)
202 return pairs, end 196 return pairs, end
203 pattern(r'{')(JSONObject)
204 197
205 198 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
206 def JSONArray(match, context, _w=WHITESPACE.match):
207 values = [] 199 values = []
208 s = match.string 200 nextchar = s[end:end + 1]
209 end = _w(s, match.end()).end() 201 if nextchar in _ws:
202 end = _w(s, end + 1).end()
203 nextchar = s[end:end + 1]
210 # Look-ahead for trivial empty array 204 # Look-ahead for trivial empty array
211 nextchar = s[end:end + 1]
212 if nextchar == ']': 205 if nextchar == ']':
213 return values, end + 1 206 return values, end + 1
214 iterscan = JSONScanner.iterscan 207 _append = values.append
215 while True: 208 while True:
216 try: 209 try:
217 value, end = iterscan(s, idx=end, context=context).next() 210 value, end = scan_once(s, end)
218 except StopIteration: 211 except StopIteration:
219 raise ValueError(errmsg("Expecting object", s, end)) 212 raise ValueError(errmsg("Expecting object", s, end))
220 values.append(value) 213 _append(value)
221 end = _w(s, end).end()
222 nextchar = s[end:end + 1] 214 nextchar = s[end:end + 1]
215 if nextchar in _ws:
216 end = _w(s, end + 1).end()
217 nextchar = s[end:end + 1]
223 end += 1 218 end += 1
224 if nextchar == ']': 219 if nextchar == ']':
225 break 220 break
226 if nextchar != ',': 221 elif nextchar != ',':
227 raise ValueError(errmsg("Expecting , delimiter", s, end)) 222 raise ValueError(errmsg("Expecting , delimiter", s, end))
228 end = _w(s, end).end() 223
224 try:
225 if s[end] in _ws:
226 end += 1
227 if s[end] in _ws:
228 end = _w(s, end + 1).end()
229 except IndexError:
230 pass
231
229 return values, end 232 return values, end
230 pattern(r'\[')(JSONArray)
231
232
233 ANYTHING = [
234 JSONObject,
235 JSONArray,
236 JSONString,
237 JSONConstant,
238 JSONNumber,
239 ]
240
241 JSONScanner = Scanner(ANYTHING)
242
243 233
244 class JSONDecoder(object): 234 class JSONDecoder(object):
245 """Simple JSON <http://json.org> decoder 235 """Simple JSON <http://json.org> decoder
246 236
247 Performs the following translations in decoding by default: 237 Performs the following translations in decoding by default:
248 238
249 +---------------+-------------------+ 239 +---------------+-------------------+
250 | JSON | Python | 240 | JSON | Python |
251 +===============+===================+ 241 +===============+===================+
252 | object | dict | 242 | object | dict |
(...skipping 10 matching lines...) Expand all
263 +---------------+-------------------+ 253 +---------------+-------------------+
264 | false | False | 254 | false | False |
265 +---------------+-------------------+ 255 +---------------+-------------------+
266 | null | None | 256 | null | None |
267 +---------------+-------------------+ 257 +---------------+-------------------+
268 258
269 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 259 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
270 their corresponding ``float`` values, which is outside the JSON spec. 260 their corresponding ``float`` values, which is outside the JSON spec.
271 """ 261 """
272 262
273 _scanner = Scanner(ANYTHING)
274 __all__ = ['__init__', 'decode', 'raw_decode'] 263 __all__ = ['__init__', 'decode', 'raw_decode']
275 264
276 def __init__(self, encoding=None, object_hook=None, parse_float=None, 265 def __init__(self, encoding=None, object_hook=None, parse_float=None,
277 parse_int=None, parse_constant=None, strict=True): 266 parse_int=None, parse_constant=None, strict=True):
278 """``encoding`` determines the encoding used to interpret any ``str`` 267 """``encoding`` determines the encoding used to interpret any ``str``
279 objects decoded by this instance (utf-8 by default). It has no 268 objects decoded by this instance (utf-8 by default). It has no
280 effect when decoding ``unicode`` objects. 269 effect when decoding ``unicode`` objects.
281 270
282 Note that currently only encodings that are a superset of ASCII work, 271 Note that currently only encodings that are a superset of ASCII work,
283 strings of other encodings should be passed in as ``unicode``. 272 strings of other encodings should be passed in as ``unicode``.
284 273
285 ``object_hook``, if specified, will be called with the result of 274 ``object_hook``, if specified, will be called with the result
286 every JSON object decoded and its return value will be used in 275 of every JSON object decoded and its return value will be used in
287 place of the given ``dict``. This can be used to provide custom 276 place of the given ``dict``. This can be used to provide custom
288 deserializations (e.g. to support JSON-RPC class hinting). 277 deserializations (e.g. to support JSON-RPC class hinting).
289 278
290 ``parse_float``, if specified, will be called with the string 279 ``parse_float``, if specified, will be called with the string
291 of every JSON float to be decoded. By default this is equivalent to 280 of every JSON float to be decoded. By default this is equivalent to
292 float(num_str). This can be used to use another datatype or parser 281 float(num_str). This can be used to use another datatype or parser
293 for JSON floats (e.g. decimal.Decimal). 282 for JSON floats (e.g. decimal.Decimal).
294 283
295 ``parse_int``, if specified, will be called with the string 284 ``parse_int``, if specified, will be called with the string
296 of every JSON int to be decoded. By default this is equivalent to 285 of every JSON int to be decoded. By default this is equivalent to
297 int(num_str). This can be used to use another datatype or parser 286 int(num_str). This can be used to use another datatype or parser
298 for JSON integers (e.g. float). 287 for JSON integers (e.g. float).
299 288
300 ``parse_constant``, if specified, will be called with one of the 289 ``parse_constant``, if specified, will be called with one of the
301 following strings: -Infinity, Infinity, NaN, null, true, false. 290 following strings: -Infinity, Infinity, NaN.
Martin v. Löwis 2009/01/04 13:22:29 This sounds like an incompatible change.
bob.ippolito 2009/01/05 01:28:19 Not practically speaking. The documented purpose o
302 This can be used to raise an exception if invalid JSON numbers 291 This can be used to raise an exception if invalid JSON numbers
303 are encountered. 292 are encountered.
304 293
305 """ 294 """
306 self.encoding = encoding 295 self.encoding = encoding
307 self.object_hook = object_hook 296 self.object_hook = object_hook
308 self.parse_float = parse_float 297 self.parse_float = parse_float or float
309 self.parse_int = parse_int 298 self.parse_int = parse_int or int
310 self.parse_constant = parse_constant 299 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
311 self.strict = strict 300 self.strict = strict
301 self.parse_object = JSONObject
302 self.parse_array = JSONArray
303 self.parse_string = scanstring
304 self.scan_once = make_scanner(self)
312 305
313 def decode(self, s, _w=WHITESPACE.match): 306 def decode(self, s, _w=WHITESPACE.match):
314 """ 307 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
315 Return the Python representation of ``s`` (a ``str`` or ``unicode``
316 instance containing a JSON document) 308 instance containing a JSON document)
317 309
318 """ 310 """
319 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 311 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
320 end = _w(s, end).end() 312 end = _w(s, end).end()
321 if end != len(s): 313 if end != len(s):
322 raise ValueError(errmsg("Extra data", s, end, len(s))) 314 raise ValueError(errmsg("Extra data", s, end, len(s)))
323 return obj 315 return obj
324 316
325 def raw_decode(self, s, **kw): 317 def raw_decode(self, s, idx=0):
Martin v. Löwis 2009/01/04 13:22:29 That looks like an incompatible change
bob.ippolito 2009/01/05 01:28:19 It is a compatible change.
326 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning 318 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
327 with a JSON document) and return a 2-tuple of the Python 319 with a JSON document) and return a 2-tuple of the Python
328 representation and the index in ``s`` where the document ended. 320 representation and the index in ``s`` where the document ended.
329 321
330 This can be used to decode a JSON document from a string that may 322 This can be used to decode a JSON document from a string that may
331 have extraneous data at the end. 323 have extraneous data at the end.
332 324
333 """ 325 """
334 kw.setdefault('context', self)
335 try: 326 try:
336 obj, end = self._scanner.iterscan(s, **kw).next() 327 obj, end = self.scan_once(s, idx)
337 except StopIteration: 328 except StopIteration:
338 raise ValueError("No JSON object could be decoded") 329 raise ValueError("No JSON object could be decoded")
339 return obj, end 330 return obj, end
OLDNEW
« no previous file with comments | « Lib/json/__init__.py ('k') | Lib/json/encoder.py » ('j') | Modules/_json.c » ('J')

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld 1278:e6ce13d99bf5