Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(3786)

Side by Side Diff: Lib/json/encoder.py

Issue 7311: [issue4136] merge json library with simplejson 2.0.3 Base URL: http://svn.python.org/view/*checkout*/python/trunk/
Patch Set: Created 5 years, 6 months ago , Downloaded from: http://bugs.python.org/file11822/json_issue4136_r66961.diff
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 """Implementation of JSONEncoder 1 """Implementation of JSONEncoder
2 """ 2 """
3 3
4 import re 4 import re
5 import math 5 import math
6 6
7 try: 7 try:
8 from _json import encode_basestring_ascii as c_encode_basestring_ascii 8 from _json import encode_basestring_ascii as c_encode_basestring_ascii
9 except ImportError: 9 except ImportError:
10 c_encode_basestring_ascii = None 10 c_encode_basestring_ascii = None
11 try:
12 from _json import make_encoder as c_make_encoder
13 except ImportError:
14 c_make_encoder = None
11 15
12 __all__ = ['JSONEncoder'] 16 __all__ = ['JSONEncoder']
13 17
14 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') 18 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 19 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16 HAS_UTF8 = re.compile(r'[\x80-\xff]') 20 HAS_UTF8 = re.compile(r'[\x80-\xff]')
17 ESCAPE_DCT = { 21 ESCAPE_DCT = {
18 '\\': '\\\\', 22 '\\': '\\\\',
19 '"': '\\"', 23 '"': '\\"',
20 '\b': '\\b', 24 '\b': '\\b',
21 '\f': '\\f', 25 '\f': '\\f',
22 '\n': '\\n', 26 '\n': '\\n',
23 '\r': '\\r', 27 '\r': '\\r',
24 '\t': '\\t', 28 '\t': '\\t',
25 } 29 }
26 for i in range(0x20): 30 for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 31 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28 32
29 FLOAT_REPR = repr 33 FLOAT_REPR = repr
30 34
31 def floatstr(o, allow_nan=True):
32 # Check for specials. Note that this type of test is processor- and/or
33 # platform-specific, so do tests which don't depend on the internals.
34
35 if math.isnan(o):
36 text = 'NaN'
37 elif math.isinf(o):
38 if math.copysign(1., o) == 1.:
39 text = 'Infinity'
40 else:
41 text = '-Infinity'
42 else:
43 return FLOAT_REPR(o)
44
45 if not allow_nan:
46 msg = "Out of range float values are not JSON compliant: " + repr(o)
47 raise ValueError(msg)
48
49 return text
50
51
52 def encode_basestring(s): 35 def encode_basestring(s):
53 """Return a JSON representation of a Python string 36 """Return a JSON representation of a Python string
54 37
55 """ 38 """
56 def replace(match): 39 def replace(match):
57 return ESCAPE_DCT[match.group(0)] 40 return ESCAPE_DCT[match.group(0)]
58 return '"' + ESCAPE.sub(replace, s) + '"' 41 return '"' + ESCAPE.sub(replace, s) + '"'
59 42
60 43
61 def py_encode_basestring_ascii(s): 44 def py_encode_basestring_ascii(s):
62 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 45 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
63 s = s.decode('utf-8') 46 s = s.decode('utf-8')
64 def replace(match): 47 def replace(match):
65 s = match.group(0) 48 s = match.group(0)
66 try: 49 try:
67 return ESCAPE_DCT[s] 50 return ESCAPE_DCT[s]
68 except KeyError: 51 except KeyError:
69 n = ord(s) 52 n = ord(s)
70 if n < 0x10000: 53 if n < 0x10000:
71 return '\\u{0:04x}'.format(n) 54 return '\\u{0:04x}'.format(n)
72 else: 55 else:
73 # surrogate pair 56 # surrogate pair
74 n -= 0x10000 57 n -= 0x10000
75 s1 = 0xd800 | ((n >> 10) & 0x3ff) 58 s1 = 0xd800 | ((n >> 10) & 0x3ff)
76 s2 = 0xdc00 | (n & 0x3ff) 59 s2 = 0xdc00 | (n & 0x3ff)
77 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) 60 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
78 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' 61 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
79 62
80 63
81 if c_encode_basestring_ascii is not None: 64 encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_asci i
82 encode_basestring_ascii = c_encode_basestring_ascii
83 else:
84 encode_basestring_ascii = py_encode_basestring_ascii
85
86 65
87 class JSONEncoder(object): 66 class JSONEncoder(object):
88 """Extensible JSON <http://json.org> encoder for Python data structures. 67 """Extensible JSON <http://json.org> encoder for Python data structures.
89 68
90 Supports the following objects and types by default: 69 Supports the following objects and types by default:
91 70
92 +-------------------+---------------+ 71 +-------------------+---------------+
93 | Python | JSON | 72 | Python | JSON |
94 +===================+===============+ 73 +===================+===============+
95 | dict | object | 74 | dict | object |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
160 transformed into unicode using that encoding prior to JSON-encoding. 139 transformed into unicode using that encoding prior to JSON-encoding.
161 The default is UTF-8. 140 The default is UTF-8.
162 141
163 """ 142 """
164 self.skipkeys = skipkeys 143 self.skipkeys = skipkeys
165 self.ensure_ascii = ensure_ascii 144 self.ensure_ascii = ensure_ascii
166 self.check_circular = check_circular 145 self.check_circular = check_circular
167 self.allow_nan = allow_nan 146 self.allow_nan = allow_nan
168 self.sort_keys = sort_keys 147 self.sort_keys = sort_keys
169 self.indent = indent 148 self.indent = indent
170 self.current_indent_level = 0
171 if separators is not None: 149 if separators is not None:
172 self.item_separator, self.key_separator = separators 150 self.item_separator, self.key_separator = separators
173 if default is not None: 151 if default is not None:
174 self.default = default 152 self.default = default
175 self.encoding = encoding 153 self.encoding = encoding
176 154
177 def _newline_indent(self): 155 def default(self, o):
178 return '\n' + (' ' * (self.indent * self.current_indent_level)) 156 """Implement this method in a subclass such that it returns
157 a serializable object for ``o``, or calls the base implementation
158 (to raise a ``TypeError``).
179 159
180 def _iterencode_list(self, lst, markers=None): 160 For example, to support arbitrary iterators, you could
161 implement default like this::
162
163 def default(self, o):
164 try:
165 iterable = iter(o)
166 except TypeError:
167 pass
168 else:
169 return list(iterable)
170 return JSONEncoder.default(self, o)
171
172 """
173 raise TypeError("{0!r} is not JSON serializable".format(o))
174
175 def encode(self, o):
176 """Return a JSON string representation of a Python data structure.
177
178 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
179 '{"foo": ["bar", "baz"]}'
180
181 """
182 # This is for extremely simple cases and benchmarks.
183 if isinstance(o, basestring):
184 if isinstance(o, str):
185 _encoding = self.encoding
186 if (_encoding is not None
187 and not (_encoding == 'utf-8')):
188 o = o.decode(_encoding)
189 if self.ensure_ascii:
190 return encode_basestring_ascii(o)
191 else:
192 return encode_basestring(o)
193 # This doesn't pass the iterator directly to ''.join() because the
194 # exceptions aren't as detailed. The list call should be roughly
195 # equivalent to the PySequence_Fast that ''.join() would do.
196 chunks = self.iterencode(o, _one_shot=True)
197 if not isinstance(chunks, (list, tuple)):
198 chunks = list(chunks)
199 return ''.join(chunks)
200
201 def iterencode(self, o, _one_shot=False):
202 """Encode the given object and yield each string
203 representation as available.
204
205 For example::
206
207 for chunk in JSONEncoder().iterencode(bigobject):
208 mysocket.write(chunk)
209
210 """
211 if self.check_circular:
212 markers = {}
213 else:
214 markers = None
215 if self.ensure_ascii:
216 _encoder = encode_basestring_ascii
217 else:
218 _encoder = encode_basestring
219 if self.encoding != 'utf-8':
220 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
221 if isinstance(o, str):
222 o = o.decode(_encoding)
223 return _orig_encoder(o)
224
225 def floatstr(o, allow_nan=True):
226 # Check for specials. Note that this type of test is processor-
227 # and/or platform-specific, so do tests which don't depend on the
228 # internals.
229
230 if math.isnan(o):
231 text = 'NaN'
232 elif math.isinf(o):
233 if math.copysign(1., o) == 1.:
234 text = 'Infinity'
235 else:
236 text = '-Infinity'
237 else:
238 return FLOAT_REPR(o)
239
240 if not allow_nan:
241 msg = "Out of range float values are not JSON compliant: " + rep r(o)
242 raise ValueError(msg)
243
244 return text
245
246 if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
247 _iterencode = c_make_encoder(
248 markers, self.default, _encoder, self.indent,
249 self.key_separator, self.item_separator, self.sort_keys,
250 self.skipkeys, self.allow_nan)
251 else:
252 _iterencode = _make_iterencode(
253 markers, self.default, _encoder, self.indent, floatstr,
254 self.key_separator, self.item_separator, self.sort_keys,
255 self.skipkeys, _one_shot)
256 return _iterencode(o, 0)
257
258 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separ ator, _item_separator, _sort_keys, _skipkeys, _one_shot,
259 ## HACK: hand-optimized bytecode; turn globals into locals
260 False=False,
261 True=True,
262 ValueError=ValueError,
263 basestring=basestring,
264 dict=dict,
265 float=float,
266 id=id,
267 int=int,
268 isinstance=isinstance,
269 list=list,
270 long=long,
271 str=str,
272 tuple=tuple,
273 ):
274
275 def _iterencode_list(lst, _current_indent_level):
181 if not lst: 276 if not lst:
182 yield '[]' 277 yield '[]'
183 return 278 return
184 if markers is not None: 279 if markers is not None:
185 markerid = id(lst) 280 markerid = id(lst)
186 if markerid in markers: 281 if markerid in markers:
187 raise ValueError("Circular reference detected") 282 raise ValueError("Circular reference detected")
188 markers[markerid] = lst 283 markers[markerid] = lst
189 yield '[' 284 buf = '['
190 if self.indent is not None: 285 if _indent is not None:
191 self.current_indent_level += 1 286 _current_indent_level += 1
192 newline_indent = self._newline_indent() 287 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
193 separator = self.item_separator + newline_indent 288 separator = _item_separator + newline_indent
194 yield newline_indent 289 buf += newline_indent
195 else: 290 else:
196 newline_indent = None 291 newline_indent = None
197 separator = self.item_separator 292 separator = _item_separator
198 first = True 293 first = True
199 for value in lst: 294 for value in lst:
200 if first: 295 if first:
201 first = False 296 first = False
202 else: 297 else:
203 yield separator 298 buf = separator
204 for chunk in self._iterencode(value, markers): 299 if isinstance(value, basestring):
205 yield chunk 300 yield buf + _encoder(value)
301 elif value is None:
302 yield buf + 'null'
303 elif value is True:
304 yield buf + 'true'
305 elif value is False:
306 yield buf + 'false'
307 elif isinstance(value, (int, long)):
308 yield buf + str(value)
309 elif isinstance(value, float):
310 yield buf + _floatstr(value)
311 else:
312 yield buf
313 if isinstance(value, (list, tuple)):
314 chunks = _iterencode_list(value, _current_indent_level)
315 elif isinstance(value, dict):
316 chunks = _iterencode_dict(value, _current_indent_level)
317 else:
318 chunks = _iterencode(value, _current_indent_level)
319 for chunk in chunks:
320 yield chunk
206 if newline_indent is not None: 321 if newline_indent is not None:
207 self.current_indent_level -= 1 322 _current_indent_level -= 1
208 yield self._newline_indent() 323 yield '\n' + (' ' * (_indent * _current_indent_level))
209 yield ']' 324 yield ']'
210 if markers is not None: 325 if markers is not None:
211 del markers[markerid] 326 del markers[markerid]
212 327
213 def _iterencode_dict(self, dct, markers=None): 328 def _iterencode_dict(dct, _current_indent_level):
214 if not dct: 329 if not dct:
215 yield '{}' 330 yield '{}'
216 return 331 return
217 if markers is not None: 332 if markers is not None:
218 markerid = id(dct) 333 markerid = id(dct)
219 if markerid in markers: 334 if markerid in markers:
220 raise ValueError("Circular reference detected") 335 raise ValueError("Circular reference detected")
221 markers[markerid] = dct 336 markers[markerid] = dct
222 yield '{' 337 yield '{'
223 key_separator = self.key_separator 338 if _indent is not None:
224 if self.indent is not None: 339 _current_indent_level += 1
225 self.current_indent_level += 1 340 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
226 newline_indent = self._newline_indent() 341 item_separator = _item_separator + newline_indent
227 item_separator = self.item_separator + newline_indent
228 yield newline_indent 342 yield newline_indent
229 else: 343 else:
230 newline_indent = None 344 newline_indent = None
231 item_separator = self.item_separator 345 item_separator = _item_separator
232 first = True 346 first = True
233 if self.ensure_ascii: 347 if _sort_keys:
234 encoder = encode_basestring_ascii 348 items = dct.items()
235 else: 349 items.sort(key=lambda kv: kv[0])
236 encoder = encode_basestring
237 allow_nan = self.allow_nan
238 if self.sort_keys:
239 keys = dct.keys()
240 keys.sort()
241 items = [(k, dct[k]) for k in keys]
242 else: 350 else:
243 items = dct.iteritems() 351 items = dct.iteritems()
244 _encoding = self.encoding
245 _do_decode = (_encoding is not None
246 and not (_encoding == 'utf-8'))
247 for key, value in items: 352 for key, value in items:
248 if isinstance(key, str): 353 if isinstance(key, basestring):
249 if _do_decode:
250 key = key.decode(_encoding)
251 elif isinstance(key, basestring):
252 pass 354 pass
253 # JavaScript is weakly typed for these, so it makes sense to 355 # JavaScript is weakly typed for these, so it makes sense to
254 # also allow them. Many encoders seem to do something like this. 356 # also allow them. Many encoders seem to do something like this.
255 elif isinstance(key, float): 357 elif isinstance(key, float):
256 key = floatstr(key, allow_nan) 358 key = _floatstr(key)
257 elif isinstance(key, (int, long)): 359 elif isinstance(key, (int, long)):
258 key = str(key) 360 key = str(key)
259 elif key is True: 361 elif key is True:
260 key = 'true' 362 key = 'true'
261 elif key is False: 363 elif key is False:
262 key = 'false' 364 key = 'false'
263 elif key is None: 365 elif key is None:
264 key = 'null' 366 key = 'null'
265 elif self.skipkeys: 367 elif _skipkeys:
266 continue 368 continue
267 else: 369 else:
268 raise TypeError("key {0!r} is not a string".format(key)) 370 raise TypeError("key {0!r} is not a string".format(key))
269 if first: 371 if first:
270 first = False 372 first = False
271 else: 373 else:
272 yield item_separator 374 yield item_separator
273 yield encoder(key) 375 yield _encoder(key)
274 yield key_separator 376 yield _key_separator
275 for chunk in self._iterencode(value, markers): 377 if isinstance(value, basestring):
276 yield chunk 378 yield _encoder(value)
379 elif value is None:
380 yield 'null'
381 elif value is True:
382 yield 'true'
383 elif value is False:
384 yield 'false'
385 elif isinstance(value, (int, long)):
386 yield str(value)
387 elif isinstance(value, float):
388 yield _floatstr(value)
389 else:
390 if isinstance(value, (list, tuple)):
391 chunks = _iterencode_list(value, _current_indent_level)
392 elif isinstance(value, dict):
393 chunks = _iterencode_dict(value, _current_indent_level)
394 else:
395 chunks = _iterencode(value, _current_indent_level)
396 for chunk in chunks:
397 yield chunk
277 if newline_indent is not None: 398 if newline_indent is not None:
278 self.current_indent_level -= 1 399 _current_indent_level -= 1
279 yield self._newline_indent() 400 yield '\n' + (' ' * (_indent * _current_indent_level))
280 yield '}' 401 yield '}'
281 if markers is not None: 402 if markers is not None:
282 del markers[markerid] 403 del markers[markerid]
283 404
284 def _iterencode(self, o, markers=None): 405 def _iterencode(o, _current_indent_level):
285 if isinstance(o, basestring): 406 if isinstance(o, basestring):
286 if self.ensure_ascii: 407 yield _encoder(o)
287 encoder = encode_basestring_ascii
288 else:
289 encoder = encode_basestring
290 _encoding = self.encoding
291 if (_encoding is not None and isinstance(o, str)
292 and not (_encoding == 'utf-8')):
293 o = o.decode(_encoding)
294 yield encoder(o)
295 elif o is None: 408 elif o is None:
296 yield 'null' 409 yield 'null'
297 elif o is True: 410 elif o is True:
298 yield 'true' 411 yield 'true'
299 elif o is False: 412 elif o is False:
300 yield 'false' 413 yield 'false'
301 elif isinstance(o, (int, long)): 414 elif isinstance(o, (int, long)):
302 yield str(o) 415 yield str(o)
303 elif isinstance(o, float): 416 elif isinstance(o, float):
304 yield floatstr(o, self.allow_nan) 417 yield _floatstr(o)
305 elif isinstance(o, (list, tuple)): 418 elif isinstance(o, (list, tuple)):
306 for chunk in self._iterencode_list(o, markers): 419 for chunk in _iterencode_list(o, _current_indent_level):
307 yield chunk 420 yield chunk
308 elif isinstance(o, dict): 421 elif isinstance(o, dict):
309 for chunk in self._iterencode_dict(o, markers): 422 for chunk in _iterencode_dict(o, _current_indent_level):
310 yield chunk 423 yield chunk
311 else: 424 else:
312 if markers is not None: 425 if markers is not None:
313 markerid = id(o) 426 markerid = id(o)
314 if markerid in markers: 427 if markerid in markers:
315 raise ValueError("Circular reference detected") 428 raise ValueError("Circular reference detected")
316 markers[markerid] = o 429 markers[markerid] = o
317 for chunk in self._iterencode_default(o, markers): 430 o = _default(o)
431 for chunk in _iterencode(o, _current_indent_level):
318 yield chunk 432 yield chunk
319 if markers is not None: 433 if markers is not None:
320 del markers[markerid] 434 del markers[markerid]
321 435
322 def _iterencode_default(self, o, markers=None): 436 return _iterencode
323 newobj = self.default(o)
324 return self._iterencode(newobj, markers)
325
326 def default(self, o):
327 """Implement this method in a subclass such that it returns a serializab le
328 object for ``o``, or calls the base implementation (to raise a
329 ``TypeError``).
330
331 For example, to support arbitrary iterators, you could implement
332 default like this::
333
334 def default(self, o):
335 try:
336 iterable = iter(o)
337 except TypeError:
338 pass
339 else:
340 return list(iterable)
341 return JSONEncoder.default(self, o)
342
343 """
344 raise TypeError(repr(o) + " is not JSON serializable")
345
346 def encode(self, o):
347 """Return a JSON string representation of a Python data structure.
348
349 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
350 '{"foo": ["bar", "baz"]}'
351
352 """
353 # This is for extremely simple cases and benchmarks.
354 if isinstance(o, basestring):
355 if isinstance(o, str):
356 _encoding = self.encoding
357 if (_encoding is not None
358 and not (_encoding == 'utf-8')):
359 o = o.decode(_encoding)
360 if self.ensure_ascii:
361 return encode_basestring_ascii(o)
362 else:
363 return encode_basestring(o)
364 # This doesn't pass the iterator directly to ''.join() because the
365 # exceptions aren't as detailed. The list call should be roughly
366 # equivalent to the PySequence_Fast that ''.join() would do.
367 chunks = list(self.iterencode(o))
368 return ''.join(chunks)
369
370 def iterencode(self, o):
371 """Encode the given object and yield each string representation as
372 available.
373
374 For example::
375
376 for chunk in JSONEncoder().iterencode(bigobject):
377 mysocket.write(chunk)
378
379 """
380 if self.check_circular:
381 markers = {}
382 else:
383 markers = None
384 return self._iterencode(o, markers)
OLDNEW

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld 1278:e6ce13d99bf5