Index: Lib/json/encoder.py |
=================================================================== |
--- Lib/json/encoder.py (revision 66961) |
+++ Lib/json/encoder.py (working copy) |
@@ -8,6 +8,10 @@ |
from _json import encode_basestring_ascii as c_encode_basestring_ascii |
except ImportError: |
c_encode_basestring_ascii = None |
+try: |
+ from _json import make_encoder as c_make_encoder |
+except ImportError: |
+ c_make_encoder = None |
__all__ = ['JSONEncoder'] |
@@ -28,27 +32,6 @@ |
FLOAT_REPR = repr |
-def floatstr(o, allow_nan=True): |
- # Check for specials. Note that this type of test is processor- and/or |
- # platform-specific, so do tests which don't depend on the internals. |
- |
- if math.isnan(o): |
- text = 'NaN' |
- elif math.isinf(o): |
- if math.copysign(1., o) == 1.: |
- text = 'Infinity' |
- else: |
- text = '-Infinity' |
- else: |
- return FLOAT_REPR(o) |
- |
- if not allow_nan: |
- msg = "Out of range float values are not JSON compliant: " + repr(o) |
- raise ValueError(msg) |
- |
- return text |
- |
- |
def encode_basestring(s): |
"""Return a JSON representation of a Python string |
@@ -78,12 +61,8 @@ |
return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' |
-if c_encode_basestring_ascii is not None: |
- encode_basestring_ascii = c_encode_basestring_ascii |
-else: |
- encode_basestring_ascii = py_encode_basestring_ascii |
+encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii |
- |
class JSONEncoder(object): |
"""Extensible JSON <http://json.org> encoder for Python data structures. |
@@ -167,17 +146,133 @@ |
self.allow_nan = allow_nan |
self.sort_keys = sort_keys |
self.indent = indent |
- self.current_indent_level = 0 |
if separators is not None: |
self.item_separator, self.key_separator = separators |
if default is not None: |
self.default = default |
self.encoding = encoding |
- def _newline_indent(self): |
- return '\n' + (' ' * (self.indent * self.current_indent_level)) |
+ def default(self, o): |
+ """Implement this method in a subclass such that it returns |
+ a serializable object for ``o``, or calls the base implementation |
+ (to raise a ``TypeError``). |
- def _iterencode_list(self, lst, markers=None): |
+ For example, to support arbitrary iterators, you could |
+ implement default like this:: |
+ |
+ def default(self, o): |
+ try: |
+ iterable = iter(o) |
+ except TypeError: |
+ pass |
+ else: |
+ return list(iterable) |
+ return JSONEncoder.default(self, o) |
+ |
+ """ |
+ raise TypeError("{0!r} is not JSON serializable".format(o)) |
+ |
+ def encode(self, o): |
+ """Return a JSON string representation of a Python data structure. |
+ |
+ >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) |
+ '{"foo": ["bar", "baz"]}' |
+ |
+ """ |
+ # This is for extremely simple cases and benchmarks. |
+ if isinstance(o, basestring): |
+ if isinstance(o, str): |
+ _encoding = self.encoding |
+ if (_encoding is not None |
+ and not (_encoding == 'utf-8')): |
+ o = o.decode(_encoding) |
+ if self.ensure_ascii: |
+ return encode_basestring_ascii(o) |
+ else: |
+ return encode_basestring(o) |
+ # This doesn't pass the iterator directly to ''.join() because the |
+ # exceptions aren't as detailed. The list call should be roughly |
+ # equivalent to the PySequence_Fast that ''.join() would do. |
+ chunks = self.iterencode(o, _one_shot=True) |
+ if not isinstance(chunks, (list, tuple)): |
+ chunks = list(chunks) |
+ return ''.join(chunks) |
+ |
+ def iterencode(self, o, _one_shot=False): |
+ """Encode the given object and yield each string |
+ representation as available. |
+ |
+ For example:: |
+ |
+ for chunk in JSONEncoder().iterencode(bigobject): |
+ mysocket.write(chunk) |
+ |
+ """ |
+ if self.check_circular: |
+ markers = {} |
+ else: |
+ markers = None |
+ if self.ensure_ascii: |
+ _encoder = encode_basestring_ascii |
+ else: |
+ _encoder = encode_basestring |
+ if self.encoding != 'utf-8': |
+ def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): |
+ if isinstance(o, str): |
+ o = o.decode(_encoding) |
+ return _orig_encoder(o) |
+ |
+ def floatstr(o, allow_nan=True): |
+ # Check for specials. Note that this type of test is processor- |
+ # and/or platform-specific, so do tests which don't depend on the |
+ # internals. |
+ |
+ if math.isnan(o): |
+ text = 'NaN' |
+ elif math.isinf(o): |
+ if math.copysign(1., o) == 1.: |
+ text = 'Infinity' |
+ else: |
+ text = '-Infinity' |
+ else: |
+ return FLOAT_REPR(o) |
+ |
+ if not allow_nan: |
+ msg = "Out of range float values are not JSON compliant: " + repr(o) |
+ raise ValueError(msg) |
+ |
+ return text |
+ |
+ if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys: |
+ _iterencode = c_make_encoder( |
+ markers, self.default, _encoder, self.indent, |
+ self.key_separator, self.item_separator, self.sort_keys, |
+ self.skipkeys, self.allow_nan) |
+ else: |
+ _iterencode = _make_iterencode( |
+ markers, self.default, _encoder, self.indent, floatstr, |
+ self.key_separator, self.item_separator, self.sort_keys, |
+ self.skipkeys, _one_shot) |
+ return _iterencode(o, 0) |
+ |
+def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, |
+ ## HACK: hand-optimized bytecode; turn globals into locals |
+ False=False, |
+ True=True, |
+ ValueError=ValueError, |
+ basestring=basestring, |
+ dict=dict, |
+ float=float, |
+ id=id, |
+ int=int, |
+ isinstance=isinstance, |
+ list=list, |
+ long=long, |
+ str=str, |
+ tuple=tuple, |
+ ): |
+ |
+ def _iterencode_list(lst, _current_indent_level): |
if not lst: |
yield '[]' |
return |
@@ -186,31 +281,51 @@ |
if markerid in markers: |
raise ValueError("Circular reference detected") |
markers[markerid] = lst |
- yield '[' |
- if self.indent is not None: |
- self.current_indent_level += 1 |
- newline_indent = self._newline_indent() |
- separator = self.item_separator + newline_indent |
- yield newline_indent |
+ buf = '[' |
+ if _indent is not None: |
+ _current_indent_level += 1 |
+ newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) |
+ separator = _item_separator + newline_indent |
+ buf += newline_indent |
else: |
newline_indent = None |
- separator = self.item_separator |
+ separator = _item_separator |
first = True |
for value in lst: |
if first: |
first = False |
else: |
- yield separator |
- for chunk in self._iterencode(value, markers): |
- yield chunk |
+ buf = separator |
+ if isinstance(value, basestring): |
+ yield buf + _encoder(value) |
+ elif value is None: |
+ yield buf + 'null' |
+ elif value is True: |
+ yield buf + 'true' |
+ elif value is False: |
+ yield buf + 'false' |
+ elif isinstance(value, (int, long)): |
+ yield buf + str(value) |
+ elif isinstance(value, float): |
+ yield buf + _floatstr(value) |
+ else: |
+ yield buf |
+ if isinstance(value, (list, tuple)): |
+ chunks = _iterencode_list(value, _current_indent_level) |
+ elif isinstance(value, dict): |
+ chunks = _iterencode_dict(value, _current_indent_level) |
+ else: |
+ chunks = _iterencode(value, _current_indent_level) |
+ for chunk in chunks: |
+ yield chunk |
if newline_indent is not None: |
- self.current_indent_level -= 1 |
- yield self._newline_indent() |
+ _current_indent_level -= 1 |
+ yield '\n' + (' ' * (_indent * _current_indent_level)) |
yield ']' |
if markers is not None: |
del markers[markerid] |
- def _iterencode_dict(self, dct, markers=None): |
+ def _iterencode_dict(dct, _current_indent_level): |
if not dct: |
yield '{}' |
return |
@@ -220,40 +335,27 @@ |
raise ValueError("Circular reference detected") |
markers[markerid] = dct |
yield '{' |
- key_separator = self.key_separator |
- if self.indent is not None: |
- self.current_indent_level += 1 |
- newline_indent = self._newline_indent() |
- item_separator = self.item_separator + newline_indent |
+ if _indent is not None: |
+ _current_indent_level += 1 |
+ newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) |
+ item_separator = _item_separator + newline_indent |
yield newline_indent |
else: |
newline_indent = None |
- item_separator = self.item_separator |
+ item_separator = _item_separator |
first = True |
- if self.ensure_ascii: |
- encoder = encode_basestring_ascii |
+ if _sort_keys: |
+ items = dct.items() |
+ items.sort(key=lambda kv: kv[0]) |
else: |
- encoder = encode_basestring |
- allow_nan = self.allow_nan |
- if self.sort_keys: |
- keys = dct.keys() |
- keys.sort() |
- items = [(k, dct[k]) for k in keys] |
- else: |
items = dct.iteritems() |
- _encoding = self.encoding |
- _do_decode = (_encoding is not None |
- and not (_encoding == 'utf-8')) |
for key, value in items: |
- if isinstance(key, str): |
- if _do_decode: |
- key = key.decode(_encoding) |
- elif isinstance(key, basestring): |
+ if isinstance(key, basestring): |
pass |
# JavaScript is weakly typed for these, so it makes sense to |
# also allow them. Many encoders seem to do something like this. |
elif isinstance(key, float): |
- key = floatstr(key, allow_nan) |
+ key = _floatstr(key) |
elif isinstance(key, (int, long)): |
key = str(key) |
elif key is True: |
@@ -262,7 +364,7 @@ |
key = 'false' |
elif key is None: |
key = 'null' |
- elif self.skipkeys: |
+ elif _skipkeys: |
continue |
else: |
raise TypeError("key {0!r} is not a string".format(key)) |
@@ -270,28 +372,39 @@ |
first = False |
else: |
yield item_separator |
- yield encoder(key) |
- yield key_separator |
- for chunk in self._iterencode(value, markers): |
- yield chunk |
+ yield _encoder(key) |
+ yield _key_separator |
+ if isinstance(value, basestring): |
+ yield _encoder(value) |
+ elif value is None: |
+ yield 'null' |
+ elif value is True: |
+ yield 'true' |
+ elif value is False: |
+ yield 'false' |
+ elif isinstance(value, (int, long)): |
+ yield str(value) |
+ elif isinstance(value, float): |
+ yield _floatstr(value) |
+ else: |
+ if isinstance(value, (list, tuple)): |
+ chunks = _iterencode_list(value, _current_indent_level) |
+ elif isinstance(value, dict): |
+ chunks = _iterencode_dict(value, _current_indent_level) |
+ else: |
+ chunks = _iterencode(value, _current_indent_level) |
+ for chunk in chunks: |
+ yield chunk |
if newline_indent is not None: |
- self.current_indent_level -= 1 |
- yield self._newline_indent() |
+ _current_indent_level -= 1 |
+ yield '\n' + (' ' * (_indent * _current_indent_level)) |
yield '}' |
if markers is not None: |
del markers[markerid] |
- def _iterencode(self, o, markers=None): |
+ def _iterencode(o, _current_indent_level): |
if isinstance(o, basestring): |
- if self.ensure_ascii: |
- encoder = encode_basestring_ascii |
- else: |
- encoder = encode_basestring |
- _encoding = self.encoding |
- if (_encoding is not None and isinstance(o, str) |
- and not (_encoding == 'utf-8')): |
- o = o.decode(_encoding) |
- yield encoder(o) |
+ yield _encoder(o) |
elif o is None: |
yield 'null' |
elif o is True: |
@@ -301,12 +414,12 @@ |
elif isinstance(o, (int, long)): |
yield str(o) |
elif isinstance(o, float): |
- yield floatstr(o, self.allow_nan) |
+ yield _floatstr(o) |
elif isinstance(o, (list, tuple)): |
- for chunk in self._iterencode_list(o, markers): |
+ for chunk in _iterencode_list(o, _current_indent_level): |
yield chunk |
elif isinstance(o, dict): |
- for chunk in self._iterencode_dict(o, markers): |
+ for chunk in _iterencode_dict(o, _current_indent_level): |
yield chunk |
else: |
if markers is not None: |
@@ -314,71 +427,10 @@ |
if markerid in markers: |
raise ValueError("Circular reference detected") |
markers[markerid] = o |
- for chunk in self._iterencode_default(o, markers): |
+ o = _default(o) |
+ for chunk in _iterencode(o, _current_indent_level): |
yield chunk |
if markers is not None: |
del markers[markerid] |
- def _iterencode_default(self, o, markers=None): |
- newobj = self.default(o) |
- return self._iterencode(newobj, markers) |
- |
- def default(self, o): |
- """Implement this method in a subclass such that it returns a serializable |
- object for ``o``, or calls the base implementation (to raise a |
- ``TypeError``). |
- |
- For example, to support arbitrary iterators, you could implement |
- default like this:: |
- |
- def default(self, o): |
- try: |
- iterable = iter(o) |
- except TypeError: |
- pass |
- else: |
- return list(iterable) |
- return JSONEncoder.default(self, o) |
- |
- """ |
- raise TypeError(repr(o) + " is not JSON serializable") |
- |
- def encode(self, o): |
- """Return a JSON string representation of a Python data structure. |
- |
- >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) |
- '{"foo": ["bar", "baz"]}' |
- |
- """ |
- # This is for extremely simple cases and benchmarks. |
- if isinstance(o, basestring): |
- if isinstance(o, str): |
- _encoding = self.encoding |
- if (_encoding is not None |
- and not (_encoding == 'utf-8')): |
- o = o.decode(_encoding) |
- if self.ensure_ascii: |
- return encode_basestring_ascii(o) |
- else: |
- return encode_basestring(o) |
- # This doesn't pass the iterator directly to ''.join() because the |
- # exceptions aren't as detailed. The list call should be roughly |
- # equivalent to the PySequence_Fast that ''.join() would do. |
- chunks = list(self.iterencode(o)) |
- return ''.join(chunks) |
- |
- def iterencode(self, o): |
- """Encode the given object and yield each string representation as |
- available. |
- |
- For example:: |
- |
- for chunk in JSONEncoder().iterencode(bigobject): |
- mysocket.write(chunk) |
- |
- """ |
- if self.check_circular: |
- markers = {} |
- else: |
- markers = None |
- return self._iterencode(o, markers) |
+ return _iterencode |