Index: Lib/json/decoder.py |
=================================================================== |
--- Lib/json/decoder.py (revision 69885) |
+++ Lib/json/decoder.py (working copy) |
@@ -1,10 +1,10 @@ |
"""Implementation of JSONDecoder |
""" |
- |
import re |
import sys |
+import struct |
-from json.scanner import Scanner, pattern |
+from json.scanner import make_scanner |
try: |
from _json import scanstring as c_scanstring |
except ImportError: |
@@ -14,9 +14,16 @@ |
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
-NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf') |
+def _floatconstants(): |
+ _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') |
+ if sys.byteorder != 'big': |
+ _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] |
+ nan, inf = struct.unpack('dd', _BYTES) |
Martin v. Löwis
2009/02/28 09:28:11
I think this can be simplified as
nan, inf = st
bob.ippolito
2009/02/28 10:02:57
Not while maintaining Python 2.4 compatibility, yo
|
+ return nan, inf, -inf |
+NaN, PosInf, NegInf = _floatconstants() |
+ |
def linecol(doc, pos): |
lineno = doc.count('\n', 0, pos) + 1 |
if lineno == 1: |
@@ -27,49 +34,26 @@ |
def errmsg(msg, doc, pos, end=None): |
+ # Note that this function is called from _json |
lineno, colno = linecol(doc, pos) |
if end is None: |
fmt = '{0}: line {1} column {2} (char {3})' |
return fmt.format(msg, lineno, colno, pos) |
+ #fmt = '%s: line %d column %d (char %d)' |
+ #return fmt % (msg, lineno, colno, pos) |
endlineno, endcolno = linecol(doc, end) |
fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' |
return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) |
+ #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' |
+ #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) |
_CONSTANTS = { |
'-Infinity': NegInf, |
'Infinity': PosInf, |
'NaN': NaN, |
- 'true': True, |
- 'false': False, |
- 'null': None, |
} |
- |
-def JSONConstant(match, context, c=_CONSTANTS): |
- s = match.group(0) |
- fn = getattr(context, 'parse_constant', None) |
- if fn is None: |
- rval = c[s] |
- else: |
- rval = fn(s) |
- return rval, None |
-pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) |
- |
- |
-def JSONNumber(match, context): |
- match = JSONNumber.regex.match(match.string, *match.span()) |
- integer, frac, exp = match.groups() |
- if frac or exp: |
- fn = getattr(context, 'parse_float', None) or float |
- res = fn(integer + (frac or '') + (exp or '')) |
- else: |
- fn = getattr(context, 'parse_int', None) or int |
- res = fn(integer) |
- return res, None |
-pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) |
- |
- |
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
BACKSLASH = { |
'"': u'"', '\\': u'\\', '/': u'/', |
@@ -78,8 +62,16 @@ |
DEFAULT_ENCODING = "utf-8" |
- |
-def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): |
+def py_scanstring(s, end, encoding=None, strict=True, |
+ _b=BACKSLASH, _m=STRINGCHUNK.match): |
+ """Scan the string s for a JSON string. End is the index of the |
+ character in s after the quote that started the JSON string. |
+ Unescapes all valid JSON string escape sequences and raises ValueError |
+ on attempt to decode an invalid string. If strict is False then literal |
+ control characters are allowed in the string. |
+ |
+ Returns a tuple of the decoded string and the index of the character in s |
+ after the end quote.""" |
if encoding is None: |
encoding = DEFAULT_ENCODING |
chunks = [] |
@@ -92,14 +84,18 @@ |
errmsg("Unterminated string starting at", s, begin)) |
end = chunk.end() |
content, terminator = chunk.groups() |
+ # Content is contains zero or more unescaped string characters |
if content: |
if not isinstance(content, unicode): |
content = unicode(content, encoding) |
_append(content) |
+ # Terminator is the end of string, a literal control character, |
+ # or a backslash denoting that an escape sequence follows |
if terminator == '"': |
break |
elif terminator != '\\': |
if strict: |
+ #msg = "Invalid control character %r at" % (terminator,) |
msg = "Invalid control character {0!r} at".format(terminator) |
raise ValueError(errmsg(msg, s, end)) |
else: |
@@ -110,137 +106,158 @@ |
except IndexError: |
raise ValueError( |
errmsg("Unterminated string starting at", s, begin)) |
+ # If not a unicode escape sequence, must be in the lookup table |
if esc != 'u': |
try: |
- m = _b[esc] |
+ char = _b[esc] |
except KeyError: |
- msg = "Invalid \\escape: {0!r}".format(esc) |
+ msg = "Invalid \\escape: " + repr(esc) |
raise ValueError(errmsg(msg, s, end)) |
end += 1 |
else: |
+ # Unicode escape sequence |
esc = s[end + 1:end + 5] |
next_end = end + 5 |
- msg = "Invalid \\uXXXX escape" |
- try: |
- if len(esc) != 4: |
- raise ValueError |
- uni = int(esc, 16) |
- if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
- msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
- if not s[end + 5:end + 7] == '\\u': |
- raise ValueError |
- esc2 = s[end + 7:end + 11] |
- if len(esc2) != 4: |
- raise ValueError |
- uni2 = int(esc2, 16) |
- uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
- next_end += 6 |
- m = unichr(uni) |
- except ValueError: |
+ if len(esc) != 4: |
+ msg = "Invalid \\uXXXX escape" |
raise ValueError(errmsg(msg, s, end)) |
+ uni = int(esc, 16) |
+ # Check for surrogate pair on UCS-4 systems |
+ if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
+ msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
+ if not s[end + 5:end + 7] == '\\u': |
+ raise ValueError(errmsg(msg, s, end)) |
+ esc2 = s[end + 7:end + 11] |
+ if len(esc2) != 4: |
+ raise ValueError(errmsg(msg, s, end)) |
+ uni2 = int(esc2, 16) |
+ uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
+ next_end += 6 |
+ char = unichr(uni) |
end = next_end |
- _append(m) |
+ # Append the unescaped character |
+ _append(char) |
return u''.join(chunks), end |
-# Use speedup |
-if c_scanstring is not None: |
- scanstring = c_scanstring |
-else: |
- scanstring = py_scanstring |
+# Use speedup if available |
+scanstring = c_scanstring or py_scanstring |
-def JSONString(match, context): |
- encoding = getattr(context, 'encoding', None) |
- strict = getattr(context, 'strict', True) |
- return scanstring(match.string, match.end(), encoding, strict) |
-pattern(r'"')(JSONString) |
+WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) |
+WHITESPACE_STR = ' \t\n\r' |
- |
-WHITESPACE = re.compile(r'\s*', FLAGS) |
- |
- |
-def JSONObject(match, context, _w=WHITESPACE.match): |
+def JSONObject((s, end), encoding, strict, scan_once, object_hook, |
+ _w=WHITESPACE.match, _ws=WHITESPACE_STR): |
pairs = {} |
- s = match.string |
- end = _w(s, match.end()).end() |
+ # Use a slice to prevent IndexError from being raised, the following |
+ # check will raise a more specific ValueError if the string is empty |
nextchar = s[end:end + 1] |
- # Trivial empty object |
- if nextchar == '}': |
- return pairs, end + 1 |
+ # Normally we expect nextchar == '"' |
if nextchar != '"': |
- raise ValueError(errmsg("Expecting property name", s, end)) |
+ if nextchar in _ws: |
+ end = _w(s, end).end() |
+ nextchar = s[end:end + 1] |
+ # Trivial empty object |
+ if nextchar == '}': |
+ return pairs, end + 1 |
+ elif nextchar != '"': |
+ raise ValueError(errmsg("Expecting property name", s, end)) |
end += 1 |
- encoding = getattr(context, 'encoding', None) |
- strict = getattr(context, 'strict', True) |
- iterscan = JSONScanner.iterscan |
while True: |
key, end = scanstring(s, end, encoding, strict) |
- end = _w(s, end).end() |
+ |
+ # To skip some function call overhead we optimize the fast paths where |
+ # the JSON key separator is ": " or just ":". |
if s[end:end + 1] != ':': |
- raise ValueError(errmsg("Expecting : delimiter", s, end)) |
- end = _w(s, end + 1).end() |
+ end = _w(s, end).end() |
+ if s[end:end + 1] != ':': |
+ raise ValueError(errmsg("Expecting : delimiter", s, end)) |
+ |
+ end += 1 |
+ |
try: |
- value, end = iterscan(s, idx=end, context=context).next() |
+ if s[end] in _ws: |
+ end += 1 |
+ if s[end] in _ws: |
+ end = _w(s, end + 1).end() |
+ except IndexError: |
+ pass |
+ |
+ try: |
+ value, end = scan_once(s, end) |
except StopIteration: |
raise ValueError(errmsg("Expecting object", s, end)) |
pairs[key] = value |
- end = _w(s, end).end() |
- nextchar = s[end:end + 1] |
+ |
+ try: |
+ nextchar = s[end] |
+ if nextchar in _ws: |
+ end = _w(s, end + 1).end() |
+ nextchar = s[end] |
+ except IndexError: |
+ nextchar = '' |
end += 1 |
+ |
if nextchar == '}': |
break |
- if nextchar != ',': |
+ elif nextchar != ',': |
raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) |
- end = _w(s, end).end() |
- nextchar = s[end:end + 1] |
+ |
+ try: |
+ nextchar = s[end] |
+ if nextchar in _ws: |
+ end += 1 |
+ nextchar = s[end] |
+ if nextchar in _ws: |
+ end = _w(s, end + 1).end() |
+ nextchar = s[end] |
+ except IndexError: |
+ nextchar = '' |
+ |
end += 1 |
if nextchar != '"': |
raise ValueError(errmsg("Expecting property name", s, end - 1)) |
- object_hook = getattr(context, 'object_hook', None) |
+ |
if object_hook is not None: |
pairs = object_hook(pairs) |
return pairs, end |
-pattern(r'{')(JSONObject) |
- |
-def JSONArray(match, context, _w=WHITESPACE.match): |
+def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): |
values = [] |
- s = match.string |
- end = _w(s, match.end()).end() |
+ nextchar = s[end:end + 1] |
+ if nextchar in _ws: |
+ end = _w(s, end + 1).end() |
+ nextchar = s[end:end + 1] |
# Look-ahead for trivial empty array |
- nextchar = s[end:end + 1] |
if nextchar == ']': |
return values, end + 1 |
- iterscan = JSONScanner.iterscan |
+ _append = values.append |
while True: |
try: |
- value, end = iterscan(s, idx=end, context=context).next() |
+ value, end = scan_once(s, end) |
except StopIteration: |
raise ValueError(errmsg("Expecting object", s, end)) |
- values.append(value) |
- end = _w(s, end).end() |
+ _append(value) |
nextchar = s[end:end + 1] |
+ if nextchar in _ws: |
+ end = _w(s, end + 1).end() |
+ nextchar = s[end:end + 1] |
end += 1 |
if nextchar == ']': |
break |
- if nextchar != ',': |
+ elif nextchar != ',': |
raise ValueError(errmsg("Expecting , delimiter", s, end)) |
- end = _w(s, end).end() |
- return values, end |
-pattern(r'\[')(JSONArray) |
+ try: |
+ if s[end] in _ws: |
+ end += 1 |
+ if s[end] in _ws: |
+ end = _w(s, end + 1).end() |
+ except IndexError: |
+ pass |
-ANYTHING = [ |
- JSONObject, |
- JSONArray, |
- JSONString, |
- JSONConstant, |
- JSONNumber, |
-] |
+ return values, end |
-JSONScanner = Scanner(ANYTHING) |
- |
- |
class JSONDecoder(object): |
"""Simple JSON <http://json.org> decoder |
@@ -268,11 +285,9 @@ |
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
their corresponding ``float`` values, which is outside the JSON spec. |
+ |
""" |
- _scanner = Scanner(ANYTHING) |
- __all__ = ['__init__', 'decode', 'raw_decode'] |
- |
def __init__(self, encoding=None, object_hook=None, parse_float=None, |
parse_int=None, parse_constant=None, strict=True): |
"""``encoding`` determines the encoding used to interpret any ``str`` |
@@ -282,8 +297,8 @@ |
Note that currently only encodings that are a superset of ASCII work, |
strings of other encodings should be passed in as ``unicode``. |
- ``object_hook``, if specified, will be called with the result of |
- every JSON object decoded and its return value will be used in |
+ ``object_hook``, if specified, will be called with the result |
+ of every JSON object decoded and its return value will be used in |
place of the given ``dict``. This can be used to provide custom |
deserializations (e.g. to support JSON-RPC class hinting). |
@@ -298,21 +313,24 @@ |
for JSON integers (e.g. float). |
``parse_constant``, if specified, will be called with one of the |
- following strings: -Infinity, Infinity, NaN, null, true, false. |
+ following strings: -Infinity, Infinity, NaN. |
This can be used to raise an exception if invalid JSON numbers |
are encountered. |
""" |
self.encoding = encoding |
self.object_hook = object_hook |
- self.parse_float = parse_float |
- self.parse_int = parse_int |
- self.parse_constant = parse_constant |
+ self.parse_float = parse_float or float |
+ self.parse_int = parse_int or int |
+ self.parse_constant = parse_constant or _CONSTANTS.__getitem__ |
self.strict = strict |
+ self.parse_object = JSONObject |
+ self.parse_array = JSONArray |
+ self.parse_string = scanstring |
+ self.scan_once = make_scanner(self) |
def decode(self, s, _w=WHITESPACE.match): |
- """ |
- Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
+ """Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
instance containing a JSON document) |
""" |
@@ -322,18 +340,17 @@ |
raise ValueError(errmsg("Extra data", s, end, len(s))) |
return obj |
- def raw_decode(self, s, **kw): |
- """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning |
- with a JSON document) and return a 2-tuple of the Python |
+ def raw_decode(self, s, idx=0): |
+ """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` |
+ beginning with a JSON document) and return a 2-tuple of the Python |
representation and the index in ``s`` where the document ended. |
This can be used to decode a JSON document from a string that may |
have extraneous data at the end. |
""" |
- kw.setdefault('context', self) |
try: |
- obj, end = self._scanner.iterscan(s, **kw).next() |
+ obj, end = self.scan_once(s, idx) |
except StopIteration: |
raise ValueError("No JSON object could be decoded") |
return obj, end |