OLD | NEW |
1 """Iterator based sre token scanner | 1 """JSON token scanner |
2 | |
3 """ | 2 """ |
4 | 3 |
5 import re | 4 import re |
6 import sre_parse | 5 try: |
7 import sre_compile | 6 from _json import make_scanner as c_make_scanner |
8 import sre_constants | 7 except ImportError: |
| 8 c_make_scanner = None |
9 | 9 |
10 from re import VERBOSE, MULTILINE, DOTALL | 10 __all__ = ['make_scanner'] |
11 from sre_constants import BRANCH, SUBPATTERN | |
12 | 11 |
13 __all__ = ['Scanner', 'pattern'] | 12 NUMBER_RE = re.compile( |
| 13 r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', |
| 14 (re.VERBOSE | re.MULTILINE | re.DOTALL)) |
14 | 15 |
15 FLAGS = (VERBOSE | MULTILINE | DOTALL) | 16 def py_make_scanner(context): |
| 17 parse_object = context.parse_object |
| 18 parse_array = context.parse_array |
| 19 parse_string = context.parse_string |
| 20 match_number = NUMBER_RE.match |
| 21 encoding = context.encoding |
| 22 strict = context.strict |
| 23 parse_float = context.parse_float |
| 24 parse_int = context.parse_int |
| 25 parse_constant = context.parse_constant |
| 26 object_hook = context.object_hook |
16 | 27 |
17 class Scanner(object): | 28 def _scan_once(string, idx): |
18 def __init__(self, lexicon, flags=FLAGS): | 29 try: |
19 self.actions = [None] | 30 nextchar = string[idx] |
20 # Combine phrases into a compound pattern | 31 except IndexError: |
21 s = sre_parse.Pattern() | 32 raise StopIteration |
22 s.flags = flags | |
23 p = [] | |
24 for idx, token in enumerate(lexicon): | |
25 phrase = token.pattern | |
26 try: | |
27 subpattern = sre_parse.SubPattern(s, | |
28 [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) | |
29 except sre_constants.error: | |
30 raise | |
31 p.append(subpattern) | |
32 self.actions.append(token) | |
33 | 33 |
34 s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work | 34 if nextchar == '"': |
35 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) | 35 return parse_string(string, idx + 1, encoding, strict) |
36 self.scanner = sre_compile.compile(p) | 36 elif nextchar == '{': |
| 37 return parse_object((string, idx + 1), encoding, strict, _scan_once,
object_hook) |
| 38 elif nextchar == '[': |
| 39 return parse_array((string, idx + 1), _scan_once) |
| 40 elif nextchar == 'n' and string[idx:idx + 4] == 'null': |
| 41 return None, idx + 4 |
| 42 elif nextchar == 't' and string[idx:idx + 4] == 'true': |
| 43 return True, idx + 4 |
| 44 elif nextchar == 'f' and string[idx:idx + 5] == 'false': |
| 45 return False, idx + 5 |
37 | 46 |
38 def iterscan(self, string, idx=0, context=None): | 47 m = match_number(string, idx) |
39 """Yield match, end_idx for each match | 48 if m is not None: |
| 49 integer, frac, exp = m.groups() |
| 50 if frac or exp: |
| 51 res = parse_float(integer + (frac or '') + (exp or '')) |
| 52 else: |
| 53 res = parse_int(integer) |
| 54 return res, m.end() |
| 55 elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': |
| 56 return parse_constant('NaN'), idx + 3 |
| 57 elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': |
| 58 return parse_constant('Infinity'), idx + 8 |
| 59 elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': |
| 60 return parse_constant('-Infinity'), idx + 9 |
| 61 else: |
| 62 raise StopIteration |
40 | 63 |
41 """ | 64 return _scan_once |
42 match = self.scanner.scanner(string, idx).match | |
43 actions = self.actions | |
44 lastend = idx | |
45 end = len(string) | |
46 while True: | |
47 m = match() | |
48 if m is None: | |
49 break | |
50 matchbegin, matchend = m.span() | |
51 if lastend == matchend: | |
52 break | |
53 action = actions[m.lastindex] | |
54 if action is not None: | |
55 rval, next_pos = action(m, context) | |
56 if next_pos is not None and next_pos != matchend: | |
57 # "fast forward" the scanner | |
58 matchend = next_pos | |
59 match = self.scanner.scanner(string, matchend).match | |
60 yield rval, matchend | |
61 lastend = matchend | |
62 | 65 |
63 | 66 make_scanner = c_make_scanner or py_make_scanner |
64 def pattern(pattern, flags=FLAGS): | |
65 def decorator(fn): | |
66 fn.pattern = pattern | |
67 fn.regex = re.compile(pattern, flags) | |
68 return fn | |
69 return decorator | |
OLD | NEW |