OLD | NEW |
(Empty) | |
| 1 """ |
| 2 Functions for reversing a regular expression (used in reverse URL resolving). |
| 3 Used internally by Django and not intended for external use. |
| 4 |
| 5 This is not, and is not intended to be, a complete reg-exp decompiler. It |
| 6 should be good enough for a large class of URLS, however. |
| 7 """ |
| 8 from __future__ import unicode_literals |
| 9 |
| 10 import warnings |
| 11 |
| 12 from django.utils import six |
| 13 from django.utils.deprecation import RemovedInDjango21Warning |
| 14 from django.utils.six.moves import zip |
| 15 |
| 16 # Mapping of an escape character to a representative of that class. So, e.g., |
| 17 # "\w" is replaced by "x" in a reverse URL. A value of None means to ignore |
| 18 # this sequence. Any missing key is mapped to itself. |
| 19 ESCAPE_MAPPINGS = { |
| 20 "A": None, |
| 21 "b": None, |
| 22 "B": None, |
| 23 "d": "0", |
| 24 "D": "x", |
| 25 "s": " ", |
| 26 "S": "x", |
| 27 "w": "x", |
| 28 "W": "!", |
| 29 "Z": None, |
| 30 } |
| 31 |
| 32 |
| 33 class Choice(list): |
| 34 """ |
| 35 Used to represent multiple possibilities at this point in a pattern string. |
| 36 We use a distinguished type, rather than a list, so that the usage in the |
| 37 code is clear. |
| 38 """ |
| 39 |
| 40 |
| 41 class Group(list): |
| 42 """ |
| 43 Used to represent a capturing group in the pattern string. |
| 44 """ |
| 45 |
| 46 |
| 47 class NonCapture(list): |
| 48 """ |
| 49 Used to represent a non-capturing group in the pattern string. |
| 50 """ |
| 51 |
| 52 |
| 53 def normalize(pattern): |
| 54 r""" |
| 55 Given a reg-exp pattern, normalizes it to an iterable of forms that |
| 56 suffice for reverse matching. This does the following: |
| 57 |
| 58 (1) For any repeating sections, keeps the minimum number of occurrences |
| 59 permitted (this means zero for optional groups). |
| 60 (2) If an optional group includes parameters, include one occurrence of |
| 61 that group (along with the zero occurrence case from step (1)). |
| 62 (3) Select the first (essentially an arbitrary) element from any character |
| 63 class. Select an arbitrary character for any unordered class (e.g. '.' |
| 64 or '\w') in the pattern. |
| 65 (4) Ignore look-ahead and look-behind assertions. |
| 66 (5) Raise an error on any disjunctive ('|') constructs. |
| 67 |
| 68 Django's URLs for forward resolving are either all positional arguments or |
| 69 all keyword arguments. That is assumed here, as well. Although reverse |
| 70 resolving can be done using positional args when keyword args are |
| 71 specified, the two cannot be mixed in the same reverse() call. |
| 72 """ |
| 73 # Do a linear scan to work out the special features of this pattern. The |
| 74 # idea is that we scan once here and collect all the information we need to |
| 75 # make future decisions. |
| 76 result = [] |
| 77 non_capturing_groups = [] |
| 78 consume_next = True |
| 79 pattern_iter = next_char(iter(pattern)) |
| 80 num_args = 0 |
| 81 |
| 82 # A "while" loop is used here because later on we need to be able to peek |
| 83 # at the next character and possibly go around without consuming another |
| 84 # one at the top of the loop. |
| 85 try: |
| 86 ch, escaped = next(pattern_iter) |
| 87 except StopIteration: |
| 88 return [('', [])] |
| 89 |
| 90 try: |
| 91 while True: |
| 92 if escaped: |
| 93 result.append(ch) |
| 94 elif ch == '.': |
| 95 # Replace "any character" with an arbitrary representative. |
| 96 result.append(".") |
| 97 elif ch == '|': |
| 98 # FIXME: One day we'll should do this, but not in 1.0. |
| 99 raise NotImplementedError('Awaiting Implementation') |
| 100 elif ch == "^": |
| 101 pass |
| 102 elif ch == '$': |
| 103 break |
| 104 elif ch == ')': |
| 105 # This can only be the end of a non-capturing group, since all |
| 106 # other unescaped parentheses are handled by the grouping |
| 107 # section later (and the full group is handled there). |
| 108 # |
| 109 # We regroup everything inside the capturing group so that it |
| 110 # can be quantified, if necessary. |
| 111 start = non_capturing_groups.pop() |
| 112 inner = NonCapture(result[start:]) |
| 113 result = result[:start] + [inner] |
| 114 elif ch == '[': |
| 115 # Replace ranges with the first character in the range. |
| 116 ch, escaped = next(pattern_iter) |
| 117 result.append(ch) |
| 118 ch, escaped = next(pattern_iter) |
| 119 while escaped or ch != ']': |
| 120 ch, escaped = next(pattern_iter) |
| 121 elif ch == '(': |
| 122 # Some kind of group. |
| 123 ch, escaped = next(pattern_iter) |
| 124 if ch != '?' or escaped: |
| 125 # A positional group |
| 126 name = "_%d" % num_args |
| 127 num_args += 1 |
| 128 result.append(Group((("%%(%s)s" % name), name))) |
| 129 walk_to_end(ch, pattern_iter) |
| 130 else: |
| 131 ch, escaped = next(pattern_iter) |
| 132 if ch in '!=<': |
| 133 # All of these are ignorable. Walk to the end of the |
| 134 # group. |
| 135 walk_to_end(ch, pattern_iter) |
| 136 elif ch in 'iLmsu#': |
| 137 warnings.warn( |
| 138 'Using (?%s) in url() patterns is deprecated.' % ch, |
| 139 RemovedInDjango21Warning |
| 140 ) |
| 141 walk_to_end(ch, pattern_iter) |
| 142 elif ch == ':': |
| 143 # Non-capturing group |
| 144 non_capturing_groups.append(len(result)) |
| 145 elif ch != 'P': |
| 146 # Anything else, other than a named group, is something |
| 147 # we cannot reverse. |
| 148 raise ValueError("Non-reversible reg-exp portion: '(?%s'
" % ch) |
| 149 else: |
| 150 ch, escaped = next(pattern_iter) |
| 151 if ch not in ('<', '='): |
| 152 raise ValueError("Non-reversible reg-exp portion: '(
?P%s'" % ch) |
| 153 # We are in a named capturing group. Extra the name and |
| 154 # then skip to the end. |
| 155 if ch == '<': |
| 156 terminal_char = '>' |
| 157 # We are in a named backreference. |
| 158 else: |
| 159 terminal_char = ')' |
| 160 name = [] |
| 161 ch, escaped = next(pattern_iter) |
| 162 while ch != terminal_char: |
| 163 name.append(ch) |
| 164 ch, escaped = next(pattern_iter) |
| 165 param = ''.join(name) |
| 166 # Named backreferences have already consumed the |
| 167 # parenthesis. |
| 168 if terminal_char != ')': |
| 169 result.append(Group((("%%(%s)s" % param), param))) |
| 170 walk_to_end(ch, pattern_iter) |
| 171 else: |
| 172 result.append(Group((("%%(%s)s" % param), None))) |
| 173 elif ch in "*?+{": |
| 174 # Quantifiers affect the previous item in the result list. |
| 175 count, ch = get_quantifier(ch, pattern_iter) |
| 176 if ch: |
| 177 # We had to look ahead, but it wasn't need to compute the |
| 178 # quantifier, so use this character next time around the |
| 179 # main loop. |
| 180 consume_next = False |
| 181 |
| 182 if count == 0: |
| 183 if contains(result[-1], Group): |
| 184 # If we are quantifying a capturing group (or |
| 185 # something containing such a group) and the minimum is |
| 186 # zero, we must also handle the case of one occurrence |
| 187 # being present. All the quantifiers (except {0,0}, |
| 188 # which we conveniently ignore) that have a 0 minimum |
| 189 # also allow a single occurrence. |
| 190 result[-1] = Choice([None, result[-1]]) |
| 191 else: |
| 192 result.pop() |
| 193 elif count > 1: |
| 194 result.extend([result[-1]] * (count - 1)) |
| 195 else: |
| 196 # Anything else is a literal. |
| 197 result.append(ch) |
| 198 |
| 199 if consume_next: |
| 200 ch, escaped = next(pattern_iter) |
| 201 else: |
| 202 consume_next = True |
| 203 except StopIteration: |
| 204 pass |
| 205 except NotImplementedError: |
| 206 # A case of using the disjunctive form. No results for you! |
| 207 return [('', [])] |
| 208 |
| 209 return list(zip(*flatten_result(result))) |
| 210 |
| 211 |
| 212 def next_char(input_iter): |
| 213 r""" |
| 214 An iterator that yields the next character from "pattern_iter", respecting |
| 215 escape sequences. An escaped character is replaced by a representative of |
| 216 its class (e.g. \w -> "x"). If the escaped character is one that is |
| 217 skipped, it is not returned (the next character is returned instead). |
| 218 |
| 219 Yields the next character, along with a boolean indicating whether it is a |
| 220 raw (unescaped) character or not. |
| 221 """ |
| 222 for ch in input_iter: |
| 223 if ch != '\\': |
| 224 yield ch, False |
| 225 continue |
| 226 ch = next(input_iter) |
| 227 representative = ESCAPE_MAPPINGS.get(ch, ch) |
| 228 if representative is None: |
| 229 continue |
| 230 yield representative, True |
| 231 |
| 232 |
| 233 def walk_to_end(ch, input_iter): |
| 234 """ |
| 235 The iterator is currently inside a capturing group. We want to walk to the |
| 236 close of this group, skipping over any nested groups and handling escaped |
| 237 parentheses correctly. |
| 238 """ |
| 239 if ch == '(': |
| 240 nesting = 1 |
| 241 else: |
| 242 nesting = 0 |
| 243 for ch, escaped in input_iter: |
| 244 if escaped: |
| 245 continue |
| 246 elif ch == '(': |
| 247 nesting += 1 |
| 248 elif ch == ')': |
| 249 if not nesting: |
| 250 return |
| 251 nesting -= 1 |
| 252 |
| 253 |
| 254 def get_quantifier(ch, input_iter): |
| 255 """ |
| 256 Parse a quantifier from the input, where "ch" is the first character in the |
| 257 quantifier. |
| 258 |
| 259 Returns the minimum number of occurrences permitted by the quantifier and |
| 260 either None or the next character from the input_iter if the next character |
| 261 is not part of the quantifier. |
| 262 """ |
| 263 if ch in '*?+': |
| 264 try: |
| 265 ch2, escaped = next(input_iter) |
| 266 except StopIteration: |
| 267 ch2 = None |
| 268 if ch2 == '?': |
| 269 ch2 = None |
| 270 if ch == '+': |
| 271 return 1, ch2 |
| 272 return 0, ch2 |
| 273 |
| 274 quant = [] |
| 275 while ch != '}': |
| 276 ch, escaped = next(input_iter) |
| 277 quant.append(ch) |
| 278 quant = quant[:-1] |
| 279 values = ''.join(quant).split(',') |
| 280 |
| 281 # Consume the trailing '?', if necessary. |
| 282 try: |
| 283 ch, escaped = next(input_iter) |
| 284 except StopIteration: |
| 285 ch = None |
| 286 if ch == '?': |
| 287 ch = None |
| 288 return int(values[0]), ch |
| 289 |
| 290 |
| 291 def contains(source, inst): |
| 292 """ |
| 293 Returns True if the "source" contains an instance of "inst". False, |
| 294 otherwise. |
| 295 """ |
| 296 if isinstance(source, inst): |
| 297 return True |
| 298 if isinstance(source, NonCapture): |
| 299 for elt in source: |
| 300 if contains(elt, inst): |
| 301 return True |
| 302 return False |
| 303 |
| 304 |
| 305 def flatten_result(source): |
| 306 """ |
| 307 Turns the given source sequence into a list of reg-exp possibilities and |
| 308 their arguments. Returns a list of strings and a list of argument lists. |
| 309 Each of the two lists will be of the same length. |
| 310 """ |
| 311 if source is None: |
| 312 return [''], [[]] |
| 313 if isinstance(source, Group): |
| 314 if source[1] is None: |
| 315 params = [] |
| 316 else: |
| 317 params = [source[1]] |
| 318 return [source[0]], [params] |
| 319 result = [''] |
| 320 result_args = [[]] |
| 321 pos = last = 0 |
| 322 for pos, elt in enumerate(source): |
| 323 if isinstance(elt, six.string_types): |
| 324 continue |
| 325 piece = ''.join(source[last:pos]) |
| 326 if isinstance(elt, Group): |
| 327 piece += elt[0] |
| 328 param = elt[1] |
| 329 else: |
| 330 param = None |
| 331 last = pos + 1 |
| 332 for i in range(len(result)): |
| 333 result[i] += piece |
| 334 if param: |
| 335 result_args[i].append(param) |
| 336 if isinstance(elt, (Choice, NonCapture)): |
| 337 if isinstance(elt, NonCapture): |
| 338 elt = [elt] |
| 339 inner_result, inner_args = [], [] |
| 340 for item in elt: |
| 341 res, args = flatten_result(item) |
| 342 inner_result.extend(res) |
| 343 inner_args.extend(args) |
| 344 new_result = [] |
| 345 new_args = [] |
| 346 for item, args in zip(result, result_args): |
| 347 for i_item, i_args in zip(inner_result, inner_args): |
| 348 new_result.append(item + i_item) |
| 349 new_args.append(args[:] + i_args) |
| 350 result = new_result |
| 351 result_args = new_args |
| 352 if pos >= last: |
| 353 piece = ''.join(source[last:]) |
| 354 for i in range(len(result)): |
| 355 result[i] += piece |
| 356 return result, result_args |
OLD | NEW |