LEFT | RIGHT |
1 # | 1 # |
2 # ElementTree | 2 # ElementTree |
3 # $Id: ElementPath.py 3276 2007-09-12 06:52:30Z fredrik $ | 3 # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ |
4 # | 4 # |
5 # limited xpath support for element trees | 5 # limited xpath support for element trees |
6 # | 6 # |
7 # history: | 7 # history: |
8 # 2003-05-23 fl created | 8 # 2003-05-23 fl created |
9 # 2003-05-28 fl added support for // etc | 9 # 2003-05-28 fl added support for // etc |
10 # 2003-08-27 fl fixed parsing of periods in element names | 10 # 2003-08-27 fl fixed parsing of periods in element names |
11 # 2007-09-10 fl new selection engine | 11 # 2007-09-10 fl new selection engine |
12 # | 12 # 2007-09-12 fl fixed parent selector |
13 # Copyright (c) 2003-2007 by Fredrik Lundh. All rights reserved. | 13 # 2007-09-13 fl added iterfind; changed findall to return a list |
| 14 # 2007-11-30 fl added namespaces support |
| 15 # 2009-10-30 fl added child element value filter |
| 16 # |
| 17 # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. |
14 # | 18 # |
15 # fredrik@pythonware.com | 19 # fredrik@pythonware.com |
16 # http://www.pythonware.com | 20 # http://www.pythonware.com |
17 # | 21 # |
18 # -------------------------------------------------------------------- | 22 # -------------------------------------------------------------------- |
19 # The ElementTree toolkit is | 23 # The ElementTree toolkit is |
20 # | 24 # |
21 # Copyright (c) 1999-2007 by Fredrik Lundh | 25 # Copyright (c) 1999-2009 by Fredrik Lundh |
22 # | 26 # |
23 # By obtaining, using, and/or copying this software and/or its | 27 # By obtaining, using, and/or copying this software and/or its |
24 # associated documentation, you agree that you have read, understood, | 28 # associated documentation, you agree that you have read, understood, |
25 # and will comply with the following terms and conditions: | 29 # and will comply with the following terms and conditions: |
26 # | 30 # |
27 # Permission to use, copy, modify, and distribute this software and | 31 # Permission to use, copy, modify, and distribute this software and |
28 # its associated documentation for any purpose and without fee is | 32 # its associated documentation for any purpose and without fee is |
29 # hereby granted, provided that the above copyright notice appears in | 33 # hereby granted, provided that the above copyright notice appears in |
30 # all copies, and that both that copyright notice and this permission | 34 # all copies, and that both that copyright notice and this permission |
31 # notice appear in supporting documentation, and that the name of | 35 # notice appear in supporting documentation, and that the name of |
(...skipping 15 matching lines...) Expand all Loading... |
47 # See http://www.python.org/psf/license for licensing details. | 51 # See http://www.python.org/psf/license for licensing details. |
48 | 52 |
49 ## | 53 ## |
50 # Implementation module for XPath support. There's usually no reason | 54 # Implementation module for XPath support. There's usually no reason |
51 # to import this module directly; the <b>ElementTree</b> does this for | 55 # to import this module directly; the <b>ElementTree</b> does this for |
52 # you, if needed. | 56 # you, if needed. |
53 ## | 57 ## |
54 | 58 |
55 import re | 59 import re |
56 | 60 |
57 xpath_tokenizer = re.compile( | 61 xpath_tokenizer_re = re.compile( |
58 "(" | 62 "(" |
59 "'[^']*'|\"[^\"]*\"|" | 63 "'[^']*'|\"[^\"]*\"|" |
60 "::|" | 64 "::|" |
61 "//?|" | 65 "//?|" |
62 "\.\.|" | 66 "\.\.|" |
63 "\(\)|" | 67 "\(\)|" |
64 "[/.*:\[\]\(\)@=])|" | 68 "[/.*:\[\]\(\)@=])|" |
65 "((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|" | 69 "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" |
66 "\s+" | 70 "\s+" |
67 ).findall | 71 ) |
68 | 72 |
69 def prepare_tag(next, token): | 73 def xpath_tokenizer(pattern, namespaces=None): |
| 74 for token in xpath_tokenizer_re.findall(pattern): |
| 75 tag = token[1] |
| 76 if tag and tag[0] != "{" and ":" in tag: |
| 77 try: |
| 78 prefix, uri = tag.split(":", 1) |
| 79 if not namespaces: |
| 80 raise KeyError |
| 81 yield token[0], "{%s}%s" % (namespaces[prefix], uri) |
| 82 except KeyError: |
| 83 raise SyntaxError("prefix %r not found in prefix map" % prefix) |
| 84 else: |
| 85 yield token |
| 86 |
| 87 def get_parent_map(context): |
| 88 parent_map = context.parent_map |
| 89 if parent_map is None: |
| 90 context.parent_map = parent_map = {} |
| 91 for p in context.root.iter(): |
| 92 for e in p: |
| 93 parent_map[e] = p |
| 94 return parent_map |
| 95 |
| 96 def prepare_child(next, token): |
70 tag = token[1] | 97 tag = token[1] |
71 def select(context, result): | 98 def select(context, result): |
72 for elem in result: | 99 for elem in result: |
73 for e in elem: | 100 for e in elem: |
74 if e.tag == tag: | 101 if e.tag == tag: |
75 yield e | 102 yield e |
76 return select | 103 return select |
77 | 104 |
78 def prepare_star(next, token): | 105 def prepare_star(next, token): |
79 def select(context, result): | 106 def select(context, result): |
80 for elem in result: | 107 for elem in result: |
81 for e in elem: | 108 for e in elem: |
82 yield e | 109 yield e |
83 return select | 110 return select |
84 | 111 |
85 def prepare_dot(next, token): | 112 def prepare_self(next, token): |
86 def select(context, result): | 113 def select(context, result): |
87 for elem in result: | 114 for elem in result: |
88 yield elem | 115 yield elem |
89 return select | 116 return select |
90 | 117 |
91 def prepare_iter(next, token): | 118 def prepare_descendant(next, token): |
92 token = next() | 119 token = next() |
93 if token[0] == "*": | 120 if token[0] == "*": |
94 tag = "*" | 121 tag = "*" |
95 elif not token[0]: | 122 elif not token[0]: |
96 tag = token[1] | 123 tag = token[1] |
97 else: | 124 else: |
98 raise SyntaxError | 125 raise SyntaxError("invalid descendant") |
99 def select(context, result): | 126 def select(context, result): |
100 for elem in result: | 127 for elem in result: |
101 for e in elem.iter(tag): | 128 for e in elem.iter(tag): |
102 if e is not elem: | 129 if e is not elem: |
103 yield e | 130 yield e |
104 return select | 131 return select |
105 | 132 |
106 def prepare_dot_dot(next, token): | 133 def prepare_parent(next, token): |
107 def select(context, result): | 134 def select(context, result): |
108 parent_map = context.parent_map | 135 # FIXME: raise error if .. is applied at toplevel? |
109 if parent_map is None: | 136 parent_map = get_parent_map(context) |
110 context.parent_map = parent_map = {} | 137 result_map = {} |
111 for p in context.root.iter(): | |
112 for e in p: | |
113 parent_map[e] = p | |
114 for elem in result: | 138 for elem in result: |
115 if elem in parent_map: | 139 if elem in parent_map: |
116 yield parent_map[elem] | 140 parent = parent_map[elem] |
| 141 if parent not in result_map: |
| 142 result_map[parent] = None |
| 143 yield parent |
117 return select | 144 return select |
118 | 145 |
119 def prepare_predicate(next, token): | 146 def prepare_predicate(next, token): |
120 # this one should probably be refactored... | 147 # FIXME: replace with real parser!!! refs: |
121 token = next() | 148 # http://effbot.org/zone/simple-iterator-parser.htm |
122 if token[0] == "@": | 149 # http://javascript.crockford.com/tdop/tdop.html |
123 # attribute | 150 signature = [] |
124 token = next() | 151 predicate = [] |
125 if token[0]: | 152 while 1: |
126 raise SyntaxError("invalid attribute predicate") | |
127 key = token[1] | |
128 token = next() | 153 token = next() |
129 if token[0] == "]": | 154 if token[0] == "]": |
130 def select(context, result): | 155 break |
131 for elem in result: | 156 if token[0] and token[0][:1] in "'\"": |
132 if elem.get(key) is not None: | 157 token = "'", token[0][1:-1] |
133 yield elem | 158 signature.append(token[0] or "-") |
134 elif token[0] == "=": | 159 predicate.append(token[1]) |
135 value = next()[0] | 160 signature = "".join(signature) |
136 if value[:1] == "'" or value[:1] == '"': | 161 # use signature to determine predicate type |
137 value = value[1:-1] | 162 if signature == "@-": |
138 else: | 163 # [@attribute] predicate |
139 raise SyntaxError("invalid comparision target") | 164 key = predicate[1] |
140 token = next() | 165 def select(context, result): |
141 def select(context, result): | 166 for elem in result: |
142 for elem in result: | 167 if elem.get(key) is not None: |
143 if elem.get(key) == value: | 168 yield elem |
144 yield elem | 169 return select |
145 if token[0] != "]": | 170 if signature == "@-='": |
146 raise SyntaxError("invalid attribute predicate") | 171 # [@attribute='value'] |
147 elif not token[0]: | 172 key = predicate[1] |
148 tag = token[1] | 173 value = predicate[-1] |
149 token = next() | 174 def select(context, result): |
150 if token[0] != "]": | 175 for elem in result: |
151 raise SyntaxError("invalid node predicate") | 176 if elem.get(key) == value: |
| 177 yield elem |
| 178 return select |
| 179 if signature == "-" and not re.match("\d+$", predicate[0]): |
| 180 # [tag] |
| 181 tag = predicate[0] |
152 def select(context, result): | 182 def select(context, result): |
153 for elem in result: | 183 for elem in result: |
154 if elem.find(tag) is not None: | 184 if elem.find(tag) is not None: |
155 yield elem | 185 yield elem |
156 else: | 186 return select |
157 raise SyntaxError("invalid predicate") | 187 if signature == "-='" and not re.match("\d+$", predicate[0]): |
158 return select | 188 # [tag='value'] |
| 189 tag = predicate[0] |
| 190 value = predicate[-1] |
| 191 def select(context, result): |
| 192 for elem in result: |
| 193 for e in elem.findall(tag): |
| 194 if "".join(e.itertext()) == value: |
| 195 yield elem |
| 196 break |
| 197 return select |
| 198 if signature == "-" or signature == "-()" or signature == "-()-": |
| 199 # [index] or [last()] or [last()-index] |
| 200 if signature == "-": |
| 201 index = int(predicate[0]) - 1 |
| 202 else: |
| 203 if predicate[0] != "last": |
| 204 raise SyntaxError("unsupported function") |
| 205 if signature == "-()-": |
| 206 try: |
| 207 index = int(predicate[2]) - 1 |
| 208 except ValueError: |
| 209 raise SyntaxError("unsupported expression") |
| 210 else: |
| 211 index = -1 |
| 212 def select(context, result): |
| 213 parent_map = get_parent_map(context) |
| 214 for elem in result: |
| 215 try: |
| 216 parent = parent_map[elem] |
| 217 # FIXME: what if the selector is "*" ? |
| 218 elems = list(parent.findall(elem.tag)) |
| 219 if elems[index] is elem: |
| 220 yield elem |
| 221 except (IndexError, KeyError): |
| 222 pass |
| 223 return select |
| 224 raise SyntaxError("invalid predicate") |
159 | 225 |
160 ops = { | 226 ops = { |
161 "": prepare_tag, | 227 "": prepare_child, |
162 "*": prepare_star, | 228 "*": prepare_star, |
163 ".": prepare_dot, | 229 ".": prepare_self, |
164 "..": prepare_dot_dot, | 230 "..": prepare_parent, |
165 "//": prepare_iter, | 231 "//": prepare_descendant, |
166 "[": prepare_predicate, | 232 "[": prepare_predicate, |
167 } | 233 } |
168 | 234 |
169 _cache = {} | 235 _cache = {} |
170 | 236 |
171 class _SelectorContext: | 237 class _SelectorContext: |
172 parent_map = None | 238 parent_map = None |
173 def __init__(self, root): | 239 def __init__(self, root): |
174 self.root = root | 240 self.root = root |
175 | 241 |
176 # -------------------------------------------------------------------- | 242 # -------------------------------------------------------------------- |
177 | 243 |
178 ## | 244 ## |
179 # Find first matching object. | 245 # Generate all matching objects. |
180 | 246 |
181 def find(elem, path): | 247 def iterfind(elem, path, namespaces=None): |
182 return next(findall(elem, path), None) | |
183 | |
184 ## | |
185 # Find all matching objects. | |
186 | |
187 def findall(elem, path): | |
188 # compile selector pattern | 248 # compile selector pattern |
| 249 if path[-1:] == "/": |
| 250 path = path + "*" # implicit all (FIXME: keep this?) |
189 try: | 251 try: |
190 selector = _cache[path] | 252 selector = _cache[path] |
191 except KeyError: | 253 except KeyError: |
192 if len(_cache) > 100: | 254 if len(_cache) > 100: |
193 _cache.clear() | 255 _cache.clear() |
194 if path[:1] == "/": | 256 if path[:1] == "/": |
195 raise SyntaxError("cannot use absolute path on element") | 257 raise SyntaxError("cannot use absolute path on element") |
196 stream = iter(xpath_tokenizer(path)) | 258 next = iter(xpath_tokenizer(path, namespaces)).next |
197 next = stream.next; token = next() | 259 token = next() |
198 selector = [] | 260 selector = [] |
199 while 1: | 261 while 1: |
200 try: | 262 try: |
201 selector.append(ops[token[0]](next, token)) | 263 selector.append(ops[token[0]](next, token)) |
202 except StopIteration: | 264 except StopIteration: |
203 raise SyntaxError("invalid path") | 265 raise SyntaxError("invalid path") |
204 try: | 266 try: |
205 token = next() | 267 token = next() |
206 if token[0] == "/": | 268 if token[0] == "/": |
207 token = next() | 269 token = next() |
208 except StopIteration: | 270 except StopIteration: |
209 break | 271 break |
210 _cache[path] = selector | 272 _cache[path] = selector |
211 # execute selector pattern | 273 # execute selector pattern |
212 result = [elem] | 274 result = [elem] |
213 context = _SelectorContext(elem) | 275 context = _SelectorContext(elem) |
214 for select in selector: | 276 for select in selector: |
215 result = select(context, result) | 277 result = select(context, result) |
216 return result | 278 return result |
217 | 279 |
218 ## | 280 ## |
| 281 # Find first matching object. |
| 282 |
| 283 def find(elem, path, namespaces=None): |
| 284 try: |
| 285 return iterfind(elem, path, namespaces).next() |
| 286 except StopIteration: |
| 287 return None |
| 288 |
| 289 ## |
| 290 # Find all matching objects. |
| 291 |
| 292 def findall(elem, path, namespaces=None): |
| 293 return list(iterfind(elem, path, namespaces)) |
| 294 |
| 295 ## |
219 # Find text for first matching object. | 296 # Find text for first matching object. |
220 | 297 |
221 def findtext(elem, path, default=None): | 298 def findtext(elem, path, default=None, namespaces=None): |
222 elem = next(findall(elem, path), None) | 299 try: |
223 if elem is None: | 300 elem = iterfind(elem, path, namespaces).next() |
| 301 return elem.text or "" |
| 302 except StopIteration: |
224 return default | 303 return default |
225 return elem.text or "" | |
LEFT | RIGHT |