LEFT | RIGHT |
1 # | 1 # |
2 # ElementTree | 2 # ElementTree |
3 # $Id: ElementTree.py 3276 2007-09-12 06:52:30Z fredrik $ | 3 # $Id: ElementTree.py 3440 2008-07-18 14:45:01Z fredrik $ |
4 # | 4 # |
5 # light-weight XML support for Python 2.2 and later. | 5 # light-weight XML support for Python 2.3 and later. |
6 # | 6 # |
7 # history: | 7 # history (since 1.2.6): |
8 # 2001-10-20 fl created (from various sources) | |
9 # 2001-11-01 fl return root from parse method | |
10 # 2002-02-16 fl sort attributes in lexical order | |
11 # 2002-04-06 fl TreeBuilder refactoring, added PythonDoc markup | |
12 # 2002-05-01 fl finished TreeBuilder refactoring | |
13 # 2002-07-14 fl added basic namespace support to ElementTree.write | |
14 # 2002-07-25 fl added QName attribute support | |
15 # 2002-10-20 fl fixed encoding in write | |
16 # 2002-11-24 fl changed default encoding to ascii; fixed attribute encoding | |
17 # 2002-11-27 fl accept file objects or file names for parse/write | |
18 # 2002-12-04 fl moved XMLTreeBuilder back to this module | |
19 # 2003-01-11 fl fixed entity encoding glitch for us-ascii | |
20 # 2003-02-13 fl added XML literal factory | |
21 # 2003-02-21 fl added ProcessingInstruction/PI factory | |
22 # 2003-05-11 fl added tostring/fromstring helpers | |
23 # 2003-05-26 fl added ElementPath support | |
24 # 2003-07-05 fl added makeelement factory method | |
25 # 2003-07-28 fl added more well-known namespace prefixes | |
26 # 2003-08-15 fl fixed typo in ElementTree.findtext (Thomas Dartsch) | |
27 # 2003-09-04 fl fall back on emulator if ElementPath is not installed | |
28 # 2003-10-31 fl markup updates | |
29 # 2003-11-15 fl fixed nested namespace bug | |
30 # 2004-03-28 fl added XMLID helper | |
31 # 2004-06-02 fl added default support to findtext | |
32 # 2004-06-08 fl fixed encoding of non-ascii element/attribute names | |
33 # 2004-08-23 fl take advantage of post-2.1 expat features | |
34 # 2004-09-03 fl made Element class visible; removed factory | |
35 # 2005-02-01 fl added iterparse implementation | |
36 # 2005-03-02 fl fixed iterparse support for pre-2.2 versions | |
37 # 2005-11-12 fl added tostringlist/fromstringlist helpers | 8 # 2005-11-12 fl added tostringlist/fromstringlist helpers |
38 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox | 9 # 2006-07-05 fl merged in selected changes from the 1.3 sandbox |
39 # 2006-07-05 fl removed support for 2.1 and earlier | 10 # 2006-07-05 fl removed support for 2.1 and earlier |
40 # 2007-06-21 fl added deprecation/future warnings | 11 # 2007-06-21 fl added deprecation/future warnings |
41 # 2007-08-25 fl added doctype hook, added parser version attribute etc | 12 # 2007-08-25 fl added doctype hook, added parser version attribute etc |
42 # 2007-08-26 fl added new serializer code (better namespace handling, etc) | 13 # 2007-08-26 fl added new serializer code (better namespace handling, etc) |
43 # 2007-08-27 fl warn for broken /tag searches on tree level | 14 # 2007-08-27 fl warn for broken /tag searches on tree level |
44 # 2007-09-02 fl added html/text methods to serializer (experimental) | 15 # 2007-09-02 fl added html/text methods to serializer (experimental) |
45 # 2007-09-05 fl added method argument to tostring/tostringlist | 16 # 2007-09-05 fl added method argument to tostring/tostringlist |
46 # 2007-09-06 fl improved error handling | 17 # 2007-09-06 fl improved error handling |
47 # | 18 # 2007-09-13 fl added itertext, iterfind; assorted cleanups |
48 # Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved. | 19 # 2007-12-15 fl added C14N hooks, copy method (experimental) |
| 20 # |
| 21 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. |
49 # | 22 # |
50 # fredrik@pythonware.com | 23 # fredrik@pythonware.com |
51 # http://www.pythonware.com | 24 # http://www.pythonware.com |
52 # | 25 # |
53 # -------------------------------------------------------------------- | 26 # -------------------------------------------------------------------- |
54 # The ElementTree toolkit is | 27 # The ElementTree toolkit is |
55 # | 28 # |
56 # Copyright (c) 1999-2007 by Fredrik Lundh | 29 # Copyright (c) 1999-2008 by Fredrik Lundh |
57 # | 30 # |
58 # By obtaining, using, and/or copying this software and/or its | 31 # By obtaining, using, and/or copying this software and/or its |
59 # associated documentation, you agree that you have read, understood, | 32 # associated documentation, you agree that you have read, understood, |
60 # and will comply with the following terms and conditions: | 33 # and will comply with the following terms and conditions: |
61 # | 34 # |
62 # Permission to use, copy, modify, and distribute this software and | 35 # Permission to use, copy, modify, and distribute this software and |
63 # its associated documentation for any purpose and without fee is | 36 # its associated documentation for any purpose and without fee is |
64 # hereby granted, provided that the above copyright notice appears in | 37 # hereby granted, provided that the above copyright notice appears in |
65 # all copies, and that both that copyright notice and this permission | 38 # all copies, and that both that copyright notice and this permission |
66 # notice appear in supporting documentation, and that the name of | 39 # notice appear in supporting documentation, and that the name of |
(...skipping 25 matching lines...) Expand all Loading... |
92 "PI", "ProcessingInstruction", | 65 "PI", "ProcessingInstruction", |
93 "QName", | 66 "QName", |
94 "SubElement", | 67 "SubElement", |
95 "tostring", "tostringlist", | 68 "tostring", "tostringlist", |
96 "TreeBuilder", | 69 "TreeBuilder", |
97 "VERSION", | 70 "VERSION", |
98 "XML", | 71 "XML", |
99 "XMLParser", "XMLTreeBuilder", | 72 "XMLParser", "XMLTreeBuilder", |
100 ] | 73 ] |
101 | 74 |
| 75 VERSION = "1.3.0" |
| 76 |
102 ## | 77 ## |
103 # The <b>Element</b> type is a flexible container object, designed to | 78 # The <b>Element</b> type is a flexible container object, designed to |
104 # store hierarchical data structures in memory. The type can be | 79 # store hierarchical data structures in memory. The type can be |
105 # described as a cross between a list and a dictionary. | 80 # described as a cross between a list and a dictionary. |
106 # <p> | 81 # <p> |
107 # Each element has a number of properties associated with it: | 82 # Each element has a number of properties associated with it: |
108 # <ul> | 83 # <ul> |
109 # <li>a <i>tag</i>. This is a string identifying what kind of data | 84 # <li>a <i>tag</i>. This is a string identifying what kind of data |
110 # this element represents (the element type, in other words).</li> | 85 # this element represents (the element type, in other words).</li> |
111 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> | 86 # <li>a number of <i>attributes</i>, stored in a Python dictionary.</li> |
112 # <li>a <i>text</i> string.</li> | 87 # <li>a <i>text</i> string.</li> |
113 # <li>an optional <i>tail</i> string.</li> | 88 # <li>an optional <i>tail</i> string.</li> |
114 # <li>a number of <i>child elements</i>, stored in a Python sequence</li> | 89 # <li>a number of <i>child elements</i>, stored in a Python sequence</li> |
115 # </ul> | 90 # </ul> |
116 # | 91 # |
117 # To create an element instance, use the {@link #Element} constructor | 92 # To create an element instance, use the {@link #Element} constructor |
118 # or the {@link #SubElement} factory function. | 93 # or the {@link #SubElement} factory function. |
119 # <p> | 94 # <p> |
120 # The {@link #ElementTree} class can be used to wrap an element | 95 # The {@link #ElementTree} class can be used to wrap an element |
121 # structure, and convert it from and to XML. | 96 # structure, and convert it from and to XML. |
122 ## | 97 ## |
123 | 98 |
124 import sys, re | 99 import sys |
| 100 import re |
125 import warnings | 101 import warnings |
| 102 |
126 | 103 |
127 class _SimpleElementPath(object): | 104 class _SimpleElementPath(object): |
128 # emulate pre-1.2 find/findtext/findall behaviour | 105 # emulate pre-1.2 find/findtext/findall behaviour |
129 def find(self, element, tag): | 106 def find(self, element, tag, namespaces=None): |
130 return next(self.findall(element, tag), None) | 107 for elem in element: |
131 | 108 if elem.tag == tag: |
132 def findtext(self, element, tag, default=None): | 109 return elem |
| 110 return None |
| 111 def findtext(self, element, tag, default=None, namespaces=None): |
133 elem = self.find(element, tag) | 112 elem = self.find(element, tag) |
134 if elem is None: | 113 if elem is None: |
135 return default | 114 return default |
136 return elem.text or "" | 115 return elem.text or "" |
137 | 116 def iterfind(self, element, tag, namespaces=None): |
138 def findall(self, element, tag): | |
139 if tag[:3] == ".//": | 117 if tag[:3] == ".//": |
140 return element.iter(tag[3:]) | 118 for elem in element.iter(tag[3:]): |
141 return (elem for elem in element if elem.tag == tag) | 119 yield elem |
| 120 for elem in element: |
| 121 if elem.tag == tag: |
| 122 yield elem |
| 123 def findall(self, element, tag, namespaces=None): |
| 124 return list(self.iterfind(element, tag, namespaces)) |
142 | 125 |
143 try: | 126 try: |
144 from . import ElementPath | 127 from . import ElementPath |
145 except ImportError: | 128 except ImportError: |
146 # FIXME: issue warning in this case? | |
147 ElementPath = _SimpleElementPath() | 129 ElementPath = _SimpleElementPath() |
148 | 130 |
149 VERSION = "1.3a2" | 131 ## |
| 132 # Parser error. This is a subclass of <b>SyntaxError</b>. |
| 133 # <p> |
| 134 # In addition to the exception value, an exception instance contains a |
| 135 # specific exception code in the <b>code</b> attribute, and the line and |
| 136 # column of the error in the <b>position</b> attribute. |
150 | 137 |
151 class ParseError(SyntaxError): | 138 class ParseError(SyntaxError): |
152 pass | 139 pass |
153 | 140 |
154 # -------------------------------------------------------------------- | 141 # -------------------------------------------------------------------- |
155 | 142 |
156 ## | 143 ## |
157 # Checks if an object appears to be a valid element object. | 144 # Checks if an object appears to be a valid element object. |
158 # | 145 # |
159 # @param An element instance. | 146 # @param An element instance. |
160 # @return A true value if this is an element object. | 147 # @return A true value if this is an element object. |
161 # @defreturn flag | 148 # @defreturn flag |
162 | 149 |
163 def iselement(element): | 150 def iselement(element): |
164 # FIXME: not sure about this; might be a better idea to look | 151 # FIXME: not sure about this; might be a better idea to look |
165 # for tag/attrib/text attributes | 152 # for tag/attrib/text attributes |
166 return isinstance(element, Element) or hasattr(element, "tag") | 153 return isinstance(element, Element) or hasattr(element, "tag") |
167 | 154 |
168 ## | 155 ## |
169 # Element class. This class defines the Element interface, and | 156 # Element class. This class defines the Element interface, and |
170 # provides a reference implementation of this interface. | 157 # provides a reference implementation of this interface. |
171 # <p> | 158 # <p> |
172 # The element name, attribute names, and attribute values can be | 159 # The element name, attribute names, and attribute values can be |
173 # either 8-bit ASCII strings or Unicode strings. | 160 # either ASCII strings (ordinary Python strings containing only 7-bit |
| 161 # ASCII characters) or Unicode strings. |
174 # | 162 # |
175 # @param tag The element name. | 163 # @param tag The element name. |
176 # @param attrib An optional dictionary, containing element attributes. | 164 # @param attrib An optional dictionary, containing element attributes. |
177 # @param **extra Additional attributes, given as keyword arguments. | 165 # @param **extra Additional attributes, given as keyword arguments. |
178 # @see Element | 166 # @see Element |
179 # @see SubElement | 167 # @see SubElement |
180 # @see Comment | 168 # @see Comment |
181 # @see ProcessingInstruction | 169 # @see ProcessingInstruction |
182 | 170 |
183 class Element(object): | 171 class Element(object): |
184 # <tag attrib>text<child/>...</tag>tail | 172 # <tag attrib>text<child/>...</tag>tail |
185 | 173 |
186 ## | 174 ## |
187 # (Attribute) Element tag. | 175 # (Attribute) Element tag. |
188 | 176 |
189 tag = None | 177 tag = None |
190 | 178 |
191 ## | 179 ## |
192 # (Attribute) Element attribute dictionary. Where possible, use | 180 # (Attribute) Element attribute dictionary. Where possible, use |
193 # {@link #Element.get}, | 181 # {@link #Element.get}, |
194 # {@link #Element.set}, | 182 # {@link #Element.set}, |
195 # {@link #Element.keys}, and | 183 # {@link #Element.keys}, and |
196 # {@link #Element.items} to access | 184 # {@link #Element.items} to access |
197 # element attributes. | 185 # element attributes. |
198 | 186 |
199 attrib = None | 187 attrib = None |
200 | 188 |
201 ## | 189 ## |
202 # (Attribute) Text before first subelement. This is either a | 190 # (Attribute) Text before first subelement. This is either a |
203 # string or the value None, if there was no text. | 191 # string or the value None. Note that if there was no text, this |
| 192 # attribute may be either None or an empty string, depending on |
| 193 # the parser. |
204 | 194 |
205 text = None | 195 text = None |
206 | 196 |
207 ## | 197 ## |
208 # (Attribute) Text after this element's end tag, but before the | 198 # (Attribute) Text after this element's end tag, but before the |
209 # next sibling element's start tag. This is either a string or | 199 # next sibling element's start tag. This is either a string or |
210 # the value None, if there was no text. | 200 # the value None. Note that if there was no text, this attribute |
| 201 # may be either None or an empty string, depending on the parser. |
211 | 202 |
212 tail = None # text after end tag, if any | 203 tail = None # text after end tag, if any |
| 204 |
| 205 # constructor |
213 | 206 |
214 def __init__(self, tag, attrib={}, **extra): | 207 def __init__(self, tag, attrib={}, **extra): |
215 attrib = attrib.copy() | 208 attrib = attrib.copy() |
216 attrib.update(extra) | 209 attrib.update(extra) |
217 self.tag = tag | 210 self.tag = tag |
218 self.attrib = attrib | 211 self.attrib = attrib |
219 self._children = [] | 212 self._children = [] |
220 | 213 |
221 def __repr__(self): | 214 def __repr__(self): |
222 return "<Element %s at 0x%x>" % (repr(self.tag), id(self)) | 215 return "<Element %s at 0x%x>" % (repr(self.tag), id(self)) |
223 | 216 |
224 ## | 217 ## |
225 # Creates a new element object of the same type as this element. | 218 # Creates a new element object of the same type as this element. |
226 # | 219 # |
227 # @param tag Element tag. | 220 # @param tag Element tag. |
228 # @param attrib Element attributes, given as a dictionary. | 221 # @param attrib Element attributes, given as a dictionary. |
229 # @return A new element instance. | 222 # @return A new element instance. |
230 | 223 |
231 def makeelement(self, tag, attrib): | 224 def makeelement(self, tag, attrib): |
232 return Element(tag, attrib) | 225 return self.__class__(tag, attrib) |
233 | 226 |
234 ## | 227 ## |
235 # Returns the number of subelements. | 228 # (Experimental) Copies the current element. This creates a |
| 229 # shallow copy; subelements will be shared with the original tree. |
| 230 # |
| 231 # @return A new element instance. |
| 232 |
| 233 def copy(self): |
| 234 elem = self.makeelement(self.tag, self.attrib) |
| 235 elem.text = self.text |
| 236 elem.tail = self.tail |
| 237 elem[:] = self |
| 238 return elem |
| 239 |
| 240 ## |
| 241 # Returns the number of subelements. Note that this only counts |
| 242 # full elements; to check if there's any content in an element, you |
| 243 # have to check both the length and the <b>text</b> attribute. |
236 # | 244 # |
237 # @return The number of subelements. | 245 # @return The number of subelements. |
238 | 246 |
239 def __len__(self): | 247 def __len__(self): |
240 return len(self._children) | 248 return len(self._children) |
241 | 249 |
242 def __nonzero__(self): | 250 def __nonzero__(self): |
243 warnings.warn( | 251 warnings.warn( |
244 "The behavior of this method will change in future versions. " | 252 "The behavior of this method will change in future versions. " |
245 "Use specific 'len(elem)' or 'elem is not None' test instead.", | 253 "Use specific 'len(elem)' or 'elem is not None' test instead.", |
246 FutureWarning, stacklevel=2 | 254 FutureWarning, stacklevel=2 |
247 ) | 255 ) |
248 return len(self._children) != 0 # emulate old behaviour | 256 return len(self._children) != 0 # emulate old behaviour, for now |
249 | 257 |
250 ## | 258 ## |
251 # Returns the given subelement. | 259 # Returns the given subelement, by index. |
252 # | 260 # |
253 # @param index What subelement to return. | 261 # @param index What subelement to return. |
254 # @return The given subelement. | 262 # @return The given subelement. |
255 # @exception IndexError If the given element does not exist. | 263 # @exception IndexError If the given element does not exist. |
256 | 264 |
257 def __getitem__(self, index): | 265 def __getitem__(self, index): |
258 return self._children[index] | 266 return self._children[index] |
259 | 267 |
260 ## | 268 ## |
261 # Replaces the given subelement. | 269 # Replaces the given subelement, by index. |
262 # | 270 # |
263 # @param index What subelement to replace. | 271 # @param index What subelement to replace. |
264 # @param element The new element value. | 272 # @param element The new element value. |
265 # @exception IndexError If the given element does not exist. | 273 # @exception IndexError If the given element does not exist. |
266 # @exception AssertionError If element is not a valid object. | |
267 | 274 |
268 def __setitem__(self, index, element): | 275 def __setitem__(self, index, element): |
269 if isinstance(index, slice): | 276 # if isinstance(index, slice): |
270 for elt in element: | 277 # for elt in element: |
271 assert iselement(elt) | 278 # assert iselement(elt) |
272 else: | 279 # else: |
273 assert iselement(element) | 280 # assert iselement(element) |
274 self._children[index] = element | 281 self._children[index] = element |
275 | 282 |
276 ## | 283 ## |
277 # Deletes the given subelement. | 284 # Deletes the given subelement, by index. |
278 # | 285 # |
279 # @param index What subelement to delete. | 286 # @param index What subelement to delete. |
280 # @exception IndexError If the given element does not exist. | 287 # @exception IndexError If the given element does not exist. |
281 | 288 |
282 def __delitem__(self, index): | 289 def __delitem__(self, index): |
283 del self._children[index] | 290 del self._children[index] |
284 | 291 |
285 ## | 292 ## |
286 # Adds a subelement to the end of this element. | 293 # Adds a subelement to the end of this element. In document order, |
| 294 # the new element will appear after the last existing subelement (or |
| 295 # directly after the text, if it's the first subelement), but before |
| 296 # the end tag for this element. |
287 # | 297 # |
288 # @param element The element to add. | 298 # @param element The element to add. |
289 # @exception AssertionError If a sequence member is not a valid object. | |
290 | 299 |
291 def append(self, element): | 300 def append(self, element): |
292 assert iselement(element) | 301 # assert iselement(element) |
293 self._children.append(element) | 302 self._children.append(element) |
294 | 303 |
295 ## | 304 ## |
296 # Appends subelements from a sequence. | 305 # Appends subelements from a sequence. |
297 # | 306 # |
298 # @param elements A sequence object with zero or more elements. | 307 # @param elements A sequence object with zero or more elements. |
299 # @exception AssertionError If a subelement is not a valid object. | |
300 # @since 1.3 | 308 # @since 1.3 |
301 | 309 |
302 def extend(self, elements): | 310 def extend(self, elements): |
303 for element in elements: | 311 # for element in elements: |
304 assert iselement(element) | 312 # assert iselement(element) |
305 self._children.extend(elements) | 313 self._children.extend(elements) |
306 | 314 |
307 ## | 315 ## |
308 # Inserts a subelement at the given position in this element. | 316 # Inserts a subelement at the given position in this element. |
309 # | 317 # |
310 # @param index Where to insert the new subelement. | 318 # @param index Where to insert the new subelement. |
311 # @exception AssertionError If the element is not a valid object. | |
312 | 319 |
313 def insert(self, index, element): | 320 def insert(self, index, element): |
314 assert iselement(element) | 321 # assert iselement(element) |
315 self._children.insert(index, element) | 322 self._children.insert(index, element) |
316 | 323 |
317 ## | 324 ## |
318 # Removes a matching subelement. Unlike the <b>find</b> methods, | 325 # Removes a matching subelement. Unlike the <b>find</b> methods, |
319 # this method compares elements based on identity, not on tag | 326 # this method compares elements based on identity, not on tag |
320 # value or contents. | 327 # value or contents. To remove subelements by other means, the |
| 328 # easiest way is often to use a list comprehension to select what |
| 329 # elements to keep, and use slice assignment to update the parent |
| 330 # element. |
321 # | 331 # |
322 # @param element What element to remove. | 332 # @param element What element to remove. |
323 # @exception ValueError If a matching element could not be found. | 333 # @exception ValueError If a matching element could not be found. |
324 # @exception AssertionError If the element is not a valid object. | |
325 | 334 |
326 def remove(self, element): | 335 def remove(self, element): |
327 assert iselement(element) | 336 # assert iselement(element) |
328 self._children.remove(element) | 337 self._children.remove(element) |
329 | 338 |
330 ## | 339 ## |
331 # (Deprecated) Returns all subelements. The elements are returned | 340 # (Deprecated) Returns all subelements. The elements are returned |
332 # in document order. | 341 # in document order. |
333 # | 342 # |
334 # @return A list of subelements. | 343 # @return A list of subelements. |
335 # @defreturn list of Element instances | 344 # @defreturn list of Element instances |
336 | 345 |
337 def getchildren(self): | 346 def getchildren(self): |
338 warnings.warn( | 347 warnings.warn( |
339 "This method will be removed in future versions. " | 348 "This method will be removed in future versions. " |
340 "Use 'list(elem)' or iteration over elem instead.", | 349 "Use 'list(elem)' or iteration over elem instead.", |
341 DeprecationWarning, stacklevel=2 | 350 DeprecationWarning, stacklevel=2 |
342 ) | 351 ) |
343 return self._children | 352 return self._children |
344 | 353 |
345 ## | 354 ## |
346 # Finds the first matching subelement, by tag name or path. | 355 # Finds the first matching subelement, by tag name or path. |
347 # | 356 # |
348 # @param path What element to look for. | 357 # @param path What element to look for. |
| 358 # @keyparam namespaces Optional namespace prefix map. |
349 # @return The first matching element, or None if no element was found. | 359 # @return The first matching element, or None if no element was found. |
350 # @defreturn Element or None | 360 # @defreturn Element or None |
351 | 361 |
352 def find(self, path): | 362 def find(self, path, namespaces=None): |
353 return ElementPath.find(self, path) | 363 return ElementPath.find(self, path, namespaces) |
354 | 364 |
355 ## | 365 ## |
356 # Finds text for the first matching subelement, by tag name or path. | 366 # Finds text for the first matching subelement, by tag name or path. |
357 # | 367 # |
358 # @param path What element to look for. | 368 # @param path What element to look for. |
359 # @param default What to return if the element was not found. | 369 # @param default What to return if the element was not found. |
| 370 # @keyparam namespaces Optional namespace prefix map. |
360 # @return The text content of the first matching element, or the | 371 # @return The text content of the first matching element, or the |
361 # default value no element was found. Note that if the element | 372 # default value no element was found. Note that if the element |
362 # has is found, but has no text content, this method returns an | 373 # is found, but has no text content, this method returns an |
363 # empty string. | 374 # empty string. |
364 # @defreturn string | 375 # @defreturn string |
365 | 376 |
366 def findtext(self, path, default=None): | 377 def findtext(self, path, default=None, namespaces=None): |
367 return ElementPath.findtext(self, path, default) | 378 return ElementPath.findtext(self, path, default, namespaces) |
368 | 379 |
369 ## | 380 ## |
370 # Finds all matching subelements, by tag name or path. | 381 # Finds all matching subelements, by tag name or path. |
371 # | 382 # |
372 # @param path What element to look for. | 383 # @param path What element to look for. |
373 # @return A list or iterator containing all matching elements, | 384 # @keyparam namespaces Optional namespace prefix map. |
| 385 # @return A list or other sequence containing all matching elements, |
374 # in document order. | 386 # in document order. |
375 # @defreturn list of Element instances | 387 # @defreturn list of Element instances |
376 | 388 |
377 def findall(self, path): | 389 def findall(self, path, namespaces=None): |
378 return ElementPath.findall(self, path) | 390 return ElementPath.findall(self, path, namespaces) |
| 391 |
| 392 ## |
| 393 # Finds all matching subelements, by tag name or path. |
| 394 # |
| 395 # @param path What element to look for. |
| 396 # @keyparam namespaces Optional namespace prefix map. |
| 397 # @return An iterator or sequence containing all matching elements, |
| 398 # in document order. |
| 399 # @defreturn a generated sequence of Element instances |
| 400 |
| 401 def iterfind(self, path, namespaces=None): |
| 402 return ElementPath.iterfind(self, path, namespaces) |
379 | 403 |
380 ## | 404 ## |
381 # Resets an element. This function removes all subelements, clears | 405 # Resets an element. This function removes all subelements, clears |
382 # all attributes, and sets the text and tail attributes to None. | 406 # all attributes, and sets the <b>text</b> and <b>tail</b> attributes |
| 407 # to None. |
383 | 408 |
384 def clear(self): | 409 def clear(self): |
385 self.attrib.clear() | 410 self.attrib.clear() |
386 self._children = [] | 411 self._children = [] |
387 self.text = self.tail = None | 412 self.text = self.tail = None |
388 | 413 |
389 ## | 414 ## |
390 # Gets an element attribute. | 415 # Gets an element attribute. Equivalent to <b>attrib.get</b>, but |
| 416 # some implementations may handle this a bit more efficiently. |
391 # | 417 # |
392 # @param key What attribute to look for. | 418 # @param key What attribute to look for. |
393 # @param default What to return if the attribute was not found. | 419 # @param default What to return if the attribute was not found. |
394 # @return The attribute value, or the default value, if the | 420 # @return The attribute value, or the default value, if the |
395 # attribute was not found. | 421 # attribute was not found. |
396 # @defreturn string or None | 422 # @defreturn string or None |
397 | 423 |
398 def get(self, key, default=None): | 424 def get(self, key, default=None): |
399 return self.attrib.get(key, default) | 425 return self.attrib.get(key, default) |
400 | 426 |
401 ## | 427 ## |
402 # Sets an element attribute. | 428 # Sets an element attribute. Equivalent to <b>attrib[key] = value</b>, |
| 429 # but some implementations may handle this a bit more efficiently. |
403 # | 430 # |
404 # @param key What attribute to set. | 431 # @param key What attribute to set. |
405 # @param value The attribute value. | 432 # @param value The attribute value. |
406 | 433 |
407 def set(self, key, value): | 434 def set(self, key, value): |
408 self.attrib[key] = value | 435 self.attrib[key] = value |
409 | 436 |
410 ## | 437 ## |
411 # Gets a list of attribute names. The names are returned in an | 438 # Gets a list of attribute names. The names are returned in an |
412 # arbitrary order (just like for an ordinary Python dictionary). | 439 # arbitrary order (just like for an ordinary Python dictionary). |
| 440 # Equivalent to <b>attrib.keys()</b>. |
413 # | 441 # |
414 # @return A list of element attribute names. | 442 # @return A list of element attribute names. |
415 # @defreturn list of strings | 443 # @defreturn list of strings |
416 | 444 |
417 def keys(self): | 445 def keys(self): |
418 return self.attrib.keys() | 446 return self.attrib.keys() |
419 | 447 |
420 ## | 448 ## |
421 # Gets element attributes, as a sequence. The attributes are | 449 # Gets element attributes, as a sequence. The attributes are |
422 # returned in an arbitrary order. | 450 # returned in an arbitrary order. Equivalent to <b>attrib.items()</b>. |
423 # | 451 # |
424 # @return A list of (name, value) tuples for all attributes. | 452 # @return A list of (name, value) tuples for all attributes. |
425 # @defreturn list of (string, string) tuples | 453 # @defreturn list of (string, string) tuples |
426 | 454 |
427 def items(self): | 455 def items(self): |
428 return self.attrib.items() | 456 return self.attrib.items() |
429 | 457 |
430 ## | 458 ## |
431 # Creates a tree iterator. The iterator loops over this element | 459 # Creates a tree iterator. The iterator loops over this element |
432 # and all subelements, in document order, and returns all elements | 460 # and all subelements, in document order, and returns all elements |
433 # with a matching tag. | 461 # with a matching tag. |
434 # <p> | 462 # <p> |
435 # If the tree structure is modified during iteration, new or removed | 463 # If the tree structure is modified during iteration, new or removed |
436 # elements may or may not be included. To get a stable set, use the | 464 # elements may or may not be included. To get a stable set, use the |
437 # list() function on the iterator, and loop over the resulting list. | 465 # list() function on the iterator, and loop over the resulting list. |
438 # | 466 # |
439 # @param tag What tags to look for (default is to return all elements). | 467 # @param tag What tags to look for (default is to return all elements). |
440 # @return An iterator containing all the matching elements. | 468 # @return An iterator containing all the matching elements. |
441 # @defreturn iterator | 469 # @defreturn iterator |
442 | 470 |
443 def iter(self, tag=None): | 471 def iter(self, tag=None): |
444 if tag == "*": | 472 if tag == "*": |
445 tag = None | 473 tag = None |
446 if tag is None or self.tag == tag: | 474 if tag is None or self.tag == tag: |
447 yield self | 475 yield self |
448 for e in self._children: | 476 for e in self._children: |
449 for e in e.iter(tag): | 477 for e in e.iter(tag): |
450 yield e | 478 yield e |
451 | 479 |
452 # compatibility (FIXME: preserve list behaviour too? see below) | 480 # compatibility |
453 getiterator = iter | 481 def getiterator(self, tag=None): |
454 | 482 # Change for a DeprecationWarning in 1.4 |
455 # def getiterator(self, tag=None): | 483 warnings.warn( |
456 # return list(tag) | 484 "This method will be removed in future versions. " |
| 485 "Use 'elem.iter()' or 'list(elem.iter())' instead.", |
| 486 PendingDeprecationWarning, stacklevel=2 |
| 487 ) |
| 488 return list(self.iter(tag)) |
457 | 489 |
458 ## | 490 ## |
459 # Creates a text iterator. The iterator loops over this element | 491 # Creates a text iterator. The iterator loops over this element |
460 # and all subelements, in document order, and returns all inner | 492 # and all subelements, in document order, and returns all inner |
461 # text. | 493 # text. |
462 # | 494 # |
463 # @return An iterator containing all inner text. | 495 # @return An iterator containing all inner text. |
464 # @defreturn iterator | 496 # @defreturn iterator |
465 | 497 |
466 def itertext(self): | 498 def itertext(self): |
| 499 tag = self.tag |
| 500 if not isinstance(tag, basestring) and tag is not None: |
| 501 return |
467 if self.text: | 502 if self.text: |
468 yield self.text | 503 yield self.text |
469 for e in self: | 504 for e in self: |
470 for s in e.itertext(): | 505 for s in e.itertext(): |
471 yield s | 506 yield s |
472 if e.tail: | 507 if e.tail: |
473 yield e.tail | 508 yield e.tail |
474 | 509 |
475 # compatibility | 510 # compatibility |
476 _Element = _ElementInterface = Element | 511 _Element = _ElementInterface = Element |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
563 # hierarchy, and adds some extra support for serialization to and from | 598 # hierarchy, and adds some extra support for serialization to and from |
564 # standard XML. | 599 # standard XML. |
565 # | 600 # |
566 # @param element Optional root element. | 601 # @param element Optional root element. |
567 # @keyparam file Optional file handle or file name. If given, the | 602 # @keyparam file Optional file handle or file name. If given, the |
568 # tree is initialized with the contents of this XML file. | 603 # tree is initialized with the contents of this XML file. |
569 | 604 |
570 class ElementTree(object): | 605 class ElementTree(object): |
571 | 606 |
572 def __init__(self, element=None, file=None): | 607 def __init__(self, element=None, file=None): |
573 assert element is None or iselement(element) | 608 # assert element is None or iselement(element) |
574 self._root = element # first node | 609 self._root = element # first node |
575 if file: | 610 if file: |
576 self.parse(file) | 611 self.parse(file) |
577 | 612 |
578 ## | 613 ## |
579 # Gets the root element for this tree. | 614 # Gets the root element for this tree. |
580 # | 615 # |
581 # @return An element instance. | 616 # @return An element instance. |
582 # @defreturn Element | 617 # @defreturn Element |
583 | 618 |
584 def getroot(self): | 619 def getroot(self): |
585 return self._root | 620 return self._root |
586 | 621 |
587 ## | 622 ## |
588 # Replaces the root element for this tree. This discards the | 623 # Replaces the root element for this tree. This discards the |
589 # current contents of the tree, and replaces it with the given | 624 # current contents of the tree, and replaces it with the given |
590 # element. Use with care. | 625 # element. Use with care. |
591 # | 626 # |
592 # @param element An element instance. | 627 # @param element An element instance. |
593 | 628 |
594 def _setroot(self, element): | 629 def _setroot(self, element): |
595 assert iselement(element) | 630 # assert iselement(element) |
596 self._root = element | 631 self._root = element |
597 | 632 |
598 ## | 633 ## |
599 # Loads an external XML document into this element tree. | 634 # Loads an external XML document into this element tree. |
600 # | 635 # |
601 # @param source A file name or file object. | 636 # @param source A file name or file object. If a file object is |
| 637 # given, it only has to implement a <b>read(n)</b> method. |
602 # @keyparam parser An optional parser instance. If not given, the | 638 # @keyparam parser An optional parser instance. If not given, the |
603 # standard {@link XMLParser} parser is used. | 639 # standard {@link XMLParser} parser is used. |
604 # @return The document root element. | 640 # @return The document root element. |
605 # @defreturn Element | 641 # @defreturn Element |
| 642 # @exception ParseError If the parser fails to parse the document. |
606 | 643 |
607 def parse(self, source, parser=None): | 644 def parse(self, source, parser=None): |
608 if not hasattr(source, "read"): | 645 if not hasattr(source, "read"): |
609 source = open(source, "rb") | 646 source = open(source, "rb") |
610 if not parser: | 647 if not parser: |
611 parser = XMLParser(target=TreeBuilder()) | 648 parser = XMLParser(target=TreeBuilder()) |
612 while 1: | 649 while 1: |
613 data = source.read(32768) | 650 data = source.read(65536) |
614 if not data: | 651 if not data: |
615 break | 652 break |
616 parser.feed(data) | 653 parser.feed(data) |
617 self._root = parser.close() | 654 self._root = parser.close() |
618 return self._root | 655 return self._root |
619 | 656 |
620 ## | 657 ## |
621 # Creates a tree iterator for the root element. The iterator loops | 658 # Creates a tree iterator for the root element. The iterator loops |
622 # over all elements in this tree, in document order. | 659 # over all elements in this tree, in document order. |
623 # | 660 # |
624 # @param tag What tags to look for (default is to return all elements) | 661 # @param tag What tags to look for (default is to return all elements) |
625 # @return An iterator. | 662 # @return An iterator. |
626 # @defreturn iterator | 663 # @defreturn iterator |
627 | 664 |
628 def iter(self, tag=None): | 665 def iter(self, tag=None): |
629 assert self._root is not None | 666 # assert self._root is not None |
630 return self._root.iter(tag) | 667 return self._root.iter(tag) |
631 | 668 |
632 getiterator = iter | 669 # compatibility |
| 670 def getiterator(self, tag=None): |
| 671 # Change for a DeprecationWarning in 1.4 |
| 672 warnings.warn( |
| 673 "This method will be removed in future versions. " |
| 674 "Use 'tree.iter()' or 'list(tree.iter())' instead.", |
| 675 PendingDeprecationWarning, stacklevel=2 |
| 676 ) |
| 677 return list(self.iter(tag)) |
633 | 678 |
634 ## | 679 ## |
635 # Finds the first toplevel element with given tag. | 680 # Finds the first toplevel element with given tag. |
636 # Same as getroot().find(path). | 681 # Same as getroot().find(path). |
637 # | 682 # |
638 # @param path What element to look for. | 683 # @param path What element to look for. |
| 684 # @keyparam namespaces Optional namespace prefix map. |
639 # @return The first matching element, or None if no element was found. | 685 # @return The first matching element, or None if no element was found. |
640 # @defreturn Element or None | 686 # @defreturn Element or None |
641 | 687 |
642 def find(self, path): | 688 def find(self, path, namespaces=None): |
643 assert self._root is not None | 689 # assert self._root is not None |
644 if path[:1] == "/": | 690 if path[:1] == "/": |
645 path = "." + path | 691 path = "." + path |
646 warnings.warn( | 692 warnings.warn( |
647 "This search is broken in 1.3 and earlier; if you rely " | 693 "This search is broken in 1.3 and earlier, and will be " |
648 "on the current behaviour, change it to %r" % path, | 694 "fixed in a future version. If you rely on the current " |
| 695 "behaviour, change it to %r" % path, |
649 FutureWarning, stacklevel=2 | 696 FutureWarning, stacklevel=2 |
650 ) | 697 ) |
651 return self._root.find(path) | 698 return self._root.find(path, namespaces) |
652 | 699 |
653 ## | 700 ## |
654 # Finds the element text for the first toplevel element with given | 701 # Finds the element text for the first toplevel element with given |
655 # tag. Same as getroot().findtext(path). | 702 # tag. Same as getroot().findtext(path). |
656 # | 703 # |
657 # @param path What toplevel element to look for. | 704 # @param path What toplevel element to look for. |
658 # @param default What to return if the element was not found. | 705 # @param default What to return if the element was not found. |
| 706 # @keyparam namespaces Optional namespace prefix map. |
659 # @return The text content of the first matching element, or the | 707 # @return The text content of the first matching element, or the |
660 # default value no element was found. Note that if the element | 708 # default value no element was found. Note that if the element |
661 # has is found, but has no text content, this method returns an | 709 # is found, but has no text content, this method returns an |
662 # empty string. | 710 # empty string. |
663 # @defreturn string | 711 # @defreturn string |
664 | 712 |
665 def findtext(self, path, default=None): | 713 def findtext(self, path, default=None, namespaces=None): |
666 assert self._root is not None | 714 # assert self._root is not None |
667 if path[:1] == "/": | 715 if path[:1] == "/": |
668 path = "." + path | 716 path = "." + path |
669 warnings.warn( | 717 warnings.warn( |
670 "This search is broken in 1.3 and earlier; if you rely " | 718 "This search is broken in 1.3 and earlier, and will be " |
671 "on the current behaviour, change it to %r" % path, | 719 "fixed in a future version. If you rely on the current " |
| 720 "behaviour, change it to %r" % path, |
672 FutureWarning, stacklevel=2 | 721 FutureWarning, stacklevel=2 |
673 ) | 722 ) |
674 return self._root.findtext(path, default) | 723 return self._root.findtext(path, default, namespaces) |
675 | 724 |
676 ## | 725 ## |
677 # Finds all toplevel elements with the given tag. | 726 # Finds all toplevel elements with the given tag. |
678 # Same as getroot().findall(path). | 727 # Same as getroot().findall(path). |
679 # | 728 # |
680 # @param path What element to look for. | 729 # @param path What element to look for. |
| 730 # @keyparam namespaces Optional namespace prefix map. |
681 # @return A list or iterator containing all matching elements, | 731 # @return A list or iterator containing all matching elements, |
682 # in document order. | 732 # in document order. |
683 # @defreturn list of Element instances | 733 # @defreturn list of Element instances |
684 | 734 |
685 def findall(self, path): | 735 def findall(self, path, namespaces=None): |
686 assert self._root is not None | 736 # assert self._root is not None |
687 if path[:1] == "/": | 737 if path[:1] == "/": |
688 path = "." + path | 738 path = "." + path |
689 warnings.warn( | 739 warnings.warn( |
690 "This search is broken in 1.3 and earlier; if you rely " | 740 "This search is broken in 1.3 and earlier, and will be " |
691 "on the current behaviour, change it to %r" % path, | 741 "fixed in a future version. If you rely on the current " |
| 742 "behaviour, change it to %r" % path, |
692 FutureWarning, stacklevel=2 | 743 FutureWarning, stacklevel=2 |
693 ) | 744 ) |
694 return self._root.findall(path) | 745 return self._root.findall(path, namespaces) |
| 746 |
| 747 ## |
| 748 # Finds all matching subelements, by tag name or path. |
| 749 # Same as getroot().iterfind(path). |
| 750 # |
| 751 # @param path What element to look for. |
| 752 # @keyparam namespaces Optional namespace prefix map. |
| 753 # @return An iterator or sequence containing all matching elements, |
| 754 # in document order. |
| 755 # @defreturn a generated sequence of Element instances |
| 756 |
| 757 def iterfind(self, path, namespaces=None): |
| 758 # assert self._root is not None |
| 759 if path[:1] == "/": |
| 760 path = "." + path |
| 761 warnings.warn( |
| 762 "This search is broken in 1.3 and earlier, and will be " |
| 763 "fixed in a future version. If you rely on the current " |
| 764 "behaviour, change it to %r" % path, |
| 765 FutureWarning, stacklevel=2 |
| 766 ) |
| 767 return self._root.iterfind(path, namespaces) |
695 | 768 |
696 ## | 769 ## |
697 # Writes the element tree to a file, as XML. | 770 # Writes the element tree to a file, as XML. |
698 # | 771 # |
| 772 # @def write(file, **options) |
699 # @param file A file name, or a file object opened for writing. | 773 # @param file A file name, or a file object opened for writing. |
| 774 # @param **options Options, given as keyword arguments. |
700 # @keyparam encoding Optional output encoding (default is US-ASCII). | 775 # @keyparam encoding Optional output encoding (default is US-ASCII). |
701 # @keyparam method Optional output method ("xml" or "html"; default | 776 # @keyparam method Optional output method ("xml", "html", "text" or |
702 # is "xml". | 777 # "c14n"; default is "xml"). |
703 # @keyparam xml_declaration Controls if an XML declaration should | 778 # @keyparam xml_declaration Controls if an XML declaration should |
704 # be added to the file. Use False for never, True for always, | 779 # be added to the file. Use False for never, True for always, |
705 # None for only if not US-ASCII or UTF-8. None is default. | 780 # None for only if not US-ASCII or UTF-8. None is default. |
706 | 781 |
707 def write(self, file, | 782 def write(self, file_or_filename, |
708 # keyword arguments | 783 # keyword arguments |
709 encoding="us-ascii", | 784 encoding=None, |
710 xml_declaration=None, | 785 xml_declaration=None, |
711 default_namespace=None, | 786 default_namespace=None, |
712 method=None): | 787 method=None): |
713 assert self._root is not None | 788 # assert self._root is not None |
714 if not hasattr(file, "write"): | |
715 file = open(file, "wb") | |
716 write = file.write | |
717 if not method: | 789 if not method: |
718 method = "xml" | 790 method = "xml" |
| 791 elif method not in _serialize: |
| 792 # FIXME: raise an ImportError for c14n if ElementC14N is missing? |
| 793 raise ValueError("unknown method %r" % method) |
| 794 if hasattr(file_or_filename, "write"): |
| 795 file = file_or_filename |
| 796 else: |
| 797 file = open(file_or_filename, "wb") |
| 798 write = file.write |
719 if not encoding: | 799 if not encoding: |
720 encoding = "us-ascii" | 800 if method == "c14n": |
| 801 encoding = "utf-8" |
| 802 else: |
| 803 encoding = "us-ascii" |
721 elif xml_declaration or (xml_declaration is None and | 804 elif xml_declaration or (xml_declaration is None and |
722 encoding not in ("utf-8", "us-ascii")): | 805 encoding not in ("utf-8", "us-ascii")): |
723 write("<?xml version='1.0' encoding='%s'?>\n" % encoding) | 806 if method == "xml": |
| 807 write("<?xml version='1.0' encoding='%s'?>\n" % encoding) |
724 if method == "text": | 808 if method == "text": |
725 _serialize_text(write, self._root, encoding) | 809 _serialize_text(write, self._root, encoding) |
726 else: | 810 else: |
727 qnames, namespaces = _namespaces( | 811 qnames, namespaces = _namespaces( |
728 self._root, encoding, default_namespace | 812 self._root, encoding, default_namespace |
729 ) | 813 ) |
730 if method == "xml": | 814 serialize = _serialize[method] |
731 _serialize_xml( | 815 serialize(write, self._root, encoding, qnames, namespaces) |
732 write, self._root, encoding, qnames, namespaces | 816 if file_or_filename is not file: |
733 ) | 817 file.close() |
734 elif method == "html": | 818 |
735 _serialize_html( | 819 def write_c14n(self, file): |
736 write, self._root, encoding, qnames, namespaces | 820 # lxml.etree compatibility. use output method instead |
737 ) | 821 return self.write(file, method="c14n") |
738 else: | |
739 raise ValueError("unknown method %r" % method) | |
740 | 822 |
741 # -------------------------------------------------------------------- | 823 # -------------------------------------------------------------------- |
742 # serialization support | 824 # serialization support |
743 | 825 |
744 def _namespaces(elem, encoding, default_namespace=None): | 826 def _namespaces(elem, encoding, default_namespace=None): |
745 # identify namespaces used in this tree | 827 # identify namespaces used in this tree |
746 | 828 |
747 # maps qnames to *encoded* prefix:local names | 829 # maps qnames to *encoded* prefix:local names |
748 qnames = {None: None} | 830 qnames = {None: None} |
749 | 831 |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
819 tag = qnames[tag] | 901 tag = qnames[tag] |
820 if tag is None: | 902 if tag is None: |
821 if text: | 903 if text: |
822 write(_escape_cdata(text, encoding)) | 904 write(_escape_cdata(text, encoding)) |
823 for e in elem: | 905 for e in elem: |
824 _serialize_xml(write, e, encoding, qnames, None) | 906 _serialize_xml(write, e, encoding, qnames, None) |
825 else: | 907 else: |
826 write("<" + tag) | 908 write("<" + tag) |
827 items = elem.items() | 909 items = elem.items() |
828 if items or namespaces: | 910 if items or namespaces: |
829 items.sort() # lexical order | 911 if namespaces: |
830 for k, v in items: | 912 for v, k in sorted(namespaces.items(), |
| 913 key=lambda x: x[1]): # sort on prefix |
| 914 if k: |
| 915 k = ":" + k |
| 916 write(" xmlns%s=\"%s\"" % ( |
| 917 k.encode(encoding), |
| 918 _escape_attrib(v, encoding) |
| 919 )) |
| 920 for k, v in sorted(items): # lexical order |
831 if isinstance(k, QName): | 921 if isinstance(k, QName): |
832 k = k.text | 922 k = k.text |
833 if isinstance(v, QName): | 923 if isinstance(v, QName): |
834 v = qnames[v.text] | 924 v = qnames[v.text] |
835 else: | 925 else: |
836 v = _escape_attrib(v, encoding) | 926 v = _escape_attrib(v, encoding) |
837 write(" %s=\"%s\"" % (qnames[k], v)) | 927 write(" %s=\"%s\"" % (qnames[k], v)) |
838 if namespaces: | |
839 items = namespaces.items() | |
840 items.sort(key=lambda x: x[1]) # sort on prefix | |
841 for v, k in items: | |
842 if k: | |
843 k = ":" + k | |
844 write(" xmlns%s=\"%s\"" % ( | |
845 k.encode(encoding), | |
846 _escape_attrib(v, encoding) | |
847 )) | |
848 if text or len(elem): | 928 if text or len(elem): |
849 write(">") | 929 write(">") |
850 if text: | 930 if text: |
851 write(_escape_cdata(text, encoding)) | 931 write(_escape_cdata(text, encoding)) |
852 for e in elem: | 932 for e in elem: |
853 _serialize_xml(write, e, encoding, qnames, None) | 933 _serialize_xml(write, e, encoding, qnames, None) |
854 write("</" + tag + ">") | 934 write("</" + tag + ">") |
855 else: | 935 else: |
856 write(" />") | 936 write(" />") |
857 if elem.tail: | 937 if elem.tail: |
858 write(_escape_cdata(elem.tail, encoding)) | 938 write(_escape_cdata(elem.tail, encoding)) |
859 | 939 |
860 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", | 940 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", |
861 "img", "input", "isindex", "link", "meta" "param") | 941 "img", "input", "isindex", "link", "meta" "param") |
862 | 942 |
863 try: | 943 try: |
864 HTML_EMPTY = set(HTML_EMPTY) | 944 HTML_EMPTY = set(HTML_EMPTY) |
865 except NameError: | 945 except NameError: |
866 pass | 946 pass |
867 | 947 |
868 def _serialize_html(write, elem, encoding, qnames, namespaces): | 948 def _serialize_html(write, elem, encoding, qnames, namespaces): |
869 tag = elem.tag | 949 tag = elem.tag |
870 text = elem.text | 950 text = elem.text |
871 if tag is Comment: | 951 if tag is Comment: |
872 write("<!--%s-->" % _encode(text, encoding)) | 952 write("<!--%s-->" % _escape_cdata(text, encoding)) |
873 elif tag is ProcessingInstruction: | 953 elif tag is ProcessingInstruction: |
874 write("<?%s?>" % _encode(text, encoding)) | 954 write("<?%s?>" % _escape_cdata(text, encoding)) |
875 else: | 955 else: |
876 tag = qnames[tag] | 956 tag = qnames[tag] |
877 if tag is None: | 957 if tag is None: |
878 if text: | 958 if text: |
879 write(_escape_cdata(text, encoding)) | 959 write(_escape_cdata(text, encoding)) |
880 for e in elem: | 960 for e in elem: |
881 _serialize_html(write, e, encoding, qnames, None) | 961 _serialize_html(write, e, encoding, qnames, None) |
882 else: | 962 else: |
883 write("<" + tag) | 963 write("<" + tag) |
884 items = elem.items() | 964 items = elem.items() |
885 if items or namespaces: | 965 if items or namespaces: |
886 items.sort() # lexical order | 966 if namespaces: |
887 for k, v in items: | 967 for v, k in sorted(namespaces.items(), |
| 968 key=lambda x: x[1]): # sort on prefix |
| 969 if k: |
| 970 k = ":" + k |
| 971 write(" xmlns%s=\"%s\"" % ( |
| 972 k.encode(encoding), |
| 973 _escape_attrib(v, encoding) |
| 974 )) |
| 975 for k, v in sorted(items): # lexical order |
888 if isinstance(k, QName): | 976 if isinstance(k, QName): |
889 k = k.text | 977 k = k.text |
890 if isinstance(v, QName): | 978 if isinstance(v, QName): |
891 v = qnames[v.text] | 979 v = qnames[v.text] |
892 else: | 980 else: |
893 v = _escape_attrib_html(v, encoding) | 981 v = _escape_attrib_html(v, encoding) |
894 # FIXME: handle boolean attributes | 982 # FIXME: handle boolean attributes |
895 write(" %s=\"%s\"" % (qnames[k], v)) | 983 write(" %s=\"%s\"" % (qnames[k], v)) |
896 if namespaces: | |
897 items = namespaces.items() | |
898 items.sort(key=lambda x: x[1]) # sort on prefix | |
899 for v, k in items: | |
900 if k: | |
901 k = ":" + k | |
902 write(" xmlns%s=\"%s\"" % ( | |
903 k.encode(encoding), | |
904 _escape_attrib(v, encoding) | |
905 )) | |
906 write(">") | 984 write(">") |
907 tag = tag.lower() | 985 tag = tag.lower() |
908 if text: | 986 if text: |
909 if tag == "script" or tag == "style": | 987 if tag == "script" or tag == "style": |
910 write(_encode(text, encoding)) | 988 write(_encode(text, encoding)) |
911 else: | 989 else: |
912 write(_escape_cdata(text, encoding)) | 990 write(_escape_cdata(text, encoding)) |
913 for e in elem: | 991 for e in elem: |
914 _serialize_html(write, e, encoding, qnames, None) | 992 _serialize_html(write, e, encoding, qnames, None) |
915 if tag not in HTML_EMPTY: | 993 if tag not in HTML_EMPTY: |
916 write("</" + tag + ">") | 994 write("</" + tag + ">") |
917 if elem.tail: | 995 if elem.tail: |
918 write(_escape_cdata(elem.tail, encoding)) | 996 write(_escape_cdata(elem.tail, encoding)) |
919 | 997 |
920 def _serialize_text(write, elem, encoding): | 998 def _serialize_text(write, elem, encoding): |
921 for part in elem.itertext(): | 999 for part in elem.itertext(): |
922 write(part.encode(encoding)) | 1000 write(part.encode(encoding)) |
923 if elem.tail: | 1001 if elem.tail: |
924 write(elem.tail.encode(encoding)) | 1002 write(elem.tail.encode(encoding)) |
925 | 1003 |
| 1004 _serialize = { |
| 1005 "xml": _serialize_xml, |
| 1006 "html": _serialize_html, |
| 1007 "text": _serialize_text, |
| 1008 # this optional method is imported at the end of the module |
| 1009 # "c14n": _serialize_c14n, |
| 1010 } |
| 1011 |
926 ## | 1012 ## |
927 # Registers a namespace prefix. The registry is global, and any | 1013 # Registers a namespace prefix. The registry is global, and any |
928 # existing mapping for either the given prefix or the namespace URI | 1014 # existing mapping for either the given prefix or the namespace URI |
929 # will be removed. | 1015 # will be removed. |
930 # | 1016 # |
931 # @param prefix Namespace prefix. | 1017 # @param prefix Namespace prefix. |
932 # @param uri Namespace uri. Tags and attributes in this namespace | 1018 # @param uri Namespace uri. Tags and attributes in this namespace |
933 # will be serialized with the given prefix, if at all possible. | 1019 # will be serialized with the given prefix, if at all possible. |
934 # @raise ValueError If the prefix is reserved, or is otherwise | 1020 # @exception ValueError If the prefix is reserved, or is otherwise |
935 # invalid. | 1021 # invalid. |
936 | 1022 |
937 def register_namespace(prefix, uri): | 1023 def register_namespace(prefix, uri): |
938 if re.match("ns\d+$", prefix): | 1024 if re.match("ns\d+$", prefix): |
939 raise ValueError("Prefix format reserved for internal use") | 1025 raise ValueError("Prefix format reserved for internal use") |
940 for k, v in _namespace_map.items(): | 1026 for k, v in _namespace_map.items(): |
941 if k == uri or v == prefix: | 1027 if k == uri or v == prefix: |
942 del _namespace_map[k] | 1028 del _namespace_map[k] |
943 _namespace_map[uri] = prefix | 1029 _namespace_map[uri] = prefix |
944 | 1030 |
945 _namespace_map = { | 1031 _namespace_map = { |
946 # "well-known" namespace prefixes | 1032 # "well-known" namespace prefixes |
947 "http://www.w3.org/XML/1998/namespace": "xml", | 1033 "http://www.w3.org/XML/1998/namespace": "xml", |
948 "http://www.w3.org/1999/xhtml": "html", | 1034 "http://www.w3.org/1999/xhtml": "html", |
949 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", | 1035 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", |
950 "http://schemas.xmlsoap.org/wsdl/": "wsdl", | 1036 "http://schemas.xmlsoap.org/wsdl/": "wsdl", |
951 # xml schema | 1037 # xml schema |
952 "http://www.w3.org/2001/XMLSchema": "xs", | 1038 "http://www.w3.org/2001/XMLSchema": "xs", |
953 "http://www.w3.org/2001/XMLSchema-instance": "xsi", | 1039 "http://www.w3.org/2001/XMLSchema-instance": "xsi", |
954 # dublin core | 1040 # dublin core |
955 "http://purl.org/dc/elements/1.1/": "dc", | 1041 "http://purl.org/dc/elements/1.1/": "dc", |
956 # Do we need others? (MathML, xlink, svg) | |
957 # http://www.w3.org/TR/html5/syntax.html#namespaces | |
958 } | 1042 } |
959 | 1043 |
960 def _raise_serialization_error(text): | 1044 def _raise_serialization_error(text): |
961 raise TypeError( | 1045 raise TypeError( |
962 "cannot serialize %r (type %s)" % (text, type(text).__name__) | 1046 "cannot serialize %r (type %s)" % (text, type(text).__name__) |
963 ) | 1047 ) |
964 | 1048 |
965 def _encode(text, encoding): | 1049 def _encode(text, encoding): |
966 try: | 1050 try: |
967 return text.encode(encoding, "xmlcharrefreplace") | 1051 return text.encode(encoding, "xmlcharrefreplace") |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1014 except (TypeError, AttributeError): | 1098 except (TypeError, AttributeError): |
1015 _raise_serialization_error(text) | 1099 _raise_serialization_error(text) |
1016 | 1100 |
1017 # -------------------------------------------------------------------- | 1101 # -------------------------------------------------------------------- |
1018 | 1102 |
1019 ## | 1103 ## |
1020 # Generates a string representation of an XML element, including all | 1104 # Generates a string representation of an XML element, including all |
1021 # subelements. | 1105 # subelements. |
1022 # | 1106 # |
1023 # @param element An Element instance. | 1107 # @param element An Element instance. |
| 1108 # @keyparam encoding Optional output encoding (default is US-ASCII). |
| 1109 # @keyparam method Optional output method ("xml", "html", "text" or |
| 1110 # "c14n"; default is "xml"). |
1024 # @return An encoded string containing the XML data. | 1111 # @return An encoded string containing the XML data. |
1025 # @defreturn string | 1112 # @defreturn string |
1026 | 1113 |
1027 def tostring(element, encoding=None, method=None): | 1114 def tostring(element, encoding=None, method=None): |
1028 class dummy: | 1115 class dummy: |
1029 pass | 1116 pass |
1030 data = [] | 1117 data = [] |
1031 file = dummy() | 1118 file = dummy() |
1032 file.write = data.append | 1119 file.write = data.append |
1033 ElementTree(element).write(file, encoding, method=method) | 1120 ElementTree(element).write(file, encoding, method=method) |
1034 return "".join(data) | 1121 return "".join(data) |
1035 | 1122 |
1036 ## | 1123 ## |
1037 # Generates a string representation of an XML element, including all | 1124 # Generates a string representation of an XML element, including all |
1038 # subelements. The string is returned as a sequence of string fragments. | 1125 # subelements. The string is returned as a sequence of string fragments. |
1039 # | 1126 # |
1040 # @param element An Element instance. | 1127 # @param element An Element instance. |
| 1128 # @keyparam encoding Optional output encoding (default is US-ASCII). |
| 1129 # @keyparam method Optional output method ("xml", "html", "text" or |
| 1130 # "c14n"; default is "xml"). |
1041 # @return A sequence object containing the XML data. | 1131 # @return A sequence object containing the XML data. |
1042 # @defreturn sequence | 1132 # @defreturn sequence |
1043 # @since 1.3 | 1133 # @since 1.3 |
1044 | 1134 |
1045 def tostringlist(element, encoding=None): | 1135 def tostringlist(element, encoding=None, method=None): |
1046 class dummy: | 1136 class dummy: |
1047 pass | 1137 pass |
1048 data = [] | 1138 data = [] |
1049 file = dummy() | 1139 file = dummy() |
1050 file.write = data.append | 1140 file.write = data.append |
1051 ElementTree(element).write(file, encoding) | 1141 ElementTree(element).write(file, encoding, method=method) |
1052 # FIXME: merge small fragments into larger parts | 1142 # FIXME: merge small fragments into larger parts |
1053 return data | 1143 return data |
1054 | 1144 |
1055 ## | 1145 ## |
1056 # Writes an element tree or element structure to sys.stdout. This | 1146 # Writes an element tree or element structure to sys.stdout. This |
1057 # function should be used for debugging only. | 1147 # function should be used for debugging only. |
1058 # <p> | 1148 # <p> |
1059 # The exact output format is implementation dependent. In this | 1149 # The exact output format is implementation dependent. In this |
1060 # version, it's written as an ordinary XML file. | 1150 # version, it's written as an ordinary XML file. |
1061 # | 1151 # |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1132 append((event, start(tag, attrib_in))) | 1222 append((event, start(tag, attrib_in))) |
1133 parser.StartElementHandler = handler | 1223 parser.StartElementHandler = handler |
1134 elif event == "end": | 1224 elif event == "end": |
1135 def handler(tag, event=event, append=append, | 1225 def handler(tag, event=event, append=append, |
1136 end=self._parser._end): | 1226 end=self._parser._end): |
1137 append((event, end(tag))) | 1227 append((event, end(tag))) |
1138 parser.EndElementHandler = handler | 1228 parser.EndElementHandler = handler |
1139 elif event == "start-ns": | 1229 elif event == "start-ns": |
1140 def handler(prefix, uri, event=event, append=append): | 1230 def handler(prefix, uri, event=event, append=append): |
1141 try: | 1231 try: |
1142 uri = uri.encode("ascii") | 1232 uri = (uri or "").encode("ascii") |
1143 except UnicodeError: | 1233 except UnicodeError: |
1144 pass | 1234 pass |
1145 append((event, (prefix or "", uri))) | 1235 append((event, (prefix or "", uri or ""))) |
1146 parser.StartNamespaceDeclHandler = handler | 1236 parser.StartNamespaceDeclHandler = handler |
1147 elif event == "end-ns": | 1237 elif event == "end-ns": |
1148 def handler(prefix, event=event, append=append): | 1238 def handler(prefix, event=event, append=append): |
1149 append((event, None)) | 1239 append((event, None)) |
1150 parser.EndNamespaceDeclHandler = handler | 1240 parser.EndNamespaceDeclHandler = handler |
1151 else: | 1241 else: |
1152 raise ValueError("unknown event %r" % event) | 1242 raise ValueError("unknown event %r" % event) |
1153 | 1243 |
1154 def next(self): | 1244 def next(self): |
1155 while 1: | 1245 while 1: |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1200 # standard {@link XMLParser} parser is used. | 1290 # standard {@link XMLParser} parser is used. |
1201 # @return A tuple containing an Element instance and a dictionary. | 1291 # @return A tuple containing an Element instance and a dictionary. |
1202 # @defreturn (Element, dictionary) | 1292 # @defreturn (Element, dictionary) |
1203 | 1293 |
1204 def XMLID(text, parser=None): | 1294 def XMLID(text, parser=None): |
1205 if not parser: | 1295 if not parser: |
1206 parser = XMLParser(target=TreeBuilder()) | 1296 parser = XMLParser(target=TreeBuilder()) |
1207 parser.feed(text) | 1297 parser.feed(text) |
1208 tree = parser.close() | 1298 tree = parser.close() |
1209 ids = {} | 1299 ids = {} |
1210 for elem in tree.getiterator(): | 1300 for elem in tree.iter(): |
1211 id = elem.get("id") | 1301 id = elem.get("id") |
1212 if id: | 1302 if id: |
1213 ids[id] = elem | 1303 ids[id] = elem |
1214 return tree, ids | 1304 return tree, ids |
1215 | 1305 |
1216 ## | 1306 ## |
1217 # Parses an XML document from a string constant. Same as {@link #XML}. | 1307 # Parses an XML document from a string constant. Same as {@link #XML}. |
1218 # | 1308 # |
1219 # @def fromstring(text) | 1309 # @def fromstring(text) |
1220 # @param source A string containing XML data. | 1310 # @param source A string containing XML data. |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1323 | 1413 |
1324 def end(self, tag): | 1414 def end(self, tag): |
1325 self._flush() | 1415 self._flush() |
1326 self._last = self._elem.pop() | 1416 self._last = self._elem.pop() |
1327 assert self._last.tag == tag,\ | 1417 assert self._last.tag == tag,\ |
1328 "end tag mismatch (expected %s, got %s)" % ( | 1418 "end tag mismatch (expected %s, got %s)" % ( |
1329 self._last.tag, tag) | 1419 self._last.tag, tag) |
1330 self._tail = 1 | 1420 self._tail = 1 |
1331 return self._last | 1421 return self._last |
1332 | 1422 |
1333 | |
1334 ## | 1423 ## |
1335 # Element structure builder for XML source data, based on the | 1424 # Element structure builder for XML source data, based on the |
1336 # <b>expat</b> parser. | 1425 # <b>expat</b> parser. |
1337 # | 1426 # |
1338 # @keyparam target Target object. If omitted, the builder uses an | 1427 # @keyparam target Target object. If omitted, the builder uses an |
1339 # instance of the standard {@link #TreeBuilder} class. | 1428 # instance of the standard {@link #TreeBuilder} class. |
1340 # @keyparam html Predefine HTML entities. This flag is not supported | 1429 # @keyparam html Predefine HTML entities. This flag is not supported |
1341 # by the current implementation. | 1430 # by the current implementation. |
1342 # @keyparam encoding Optional encoding. If given, the value overrides | 1431 # @keyparam encoding Optional encoding. If given, the value overrides |
1343 # the encoding specified in the XML file. | 1432 # the encoding specified in the XML file. |
1344 # @see #ElementTree | 1433 # @see #ElementTree |
1345 # @see #TreeBuilder | 1434 # @see #TreeBuilder |
1346 | 1435 |
1347 class XMLParser(object): | 1436 class XMLParser(object): |
1348 | 1437 |
1349 def __init__(self, html=0, target=None, encoding=None): | 1438 def __init__(self, html=0, target=None, encoding=None): |
1350 try: | 1439 try: |
1351 from xml.parsers import expat | 1440 from xml.parsers import expat |
1352 except ImportError: | 1441 except ImportError: |
1353 try: | 1442 try: |
1354 import pyexpat; expat = pyexpat | 1443 import pyexpat as expat |
1355 except ImportError: | 1444 except ImportError: |
1356 raise ImportError( | 1445 raise ImportError( |
1357 "No module named expat; use SimpleXMLTreeBuilder instead" | 1446 "No module named expat; use SimpleXMLTreeBuilder instead" |
1358 ) | 1447 ) |
1359 parser = expat.ParserCreate(encoding, "}") | 1448 parser = expat.ParserCreate(encoding, "}") |
1360 if target is None: | 1449 if target is None: |
1361 target = TreeBuilder() | 1450 target = TreeBuilder() |
1362 # underscored names are provided for compatibility only | 1451 # underscored names are provided for compatibility only |
1363 self.parser = self._parser = parser | 1452 self.parser = self._parser = parser |
1364 self.target = self._target = target | 1453 self.target = self._target = target |
1365 self._error = expat.error | 1454 self._error = expat.error |
1366 self._names = {} # name memo cache | 1455 self._names = {} # name memo cache |
1367 # callbacks | 1456 # callbacks |
1368 parser.DefaultHandlerExpand = self._default | 1457 parser.DefaultHandlerExpand = self._default |
1369 parser.StartElementHandler = self._start | 1458 parser.StartElementHandler = self._start |
1370 parser.EndElementHandler = self._end | 1459 parser.EndElementHandler = self._end |
1371 parser.CharacterDataHandler = self._data | 1460 parser.CharacterDataHandler = self._data |
| 1461 # optional callbacks |
| 1462 parser.CommentHandler = self._comment |
| 1463 parser.ProcessingInstructionHandler = self._pi |
1372 # let expat do the buffering, if supported | 1464 # let expat do the buffering, if supported |
1373 try: | 1465 try: |
1374 self._parser.buffer_text = 1 | 1466 self._parser.buffer_text = 1 |
1375 except AttributeError: | 1467 except AttributeError: |
1376 pass | 1468 pass |
1377 # use new-style attribute handling, if supported | 1469 # use new-style attribute handling, if supported |
1378 try: | 1470 try: |
1379 self._parser.ordered_attributes = 1 | 1471 self._parser.ordered_attributes = 1 |
1380 self._parser.specified_attributes = 1 | 1472 self._parser.specified_attributes = 1 |
1381 parser.StartElementHandler = self._start_list | 1473 parser.StartElementHandler = self._start_list |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1429 if attrib_in: | 1521 if attrib_in: |
1430 for i in range(0, len(attrib_in), 2): | 1522 for i in range(0, len(attrib_in), 2): |
1431 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) | 1523 attrib[fixname(attrib_in[i])] = fixtext(attrib_in[i+1]) |
1432 return self.target.start(tag, attrib) | 1524 return self.target.start(tag, attrib) |
1433 | 1525 |
1434 def _data(self, text): | 1526 def _data(self, text): |
1435 return self.target.data(self._fixtext(text)) | 1527 return self.target.data(self._fixtext(text)) |
1436 | 1528 |
1437 def _end(self, tag): | 1529 def _end(self, tag): |
1438 return self.target.end(self._fixname(tag)) | 1530 return self.target.end(self._fixname(tag)) |
| 1531 |
| 1532 def _comment(self, data): |
| 1533 try: |
| 1534 comment = self.target.comment |
| 1535 except AttributeError: |
| 1536 pass |
| 1537 else: |
| 1538 return comment(self._fixtext(data)) |
| 1539 |
| 1540 def _pi(self, target, data): |
| 1541 try: |
| 1542 pi = self.target.pi |
| 1543 except AttributeError: |
| 1544 pass |
| 1545 else: |
| 1546 return pi(self._fixtext(target), self._fixtext(data)) |
1439 | 1547 |
1440 def _default(self, text): | 1548 def _default(self, text): |
1441 prefix = text[:1] | 1549 prefix = text[:1] |
1442 if prefix == "&": | 1550 if prefix == "&": |
1443 # deal with undefined entities | 1551 # deal with undefined entities |
1444 try: | 1552 try: |
1445 self.target.data(self.entity[text[1:-1]]) | 1553 self.target.data(self.entity[text[1:-1]]) |
1446 except KeyError: | 1554 except KeyError: |
1447 from xml.parsers import expat | 1555 from xml.parsers import expat |
1448 err = expat.error( | 1556 err = expat.error( |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1524 try: | 1632 try: |
1525 self._parser.Parse("", 1) # end of data | 1633 self._parser.Parse("", 1) # end of data |
1526 except self._error, v: | 1634 except self._error, v: |
1527 self._raiseerror(v) | 1635 self._raiseerror(v) |
1528 tree = self.target.close() | 1636 tree = self.target.close() |
1529 del self.target, self._parser # get rid of circular references | 1637 del self.target, self._parser # get rid of circular references |
1530 return tree | 1638 return tree |
1531 | 1639 |
1532 # compatibility | 1640 # compatibility |
1533 XMLTreeBuilder = XMLParser | 1641 XMLTreeBuilder = XMLParser |
| 1642 |
| 1643 # workaround circular import. |
| 1644 try: |
| 1645 from ElementC14N import _serialize_c14n |
| 1646 _serialize["c14n"] = _serialize_c14n |
| 1647 except ImportError: |
| 1648 pass |
LEFT | RIGHT |