Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(603)

Side by Side Diff: src/pkg/encoding/xml/xml.go

Issue 12556043: code review 12556043: encoding/xml: add, support Unmarshaler interface (Closed)
Patch Set: diff -r b20db4bc66df https://code.google.com/p/go/ Created 10 years, 7 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2009 The Go Authors. All rights reserved. 1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // Package xml implements a simple XML 1.0 parser that 5 // Package xml implements a simple XML 1.0 parser that
6 // understands XML name spaces. 6 // understands XML name spaces.
7 package xml 7 package xml
8 8
9 // References: 9 // References:
10 // Annotated XML spec: http://www.xml.com/axml/testaxml.htm 10 // Annotated XML spec: http://www.xml.com/axml/testaxml.htm
11 // XML name spaces: http://www.w3.org/TR/REC-xml-names/ 11 // XML name spaces: http://www.w3.org/TR/REC-xml-names/
12 12
13 // TODO(rsc): 13 // TODO(rsc):
14 // Test error handling. 14 // Test error handling.
15 15
16 import ( 16 import (
17 "bufio" 17 "bufio"
18 "bytes" 18 "bytes"
19 "errors"
19 "fmt" 20 "fmt"
20 "io" 21 "io"
21 "strconv" 22 "strconv"
22 "strings" 23 "strings"
23 "unicode" 24 "unicode"
24 "unicode/utf8" 25 "unicode/utf8"
25 ) 26 )
26 27
27 // A SyntaxError represents a syntax error in the XML input stream. 28 // A SyntaxError represents a syntax error in the XML input stream.
28 type SyntaxError struct { 29 type SyntaxError struct {
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after
167 // non-UTF-8 charset into UTF-8. If CharsetReader is nil or 168 // non-UTF-8 charset into UTF-8. If CharsetReader is nil or
168 // returns an error, parsing stops with an error. One of the 169 // returns an error, parsing stops with an error. One of the
169 // the CharsetReader's result values must be non-nil. 170 // the CharsetReader's result values must be non-nil.
170 CharsetReader func(charset string, input io.Reader) (io.Reader, error) 171 CharsetReader func(charset string, input io.Reader) (io.Reader, error)
171 172
172 // DefaultSpace sets the default name space used for unadorned tags, 173 // DefaultSpace sets the default name space used for unadorned tags,
173 // as if the entire XML stream were wrapped in an element containing 174 // as if the entire XML stream were wrapped in an element containing
174 // the attribute xmlns="DefaultSpace". 175 // the attribute xmlns="DefaultSpace".
175 DefaultSpace string 176 DefaultSpace string
176 177
177 » r io.ByteReader 178 » r io.ByteReader
178 » buf bytes.Buffer 179 » buf bytes.Buffer
179 » saved *bytes.Buffer 180 » saved *bytes.Buffer
180 » stk *stack 181 » stk *stack
181 » free *stack 182 » free *stack
182 » needClose bool 183 » needClose bool
183 » toClose Name 184 » toClose Name
184 » nextToken Token 185 » nextToken Token
185 » nextByte int 186 » nextByte int
186 » ns map[string]string 187 » ns map[string]string
187 » err error 188 » err error
188 » line int 189 » line int
190 » unmarshalDepth int
189 } 191 }
190 192
191 // NewDecoder creates a new XML parser reading from r. 193 // NewDecoder creates a new XML parser reading from r.
192 func NewDecoder(r io.Reader) *Decoder { 194 func NewDecoder(r io.Reader) *Decoder {
193 d := &Decoder{ 195 d := &Decoder{
194 ns: make(map[string]string), 196 ns: make(map[string]string),
195 nextByte: -1, 197 nextByte: -1,
196 line: 1, 198 line: 1,
197 Strict: true, 199 Strict: true,
198 } 200 }
(...skipping 17 matching lines...) Expand all
216 // if Token encounters an unexpected end element, 218 // if Token encounters an unexpected end element,
217 // it will return an error. 219 // it will return an error.
218 // 220 //
219 // Token implements XML name spaces as described by 221 // Token implements XML name spaces as described by
220 // http://www.w3.org/TR/REC-xml-names/. Each of the 222 // http://www.w3.org/TR/REC-xml-names/. Each of the
221 // Name structures contained in the Token has the Space 223 // Name structures contained in the Token has the Space
222 // set to the URL identifying its name space when known. 224 // set to the URL identifying its name space when known.
223 // If Token encounters an unrecognized name space prefix, 225 // If Token encounters an unrecognized name space prefix,
224 // it uses the prefix as the Space rather than report an error. 226 // it uses the prefix as the Space rather than report an error.
225 func (d *Decoder) Token() (t Token, err error) { 227 func (d *Decoder) Token() (t Token, err error) {
228 if d.stk != nil && d.stk.kind == stkEOF {
229 err = io.EOF
230 return
231 }
226 if d.nextToken != nil { 232 if d.nextToken != nil {
227 t = d.nextToken 233 t = d.nextToken
228 d.nextToken = nil 234 d.nextToken = nil
229 » } else if t, err = d.RawToken(); err != nil { 235 » } else if t, err = d.rawToken(); err != nil {
230 return 236 return
231 } 237 }
232 238
233 if !d.Strict { 239 if !d.Strict {
234 if t1, ok := d.autoClose(t); ok { 240 if t1, ok := d.autoClose(t); ok {
235 d.nextToken = t 241 d.nextToken = t
236 t = t1 242 t = t1
237 } 243 }
238 } 244 }
239 switch t1 := t.(type) { 245 switch t1 := t.(type) {
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
315 type stack struct { 321 type stack struct {
316 next *stack 322 next *stack
317 kind int 323 kind int
318 name Name 324 name Name
319 ok bool 325 ok bool
320 } 326 }
321 327
322 const ( 328 const (
323 stkStart = iota 329 stkStart = iota
324 stkNs 330 stkNs
331 stkEOF
325 ) 332 )
326 333
327 func (d *Decoder) push(kind int) *stack { 334 func (d *Decoder) push(kind int) *stack {
328 s := d.free 335 s := d.free
329 if s != nil { 336 if s != nil {
330 d.free = s.next 337 d.free = s.next
331 } else { 338 } else {
332 s = new(stack) 339 s = new(stack)
333 } 340 }
334 s.next = d.stk 341 s.next = d.stk
335 s.kind = kind 342 s.kind = kind
336 d.stk = s 343 d.stk = s
337 return s 344 return s
338 } 345 }
339 346
340 func (d *Decoder) pop() *stack { 347 func (d *Decoder) pop() *stack {
341 s := d.stk 348 s := d.stk
342 if s != nil { 349 if s != nil {
343 d.stk = s.next 350 d.stk = s.next
344 s.next = d.free 351 s.next = d.free
345 d.free = s 352 d.free = s
346 } 353 }
347 return s 354 return s
348 } 355 }
349 356
357 // Record that after the current element is finished
358 // (that element is already pushed on the stack)
359 // Token should return EOF until popEOF is called.
360 func (d *Decoder) pushEOF() {
361 // Walk down stack to find Start.
362 // It might not be the top, because there might be stkNs
363 // entries above it.
364 start := d.stk
365 for start.kind != stkStart {
366 start = start.next
367 }
368 // The stkNs entries below a start are associated with that
369 // element too; skip over them.
370 for start.next != nil && start.next.kind == stkNs {
371 start = start.next
372 }
373 s := d.free
374 if s != nil {
375 d.free = s.next
376 } else {
377 s = new(stack)
378 }
379 s.kind = stkEOF
380 s.next = start.next
381 start.next = s
382 }
383
384 // Undo a pushEOF.
385 // The element must have been finished, so the EOF should be at the top of the s tack.
386 func (d *Decoder) popEOF() bool {
387 if d.stk == nil || d.stk.kind != stkEOF {
388 return false
389 }
390 d.pop()
391 return true
392 }
393
350 // Record that we are starting an element with the given name. 394 // Record that we are starting an element with the given name.
351 func (d *Decoder) pushElement(name Name) { 395 func (d *Decoder) pushElement(name Name) {
352 s := d.push(stkStart) 396 s := d.push(stkStart)
353 s.name = name 397 s.name = name
354 } 398 }
355 399
356 // Record that we are changing the value of ns[local]. 400 // Record that we are changing the value of ns[local].
357 // The old value is url, ok. 401 // The old value is url, ok.
358 func (d *Decoder) pushNs(local string, url string, ok bool) { 402 func (d *Decoder) pushNs(local string, url string, ok bool) {
359 s := d.push(stkNs) 403 s := d.push(stkNs)
(...skipping 28 matching lines...) Expand all
388 return true 432 return true
389 } 433 }
390 d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">") 434 d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">")
391 return false 435 return false
392 case s.name.Space != name.Space: 436 case s.name.Space != name.Space:
393 d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space + 437 d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space +
394 "closed by </" + name.Local + "> in space " + name.Space ) 438 "closed by </" + name.Local + "> in space " + name.Space )
395 return false 439 return false
396 } 440 }
397 441
398 » // Pop stack until a Start is on the top, undoing the 442 » // Pop stack until a Start or EOF is on the top, undoing the
399 // translations that were associated with the element we just closed. 443 // translations that were associated with the element we just closed.
400 » for d.stk != nil && d.stk.kind != stkStart { 444 » for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF {
401 s := d.pop() 445 s := d.pop()
402 if s.ok { 446 if s.ok {
403 d.ns[s.name.Local] = s.name.Space 447 d.ns[s.name.Local] = s.name.Space
404 } else { 448 } else {
405 delete(d.ns, s.name.Local) 449 delete(d.ns, s.name.Local)
406 } 450 }
407 } 451 }
408 452
409 return true 453 return true
410 } 454 }
(...skipping 11 matching lines...) Expand all
422 et, ok := t.(EndElement) 466 et, ok := t.(EndElement)
423 if !ok || et.Name.Local != name { 467 if !ok || et.Name.Local != name {
424 return EndElement{d.stk.name}, true 468 return EndElement{d.stk.name}, true
425 } 469 }
426 break 470 break
427 } 471 }
428 } 472 }
429 return nil, false 473 return nil, false
430 } 474 }
431 475
476 var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method" )
477
432 // RawToken is like Token but does not verify that 478 // RawToken is like Token but does not verify that
433 // start and end elements match and does not translate 479 // start and end elements match and does not translate
434 // name space prefixes to their corresponding URLs. 480 // name space prefixes to their corresponding URLs.
435 func (d *Decoder) RawToken() (Token, error) { 481 func (d *Decoder) RawToken() (Token, error) {
482 if d.unmarshalDepth > 0 {
483 return nil, errRawToken
Dominik Honnef 2013/08/09 16:21:45 Should this error condition be documented?
rsc 2013/08/13 16:24:34 Done.
484 }
485 return d.rawToken()
486 }
487
488 func (d *Decoder) rawToken() (Token, error) {
436 if d.err != nil { 489 if d.err != nil {
437 return nil, d.err 490 return nil, d.err
438 } 491 }
439 if d.needClose { 492 if d.needClose {
440 // The last element we read was self-closing and 493 // The last element we read was self-closing and
441 // we returned just the StartElement half. 494 // we returned just the StartElement half.
442 // Return the EndElement half now. 495 // Return the EndElement half now.
443 d.needClose = false 496 d.needClose = false
444 return EndElement{d.toClose}, nil 497 return EndElement{d.toClose}, nil
445 } 498 }
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
477 return nil, d.err 530 return nil, d.err
478 } 531 }
479 if b != '>' { 532 if b != '>' {
480 d.err = d.syntaxError("invalid characters between </" + name.Local + " and >") 533 d.err = d.syntaxError("invalid characters between </" + name.Local + " and >")
481 return nil, d.err 534 return nil, d.err
482 } 535 }
483 return EndElement{name}, nil 536 return EndElement{name}, nil
484 537
485 case '?': 538 case '?':
486 // <?: Processing instruction. 539 // <?: Processing instruction.
487 » » // TODO(rsc): Should parse the <?xml declaration to make sure 540 » » // TODO(rsc): Should parse the <?xml declaration to make sure th e version is 1.0.
488 » » // the version is 1.0 and the encoding is UTF-8.
489 var target string 541 var target string
490 if target, ok = d.name(); !ok { 542 if target, ok = d.name(); !ok {
491 if d.err == nil { 543 if d.err == nil {
492 d.err = d.syntaxError("expected target name afte r <?") 544 d.err = d.syntaxError("expected target name afte r <?")
493 } 545 }
494 return nil, d.err 546 return nil, d.err
495 } 547 }
496 d.space() 548 d.space()
497 d.buf.Reset() 549 d.buf.Reset()
498 var b0 byte 550 var b0 byte
(...skipping 606 matching lines...) Expand 10 before | Expand all | Expand 10 after
1105 if c == utf8.RuneError && n == 1 { 1157 if c == utf8.RuneError && n == 1 {
1106 return false 1158 return false
1107 } 1159 }
1108 if !unicode.Is(first, c) && !unicode.Is(second, c) { 1160 if !unicode.Is(first, c) && !unicode.Is(second, c) {
1109 return false 1161 return false
1110 } 1162 }
1111 } 1163 }
1112 return true 1164 return true
1113 } 1165 }
1114 1166
1167 func isNameString(s string) bool {
1168 if len(s) == 0 {
1169 return false
1170 }
1171 c, n := utf8.DecodeRuneInString(s)
1172 if c == utf8.RuneError && n == 1 {
1173 return false
1174 }
1175 if !unicode.Is(first, c) {
1176 return false
1177 }
1178 for n < len(s) {
1179 s = s[n:]
1180 c, n = utf8.DecodeRuneInString(s)
1181 if c == utf8.RuneError && n == 1 {
1182 return false
1183 }
1184 if !unicode.Is(first, c) && !unicode.Is(second, c) {
1185 return false
1186 }
1187 }
1188 return true
1189 }
1190
1115 // These tables were generated by cut and paste from Appendix B of 1191 // These tables were generated by cut and paste from Appendix B of
1116 // the XML spec at http://www.xml.com/axml/testaxml.htm 1192 // the XML spec at http://www.xml.com/axml/testaxml.htm
1117 // and then reformatting. First corresponds to (Letter | '_' | ':') 1193 // and then reformatting. First corresponds to (Letter | '_' | ':')
1118 // and second corresponds to NameChar. 1194 // and second corresponds to NameChar.
1119 1195
1120 var first = &unicode.RangeTable{ 1196 var first = &unicode.RangeTable{
1121 R16: []unicode.Range16{ 1197 R16: []unicode.Range16{
1122 {0x003A, 0x003A, 1}, 1198 {0x003A, 0x003A, 1},
1123 {0x0041, 0x005A, 1}, 1199 {0x0041, 0x005A, 1},
1124 {0x005F, 0x005F, 1}, 1200 {0x005F, 0x005F, 1},
(...skipping 646 matching lines...) Expand 10 before | Expand all | Expand 10 after
1771 return err 1847 return err
1772 } 1848 }
1773 last = i 1849 last = i
1774 } 1850 }
1775 if _, err := w.Write(s[last:]); err != nil { 1851 if _, err := w.Write(s[last:]); err != nil {
1776 return err 1852 return err
1777 } 1853 }
1778 return nil 1854 return nil
1779 } 1855 }
1780 1856
1857 // EscapeString writes to p the properly escaped XML equivalent
1858 // of the plain text data s.
1859 func (p *printer) EscapeString(s string) {
1860 var esc []byte
1861 last := 0
1862 for i := 0; i < len(s); {
1863 r, width := utf8.DecodeRuneInString(s[i:])
1864 i += width
1865 switch r {
1866 case '"':
1867 esc = esc_quot
1868 case '\'':
1869 esc = esc_apos
1870 case '&':
1871 esc = esc_amp
1872 case '<':
1873 esc = esc_lt
1874 case '>':
1875 esc = esc_gt
1876 case '\t':
1877 esc = esc_tab
1878 case '\n':
1879 esc = esc_nl
1880 case '\r':
1881 esc = esc_cr
1882 default:
1883 if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
1884 esc = esc_fffd
1885 break
1886 }
1887 continue
1888 }
1889 p.WriteString(s[last : i-width])
1890 p.Write(esc)
1891 last = i
1892 }
1893 p.WriteString(s[last:])
1894 }
1895
1781 // Escape is like EscapeText but omits the error return value. 1896 // Escape is like EscapeText but omits the error return value.
1782 // It is provided for backwards compatibility with Go 1.0. 1897 // It is provided for backwards compatibility with Go 1.0.
1783 // Code targeting Go 1.1 or later should use EscapeText. 1898 // Code targeting Go 1.1 or later should use EscapeText.
1784 func Escape(w io.Writer, s []byte) { 1899 func Escape(w io.Writer, s []byte) {
1785 EscapeText(w, s) 1900 EscapeText(w, s)
1786 } 1901 }
1787 1902
1788 // procInstEncoding parses the `encoding="..."` or `encoding='...'` 1903 // procInstEncoding parses the `encoding="..."` or `encoding='...'`
1789 // value out of the provided string, returning "" if not found. 1904 // value out of the provided string, returning "" if not found.
1790 func procInstEncoding(s string) string { 1905 func procInstEncoding(s string) string {
1791 // TODO: this parsing is somewhat lame and not exact. 1906 // TODO: this parsing is somewhat lame and not exact.
1792 // It works for all actual cases, though. 1907 // It works for all actual cases, though.
1793 idx := strings.Index(s, "encoding=") 1908 idx := strings.Index(s, "encoding=")
1794 if idx == -1 { 1909 if idx == -1 {
1795 return "" 1910 return ""
1796 } 1911 }
1797 v := s[idx+len("encoding="):] 1912 v := s[idx+len("encoding="):]
1798 if v == "" { 1913 if v == "" {
1799 return "" 1914 return ""
1800 } 1915 }
1801 if v[0] != '\'' && v[0] != '"' { 1916 if v[0] != '\'' && v[0] != '"' {
1802 return "" 1917 return ""
1803 } 1918 }
1804 idx = strings.IndexRune(v[1:], rune(v[0])) 1919 idx = strings.IndexRune(v[1:], rune(v[0]))
1805 if idx == -1 { 1920 if idx == -1 {
1806 return "" 1921 return ""
1807 } 1922 }
1808 return v[1 : idx+1] 1923 return v[1 : idx+1]
1809 } 1924 }
OLDNEW
« src/pkg/encoding/xml/read.go ('K') | « src/pkg/encoding/xml/read_test.go ('k') | no next file » | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b