LEFT | RIGHT |
1 // Copyright 2010 The Go Authors. All rights reserved. | 1 // Copyright 2010 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 package html | 5 package html |
6 | 6 |
7 import ( | 7 import ( |
8 "bytes" | 8 "bytes" |
9 "os" | 9 "os" |
| 10 "strings" |
10 "testing" | 11 "testing" |
11 ) | 12 ) |
12 | 13 |
13 type tokenTest struct { | 14 type tokenTest struct { |
14 // A short description of the test case. | 15 // A short description of the test case. |
15 desc string | 16 desc string |
16 // The HTML to parse. | 17 // The HTML to parse. |
17 html string | 18 html string |
18 » // The string representations of the expected tokens. | 19 » // The string representations of the expected tokens, joined by '$'. |
19 » tokens []string | 20 » golden string |
20 } | 21 } |
21 | 22 |
22 var tokenTests = []tokenTest{ | 23 var tokenTests = []tokenTest{ |
23 // A single text node. The tokenizer should not break text nodes on whit
espace, | 24 // A single text node. The tokenizer should not break text nodes on whit
espace, |
24 // nor should it normalize whitespace within a text node. | 25 // nor should it normalize whitespace within a text node. |
25 { | 26 { |
26 "text", | 27 "text", |
27 "foo bar", | 28 "foo bar", |
28 » » []string{ | 29 » » "foo bar", |
29 » » » "foo bar", | |
30 » » }, | |
31 }, | 30 }, |
32 // An entity. | 31 // An entity. |
33 { | 32 { |
34 "entity", | 33 "entity", |
35 "one < two", | 34 "one < two", |
36 » » []string{ | 35 » » "one < two", |
37 » » » "one < two", | |
38 » » }, | |
39 }, | 36 }, |
40 // A start, self-closing and end tag. The tokenizer does not care if the
start | 37 // A start, self-closing and end tag. The tokenizer does not care if the
start |
41 // and end tokens don't match; that is the job of the parser. | 38 // and end tokens don't match; that is the job of the parser. |
42 { | 39 { |
43 "tags", | 40 "tags", |
44 "<a>b<c/>d</e>", | 41 "<a>b<c/>d</e>", |
45 » » []string{ | 42 » » "<a>$b$<c/>$d$</e>", |
46 » » » "<a>", | 43 » }, |
47 » » » "b", | 44 » // Comments. |
48 » » » "<c/>", | 45 » { |
49 » » » "d", | 46 » » "comment0", |
50 » » » "</e>", | |
51 » » }, | |
52 » }, | |
53 » // A comment. | |
54 » { | |
55 » » "comment", | |
56 "abc<b><!-- skipme --></b>def", | 47 "abc<b><!-- skipme --></b>def", |
57 » » []string{ | 48 » » "abc$<b>$</b>$def", |
58 » » » "abc", | 49 » }, |
59 » » » "<b>", | 50 » { |
60 » » » "</b>", | 51 » » "comment1", |
61 » » » "def", | 52 » » "a<!-->z", |
62 » » }, | 53 » » "a$z", |
| 54 » }, |
| 55 » { |
| 56 » » "comment2", |
| 57 » » "a<!--->z", |
| 58 » » "a$z", |
| 59 » }, |
| 60 » { |
| 61 » » "comment3", |
| 62 » » "a<!--x>-->z", |
| 63 » » "a$z", |
| 64 » }, |
| 65 » { |
| 66 » » "comment4", |
| 67 » » "a<!--x->-->z", |
| 68 » » "a$z", |
| 69 » }, |
| 70 » { |
| 71 » » "comment5", |
| 72 » » "a<!>z", |
| 73 » » "a$<!>z", |
| 74 » }, |
| 75 » { |
| 76 » » "comment6", |
| 77 » » "a<!->z", |
| 78 » » "a$<!->z", |
| 79 » }, |
| 80 » { |
| 81 » » "comment7", |
| 82 » » "a<!---<>z", |
| 83 » » "a$<!---<>z", |
| 84 » }, |
| 85 » { |
| 86 » » "comment8", |
| 87 » » "a<!--z", |
| 88 » » "a$<!--z", |
63 }, | 89 }, |
64 // An attribute with a backslash. | 90 // An attribute with a backslash. |
65 { | 91 { |
66 "backslash", | 92 "backslash", |
67 `<p id="a\"b">`, | 93 `<p id="a\"b">`, |
68 » » []string{ | 94 » » `<p id="a"b">`, |
69 » » » `<p id="a"b">`, | |
70 » » }, | |
71 }, | 95 }, |
72 // Entities, tag name and attribute key lower-casing, and whitespace | 96 // Entities, tag name and attribute key lower-casing, and whitespace |
73 // normalization within a tag. | 97 // normalization within a tag. |
74 { | 98 { |
75 "tricky", | 99 "tricky", |
76 "<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p
>", | 100 "<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p
>", |
77 » » []string{ | 101 » » `<p id="a"B" foo="bar">$<em>$te<&;xt$</em>$</p>`, |
78 » » » `<p id="a"B" foo="bar">`, | |
79 » » » "<em>", | |
80 » » » "te<&;xt", | |
81 » » » "</em>", | |
82 » » » "</p>", | |
83 » » }, | |
84 }, | 102 }, |
85 // A non-existant entity. Tokenizing and converting back to a string sho
uld | 103 // A non-existant entity. Tokenizing and converting back to a string sho
uld |
86 // escape the "&" to become "&". | 104 // escape the "&" to become "&". |
87 { | 105 { |
88 "noSuchEntity", | 106 "noSuchEntity", |
89 `<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`, | 107 `<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`, |
90 » » []string{ | 108 » » `<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`, |
91 » » » `<a b="c&noSuchEntity;d">`, | |
92 » » » "<&alsoDoesntExist;&", | |
93 » » }, | |
94 }, | 109 }, |
95 } | 110 } |
96 | 111 |
97 func TestTokenizer(t *testing.T) { | 112 func TestTokenizer(t *testing.T) { |
98 loop: | 113 loop: |
99 for _, tt := range tokenTests { | 114 for _, tt := range tokenTests { |
100 z := NewTokenizer(bytes.NewBuffer([]byte(tt.html))) | 115 z := NewTokenizer(bytes.NewBuffer([]byte(tt.html))) |
101 » » for i, s := range tt.tokens { | 116 » » for i, s := range strings.Split(tt.golden, "$", -1) { |
102 if z.Next() == ErrorToken { | 117 if z.Next() == ErrorToken { |
103 t.Errorf("%s token %d: want %q got error %v", tt
.desc, i, s, z.Error()) | 118 t.Errorf("%s token %d: want %q got error %v", tt
.desc, i, s, z.Error()) |
104 continue loop | 119 continue loop |
105 } | 120 } |
106 actual := z.Token().String() | 121 actual := z.Token().String() |
107 if s != actual { | 122 if s != actual { |
108 t.Errorf("%s token %d: want %q got %q", tt.desc,
i, s, actual) | 123 t.Errorf("%s token %d: want %q got %q", tt.desc,
i, s, actual) |
109 continue loop | 124 continue loop |
110 } | 125 } |
111 } | 126 } |
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
233 } | 248 } |
234 } | 249 } |
235 } | 250 } |
236 } | 251 } |
237 u := "14567" | 252 u := "14567" |
238 v := string(result.Bytes()) | 253 v := string(result.Bytes()) |
239 if u != v { | 254 if u != v { |
240 t.Errorf("TestBufAPI: want %q got %q", u, v) | 255 t.Errorf("TestBufAPI: want %q got %q", u, v) |
241 } | 256 } |
242 } | 257 } |
LEFT | RIGHT |