Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(41)

Delta Between Two Patch Sets: src/pkg/html/token_test.go

Issue 4186055: code review 4186055: html: tokenize HTML comments. (Closed)
Left Patch Set: diff -r 1d32c7df56c8 https://go.googlecode.com/hg/ Created 14 years, 1 month ago
Right Patch Set: diff -r c12d974b0f6c https://go.googlecode.com/hg/ Created 14 years, 1 month ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/html/token.go ('k') | no next file » | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2010 The Go Authors. All rights reserved. 1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 package html 5 package html
6 6
7 import ( 7 import (
8 "bytes" 8 "bytes"
9 "os" 9 "os"
10 "strings"
10 "testing" 11 "testing"
11 ) 12 )
12 13
13 type tokenTest struct { 14 type tokenTest struct {
14 // A short description of the test case. 15 // A short description of the test case.
15 desc string 16 desc string
16 // The HTML to parse. 17 // The HTML to parse.
17 html string 18 html string
18 » // The string representations of the expected tokens. 19 » // The string representations of the expected tokens, joined by '$'.
19 » tokens []string 20 » golden string
20 } 21 }
21 22
22 var tokenTests = []tokenTest{ 23 var tokenTests = []tokenTest{
23 // A single text node. The tokenizer should not break text nodes on whit espace, 24 // A single text node. The tokenizer should not break text nodes on whit espace,
24 // nor should it normalize whitespace within a text node. 25 // nor should it normalize whitespace within a text node.
25 { 26 {
26 "text", 27 "text",
27 "foo bar", 28 "foo bar",
28 » » []string{ 29 » » "foo bar",
29 » » » "foo bar",
30 » » },
31 }, 30 },
32 // An entity. 31 // An entity.
33 { 32 {
34 "entity", 33 "entity",
35 "one < two", 34 "one < two",
36 » » []string{ 35 » » "one < two",
37 » » » "one < two",
38 » » },
39 }, 36 },
40 // A start, self-closing and end tag. The tokenizer does not care if the start 37 // A start, self-closing and end tag. The tokenizer does not care if the start
41 // and end tokens don't match; that is the job of the parser. 38 // and end tokens don't match; that is the job of the parser.
42 { 39 {
43 "tags", 40 "tags",
44 "<a>b<c/>d</e>", 41 "<a>b<c/>d</e>",
45 » » []string{ 42 » » "<a>$b$<c/>$d$</e>",
46 » » » "<a>", 43 » },
47 » » » "b", 44 » // Comments.
48 » » » "<c/>", 45 » {
49 » » » "d", 46 » » "comment0",
50 » » » "</e>",
51 » » },
52 » },
53 » // A comment.
54 » {
55 » » "comment",
56 "abc<b><!-- skipme --></b>def", 47 "abc<b><!-- skipme --></b>def",
57 » » []string{ 48 » » "abc$<b>$</b>$def",
58 » » » "abc", 49 » },
59 » » » "<b>", 50 » {
60 » » » "</b>", 51 » » "comment1",
61 » » » "def", 52 » » "a<!-->z",
62 » » }, 53 » » "a$z",
54 » },
55 » {
56 » » "comment2",
57 » » "a<!--->z",
58 » » "a$z",
59 » },
60 » {
61 » » "comment3",
62 » » "a<!--x>-->z",
63 » » "a$z",
64 » },
65 » {
66 » » "comment4",
67 » » "a<!--x->-->z",
68 » » "a$z",
69 » },
70 » {
71 » » "comment5",
72 » » "a<!>z",
73 » » "a$&lt;!&gt;z",
74 » },
75 » {
76 » » "comment6",
77 » » "a<!->z",
78 » » "a$&lt;!-&gt;z",
79 » },
80 » {
81 » » "comment7",
82 » » "a<!---<>z",
83 » » "a$&lt;!---&lt;&gt;z",
84 » },
85 » {
86 » » "comment8",
87 » » "a<!--z",
88 » » "a$&lt;!--z",
63 }, 89 },
64 // An attribute with a backslash. 90 // An attribute with a backslash.
65 { 91 {
66 "backslash", 92 "backslash",
67 `<p id="a\"b">`, 93 `<p id="a\"b">`,
68 » » []string{ 94 » » `<p id="a&quot;b">`,
69 » » » `<p id="a&quot;b">`,
70 » » },
71 }, 95 },
72 // Entities, tag name and attribute key lower-casing, and whitespace 96 // Entities, tag name and attribute key lower-casing, and whitespace
73 // normalization within a tag. 97 // normalization within a tag.
74 { 98 {
75 "tricky", 99 "tricky",
76 "<p \t\n iD=\"a&quot;B\" foo=\"bar\"><EM>te&lt;&amp;;xt</em></p >", 100 "<p \t\n iD=\"a&quot;B\" foo=\"bar\"><EM>te&lt;&amp;;xt</em></p >",
77 » » []string{ 101 » » `<p id="a&quot;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
78 » » » `<p id="a&quot;B" foo="bar">`,
79 » » » "<em>",
80 » » » "te&lt;&amp;;xt",
81 » » » "</em>",
82 » » » "</p>",
83 » » },
84 }, 102 },
85 // A non-existant entity. Tokenizing and converting back to a string sho uld 103 // A non-existant entity. Tokenizing and converting back to a string sho uld
86 // escape the "&" to become "&amp;". 104 // escape the "&" to become "&amp;".
87 { 105 {
88 "noSuchEntity", 106 "noSuchEntity",
89 `<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`, 107 `<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
90 » » []string{ 108 » » `<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
91 » » » `<a b="c&amp;noSuchEntity;d">`,
92 » » » "&lt;&amp;alsoDoesntExist;&amp;",
93 » » },
94 }, 109 },
95 } 110 }
96 111
97 func TestTokenizer(t *testing.T) { 112 func TestTokenizer(t *testing.T) {
98 loop: 113 loop:
99 for _, tt := range tokenTests { 114 for _, tt := range tokenTests {
100 z := NewTokenizer(bytes.NewBuffer([]byte(tt.html))) 115 z := NewTokenizer(bytes.NewBuffer([]byte(tt.html)))
101 » » for i, s := range tt.tokens { 116 » » for i, s := range strings.Split(tt.golden, "$", -1) {
102 if z.Next() == ErrorToken { 117 if z.Next() == ErrorToken {
103 t.Errorf("%s token %d: want %q got error %v", tt .desc, i, s, z.Error()) 118 t.Errorf("%s token %d: want %q got error %v", tt .desc, i, s, z.Error())
104 continue loop 119 continue loop
105 } 120 }
106 actual := z.Token().String() 121 actual := z.Token().String()
107 if s != actual { 122 if s != actual {
108 t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual) 123 t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
109 continue loop 124 continue loop
110 } 125 }
111 } 126 }
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
233 } 248 }
234 } 249 }
235 } 250 }
236 } 251 }
237 u := "14567" 252 u := "14567"
238 v := string(result.Bytes()) 253 v := string(result.Bytes())
239 if u != v { 254 if u != v {
240 t.Errorf("TestBufAPI: want %q got %q", u, v) 255 t.Errorf("TestBufAPI: want %q got %q", u, v)
241 } 256 }
242 } 257 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b