src/pkg/html/token_test.go - Issue 4186055: code review 4186055: html: tokenize HTML comments.

Delta Between Two Patch Sets: src/pkg/html/token_test.go

Issue 4186055: code review 4186055: html: tokenize HTML comments. (Closed)

Left Patch Set: diff -r 1d32c7df56c8 https://go.googlecode.com/hg/ Created 14 years, 1 month ago

Right Patch Set: diff -r c12d974b0f6c https://go.googlecode.com/hg/ Created 14 years, 1 month ago

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 // Copyright 2010 The Go Authors. All rights reserved.	1 // Copyright 2010 The Go Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style	2 // Use of this source code is governed by a BSD-style

3 // license that can be found in the LICENSE file.	3 // license that can be found in the LICENSE file.

4	4

5 package html	5 package html

6	6

7 import (	7 import (

8 "bytes"	8 "bytes"

9 "os"	9 "os"

	10 "strings"

10 "testing"	11 "testing"

11 )	12 )

12	13

13 type tokenTest struct {	14 type tokenTest struct {

14 // A short description of the test case.	15 // A short description of the test case.

15 desc string	16 desc string

16 // The HTML to parse.	17 // The HTML to parse.

17 html string	18 html string

18 » // The string representations of the expected tokens.	19 » // The string representations of the expected tokens, joined by '$'.

19 » tokens []string	20 » golden string

20 }	21 }

21	22

22 var tokenTests = []tokenTest{	23 var tokenTests = []tokenTest{

23 // A single text node. The tokenizer should not break text nodes on whit espace,	24 // A single text node. The tokenizer should not break text nodes on whit espace,

24 // nor should it normalize whitespace within a text node.	25 // nor should it normalize whitespace within a text node.

25 {	26 {

26 "text",	27 "text",

27 "foo bar",	28 "foo bar",

28 » » []string{	29 » » "foo bar",

29 » » » "foo bar",

30 » » },

31 },	30 },

32 // An entity.	31 // An entity.

33 {	32 {

34 "entity",	33 "entity",

35 "one < two",	34 "one < two",

36 » » []string{	35 » » "one < two",

37 » » » "one < two",

38 » » },

39 },	36 },

40 // A start, self-closing and end tag. The tokenizer does not care if the start	37 // A start, self-closing and end tag. The tokenizer does not care if the start

41 // and end tokens don't match; that is the job of the parser.	38 // and end tokens don't match; that is the job of the parser.

42 {	39 {

43 "tags",	40 "tags",

44 "<a>b<c/>d</e>",	41 "<a>b<c/>d</e>",

45 » » []string{	42 » » "<a>$b$<c/>$d$</e>",

46 » » » "<a>",	43 » },

47 » » » "b",	44 » // Comments.

48 » » » "<c/>",	45 » {

49 » » » "d",	46 » » "comment0",

50 » » » "</e>",

51 » » },

52 » },

53 » // A comment.

54 » {

55 » » "comment",

56 "abc<b><!-- skipme --></b>def",	47 "abc<b><!-- skipme --></b>def",

57 » » []string{	48 » » "abc$<b>$</b>$def",

58 » » » "abc",	49 » },

59 » » » "<b>",	50 » {

60 » » » "</b>",	51 » » "comment1",

61 » » » "def",	52 » » "a<!-->z",

62 » » },	53 » » "a$z",

	54 » },

	55 » {

	56 » » "comment2",

	57 » » "a<!--->z",

	58 » » "a$z",

	59 » },

	60 » {

	61 » » "comment3",

	62 » » "a<!--x>-->z",

	63 » » "a$z",

	64 » },

	65 » {

	66 » » "comment4",

	67 » » "a<!--x->-->z",

	68 » » "a$z",

	69 » },

	70 » {

	71 » » "comment5",

	72 » » "a<!>z",

	73 » » "a$<!>z",

	74 » },

	75 » {

	76 » » "comment6",

	77 » » "a<!->z",

	78 » » "a$<!->z",

	79 » },

	80 » {

	81 » » "comment7",

	82 » » "a<!---<>z",

	83 » » "a$<!---<>z",

	84 » },

	85 » {

	86 » » "comment8",

	87 » » "a<!--z",

	88 » » "a$<!--z",

63 },	89 },

64 // An attribute with a backslash.	90 // An attribute with a backslash.

65 {	91 {

66 "backslash",	92 "backslash",

67 `<p id="a\"b">`,	93 `<p id="a\"b">`,

68 » » []string{	94 » » `<p id="a"b">`,

69 » » » `<p id="a"b">`,

70 » » },

71 },	95 },

72 // Entities, tag name and attribute key lower-casing, and whitespace	96 // Entities, tag name and attribute key lower-casing, and whitespace

73 // normalization within a tag.	97 // normalization within a tag.

74 {	98 {

75 "tricky",	99 "tricky",

76 "<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p >",	100 "<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p >",

77 » » []string{	101 » » `<p id="a"B" foo="bar">$<em>$te<&;xt$</em>$</p>`,

78 » » » `<p id="a"B" foo="bar">`,

79 » » » "<em>",

80 » » » "te<&;xt",

81 » » » "</em>",

82 » » » "</p>",

83 » » },

84 },	102 },

85 // A non-existant entity. Tokenizing and converting back to a string sho uld	103 // A non-existant entity. Tokenizing and converting back to a string sho uld

86 // escape the "&" to become "&".	104 // escape the "&" to become "&".

87 {	105 {

88 "noSuchEntity",	106 "noSuchEntity",

89 `<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,	107 `<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,

90 » » []string{	108 » » `<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`,

91 » » » `<a b="c&noSuchEntity;d">`,

92 » » » "<&alsoDoesntExist;&",

93 » » },

94 },	109 },

95 }	110 }

96	111

97 func TestTokenizer(t *testing.T) {	112 func TestTokenizer(t *testing.T) {

98 loop:	113 loop:

99 for _, tt := range tokenTests {	114 for _, tt := range tokenTests {

100 z := NewTokenizer(bytes.NewBuffer([]byte(tt.html)))	115 z := NewTokenizer(bytes.NewBuffer([]byte(tt.html)))

101 » » for i, s := range tt.tokens {	116 » » for i, s := range strings.Split(tt.golden, "$", -1) {

102 if z.Next() == ErrorToken {	117 if z.Next() == ErrorToken {

103 t.Errorf("%s token %d: want %q got error %v", tt .desc, i, s, z.Error())	118 t.Errorf("%s token %d: want %q got error %v", tt .desc, i, s, z.Error())

104 continue loop	119 continue loop

105 }	120 }

106 actual := z.Token().String()	121 actual := z.Token().String()

107 if s != actual {	122 if s != actual {

108 t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)	123 t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)

109 continue loop	124 continue loop

110 }	125 }

111 }	126 }

(...skipping 121 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
233 }	248 }

234 }	249 }

235 }	250 }

236 }	251 }

237 u := "14567"	252 u := "14567"

238 v := string(result.Bytes())	253 v := string(result.Bytes())

239 if u != v {	254 if u != v {

240 t.Errorf("TestBufAPI: want %q got %q", u, v)	255 t.Errorf("TestBufAPI: want %q got %q", u, v)

241 }	256 }

242 }	257 }

LEFT	RIGHT