src/pkg/exp/regexp/syntax/parse_test.go - Issue 4538123: code review 4538123: exp/regexp/syntax: syntax data structures, parser

Delta Between Two Patch Sets: src/pkg/exp/regexp/syntax/parse_test.go

Issue 4538123: code review 4538123: exp/regexp/syntax: syntax data structures, parser (Closed)

Left Patch Set: diff -r 69f12bae0f09 https://go.googlecode.com/hg Created 13 years, 9 months ago

Right Patch Set: diff -r 881a0fc6528d https://go.googlecode.com/hg Created 13 years, 9 months ago

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 // Copyright 2011 The Go Authors. All rights reserved.	1 // Copyright 2011 The Go Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style	2 // Use of this source code is governed by a BSD-style

3 // license that can be found in the LICENSE file.	3 // license that can be found in the LICENSE file.

4	4

5 package syntax	5 package syntax

6	6

7 import (	7 import (

8 "bytes"	8 "bytes"

9 "fmt"	9 "fmt"

10 "testing"	10 "testing"

11 "unicode"	11 "unicode"

12 )	12 )

13	13

14 var parseTests = []struct {	14 var parseTests = []struct {

15 Regexp string	15 Regexp string

16 Dump string	16 Dump string

17 }{	17 }{

18 // Base cases	18 // Base cases

19 {"a", "lit{a}"},	19 {"a", "lit{a}"},

20 {"a.", "cat{lit{a}dot{}}"},	20 {"a.", "cat{lit{a}dot{}}"},

21 {"a.b", "cat{lit{a}dot{}lit{b}}"},	21 {"a.b", "cat{lit{a}dot{}lit{b}}"},

22 // { "ab", "str{ab}" },	22 // { "ab", "str{ab}" },
Sam 2011/06/10 00:14:55 won't these str{} cases work - as just an extensio won't these str{} cases work - as just an extension of lit{} to >1 char? rsc 2011/06/10 00:27:38 That's the plan, but for now they compile into the Show quoted text On 2011/06/10 00:14:55, Sam wrote: > won't these str{} cases work - as just an extension of lit{} to >1 char? That's the plan, but for now they compile into the next line below: cat{lit{a}lit{b}} instead of lit{ab}. The alternate and concat methods will get much more complicated before all this is done, and one of the complications is turning cat{lit{a}lit{b}} into str{ab}.
23 {"ab", "cat{lit{a}lit{b}}"},	23 {"ab", "cat{lit{a}lit{b}}"},

24 {"a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}"},	24 {"a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}"},

25 // { "abc", "str{abc}" },	25 // { "abc", "str{abc}" },

26 {"abc", "cat{lit{a}lit{b}lit{c}}"},	26 {"abc", "cat{lit{a}lit{b}lit{c}}"},

27 {"a\|^", "alt{lit{a}bol{}}"},	27 {"a\|^", "alt{lit{a}bol{}}"},

28 // { "a\|b", "cc{0x61-0x62}" },	28 // { "a\|b", "cc{0x61-0x62}" },

29 {"a\|b", "alt{lit{a}lit{b}}"},	29 {"a\|b", "alt{lit{a}lit{b}}"},

30 {"(a)", "cap{lit{a}}"},	30 {"(a)", "cap{lit{a}}"},

31 {"(a)\|b", "alt{cap{lit{a}}lit{b}}"},	31 {"(a)\|b", "alt{cap{lit{a}}lit{b}}"},

32 {"a*", "star{lit{a}}"},	32 {"a*", "star{lit{a}}"},

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
98 // More interesting regular expressions.	98 // More interesting regular expressions.

99 // { "a{,2}", "str{a{,2}}" },	99 // { "a{,2}", "str{a{,2}}" },

100 // { "\\.\\^\\$\\\\", "str{.^$\\}" },	100 // { "\\.\\^\\$\\\\", "str{.^$\\}" },

101 {"[a-zABC]", "cc{0x41-0x43 0x61-0x7a}"},	101 {"[a-zABC]", "cc{0x41-0x43 0x61-0x7a}"},

102 {"[^a]", "cc{0x0-0x60 0x62-0x10ffff}"},	102 {"[^a]", "cc{0x0-0x60 0x62-0x10ffff}"},

103 {"[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}"}, // utf-8	103 {"[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}"}, // utf-8

104 // { "a*{", "cat{star{lit{a}}lit{{}}" },	104 // { "a*{", "cat{star{lit{a}}lit{{}}" },

105	105

106 // Test precedences	106 // Test precedences

107 // { "(?:ab)*", "star{str{ab}}" },	107 // { "(?:ab)*", "star{str{ab}}" },

108 // { "(ab)*", "star{cap{str{ab}}}" },	108 // { "(ab)*", "star{cap{str{ab}}}" },
Sam 2011/06/10 00:14:55 I think this one should work now I think this one should work now rsc 2011/06/10 00:27:38 Same thing as above: the uncommented copy a few li Show quoted text On 2011/06/10 00:14:55, Sam wrote: > I think this one should work now Same thing as above: the uncommented copy a few lines from now is the current parse tree, because nothing generates str yet.
109 // { "ab\|cd", "alt{str{ab}str{cd}}" },	109 // { "ab\|cd", "alt{str{ab}str{cd}}" },

110 // { "a(b\|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },	110 // { "a(b\|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },

111 {"(?:ab)*", "star{cat{lit{a}lit{b}}}"},	111 {"(?:ab)*", "star{cat{lit{a}lit{b}}}"},

112 {"(ab)*", "star{cap{cat{lit{a}lit{b}}}}"},	112 {"(ab)*", "star{cap{cat{lit{a}lit{b}}}}"},

113 {"ab\|cd", "alt{cat{lit{a}lit{b}}cat{lit{c}lit{d}}}"},	113 {"ab\|cd", "alt{cat{lit{a}lit{b}}cat{lit{c}lit{d}}}"},

114 {"a(b\|c)d", "cat{lit{a}cap{alt{lit{b}lit{c}}}lit{d}}"},	114 {"a(b\|c)d", "cat{lit{a}cap{alt{lit{b}lit{c}}}lit{d}}"},

115	115

116 // Test flattening.	116 // Test flattening.

117 // { "(?:a)", "lit{a}" },	117 // { "(?:a)", "lit{a}" },

118 // { "(?:ab)(?:cd)", "str{abcd}" },	118 // { "(?:ab)(?:cd)", "str{abcd}" },

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
187 OpNoWordBoundary: "nwb",	187 OpNoWordBoundary: "nwb",

188 OpCapture: "cap",	188 OpCapture: "cap",

189 OpStar: "star",	189 OpStar: "star",

190 OpPlus: "plus",	190 OpPlus: "plus",

191 OpQuest: "que",	191 OpQuest: "que",

192 OpRepeat: "rep",	192 OpRepeat: "rep",

193 OpConcat: "cat",	193 OpConcat: "cat",

194 OpAlternate: "alt",	194 OpAlternate: "alt",

195 }	195 }

196	196

	197 // dumpRegexp writes an encoding of the syntax tree for the regexp re to b.

	198 // It is used during testing to distinguish between parses that might print

	199 // the same using re's String method.

197 func dumpRegexp(b bytes.Buffer, re Regexp) {	200 func dumpRegexp(b bytes.Buffer, re Regexp) {

198 if int(re.Op) >= len(opNames) \|\| opNames[re.Op] == "" {	201 if int(re.Op) >= len(opNames) \|\| opNames[re.Op] == "" {

199 fmt.Fprintf(b, "op%d", re.Op)	202 fmt.Fprintf(b, "op%d", re.Op)

200 } else {	203 } else {

201 switch re.Op {	204 switch re.Op {

202 default:	205 default:

203 b.WriteString(opNames[re.Op])	206 b.WriteString(opNames[re.Op])

204 case OpStar, OpPlus, OpQuest, OpRepeat:	207 case OpStar, OpPlus, OpQuest, OpRepeat:

205 if re.Flags&NonGreedy != 0 {	208 if re.Flags&NonGreedy != 0 {

206 b.WriteByte('n')	209 b.WriteByte('n')

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
254 lo, hi := re.Rune[i], re.Rune[i+1]	257 lo, hi := re.Rune[i], re.Rune[i+1]

255 if lo == hi {	258 if lo == hi {

256 fmt.Fprintf(b, "%#x", lo)	259 fmt.Fprintf(b, "%#x", lo)

257 } else {	260 } else {

258 fmt.Fprintf(b, "%#x-%#x", lo, hi)	261 fmt.Fprintf(b, "%#x-%#x", lo, hi)

259 }	262 }

260 }	263 }

261 }	264 }

262 b.WriteByte('}')	265 b.WriteByte('}')

263 }	266 }

LEFT	RIGHT