Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(721)

Delta Between Two Patch Sets: src/pkg/exp/regexp/syntax/parse_test.go

Issue 4612041: code review 4612041: exp/regexp/syntax: more escapes, character classes (Closed)
Left Patch Set: Created 13 years, 9 months ago
Right Patch Set: diff -r 8572c1b0cdcc https://go.googlecode.com/hg Created 13 years, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/exp/regexp/syntax/parse.go ('k') | src/pkg/exp/regexp/syntax/perl_groups.go » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
(no file at all)
1 // Copyright 2011 The Go Authors. All rights reserved. 1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 package syntax 5 package syntax
6 6
7 import ( 7 import (
8 "bytes" 8 "bytes"
9 "fmt" 9 "fmt"
10 "testing" 10 "testing"
(...skipping 14 matching lines...) Expand all
25 // { "abc", "str{abc}" }, 25 // { "abc", "str{abc}" },
26 {"abc", "cat{lit{a}lit{b}lit{c}}"}, 26 {"abc", "cat{lit{a}lit{b}lit{c}}"},
27 {"a|^", "alt{lit{a}bol{}}"}, 27 {"a|^", "alt{lit{a}bol{}}"},
28 // { "a|b", "cc{0x61-0x62}" }, 28 // { "a|b", "cc{0x61-0x62}" },
29 {"a|b", "alt{lit{a}lit{b}}"}, 29 {"a|b", "alt{lit{a}lit{b}}"},
30 {"(a)", "cap{lit{a}}"}, 30 {"(a)", "cap{lit{a}}"},
31 {"(a)|b", "alt{cap{lit{a}}lit{b}}"}, 31 {"(a)|b", "alt{cap{lit{a}}lit{b}}"},
32 {"a*", "star{lit{a}}"}, 32 {"a*", "star{lit{a}}"},
33 {"a+", "plus{lit{a}}"}, 33 {"a+", "plus{lit{a}}"},
34 {"a?", "que{lit{a}}"}, 34 {"a?", "que{lit{a}}"},
35 » //» { "a{2}", "rep{2,2 lit{a}}" }, 35 » {"a{2}", "rep{2,2 lit{a}}"},
36 » //» { "a{2,3}", "rep{2,3 lit{a}}" }, 36 » {"a{2,3}", "rep{2,3 lit{a}}"},
37 » //» { "a{2,}", "rep{2,-1 lit{a}}" }, 37 » {"a{2,}", "rep{2,-1 lit{a}}"},
38 » //» { "a*?", "nstar{lit{a}}" }, 38 » {"a*?", "nstar{lit{a}}"},
39 » //» { "a+?", "nplus{lit{a}}" }, 39 » {"a+?", "nplus{lit{a}}"},
40 » //» { "a??", "nque{lit{a}}" }, 40 » {"a??", "nque{lit{a}}"},
41 » //» { "a{2}?", "nrep{2,2 lit{a}}" }, 41 » {"a{2}?", "nrep{2,2 lit{a}}"},
42 » //» { "a{2,3}?", "nrep{2,3 lit{a}}" }, 42 » {"a{2,3}?", "nrep{2,3 lit{a}}"},
43 » //» { "a{2,}?", "nrep{2,-1 lit{a}}" }, 43 » {"a{2,}?", "nrep{2,-1 lit{a}}"},
44 {"", "emp{}"}, 44 {"", "emp{}"},
45 // { "|", "emp{}" }, // alt{emp{}emp{}} but got factored 45 // { "|", "emp{}" }, // alt{emp{}emp{}} but got factored
46 » //» { "|", "alt{emp{}emp{}}" }, 46 » {"|", "alt{emp{}emp{}}"},
47 {"|x|", "alt{emp{}lit{x}emp{}}"}, 47 {"|x|", "alt{emp{}lit{x}emp{}}"},
48 {".", "dot{}"}, 48 {".", "dot{}"},
49 {"^", "bol{}"}, 49 {"^", "bol{}"},
50 {"$", "eol{}"}, 50 {"$", "eol{}"},
51 » //» { "\\|", "lit{|}" }, 51 » {"\\|", "lit{|}"},
52 » //» { "\\(", "lit{(}" }, 52 » {"\\(", "lit{(}"},
53 » //» { "\\)", "lit{)}" }, 53 » {"\\)", "lit{)}"},
54 » //» { "\\*", "lit{*}" }, 54 » {"\\*", "lit{*}"},
55 » //» { "\\+", "lit{+}" }, 55 » {"\\+", "lit{+}"},
56 » //» { "\\?", "lit{?}" }, 56 » {"\\?", "lit{?}"},
57 » //» { "{", "lit{{}" }, 57 » {"{", "lit{{}"},
58 {"}", "lit{}}"}, 58 {"}", "lit{}}"},
59 » //» { "\\.", "lit{.}" }, 59 » {"\\.", "lit{.}"},
60 » //» { "\\^", "lit{^}" }, 60 » {"\\^", "lit{^}"},
61 » //» { "\\$", "lit{$}" }, 61 » {"\\$", "lit{$}"},
62 » //» { "\\\\", "lit{\\}" }, 62 » {"\\\\", "lit{\\}"},
63 {"[ace]", "cc{0x61 0x63 0x65}"}, 63 {"[ace]", "cc{0x61 0x63 0x65}"},
64 {"[abc]", "cc{0x61-0x63}"}, 64 {"[abc]", "cc{0x61-0x63}"},
65 {"[a-z]", "cc{0x61-0x7a}"}, 65 {"[a-z]", "cc{0x61-0x7a}"},
66 // { "[a]", "lit{a}" }, 66 // { "[a]", "lit{a}" },
67 {"[a]", "cc{0x61}"}, 67 {"[a]", "cc{0x61}"},
68 » //» { "\\-", "lit{-}" }, 68 » {"\\-", "lit{-}"},
69 {"-", "lit{-}"}, 69 {"-", "lit{-}"},
70 » //» { "\\_", "lit{_}" }, 70 » {"\\_", "lit{_}"},
71 71
72 // Posix and Perl extensions 72 // Posix and Perl extensions
73 » //» { "[[:lower:]]", "cc{0x61-0x7a}" }, 73 » {"[[:lower:]]", "cc{0x61-0x7a}"},
74 » //» { "[a-z]", "cc{0x61-0x7a}" }, 74 » {"[a-z]", "cc{0x61-0x7a}"},
75 » //» { "[^[:lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}" }, 75 » {"[^[:lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}"},
76 » //» { "[[:^lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}" }, 76 » {"[[:^lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}"},
77 // { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, 77 // { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
78 // { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, 78 // { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
79 // { "(?i)[^[:lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x 2129 0x212b-0x10ffff}" }, 79 // { "(?i)[^[:lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x 2129 0x212b-0x10ffff}" },
80 // { "(?i)[[:^lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x 2129 0x212b-0x10ffff}" }, 80 // { "(?i)[[:^lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x 2129 0x212b-0x10ffff}" },
81 » //» { "\\d", "cc{0x30-0x39}" }, 81 » {"\\d", "cc{0x30-0x39}"},
82 » //» { "\\D", "cc{0x0-0x2f 0x3a-0x10ffff}" }, 82 » {"\\D", "cc{0x0-0x2f 0x3a-0x10ffff}"},
83 » //» { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" }, 83 » {"\\s", "cc{0x9-0xa 0xc-0xd 0x20}"},
84 » //» { "\\S", "cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" }, 84 » {"\\S", "cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}"},
85 » //» { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" }, 85 » {"\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}"},
86 » //» { "\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" } , 86 » {"\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}"},
87 // { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a }" }, 87 // { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a }" },
88 // { "(?i)\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x 180-0x2129 0x212b-0x10ffff}" }, 88 // { "(?i)\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x 180-0x2129 0x212b-0x10ffff}" },
89 » //» { "[^\\\\]", "cc{0x0-0x5b 0x5d-0x10ffff}" }, 89 » {"[^\\\\]", "cc{0x0-0x5b 0x5d-0x10ffff}"},
90 // { "\\C", "byte{}" }, 90 // { "\\C", "byte{}" },
91 91
92 // Unicode, negatives, and a double negative. 92 // Unicode, negatives, and a double negative.
93 » //» { "\\p{Braille}", "cc{0x2800-0x28ff}" }, 93 » {"\\p{Braille}", "cc{0x2800-0x28ff}"},
94 » //» { "\\P{Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}" }, 94 » {"\\P{Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}"},
95 » //» { "\\p{^Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}" }, 95 » {"\\p{^Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}"},
96 » //» { "\\P{^Braille}", "cc{0x2800-0x28ff}" }, 96 » {"\\P{^Braille}", "cc{0x2800-0x28ff}"},
97 » {"\\pZ", "cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}"},
98 » {"[\\p{Braille}]", "cc{0x2800-0x28ff}"},
99 » {"[\\P{Braille}]", "cc{0x0-0x27ff 0x2900-0x10ffff}"},
100 » {"[\\p{^Braille}]", "cc{0x0-0x27ff 0x2900-0x10ffff}"},
101 » {"[\\P{^Braille}]", "cc{0x2800-0x28ff}"},
102 » {"[\\pZ]", "cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202 f 0x205f 0x3000}"},
97 103
98 // More interesting regular expressions. 104 // More interesting regular expressions.
99 // { "a{,2}", "str{a{,2}}" }, 105 // { "a{,2}", "str{a{,2}}" },
100 // { "\\.\\^\\$\\\\", "str{.^$\\}" }, 106 // { "\\.\\^\\$\\\\", "str{.^$\\}" },
101 {"[a-zABC]", "cc{0x41-0x43 0x61-0x7a}"}, 107 {"[a-zABC]", "cc{0x41-0x43 0x61-0x7a}"},
102 {"[^a]", "cc{0x0-0x60 0x62-0x10ffff}"}, 108 {"[^a]", "cc{0x0-0x60 0x62-0x10ffff}"},
103 {"[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}"}, // utf-8 109 {"[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}"}, // utf-8
104 » //» { "a*{", "cat{star{lit{a}}lit{{}}" }, 110 » {"a*{", "cat{star{lit{a}}lit{{}}"},
105 111
106 // Test precedences 112 // Test precedences
107 // { "(?:ab)*", "star{str{ab}}" }, 113 // { "(?:ab)*", "star{str{ab}}" },
108 // { "(ab)*", "star{cap{str{ab}}}" }, 114 // { "(ab)*", "star{cap{str{ab}}}" },
109 // { "ab|cd", "alt{str{ab}str{cd}}" }, 115 // { "ab|cd", "alt{str{ab}str{cd}}" },
110 // { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" }, 116 // { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },
111 {"(?:ab)*", "star{cat{lit{a}lit{b}}}"}, 117 {"(?:ab)*", "star{cat{lit{a}lit{b}}}"},
112 {"(ab)*", "star{cap{cat{lit{a}lit{b}}}}"}, 118 {"(ab)*", "star{cap{cat{lit{a}lit{b}}}}"},
113 {"ab|cd", "alt{cat{lit{a}lit{b}}cat{lit{c}lit{d}}}"}, 119 {"ab|cd", "alt{cat{lit{a}lit{b}}cat{lit{c}lit{d}}}"},
114 {"a(b|c)d", "cat{lit{a}cap{alt{lit{b}lit{c}}}lit{d}}"}, 120 {"a(b|c)d", "cat{lit{a}cap{alt{lit{b}lit{c}}}lit{d}}"},
115 121
116 // Test flattening. 122 // Test flattening.
117 » //» { "(?:a)", "lit{a}" }, 123 » {"(?:a)", "lit{a}"},
118 // { "(?:ab)(?:cd)", "str{abcd}" }, 124 // { "(?:ab)(?:cd)", "str{abcd}" },
119 // { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" }, 125 // { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" },
120 // { "a|.", "dot{}" }, 126 // { "a|.", "dot{}" },
121 // { ".|a", "dot{}" }, 127 // { ".|a", "dot{}" },
122 128
123 // Test Perl quoted literals 129 // Test Perl quoted literals
124 » //» { "\\Q+|*?{[\\E", "str{+|*?{[}" }, 130 » {"\\Q+|*?{[\\E", "str{+|*?{[}"},
125 » //» { "\\Q+\\E+", "plus{lit{+}}" }, 131 » {"\\Q+\\E+", "plus{lit{+}}"},
126 » //» { "\\Q\\\\E", "lit{\\}" }, 132 » {"\\Q\\\\E", "lit{\\}"},
127 » //» { "\\Q\\\\\\E", "str{\\\\}" }, 133 » {"\\Q\\\\\\E", "str{\\\\}"},
128 134
129 // Test Perl \A and \z 135 // Test Perl \A and \z
130 » //» { "(?m)^", "bol{}" }, 136 » {"(?m)^", "bol{}"},
131 » //» { "(?m)$", "eol{}" }, 137 » {"(?m)$", "eol{}"},
132 » //» { "(?-m)^", "bot{}" }, 138 » {"(?-m)^", "bot{}"},
133 » //» { "(?-m)$", "eot{}" }, 139 » {"(?-m)$", "eot{}"},
134 » //» { "(?m)\\A", "bot{}" }, 140 » {"(?m)\\A", "bot{}"},
135 » //» { "(?m)\\z", "eot{\\z}" }, 141 » {"(?m)\\z", "eot{\\z}"},
136 » //» { "(?-m)\\A", "bot{}" }, 142 » {"(?-m)\\A", "bot{}"},
137 » //» { "(?-m)\\z", "eot{\\z}" }, 143 » {"(?-m)\\z", "eot{\\z}"},
138 144
139 // Test named captures 145 // Test named captures
140 » //» { "(?P<name>a)", "cap{name:lit{a}}" }, 146 » {"(?P<name>a)", "cap{name:lit{a}}"},
141 147
142 // Case-folded literals 148 // Case-folded literals
143 // { "[Aa]", "litfold{a}" }, 149 // { "[Aa]", "litfold{a}" },
144 150
145 // Strings 151 // Strings
146 // { "abcde", "str{abcde}" }, 152 // { "abcde", "str{abcde}" },
147 // { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" }, 153 // { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" },
148 } 154 }
149 155
150 const testFlags = MatchNL | PerlX | UnicodeGroups 156 const testFlags = MatchNL | PerlX | UnicodeGroups
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
257 lo, hi := re.Rune[i], re.Rune[i+1] 263 lo, hi := re.Rune[i], re.Rune[i+1]
258 if lo == hi { 264 if lo == hi {
259 fmt.Fprintf(b, "%#x", lo) 265 fmt.Fprintf(b, "%#x", lo)
260 } else { 266 } else {
261 fmt.Fprintf(b, "%#x-%#x", lo, hi) 267 fmt.Fprintf(b, "%#x-%#x", lo, hi)
262 } 268 }
263 } 269 }
264 } 270 }
265 b.WriteByte('}') 271 b.WriteByte('}')
266 } 272 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b