LEFT | RIGHT |
(no file at all) | |
1 // Copyright 2011 The Go Authors. All rights reserved. | 1 // Copyright 2011 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 package syntax | 5 package syntax |
6 | 6 |
7 import ( | 7 import ( |
8 "bytes" | 8 "bytes" |
9 "fmt" | 9 "fmt" |
10 "testing" | 10 "testing" |
(...skipping 14 matching lines...) Expand all Loading... |
25 // { "abc", "str{abc}" }, | 25 // { "abc", "str{abc}" }, |
26 {"abc", "cat{lit{a}lit{b}lit{c}}"}, | 26 {"abc", "cat{lit{a}lit{b}lit{c}}"}, |
27 {"a|^", "alt{lit{a}bol{}}"}, | 27 {"a|^", "alt{lit{a}bol{}}"}, |
28 // { "a|b", "cc{0x61-0x62}" }, | 28 // { "a|b", "cc{0x61-0x62}" }, |
29 {"a|b", "alt{lit{a}lit{b}}"}, | 29 {"a|b", "alt{lit{a}lit{b}}"}, |
30 {"(a)", "cap{lit{a}}"}, | 30 {"(a)", "cap{lit{a}}"}, |
31 {"(a)|b", "alt{cap{lit{a}}lit{b}}"}, | 31 {"(a)|b", "alt{cap{lit{a}}lit{b}}"}, |
32 {"a*", "star{lit{a}}"}, | 32 {"a*", "star{lit{a}}"}, |
33 {"a+", "plus{lit{a}}"}, | 33 {"a+", "plus{lit{a}}"}, |
34 {"a?", "que{lit{a}}"}, | 34 {"a?", "que{lit{a}}"}, |
35 » //» { "a{2}", "rep{2,2 lit{a}}" }, | 35 » {"a{2}", "rep{2,2 lit{a}}"}, |
36 » //» { "a{2,3}", "rep{2,3 lit{a}}" }, | 36 » {"a{2,3}", "rep{2,3 lit{a}}"}, |
37 » //» { "a{2,}", "rep{2,-1 lit{a}}" }, | 37 » {"a{2,}", "rep{2,-1 lit{a}}"}, |
38 » //» { "a*?", "nstar{lit{a}}" }, | 38 » {"a*?", "nstar{lit{a}}"}, |
39 » //» { "a+?", "nplus{lit{a}}" }, | 39 » {"a+?", "nplus{lit{a}}"}, |
40 » //» { "a??", "nque{lit{a}}" }, | 40 » {"a??", "nque{lit{a}}"}, |
41 » //» { "a{2}?", "nrep{2,2 lit{a}}" }, | 41 » {"a{2}?", "nrep{2,2 lit{a}}"}, |
42 » //» { "a{2,3}?", "nrep{2,3 lit{a}}" }, | 42 » {"a{2,3}?", "nrep{2,3 lit{a}}"}, |
43 » //» { "a{2,}?", "nrep{2,-1 lit{a}}" }, | 43 » {"a{2,}?", "nrep{2,-1 lit{a}}"}, |
44 {"", "emp{}"}, | 44 {"", "emp{}"}, |
45 // { "|", "emp{}" }, // alt{emp{}emp{}} but got factored | 45 // { "|", "emp{}" }, // alt{emp{}emp{}} but got factored |
46 » //» { "|", "alt{emp{}emp{}}" }, | 46 » {"|", "alt{emp{}emp{}}"}, |
47 {"|x|", "alt{emp{}lit{x}emp{}}"}, | 47 {"|x|", "alt{emp{}lit{x}emp{}}"}, |
48 {".", "dot{}"}, | 48 {".", "dot{}"}, |
49 {"^", "bol{}"}, | 49 {"^", "bol{}"}, |
50 {"$", "eol{}"}, | 50 {"$", "eol{}"}, |
51 » //» { "\\|", "lit{|}" }, | 51 » {"\\|", "lit{|}"}, |
52 » //» { "\\(", "lit{(}" }, | 52 » {"\\(", "lit{(}"}, |
53 » //» { "\\)", "lit{)}" }, | 53 » {"\\)", "lit{)}"}, |
54 » //» { "\\*", "lit{*}" }, | 54 » {"\\*", "lit{*}"}, |
55 » //» { "\\+", "lit{+}" }, | 55 » {"\\+", "lit{+}"}, |
56 » //» { "\\?", "lit{?}" }, | 56 » {"\\?", "lit{?}"}, |
57 » //» { "{", "lit{{}" }, | 57 » {"{", "lit{{}"}, |
58 {"}", "lit{}}"}, | 58 {"}", "lit{}}"}, |
59 » //» { "\\.", "lit{.}" }, | 59 » {"\\.", "lit{.}"}, |
60 » //» { "\\^", "lit{^}" }, | 60 » {"\\^", "lit{^}"}, |
61 » //» { "\\$", "lit{$}" }, | 61 » {"\\$", "lit{$}"}, |
62 » //» { "\\\\", "lit{\\}" }, | 62 » {"\\\\", "lit{\\}"}, |
63 {"[ace]", "cc{0x61 0x63 0x65}"}, | 63 {"[ace]", "cc{0x61 0x63 0x65}"}, |
64 {"[abc]", "cc{0x61-0x63}"}, | 64 {"[abc]", "cc{0x61-0x63}"}, |
65 {"[a-z]", "cc{0x61-0x7a}"}, | 65 {"[a-z]", "cc{0x61-0x7a}"}, |
66 // { "[a]", "lit{a}" }, | 66 // { "[a]", "lit{a}" }, |
67 {"[a]", "cc{0x61}"}, | 67 {"[a]", "cc{0x61}"}, |
68 » //» { "\\-", "lit{-}" }, | 68 » {"\\-", "lit{-}"}, |
69 {"-", "lit{-}"}, | 69 {"-", "lit{-}"}, |
70 » //» { "\\_", "lit{_}" }, | 70 » {"\\_", "lit{_}"}, |
71 | 71 |
72 // Posix and Perl extensions | 72 // Posix and Perl extensions |
73 » //» { "[[:lower:]]", "cc{0x61-0x7a}" }, | 73 » {"[[:lower:]]", "cc{0x61-0x7a}"}, |
74 » //» { "[a-z]", "cc{0x61-0x7a}" }, | 74 » {"[a-z]", "cc{0x61-0x7a}"}, |
75 » //» { "[^[:lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}" }, | 75 » {"[^[:lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}"}, |
76 » //» { "[[:^lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}" }, | 76 » {"[[:^lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}"}, |
77 // { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | 77 // { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, |
78 // { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, | 78 // { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, |
79 // { "(?i)[^[:lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x
2129 0x212b-0x10ffff}" }, | 79 // { "(?i)[^[:lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x
2129 0x212b-0x10ffff}" }, |
80 // { "(?i)[[:^lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x
2129 0x212b-0x10ffff}" }, | 80 // { "(?i)[[:^lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x
2129 0x212b-0x10ffff}" }, |
81 » //» { "\\d", "cc{0x30-0x39}" }, | 81 » {"\\d", "cc{0x30-0x39}"}, |
82 » //» { "\\D", "cc{0x0-0x2f 0x3a-0x10ffff}" }, | 82 » {"\\D", "cc{0x0-0x2f 0x3a-0x10ffff}"}, |
83 » //» { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" }, | 83 » {"\\s", "cc{0x9-0xa 0xc-0xd 0x20}"}, |
84 » //» { "\\S", "cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" }, | 84 » {"\\S", "cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}"}, |
85 » //» { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" }, | 85 » {"\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}"}, |
86 » //» { "\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" }
, | 86 » {"\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}"}, |
87 // { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a
}" }, | 87 // { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a
}" }, |
88 // { "(?i)\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x
180-0x2129 0x212b-0x10ffff}" }, | 88 // { "(?i)\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x
180-0x2129 0x212b-0x10ffff}" }, |
89 » //» { "[^\\\\]", "cc{0x0-0x5b 0x5d-0x10ffff}" }, | 89 » {"[^\\\\]", "cc{0x0-0x5b 0x5d-0x10ffff}"}, |
90 // { "\\C", "byte{}" }, | 90 // { "\\C", "byte{}" }, |
91 | 91 |
92 // Unicode, negatives, and a double negative. | 92 // Unicode, negatives, and a double negative. |
93 » //» { "\\p{Braille}", "cc{0x2800-0x28ff}" }, | 93 » {"\\p{Braille}", "cc{0x2800-0x28ff}"}, |
94 » //» { "\\P{Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}" }, | 94 » {"\\P{Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}"}, |
95 » //» { "\\p{^Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}" }, | 95 » {"\\p{^Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}"}, |
96 » //» { "\\P{^Braille}", "cc{0x2800-0x28ff}" }, | 96 » {"\\P{^Braille}", "cc{0x2800-0x28ff}"}, |
| 97 » {"\\pZ", "cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f
0x205f 0x3000}"}, |
| 98 » {"[\\p{Braille}]", "cc{0x2800-0x28ff}"}, |
| 99 » {"[\\P{Braille}]", "cc{0x0-0x27ff 0x2900-0x10ffff}"}, |
| 100 » {"[\\p{^Braille}]", "cc{0x0-0x27ff 0x2900-0x10ffff}"}, |
| 101 » {"[\\P{^Braille}]", "cc{0x2800-0x28ff}"}, |
| 102 » {"[\\pZ]", "cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202
f 0x205f 0x3000}"}, |
97 | 103 |
98 // More interesting regular expressions. | 104 // More interesting regular expressions. |
99 // { "a{,2}", "str{a{,2}}" }, | 105 // { "a{,2}", "str{a{,2}}" }, |
100 // { "\\.\\^\\$\\\\", "str{.^$\\}" }, | 106 // { "\\.\\^\\$\\\\", "str{.^$\\}" }, |
101 {"[a-zABC]", "cc{0x41-0x43 0x61-0x7a}"}, | 107 {"[a-zABC]", "cc{0x41-0x43 0x61-0x7a}"}, |
102 {"[^a]", "cc{0x0-0x60 0x62-0x10ffff}"}, | 108 {"[^a]", "cc{0x0-0x60 0x62-0x10ffff}"}, |
103 {"[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}"}, // utf-8 | 109 {"[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}"}, // utf-8 |
104 » //» { "a*{", "cat{star{lit{a}}lit{{}}" }, | 110 » {"a*{", "cat{star{lit{a}}lit{{}}"}, |
105 | 111 |
106 // Test precedences | 112 // Test precedences |
107 // { "(?:ab)*", "star{str{ab}}" }, | 113 // { "(?:ab)*", "star{str{ab}}" }, |
108 // { "(ab)*", "star{cap{str{ab}}}" }, | 114 // { "(ab)*", "star{cap{str{ab}}}" }, |
109 // { "ab|cd", "alt{str{ab}str{cd}}" }, | 115 // { "ab|cd", "alt{str{ab}str{cd}}" }, |
110 // { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" }, | 116 // { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" }, |
111 {"(?:ab)*", "star{cat{lit{a}lit{b}}}"}, | 117 {"(?:ab)*", "star{cat{lit{a}lit{b}}}"}, |
112 {"(ab)*", "star{cap{cat{lit{a}lit{b}}}}"}, | 118 {"(ab)*", "star{cap{cat{lit{a}lit{b}}}}"}, |
113 {"ab|cd", "alt{cat{lit{a}lit{b}}cat{lit{c}lit{d}}}"}, | 119 {"ab|cd", "alt{cat{lit{a}lit{b}}cat{lit{c}lit{d}}}"}, |
114 {"a(b|c)d", "cat{lit{a}cap{alt{lit{b}lit{c}}}lit{d}}"}, | 120 {"a(b|c)d", "cat{lit{a}cap{alt{lit{b}lit{c}}}lit{d}}"}, |
115 | 121 |
116 // Test flattening. | 122 // Test flattening. |
117 » //» { "(?:a)", "lit{a}" }, | 123 » {"(?:a)", "lit{a}"}, |
118 // { "(?:ab)(?:cd)", "str{abcd}" }, | 124 // { "(?:ab)(?:cd)", "str{abcd}" }, |
119 // { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" }, | 125 // { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" }, |
120 // { "a|.", "dot{}" }, | 126 // { "a|.", "dot{}" }, |
121 // { ".|a", "dot{}" }, | 127 // { ".|a", "dot{}" }, |
122 | 128 |
123 // Test Perl quoted literals | 129 // Test Perl quoted literals |
124 » //» { "\\Q+|*?{[\\E", "str{+|*?{[}" }, | 130 » {"\\Q+|*?{[\\E", "str{+|*?{[}"}, |
125 » //» { "\\Q+\\E+", "plus{lit{+}}" }, | 131 » {"\\Q+\\E+", "plus{lit{+}}"}, |
126 » //» { "\\Q\\\\E", "lit{\\}" }, | 132 » {"\\Q\\\\E", "lit{\\}"}, |
127 » //» { "\\Q\\\\\\E", "str{\\\\}" }, | 133 » {"\\Q\\\\\\E", "str{\\\\}"}, |
128 | 134 |
129 // Test Perl \A and \z | 135 // Test Perl \A and \z |
130 » //» { "(?m)^", "bol{}" }, | 136 » {"(?m)^", "bol{}"}, |
131 » //» { "(?m)$", "eol{}" }, | 137 » {"(?m)$", "eol{}"}, |
132 » //» { "(?-m)^", "bot{}" }, | 138 » {"(?-m)^", "bot{}"}, |
133 » //» { "(?-m)$", "eot{}" }, | 139 » {"(?-m)$", "eot{}"}, |
134 » //» { "(?m)\\A", "bot{}" }, | 140 » {"(?m)\\A", "bot{}"}, |
135 » //» { "(?m)\\z", "eot{\\z}" }, | 141 » {"(?m)\\z", "eot{\\z}"}, |
136 » //» { "(?-m)\\A", "bot{}" }, | 142 » {"(?-m)\\A", "bot{}"}, |
137 » //» { "(?-m)\\z", "eot{\\z}" }, | 143 » {"(?-m)\\z", "eot{\\z}"}, |
138 | 144 |
139 // Test named captures | 145 // Test named captures |
140 » //» { "(?P<name>a)", "cap{name:lit{a}}" }, | 146 » {"(?P<name>a)", "cap{name:lit{a}}"}, |
141 | 147 |
142 // Case-folded literals | 148 // Case-folded literals |
143 // { "[Aa]", "litfold{a}" }, | 149 // { "[Aa]", "litfold{a}" }, |
144 | 150 |
145 // Strings | 151 // Strings |
146 // { "abcde", "str{abcde}" }, | 152 // { "abcde", "str{abcde}" }, |
147 // { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" }, | 153 // { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" }, |
148 } | 154 } |
149 | 155 |
150 const testFlags = MatchNL | PerlX | UnicodeGroups | 156 const testFlags = MatchNL | PerlX | UnicodeGroups |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
257 lo, hi := re.Rune[i], re.Rune[i+1] | 263 lo, hi := re.Rune[i], re.Rune[i+1] |
258 if lo == hi { | 264 if lo == hi { |
259 fmt.Fprintf(b, "%#x", lo) | 265 fmt.Fprintf(b, "%#x", lo) |
260 } else { | 266 } else { |
261 fmt.Fprintf(b, "%#x-%#x", lo, hi) | 267 fmt.Fprintf(b, "%#x-%#x", lo, hi) |
262 } | 268 } |
263 } | 269 } |
264 } | 270 } |
265 b.WriteByte('}') | 271 b.WriteByte('}') |
266 } | 272 } |
LEFT | RIGHT |