Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 // Copyright 2011 The Go Authors. All rights reserved. | 1 // Copyright 2011 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 package syntax | 5 package syntax |
6 | 6 |
7 import ( | 7 import ( |
8 "os" | 8 "os" |
9 "sort" | 9 "sort" |
10 "unicode" | 10 "unicode" |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
53 | 53 |
54 const ( | 54 const ( |
55 FoldCase Flags = 1 << iota // case-insensitive match | 55 FoldCase Flags = 1 << iota // case-insensitive match |
56 Literal // treat pattern as literal string | 56 Literal // treat pattern as literal string |
57 ClassNL // allow character classes like [^a-z] a nd [[:space:]] to match newline | 57 ClassNL // allow character classes like [^a-z] a nd [[:space:]] to match newline |
58 DotNL // allow . to match newline | 58 DotNL // allow . to match newline |
59 OneLine // treat ^ and $ as only matching at beg inning and end of text | 59 OneLine // treat ^ and $ as only matching at beg inning and end of text |
60 NonGreedy // make repetition operators default to non-greedy | 60 NonGreedy // make repetition operators default to non-greedy |
61 PerlX // allow Perl extensions | 61 PerlX // allow Perl extensions |
62 UnicodeGroups // allow \p{Han}, \P{Han} for Unicode gr oup and negation | 62 UnicodeGroups // allow \p{Han}, \P{Han} for Unicode gr oup and negation |
63 WasDollar // regexp OpEndText was $, not \z | 63 WasDollar // regexp OpEndText was $, not \z |
Sam
2011/06/10 00:14:55
I'm kind of confused as to why you need this in th
rsc
2011/06/10 00:27:38
In Perl, there is \Z (match end of text or right b
| |
64 Simple // regexp contains no counted repetition | 64 Simple // regexp contains no counted repetition |
65 | 65 |
66 MatchNL = ClassNL | DotNL | 66 MatchNL = ClassNL | DotNL |
67 | 67 |
68 Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to P erl as possible | 68 Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to P erl as possible |
69 POSIX Flags = 0 // POSIX syntax | 69 POSIX Flags = 0 // POSIX syntax |
70 | |
71 ) | 70 ) |
72 | 71 |
73 // Pseudo-ops for parsing stack. | 72 // Pseudo-ops for parsing stack. |
74 const ( | 73 const ( |
75 opLeftParen = opPseudo + iota | 74 opLeftParen = opPseudo + iota |
76 opVerticalBar | 75 opVerticalBar |
77 ) | 76 ) |
78 | 77 |
79 type parser struct { | 78 type parser struct { |
80 flags Flags // parse mode flags | 79 flags Flags // parse mode flags |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
157 re = &Regexp{Op: OpConcat} | 156 re = &Regexp{Op: OpConcat} |
158 re.Sub = append(re.Sub0[:0], sub...) | 157 re.Sub = append(re.Sub0[:0], sub...) |
159 } | 158 } |
160 return p.push(re) | 159 return p.push(re) |
161 } | 160 } |
162 | 161 |
163 // alternate replaces the top of the stack (above the topmost '(') with its alte rnation. | 162 // alternate replaces the top of the stack (above the topmost '(') with its alte rnation. |
164 func (p *parser) alternate() *Regexp { | 163 func (p *parser) alternate() *Regexp { |
165 // TODO: Flatten alternates. | 164 // TODO: Flatten alternates. |
166 | 165 |
167 » // Scan down to find pseudo-operator | or (. | 166 » // Scan down to find pseudo-operator (. |
167 » // There are no | above (. | |
168 i := len(p.stack) | 168 i := len(p.stack) |
169 for i > 0 && p.stack[i-1].Op < opPseudo { | 169 for i > 0 && p.stack[i-1].Op < opPseudo { |
170 i-- | 170 i-- |
171 } | 171 } |
172 sub := p.stack[i:] | 172 sub := p.stack[i:] |
173 p.stack = p.stack[:i] | 173 p.stack = p.stack[:i] |
174 | 174 |
175 var re *Regexp | 175 var re *Regexp |
176 switch len(sub) { | 176 switch len(sub) { |
177 case 0: | 177 case 0: |
(...skipping 24 matching lines...) Expand all Loading... | |
202 if len(re.Rune) >= cap(re.Rune) { | 202 if len(re.Rune) >= cap(re.Rune) { |
203 // string is too long to fit in Rune0. let Go h andle it | 203 // string is too long to fit in Rune0. let Go h andle it |
204 re.Rune = []int(s) | 204 re.Rune = []int(s) |
205 break | 205 break |
206 } | 206 } |
207 re.Rune = append(re.Rune, c) | 207 re.Rune = append(re.Rune, c) |
208 } | 208 } |
209 return re, nil | 209 return re, nil |
210 } | 210 } |
211 | 211 |
212 » // Otherwise, have to do real work. | 212 » // Otherwise, must do real work. |
213 var ( | 213 var ( |
214 p parser | 214 p parser |
215 err os.Error | 215 err os.Error |
216 c int | 216 c int |
217 op Op | 217 op Op |
218 ) | 218 ) |
219 p.flags = flags | 219 p.flags = flags |
220 p.wholeRegexp = s | 220 p.wholeRegexp = s |
221 t := s | 221 t := s |
222 for t != "" { | 222 for t != "" { |
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
396 t = t[1:] | 396 t = t[1:] |
397 | 397 |
398 // If character class does not match \n, add it here, | 398 // If character class does not match \n, add it here, |
399 // so that negation later will do the right thing. | 399 // so that negation later will do the right thing. |
400 if p.flags&ClassNL == 0 { | 400 if p.flags&ClassNL == 0 { |
401 re.Rune = append(re.Rune, '\n', '\n') | 401 re.Rune = append(re.Rune, '\n', '\n') |
402 } | 402 } |
403 } | 403 } |
404 | 404 |
405 class := re.Rune | 405 class := re.Rune |
406 » first := true // ] is okay as first char in class | 406 » first := true // ] and - are okay as first char in class |
407 for t == "" || t[0] != ']' || first { | 407 for t == "" || t[0] != ']' || first { |
408 // POSIX: - is only okay unescaped as first or last in class. | 408 // POSIX: - is only okay unescaped as first or last in class. |
409 // Perl: - is okay anywhere. | 409 // Perl: - is okay anywhere. |
410 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (le n(t) == 1 || t[1] != ']') { | 410 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (le n(t) == 1 || t[1] != ']') { |
411 _, size := utf8.DecodeRuneInString(t[1:]) | 411 _, size := utf8.DecodeRuneInString(t[1:]) |
412 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+ size]} | 412 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+ size]} |
413 } | 413 } |
414 first = false | 414 first = false |
415 | 415 |
416 // TODO: Look for [:alnum:] | 416 // TODO: Look for [:alnum:] |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
552 return nil | 552 return nil |
553 } | 553 } |
554 | 554 |
555 func nextRune(s string) (c int, t string, err os.Error) { | 555 func nextRune(s string) (c int, t string, err os.Error) { |
556 c, size := utf8.DecodeRuneInString(s) | 556 c, size := utf8.DecodeRuneInString(s) |
557 if c == utf8.RuneError && size == 1 { | 557 if c == utf8.RuneError && size == 1 { |
558 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s} | 558 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s} |
559 } | 559 } |
560 return c, s[size:], nil | 560 return c, s[size:], nil |
561 } | 561 } |
LEFT | RIGHT |