Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(7)

Delta Between Two Patch Sets: src/pkg/exp/regexp/syntax/parse.go

Issue 4538123: code review 4538123: exp/regexp/syntax: syntax data structures, parser (Closed)
Left Patch Set: diff -r 69f12bae0f09 https://go.googlecode.com/hg Created 13 years, 10 months ago
Right Patch Set: diff -r 881a0fc6528d https://go.googlecode.com/hg Created 13 years, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/exp/regexp/syntax/Makefile ('k') | src/pkg/exp/regexp/syntax/parse_test.go » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2011 The Go Authors. All rights reserved. 1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 package syntax 5 package syntax
6 6
7 import ( 7 import (
8 "os" 8 "os"
9 "sort" 9 "sort"
10 "unicode" 10 "unicode"
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 53
54 const ( 54 const (
55 FoldCase Flags = 1 << iota // case-insensitive match 55 FoldCase Flags = 1 << iota // case-insensitive match
56 Literal // treat pattern as literal string 56 Literal // treat pattern as literal string
57 ClassNL // allow character classes like [^a-z] a nd [[:space:]] to match newline 57 ClassNL // allow character classes like [^a-z] a nd [[:space:]] to match newline
58 DotNL // allow . to match newline 58 DotNL // allow . to match newline
59 OneLine // treat ^ and $ as only matching at beg inning and end of text 59 OneLine // treat ^ and $ as only matching at beg inning and end of text
60 NonGreedy // make repetition operators default to non-greedy 60 NonGreedy // make repetition operators default to non-greedy
61 PerlX // allow Perl extensions 61 PerlX // allow Perl extensions
62 UnicodeGroups // allow \p{Han}, \P{Han} for Unicode gr oup and negation 62 UnicodeGroups // allow \p{Han}, \P{Han} for Unicode gr oup and negation
63 WasDollar // regexp OpEndText was $, not \z 63 WasDollar // regexp OpEndText was $, not \z
Sam 2011/06/10 00:14:55 I'm kind of confused as to why you need this in th
rsc 2011/06/10 00:27:38 In Perl, there is \Z (match end of text or right b
64 Simple // regexp contains no counted repetition 64 Simple // regexp contains no counted repetition
65 65
66 MatchNL = ClassNL | DotNL 66 MatchNL = ClassNL | DotNL
67 67
68 Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to P erl as possible 68 Perl = ClassNL | OneLine | PerlX | UnicodeGroups // as close to P erl as possible
69 POSIX Flags = 0 // POSIX syntax 69 POSIX Flags = 0 // POSIX syntax
70
71 ) 70 )
72 71
73 // Pseudo-ops for parsing stack. 72 // Pseudo-ops for parsing stack.
74 const ( 73 const (
75 opLeftParen = opPseudo + iota 74 opLeftParen = opPseudo + iota
76 opVerticalBar 75 opVerticalBar
77 ) 76 )
78 77
79 type parser struct { 78 type parser struct {
80 flags Flags // parse mode flags 79 flags Flags // parse mode flags
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
157 re = &Regexp{Op: OpConcat} 156 re = &Regexp{Op: OpConcat}
158 re.Sub = append(re.Sub0[:0], sub...) 157 re.Sub = append(re.Sub0[:0], sub...)
159 } 158 }
160 return p.push(re) 159 return p.push(re)
161 } 160 }
162 161
163 // alternate replaces the top of the stack (above the topmost '(') with its alte rnation. 162 // alternate replaces the top of the stack (above the topmost '(') with its alte rnation.
164 func (p *parser) alternate() *Regexp { 163 func (p *parser) alternate() *Regexp {
165 // TODO: Flatten alternates. 164 // TODO: Flatten alternates.
166 165
167 » // Scan down to find pseudo-operator | or (. 166 » // Scan down to find pseudo-operator (.
167 » // There are no | above (.
168 i := len(p.stack) 168 i := len(p.stack)
169 for i > 0 && p.stack[i-1].Op < opPseudo { 169 for i > 0 && p.stack[i-1].Op < opPseudo {
170 i-- 170 i--
171 } 171 }
172 sub := p.stack[i:] 172 sub := p.stack[i:]
173 p.stack = p.stack[:i] 173 p.stack = p.stack[:i]
174 174
175 var re *Regexp 175 var re *Regexp
176 switch len(sub) { 176 switch len(sub) {
177 case 0: 177 case 0:
(...skipping 24 matching lines...) Expand all
202 if len(re.Rune) >= cap(re.Rune) { 202 if len(re.Rune) >= cap(re.Rune) {
203 // string is too long to fit in Rune0. let Go h andle it 203 // string is too long to fit in Rune0. let Go h andle it
204 re.Rune = []int(s) 204 re.Rune = []int(s)
205 break 205 break
206 } 206 }
207 re.Rune = append(re.Rune, c) 207 re.Rune = append(re.Rune, c)
208 } 208 }
209 return re, nil 209 return re, nil
210 } 210 }
211 211
212 » // Otherwise, have to do real work. 212 » // Otherwise, must do real work.
213 var ( 213 var (
214 p parser 214 p parser
215 err os.Error 215 err os.Error
216 c int 216 c int
217 op Op 217 op Op
218 ) 218 )
219 p.flags = flags 219 p.flags = flags
220 p.wholeRegexp = s 220 p.wholeRegexp = s
221 t := s 221 t := s
222 for t != "" { 222 for t != "" {
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
396 t = t[1:] 396 t = t[1:]
397 397
398 // If character class does not match \n, add it here, 398 // If character class does not match \n, add it here,
399 // so that negation later will do the right thing. 399 // so that negation later will do the right thing.
400 if p.flags&ClassNL == 0 { 400 if p.flags&ClassNL == 0 {
401 re.Rune = append(re.Rune, '\n', '\n') 401 re.Rune = append(re.Rune, '\n', '\n')
402 } 402 }
403 } 403 }
404 404
405 class := re.Rune 405 class := re.Rune
406 » first := true // ] is okay as first char in class 406 » first := true // ] and - are okay as first char in class
407 for t == "" || t[0] != ']' || first { 407 for t == "" || t[0] != ']' || first {
408 // POSIX: - is only okay unescaped as first or last in class. 408 // POSIX: - is only okay unescaped as first or last in class.
409 // Perl: - is okay anywhere. 409 // Perl: - is okay anywhere.
410 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (le n(t) == 1 || t[1] != ']') { 410 if t != "" && t[0] == '-' && p.flags&PerlX == 0 && !first && (le n(t) == 1 || t[1] != ']') {
411 _, size := utf8.DecodeRuneInString(t[1:]) 411 _, size := utf8.DecodeRuneInString(t[1:])
412 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+ size]} 412 return "", &Error{Code: ErrInvalidCharRange, Expr: t[:1+ size]}
413 } 413 }
414 first = false 414 first = false
415 415
416 // TODO: Look for [:alnum:] 416 // TODO: Look for [:alnum:]
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after
552 return nil 552 return nil
553 } 553 }
554 554
555 func nextRune(s string) (c int, t string, err os.Error) { 555 func nextRune(s string) (c int, t string, err os.Error) {
556 c, size := utf8.DecodeRuneInString(s) 556 c, size := utf8.DecodeRuneInString(s)
557 if c == utf8.RuneError && size == 1 { 557 if c == utf8.RuneError && size == 1 {
558 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s} 558 return 0, "", &Error{Code: ErrInvalidUTF8, Expr: s}
559 } 559 }
560 return c, s[size:], nil 560 return c, s[size:], nil
561 } 561 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b