LEFT | RIGHT |
1 // Copyright 2011 The Go Authors. All rights reserved. | 1 // Copyright 2011 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 package parse | 5 package parse |
6 | 6 |
7 import ( | 7 import ( |
8 "fmt" | 8 "fmt" |
9 "strings" | 9 "strings" |
10 "unicode" | 10 "unicode" |
(...skipping 20 matching lines...) Expand all Loading... |
31 } | 31 } |
32 return fmt.Sprintf("%q", i.val) | 32 return fmt.Sprintf("%q", i.val) |
33 } | 33 } |
34 | 34 |
35 // itemType identifies the type of lex items. | 35 // itemType identifies the type of lex items. |
36 type itemType int | 36 type itemType int |
37 | 37 |
38 const ( | 38 const ( |
39 itemError itemType = iota // error occurred; value is text of err
or | 39 itemError itemType = iota // error occurred; value is text of err
or |
40 itemBool // boolean constant | 40 itemBool // boolean constant |
41 » itemChar // printable ASCII character; grab bag
for comma etc. | 41 » itemChar // printable ASCII character; grab bag
for comma etc |
42 itemCharConstant // character constant | 42 itemCharConstant // character constant |
43 itemComplex // complex constant (1+2i); imaginary i
s just a number | 43 itemComplex // complex constant (1+2i); imaginary i
s just a number |
44 itemColonEquals // colon-equals (':=') introducing a de
claration | 44 itemColonEquals // colon-equals (':=') introducing a de
claration |
45 itemEOF | 45 itemEOF |
46 » itemField // alphanumeric identifier, starting with '.', possibly c
hained ('.x.y') | 46 » itemField // alphanumeric identifier starting with '.' |
47 » itemIdentifier // alphanumeric identifier | 47 » itemIdentifier // alphanumeric identifier not starting with '.' |
48 itemLeftDelim // left action delimiter | 48 itemLeftDelim // left action delimiter |
49 itemLeftParen // '(' inside action | 49 itemLeftParen // '(' inside action |
50 itemNumber // simple number, including imaginary | 50 itemNumber // simple number, including imaginary |
51 itemPipe // pipe symbol | 51 itemPipe // pipe symbol |
52 itemRawString // raw quoted string (includes quotes) | 52 itemRawString // raw quoted string (includes quotes) |
53 itemRightDelim // right action delimiter | 53 itemRightDelim // right action delimiter |
54 itemRightParen // ')' inside action | 54 itemRightParen // ')' inside action |
55 itemSpace // run of spaces separating arguments | 55 itemSpace // run of spaces separating arguments |
56 itemString // quoted string (includes quotes) | 56 itemString // quoted string (includes quotes) |
57 itemText // plain text | 57 itemText // plain text |
58 » itemVariable // variable starting with '$', such as '$' or '$1' or '$
hello'. | 58 » itemVariable // variable starting with '$', such as '$' or '$1' or '$
hello' |
59 // Keywords appear after all the rest. | 59 // Keywords appear after all the rest. |
60 itemKeyword // used only to delimit the keywords | 60 itemKeyword // used only to delimit the keywords |
61 » itemDot // the cursor, spelled '.'. | 61 » itemDot // the cursor, spelled '.' |
62 itemDefine // define keyword | 62 itemDefine // define keyword |
63 itemElse // else keyword | 63 itemElse // else keyword |
64 itemEnd // end keyword | 64 itemEnd // end keyword |
65 itemIf // if keyword | 65 itemIf // if keyword |
66 itemNil // the untyped nil constant, easiest to treat as a keyword | 66 itemNil // the untyped nil constant, easiest to treat as a keyword |
67 itemRange // range keyword | 67 itemRange // range keyword |
68 itemTemplate // template keyword | 68 itemTemplate // template keyword |
69 itemWith // with keyword | 69 itemWith // with keyword |
70 ) | 70 ) |
71 | 71 |
72 var key = map[string]itemType{ | 72 var key = map[string]itemType{ |
73 ".": itemDot, | 73 ".": itemDot, |
74 "define": itemDefine, | 74 "define": itemDefine, |
75 "else": itemElse, | 75 "else": itemElse, |
76 "end": itemEnd, | 76 "end": itemEnd, |
77 "if": itemIf, | 77 "if": itemIf, |
78 "range": itemRange, | 78 "range": itemRange, |
79 "nil": itemNil, | 79 "nil": itemNil, |
80 "template": itemTemplate, | 80 "template": itemTemplate, |
81 "with": itemWith, | 81 "with": itemWith, |
82 } | 82 } |
83 | 83 |
84 const eof = -1 | 84 const eof = -1 |
85 | 85 |
86 // stateFn represents the state of the scanner as a function that returns the ne
xt state. | 86 // stateFn represents the state of the scanner as a function that returns the ne
xt state. |
87 type stateFn func(*lexer) stateFn | 87 type stateFn func(*lexer) stateFn |
88 | 88 |
89 // lexer holds the state of the scanner. | 89 // lexer holds the state of the scanner. |
90 type lexer struct { | 90 type lexer struct { |
91 » name string // the name of the input; used only for error repor
ts. | 91 » name string // the name of the input; used only for error repor
ts |
92 » input string // the string being scanned. | 92 » input string // the string being scanned |
93 » leftDelim string // start of action. | 93 » leftDelim string // start of action |
94 » rightDelim string // end of action. | 94 » rightDelim string // end of action |
95 » state stateFn // the next lexing function to enter. | 95 » state stateFn // the next lexing function to enter |
96 » pos int // current position in the input. | 96 » pos int // current position in the input |
97 » start int // start position of this item. | 97 » start int // start position of this item |
98 » width int // width of last rune read from input. | 98 » width int // width of last rune read from input |
99 lastPos int // position of most recent item returned by nextIte
m | 99 lastPos int // position of most recent item returned by nextIte
m |
100 » items chan item // channel of scanned items. | 100 » items chan item // channel of scanned items |
101 parenDepth int // nesting depth of ( ) exprs | 101 parenDepth int // nesting depth of ( ) exprs |
102 } | 102 } |
103 | 103 |
104 // next returns the next rune in the input. | 104 // next returns the next rune in the input. |
105 func (l *lexer) next() (r rune) { | 105 func (l *lexer) next() (r rune) { |
106 if l.pos >= len(l.input) { | 106 if l.pos >= len(l.input) { |
107 l.width = 0 | 107 l.width = 0 |
108 return eof | 108 return eof |
109 } | 109 } |
110 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) | 110 r, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) |
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
279 return l.errorf("expected :=") | 279 return l.errorf("expected :=") |
280 } | 280 } |
281 l.emit(itemColonEquals) | 281 l.emit(itemColonEquals) |
282 case r == '|': | 282 case r == '|': |
283 l.emit(itemPipe) | 283 l.emit(itemPipe) |
284 case r == '"': | 284 case r == '"': |
285 return lexQuote | 285 return lexQuote |
286 case r == '`': | 286 case r == '`': |
287 return lexRawQuote | 287 return lexRawQuote |
288 case r == '$': | 288 case r == '$': |
289 » » return lexIdentifier | 289 » » return lexVariable |
290 case r == '\'': | 290 case r == '\'': |
291 return lexChar | 291 return lexChar |
292 case r == '.': | 292 case r == '.': |
293 // special look-ahead for ".field" so we don't break l.backup(). | 293 // special look-ahead for ".field" so we don't break l.backup(). |
294 if l.pos < len(l.input) { | 294 if l.pos < len(l.input) { |
295 r := l.input[l.pos] | 295 r := l.input[l.pos] |
296 if r < '0' || '9' < r { | 296 if r < '0' || '9' < r { |
297 » » » » return lexIdentifier // itemDot comes from the k
eyword table. | 297 » » » » return lexField |
298 } | 298 } |
299 } | 299 } |
300 fallthrough // '.' can start a number. | 300 fallthrough // '.' can start a number. |
301 case r == '+' || r == '-' || ('0' <= r && r <= '9'): | 301 case r == '+' || r == '-' || ('0' <= r && r <= '9'): |
302 l.backup() | 302 l.backup() |
303 return lexNumber | 303 return lexNumber |
304 case isAlphaNumeric(r): | 304 case isAlphaNumeric(r): |
305 l.backup() | 305 l.backup() |
306 return lexIdentifier | 306 return lexIdentifier |
307 case r == '(': | 307 case r == '(': |
(...skipping 19 matching lines...) Expand all Loading... |
327 // lexSpace scans a run of space characters. | 327 // lexSpace scans a run of space characters. |
328 // One space has already been seen. | 328 // One space has already been seen. |
329 func lexSpace(l *lexer) stateFn { | 329 func lexSpace(l *lexer) stateFn { |
330 for isSpace(l.peek()) { | 330 for isSpace(l.peek()) { |
331 l.next() | 331 l.next() |
332 } | 332 } |
333 l.emit(itemSpace) | 333 l.emit(itemSpace) |
334 return lexInsideAction | 334 return lexInsideAction |
335 } | 335 } |
336 | 336 |
337 // lexIdentifier scans an alphanumeric or field. | 337 // lexIdentifier scans an alphanumeric. |
338 func lexIdentifier(l *lexer) stateFn { | 338 func lexIdentifier(l *lexer) stateFn { |
339 Loop: | 339 Loop: |
340 for { | 340 for { |
341 switch r := l.next(); { | 341 switch r := l.next(); { |
342 case isAlphaNumeric(r): | 342 case isAlphaNumeric(r): |
343 // absorb. | 343 // absorb. |
344 case r == '.' && (l.input[l.start] == '.' || l.input[l.start] ==
'$'): | |
345 // field chaining; absorb into one token. | |
346 default: | 344 default: |
347 l.backup() | 345 l.backup() |
348 word := l.input[l.start:l.pos] | 346 word := l.input[l.start:l.pos] |
349 if !l.atTerminator() { | 347 if !l.atTerminator() { |
350 return l.errorf("bad character %#U", r) | 348 return l.errorf("bad character %#U", r) |
351 } | 349 } |
352 switch { | 350 switch { |
353 case key[word] > itemKeyword: | 351 case key[word] > itemKeyword: |
354 l.emit(key[word]) | 352 l.emit(key[word]) |
355 case word[0] == '.': | 353 case word[0] == '.': |
356 l.emit(itemField) | 354 l.emit(itemField) |
357 case word[0] == '$': | |
358 l.emit(itemVariable) | |
359 case word == "true", word == "false": | 355 case word == "true", word == "false": |
360 l.emit(itemBool) | 356 l.emit(itemBool) |
361 default: | 357 default: |
362 l.emit(itemIdentifier) | 358 l.emit(itemIdentifier) |
363 } | 359 } |
364 break Loop | 360 break Loop |
365 } | 361 } |
366 } | 362 } |
367 return lexInsideAction | 363 return lexInsideAction |
368 } | 364 } |
369 | 365 |
| 366 // lexField scans a field: .Alphanumeric. |
| 367 // The . has been scanned. |
| 368 func lexField(l *lexer) stateFn { |
| 369 return lexFieldOrVariable(l, itemField) |
| 370 } |
| 371 |
| 372 // lexVariable scans a Variable: $Alphanumeric. |
| 373 // The $ has been scanned. |
| 374 func lexVariable(l *lexer) stateFn { |
| 375 if l.atTerminator() { // Nothing interesting follows -> "$". |
| 376 l.emit(itemVariable) |
| 377 return lexInsideAction |
| 378 } |
| 379 return lexFieldOrVariable(l, itemVariable) |
| 380 } |
| 381 |
| 382 // lexVariable scans a field or variable: [.$]Alphanumeric. |
| 383 // The . or $ has been scanned. |
| 384 func lexFieldOrVariable(l *lexer, typ itemType) stateFn { |
| 385 if l.atTerminator() { // Nothing interesting follows -> "." or "$". |
| 386 if typ == itemVariable { |
| 387 l.emit(itemVariable) |
| 388 } else { |
| 389 l.emit(itemDot) |
| 390 } |
| 391 return lexInsideAction |
| 392 } |
| 393 var r rune |
| 394 for { |
| 395 r = l.next() |
| 396 if !isAlphaNumeric(r) { |
| 397 l.backup() |
| 398 break |
| 399 } |
| 400 } |
| 401 if !l.atTerminator() { |
| 402 return l.errorf("bad character %#U", r) |
| 403 } |
| 404 l.emit(typ) |
| 405 return lexInsideAction |
| 406 } |
| 407 |
370 // atTerminator reports whether the input is at valid termination character to | 408 // atTerminator reports whether the input is at valid termination character to |
371 // appear after an identifier. Mostly to catch cases like "$x+2" not being | 409 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases |
372 // acceptable without a space, in case we decide one day to implement | 410 // like "$x+2" not being acceptable without a space, in case we decide one |
373 // arithmetic. | 411 // day to implement arithmetic. |
374 func (l *lexer) atTerminator() bool { | 412 func (l *lexer) atTerminator() bool { |
375 r := l.peek() | 413 r := l.peek() |
376 if isSpace(r) || isEndOfLine(r) { | 414 if isSpace(r) || isEndOfLine(r) { |
377 return true | 415 return true |
378 } | 416 } |
379 switch r { | 417 switch r { |
380 » case eof, ',', '|', ':', ')', '(': | 418 » case eof, '.', ',', '|', ':', ')', '(': |
381 return true | 419 return true |
382 } | 420 } |
383 // Does r start the delimiter? This can be ambiguous (with delim=="//",
$x/2 will | 421 // Does r start the delimiter? This can be ambiguous (with delim=="//",
$x/2 will |
384 // succeed but should fail) but only in extremely rare cases caused by w
illfully | 422 // succeed but should fail) but only in extremely rare cases caused by w
illfully |
385 // bad choice of delimiter. | 423 // bad choice of delimiter. |
386 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { | 424 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r { |
387 return true | 425 return true |
388 } | 426 } |
389 return false | 427 return false |
390 } | 428 } |
391 | 429 |
392 // lexChar scans a character constant. The initial quote is already | 430 // lexChar scans a character constant. The initial quote is already |
393 // scanned. Syntax checking is done by the parser. | 431 // scanned. Syntax checking is done by the parser. |
394 func lexChar(l *lexer) stateFn { | 432 func lexChar(l *lexer) stateFn { |
395 Loop: | 433 Loop: |
396 for { | 434 for { |
397 switch l.next() { | 435 switch l.next() { |
398 case '\\': | 436 case '\\': |
399 if r := l.next(); r != eof && r != '\n' { | 437 if r := l.next(); r != eof && r != '\n' { |
400 break | 438 break |
401 } | 439 } |
402 fallthrough | 440 fallthrough |
403 case eof, '\n': | 441 case eof, '\n': |
404 return l.errorf("unterminated character constant") | 442 return l.errorf("unterminated character constant") |
405 case '\'': | 443 case '\'': |
406 break Loop | 444 break Loop |
407 } | 445 } |
408 } | 446 } |
409 l.emit(itemCharConstant) | 447 l.emit(itemCharConstant) |
410 return lexInsideAction | 448 return lexInsideAction |
411 } | 449 } |
412 | 450 |
413 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This | 451 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This |
414 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" | 452 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2" |
415 // and "089" - but when it's wrong the input is invalid and the parser (via | 453 // and "089" - but when it's wrong the input is invalid and the parser (via |
416 // strconv) will notice. | 454 // strconv) will notice. |
417 func lexNumber(l *lexer) stateFn { | 455 func lexNumber(l *lexer) stateFn { |
418 if !l.scanNumber() { | 456 if !l.scanNumber() { |
419 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) | 457 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) |
420 } | 458 } |
421 if sign := l.peek(); sign == '+' || sign == '-' { | 459 if sign := l.peek(); sign == '+' || sign == '-' { |
422 » » // Complex: 1+2i. No spaces, must end in 'i'. | 460 » » // Complex: 1+2i. No spaces, must end in 'i'. |
423 if !l.scanNumber() || l.input[l.pos-1] != 'i' { | 461 if !l.scanNumber() || l.input[l.pos-1] != 'i' { |
424 return l.errorf("bad number syntax: %q", l.input[l.start
:l.pos]) | 462 return l.errorf("bad number syntax: %q", l.input[l.start
:l.pos]) |
425 } | 463 } |
426 l.emit(itemComplex) | 464 l.emit(itemComplex) |
427 } else { | 465 } else { |
428 l.emit(itemNumber) | 466 l.emit(itemNumber) |
429 } | 467 } |
430 return lexInsideAction | 468 return lexInsideAction |
431 } | 469 } |
432 | 470 |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
489 } | 527 } |
490 l.emit(itemRawString) | 528 l.emit(itemRawString) |
491 return lexInsideAction | 529 return lexInsideAction |
492 } | 530 } |
493 | 531 |
494 // isSpace reports whether r is a space character. | 532 // isSpace reports whether r is a space character. |
495 func isSpace(r rune) bool { | 533 func isSpace(r rune) bool { |
496 return r == ' ' || r == '\t' | 534 return r == ' ' || r == '\t' |
497 } | 535 } |
498 | 536 |
499 // isEndOfLine reports whether r is an end-of-line character | 537 // isEndOfLine reports whether r is an end-of-line character. |
500 func isEndOfLine(r rune) bool { | 538 func isEndOfLine(r rune) bool { |
501 return r == '\r' || r == '\n' | 539 return r == '\r' || r == '\n' |
502 } | 540 } |
503 | 541 |
504 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. | 542 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. |
505 func isAlphaNumeric(r rune) bool { | 543 func isAlphaNumeric(r rune) bool { |
506 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) | 544 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) |
507 } | 545 } |
LEFT | RIGHT |