OLD | NEW |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // Package scanner implements a scanner for Go source text. | 5 // Package scanner implements a scanner for Go source text. |
6 // It takes a []byte as source which can then be tokenized | 6 // It takes a []byte as source which can then be tokenized |
7 // through repeated calls to the Scan method. | 7 // through repeated calls to the Scan method. |
8 // | 8 // |
9 package scanner | 9 package scanner |
10 | 10 |
(...skipping 554 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
565 func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { | 565 func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) { |
566 scanAgain: | 566 scanAgain: |
567 s.skipWhitespace() | 567 s.skipWhitespace() |
568 | 568 |
569 // current token start | 569 // current token start |
570 pos = s.file.Pos(s.offset) | 570 pos = s.file.Pos(s.offset) |
571 | 571 |
572 // determine token value | 572 // determine token value |
573 insertSemi := false | 573 insertSemi := false |
574 switch ch := s.ch; { | 574 switch ch := s.ch; { |
575 » case isLetter(ch): | 575 » case 'a' <= ch && ch <= 'z': |
| 576 » » // literals start with a lower-case letter |
576 lit = s.scanIdentifier() | 577 lit = s.scanIdentifier() |
577 » » tok = token.Lookup(lit) | 578 » » if len(lit) > 1 { |
578 » » switch tok { | 579 » » » // keywords are longer than one letter - avoid lookup ot
herwise |
579 » » case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH
, token.RETURN: | 580 » » » tok = token.Lookup(lit) |
| 581 » » » switch tok { |
| 582 » » » case token.IDENT, token.BREAK, token.CONTINUE, token.FAL
LTHROUGH, token.RETURN: |
| 583 » » » » insertSemi = true |
| 584 » » » } |
| 585 » » } else { |
580 insertSemi = true | 586 insertSemi = true |
| 587 tok = token.IDENT |
581 } | 588 } |
582 » case digitVal(ch) < 10: | 589 » case 'A' <= ch && ch <= 'Z' || ch == '_': |
| 590 » » insertSemi = true |
| 591 » » tok = token.IDENT |
| 592 » » lit = s.scanIdentifier() |
| 593 » case '0' <= ch && ch <= '9': |
583 insertSemi = true | 594 insertSemi = true |
584 tok, lit = s.scanNumber(false) | 595 tok, lit = s.scanNumber(false) |
585 default: | 596 default: |
586 s.next() // always make progress | 597 s.next() // always make progress |
587 switch ch { | 598 switch ch { |
588 case -1: | 599 case -1: |
589 if s.insertSemi { | 600 if s.insertSemi { |
590 s.insertSemi = false // EOF consumed | 601 s.insertSemi = false // EOF consumed |
591 return pos, token.SEMICOLON, "\n" | 602 return pos, token.SEMICOLON, "\n" |
592 } | 603 } |
(...skipping 12 matching lines...) Expand all Loading... |
605 insertSemi = true | 616 insertSemi = true |
606 tok = token.CHAR | 617 tok = token.CHAR |
607 lit = s.scanChar() | 618 lit = s.scanChar() |
608 case '`': | 619 case '`': |
609 insertSemi = true | 620 insertSemi = true |
610 tok = token.STRING | 621 tok = token.STRING |
611 lit = s.scanRawString() | 622 lit = s.scanRawString() |
612 case ':': | 623 case ':': |
613 tok = s.switch2(token.COLON, token.DEFINE) | 624 tok = s.switch2(token.COLON, token.DEFINE) |
614 case '.': | 625 case '.': |
615 » » » if digitVal(s.ch) < 10 { | 626 » » » if '0' <= s.ch && s.ch <= '9' { |
616 insertSemi = true | 627 insertSemi = true |
617 tok, lit = s.scanNumber(true) | 628 tok, lit = s.scanNumber(true) |
618 } else if s.ch == '.' { | 629 } else if s.ch == '.' { |
619 s.next() | 630 s.next() |
620 if s.ch == '.' { | 631 if s.ch == '.' { |
621 s.next() | 632 s.next() |
622 tok = token.ELLIPSIS | 633 tok = token.ELLIPSIS |
623 } | 634 } |
624 } else { | 635 } else { |
625 tok = token.PERIOD | 636 tok = token.PERIOD |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
697 case '&': | 708 case '&': |
698 if s.ch == '^' { | 709 if s.ch == '^' { |
699 s.next() | 710 s.next() |
700 tok = s.switch2(token.AND_NOT, token.AND_NOT_ASS
IGN) | 711 tok = s.switch2(token.AND_NOT, token.AND_NOT_ASS
IGN) |
701 } else { | 712 } else { |
702 tok = s.switch3(token.AND, token.AND_ASSIGN, '&'
, token.LAND) | 713 tok = s.switch3(token.AND, token.AND_ASSIGN, '&'
, token.LAND) |
703 } | 714 } |
704 case '|': | 715 case '|': |
705 tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LO
R) | 716 tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LO
R) |
706 default: | 717 default: |
707 » » » s.error(s.file.Offset(pos), fmt.Sprintf("illegal charact
er %#U", ch)) | 718 » » » if isLetter(ch) { |
708 » » » insertSemi = s.insertSemi // preserve insertSemi info | 719 » » » » // handle any letters we might have missed |
709 » » » tok = token.ILLEGAL | 720 » » » » insertSemi = true |
710 » » » lit = string(ch) | 721 » » » » tok = token.IDENT |
| 722 » » » » s.scanIdentifier() |
| 723 » » » } else { |
| 724 » » » » s.error(s.file.Offset(pos), fmt.Sprintf("illegal
character %#U", ch)) |
| 725 » » » » insertSemi = s.insertSemi // preserve insertSemi
info |
| 726 » » » » tok = token.ILLEGAL |
| 727 » » » » lit = string(ch) |
| 728 » » » } |
711 } | 729 } |
712 } | 730 } |
713 if s.mode&dontInsertSemis == 0 { | 731 if s.mode&dontInsertSemis == 0 { |
714 s.insertSemi = insertSemi | 732 s.insertSemi = insertSemi |
715 } | 733 } |
716 | 734 |
717 return | 735 return |
718 } | 736 } |
OLD | NEW |