Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 // Copyright 2011 The Go Authors. All rights reserved. | 1 // Copyright 2011 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 package syntax | 5 package syntax |
6 | 6 |
7 import ( | 7 import ( |
8 "os" | 8 "os" |
9 "sort" | 9 "sort" |
10 "strings" | 10 "strings" |
(...skipping 283 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
294 } | 294 } |
295 if cap(re.Rune)-len(re.Rune) > 100 { | 295 if cap(re.Rune)-len(re.Rune) > 100 { |
296 // re.Rune will not grow any more. | 296 // re.Rune will not grow any more. |
297 // Make a copy or inline to reclaim storage. | 297 // Make a copy or inline to reclaim storage. |
298 re.Rune = append(re.Rune0[:0], re.Rune...) | 298 re.Rune = append(re.Rune0[:0], re.Rune...) |
299 } | 299 } |
300 } | 300 } |
301 } | 301 } |
302 | 302 |
303 // collapse returns the result of applying op to sub. | 303 // collapse returns the result of applying op to sub. |
304 // If sub contains op nodes, they all | 304 // If sub contains op nodes, they all get hoisted up |
305 // get flattened into a single node. | 305 // so that there is never a concat of a concat or an |
r
2011/06/30 03:06:02
odd line break
| |
306 // alternate of an alternate. | |
306 func (p *parser) collapse(subs []*Regexp, op Op) *Regexp { | 307 func (p *parser) collapse(subs []*Regexp, op Op) *Regexp { |
307 if len(subs) == 1 { | 308 if len(subs) == 1 { |
308 return subs[0] | 309 return subs[0] |
309 } | 310 } |
310 re := p.newRegexp(op) | 311 re := p.newRegexp(op) |
311 re.Sub = re.Sub0[:0] | 312 re.Sub = re.Sub0[:0] |
312 for _, sub := range subs { | 313 for _, sub := range subs { |
313 if sub.Op == op { | 314 if sub.Op == op { |
314 re.Sub = append(re.Sub, sub.Sub...) | 315 re.Sub = append(re.Sub, sub.Sub...) |
315 p.reuse(sub) | 316 p.reuse(sub) |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
373 } | 374 } |
374 } | 375 } |
375 } | 376 } |
376 | 377 |
377 // Found end of a run with common leading literal string: | 378 // Found end of a run with common leading literal string: |
378 // sub[start:i] all begin with str[0:len(str)], but sub[i] | 379 // sub[start:i] all begin with str[0:len(str)], but sub[i] |
379 // does not even begin with str[0]. | 380 // does not even begin with str[0]. |
380 // | 381 // |
381 // Factor out common string and append factored expression to ou t. | 382 // Factor out common string and append factored expression to ou t. |
382 if i == start { | 383 if i == start { |
383 » » » // Nothing to do - first iteration. | 384 » » » // Nothing to do - run of length 0. |
r
2011/06/30 03:06:02
first iteration has i == 0. you mean something els
| |
384 } else if i == start+1 { | 385 } else if i == start+1 { |
385 // Just one: don't bother factoring. | 386 // Just one: don't bother factoring. |
386 out = append(out, sub[start]) | 387 out = append(out, sub[start]) |
387 } else { | 388 } else { |
388 // Construct factored form: prefix(suffix1|suffix2|...) | 389 // Construct factored form: prefix(suffix1|suffix2|...) |
389 prefix := p.newRegexp(OpLiteral) | 390 prefix := p.newRegexp(OpLiteral) |
390 prefix.Flags = strflags | 391 prefix.Flags = strflags |
391 prefix.Rune = append(prefix.Rune[:0], str...) | 392 prefix.Rune = append(prefix.Rune[:0], str...) |
392 | 393 |
393 for j := start; j < i; j++ { | 394 for j := start; j < i; j++ { |
394 sub[j] = p.removeLeadingString(sub[j], len(str)) | 395 sub[j] = p.removeLeadingString(sub[j], len(str)) |
395 } | 396 } |
396 suffix := p.collapse(sub[start:i], OpAlternate) // recur se | 397 suffix := p.collapse(sub[start:i], OpAlternate) // recur se |
397 | 398 |
398 re := p.newRegexp(OpConcat) | 399 re := p.newRegexp(OpConcat) |
399 re.Sub = append(re.Sub[:0], prefix, suffix) | 400 re.Sub = append(re.Sub[:0], prefix, suffix) |
400 out = append(out, re) | 401 out = append(out, re) |
401 } | 402 } |
402 | 403 |
403 » » // Prepare for next iteration (if there is one). | 404 » » // Prepare for next iteration. |
r
2011/06/30 03:06:02
delete parenthetical remark
| |
404 start = i | 405 start = i |
405 str = istr | 406 str = istr |
406 strflags = iflags | 407 strflags = iflags |
407 } | 408 } |
408 sub = out | 409 sub = out |
409 | 410 |
410 // Round 2: Factor out common complex prefixes, | 411 // Round 2: Factor out common complex prefixes, |
411 // just the first piece of each concatenation, | 412 // just the first piece of each concatenation, |
412 // whatever it is. This is good enough a lot of the time. | 413 // whatever it is. This is good enough a lot of the time. |
413 start = 0 | 414 start = 0 |
(...skipping 12 matching lines...) Expand all Loading... | |
426 if first != nil && first.Equal(ifirst) { | 427 if first != nil && first.Equal(ifirst) { |
427 continue | 428 continue |
428 } | 429 } |
429 } | 430 } |
430 | 431 |
431 // Found end of a run with common leading regexp: | 432 // Found end of a run with common leading regexp: |
432 // sub[start:i] all begin with first but sub[i] does not. | 433 // sub[start:i] all begin with first but sub[i] does not. |
433 // | 434 // |
434 // Factor out common regexp and append factored expression to ou t. | 435 // Factor out common regexp and append factored expression to ou t. |
435 if i == start { | 436 if i == start { |
436 » » » // Nothing to do - first iteration. | 437 » » » // Nothing to do - run of length 0. |
r
2011/06/30 03:06:02
first iteration has i == 0. you mean something els
| |
437 } else if i == start+1 { | 438 } else if i == start+1 { |
438 // Just one: don't bother factoring. | 439 // Just one: don't bother factoring. |
439 out = append(out, sub[start]) | 440 out = append(out, sub[start]) |
440 } else { | 441 } else { |
441 // Construct factored form: prefix(suffix1|suffix2|...) | 442 // Construct factored form: prefix(suffix1|suffix2|...) |
442 prefix := first | 443 prefix := first |
443 | 444 |
444 for j := start; j < i; j++ { | 445 for j := start; j < i; j++ { |
445 reuse := j != start // prefix came from sub[star t]· | 446 reuse := j != start // prefix came from sub[star t]· |
446 sub[j] = p.removeLeadingRegexp(sub[j], reuse) | 447 sub[j] = p.removeLeadingRegexp(sub[j], reuse) |
447 } | 448 } |
448 suffix := p.collapse(sub[start:i], OpAlternate) // recur se | 449 suffix := p.collapse(sub[start:i], OpAlternate) // recur se |
449 | 450 |
450 re := p.newRegexp(OpConcat) | 451 re := p.newRegexp(OpConcat) |
451 re.Sub = append(re.Sub[:0], prefix, suffix) | 452 re.Sub = append(re.Sub[:0], prefix, suffix) |
452 out = append(out, re) | 453 out = append(out, re) |
453 } | 454 } |
454 | 455 |
455 » » // Prepare for next iteration (if there is one). | 456 » » // Prepare for next iteration. |
r
2011/06/30 03:06:02
ditto
| |
456 start = i | 457 start = i |
457 first = ifirst | 458 first = ifirst |
458 } | 459 } |
459 sub = out | 460 sub = out |
460 | 461 |
461 // Round 3: Collapse runs of single literals into character classes. | 462 // Round 3: Collapse runs of single literals into character classes. |
462 start = 0 | 463 start = 0 |
463 out = sub[:0] | 464 out = sub[:0] |
464 for i := 0; i <= len(sub); i++ { | 465 for i := 0; i <= len(sub); i++ { |
465 // Invariant: the Regexps that were in sub[0:start] have been | 466 // Invariant: the Regexps that were in sub[0:start] have been |
466 // used or marked for reuse, and the slice space has been reused | 467 // used or marked for reuse, and the slice space has been reused |
467 // for out (len(out) <= start). | 468 // for out (len(out) <= start). |
468 // | 469 // |
469 // Invariant: sub[start:i] consists of regexps that are either | 470 // Invariant: sub[start:i] consists of regexps that are either |
470 // literal runes or character classes. | 471 // literal runes or character classes. |
471 if i < len(sub) && isCharClass(sub[i]) { | 472 if i < len(sub) && isCharClass(sub[i]) { |
472 continue | 473 continue |
473 } | 474 } |
474 | 475 |
475 // sub[i] is not a char or char class; | 476 // sub[i] is not a char or char class; |
476 // emit char class for sub[start:i]... | 477 // emit char class for sub[start:i]... |
477 if i == start { | 478 if i == start { |
478 » » » // Nothing to do | 479 » » » // Nothing to do - run of length 0. |
479 } else if i == start+1 { | 480 } else if i == start+1 { |
480 out = append(out, sub[start]) | 481 out = append(out, sub[start]) |
481 } else { | 482 } else { |
482 // Make new char class. | 483 // Make new char class. |
483 // Start with most complex regexp in sub[start]. | 484 // Start with most complex regexp in sub[start]. |
484 max := start | 485 max := start |
485 for j := start + 1; j < i; j++ { | 486 for j := start + 1; j < i; j++ { |
486 if sub[max].Op < sub[j].Op || sub[max].Op == sub [j].Op && len(sub[max].Rune) < len(sub[j].Rune) { | 487 if sub[max].Op < sub[j].Op || sub[max].Op == sub [j].Op && len(sub[max].Rune) < len(sub[j].Rune) { |
487 max = j | 488 max = j |
488 } | 489 } |
(...skipping 23 matching lines...) Expand all Loading... | |
512 if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch { | 513 if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch { |
513 continue | 514 continue |
514 } | 515 } |
515 out = append(out, sub[i]) | 516 out = append(out, sub[i]) |
516 } | 517 } |
517 sub = out | 518 sub = out |
518 | 519 |
519 return sub | 520 return sub |
520 } | 521 } |
521 | 522 |
522 // leadingString returns the leading literal string that re starts with. | 523 // leadingString returns the leading literal string that re begins with. |
r
2011/06/30 03:06:02
s/re starts with/begins re/
| |
523 // The string refers to storage in re or its children. | 524 // The string refers to storage in re or its children. |
524 func (p *parser) leadingString(re *Regexp) ([]int, Flags) { | 525 func (p *parser) leadingString(re *Regexp) ([]int, Flags) { |
525 if re.Op == OpConcat && len(re.Sub) > 0 { | 526 if re.Op == OpConcat && len(re.Sub) > 0 { |
526 re = re.Sub[0] | 527 re = re.Sub[0] |
527 } | 528 } |
528 if re.Op != OpLiteral { | 529 if re.Op != OpLiteral { |
529 return nil, 0 | 530 return nil, 0 |
530 } | 531 } |
531 return re.Rune, re.Flags & FoldCase | 532 return re.Rune, re.Flags & FoldCase |
532 } | 533 } |
(...skipping 28 matching lines...) Expand all Loading... | |
561 | 562 |
562 if re.Op == OpLiteral { | 563 if re.Op == OpLiteral { |
563 re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])] | 564 re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])] |
564 if len(re.Rune) == 0 { | 565 if len(re.Rune) == 0 { |
565 re.Op = OpEmptyMatch | 566 re.Op = OpEmptyMatch |
566 } | 567 } |
567 } | 568 } |
568 return re | 569 return re |
569 } | 570 } |
570 | 571 |
571 // leadingRegexp returns the leading regexp that re starts with. | 572 // leadingRegexp returns the leading regexp that re begins with. |
572 // The regexp refers to storage in re or its children. | 573 // The regexp refers to storage in re or its children. |
573 func (p *parser) leadingRegexp(re *Regexp) *Regexp { | 574 func (p *parser) leadingRegexp(re *Regexp) *Regexp { |
574 if re.Op == OpEmptyMatch { | 575 if re.Op == OpEmptyMatch { |
575 return nil | 576 return nil |
576 } | 577 } |
577 if re.Op == OpConcat && len(re.Sub) > 0 { | 578 if re.Op == OpConcat && len(re.Sub) > 0 { |
578 sub := re.Sub[0] | 579 sub := re.Sub[0] |
579 if sub.Op == OpEmptyMatch { | 580 if sub.Op == OpEmptyMatch { |
580 return nil | 581 return nil |
581 } | 582 } |
(...skipping 448 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1030 // If it sits above an opVerticalBar, swap it below | 1031 // If it sits above an opVerticalBar, swap it below |
1031 // (things below an opVerticalBar become an alternation). | 1032 // (things below an opVerticalBar become an alternation). |
1032 // Otherwise, push a new vertical bar. | 1033 // Otherwise, push a new vertical bar. |
1033 if !p.swapVerticalBar() { | 1034 if !p.swapVerticalBar() { |
1034 p.op(opVerticalBar) | 1035 p.op(opVerticalBar) |
1035 } | 1036 } |
1036 | 1037 |
1037 return nil | 1038 return nil |
1038 } | 1039 } |
1039 | 1040 |
1040 // mergeCharClass makes re1 = re1|re2. | 1041 // mergeCharClass makes dst = dst|src. |
1041 // The caller must ensure that re1.Op > re2.Op. | 1042 // The caller must ensure that dst.Op >= src.Op, |
r
2011/06/30 03:06:02
i'd expect re2.Op > re1.Op
| |
1042 func mergeCharClass(re1, re2 *Regexp) { | 1043 // to reduce the amount of copying. |
1043 » switch re1.Op { | 1044 func mergeCharClass(dst, src *Regexp) { |
1045 » switch dst.Op { | |
1044 case OpAnyChar: | 1046 case OpAnyChar: |
1045 » » // re2 doesn't add anything. | 1047 » » // src doesn't add anything. |
1046 case OpAnyCharNotNL: | 1048 case OpAnyCharNotNL: |
1047 » » // re2 might add \n | 1049 » » // src might add \n |
1048 » » if matchRune(re2, '\n') { | 1050 » » if matchRune(src, '\n') { |
1049 » » » re1.Op = OpAnyChar | 1051 » » » dst.Op = OpAnyChar |
1050 } | 1052 } |
1051 case OpCharClass: | 1053 case OpCharClass: |
1052 » » // re2 is simpler, so either literal or char class | 1054 » » // src is simpler, so either literal or char class |
1053 » » if re2.Op == OpLiteral { | 1055 » » if src.Op == OpLiteral { |
1054 » » » re1.Rune = appendRange(re1.Rune, re2.Rune[0], re2.Rune[0 ]) | 1056 » » » dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0 ]) |
1055 } else { | 1057 } else { |
1056 » » » re1.Rune = appendClass(re1.Rune, re2.Rune) | 1058 » » » dst.Rune = appendClass(dst.Rune, src.Rune) |
1057 } | 1059 } |
1058 case OpLiteral: | 1060 case OpLiteral: |
1059 // both literal | 1061 // both literal |
1060 » » if re2.Rune[0] == re1.Rune[0] { | 1062 » » if src.Rune[0] == dst.Rune[0] { |
1061 break | 1063 break |
1062 } | 1064 } |
1063 » » re1.Op = OpCharClass | 1065 » » dst.Op = OpCharClass |
1064 » » re1.Rune = append(re1.Rune, re1.Rune[0]) | 1066 » » dst.Rune = append(dst.Rune, dst.Rune[0]) |
1065 » » re1.Rune = appendRange(re1.Rune, re2.Rune[0], re2.Rune[0]) | 1067 » » dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0]) |
1066 } | 1068 } |
1067 } | 1069 } |
1068 | 1070 |
1069 // If the top of the stack is an element followed by an opVerticalBar | 1071 // If the top of the stack is an element followed by an opVerticalBar |
1070 // swapVerticalBar swaps the two and returns true. | 1072 // swapVerticalBar swaps the two and returns true. |
1071 // Otherwise it returns false. | 1073 // Otherwise it returns false. |
1072 func (p *parser) swapVerticalBar() bool { | 1074 func (p *parser) swapVerticalBar() bool { |
1073 // If above and below vertical bar are literal or char class, | 1075 // If above and below vertical bar are literal or char class, |
1074 // can merge into a single char class. | 1076 // can merge into a single char class. |
1075 n := len(p.stack) | 1077 n := len(p.stack) |
(...skipping 711 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1787 return c - '0' | 1789 return c - '0' |
1788 } | 1790 } |
1789 if 'a' <= c && c <= 'f' { | 1791 if 'a' <= c && c <= 'f' { |
1790 return c - 'a' + 10 | 1792 return c - 'a' + 10 |
1791 } | 1793 } |
1792 if 'A' <= c && c <= 'F' { | 1794 if 'A' <= c && c <= 'F' { |
1793 return c - 'A' + 10 | 1795 return c - 'A' + 10 |
1794 } | 1796 } |
1795 return -1 | 1797 return -1 |
1796 } | 1798 } |
LEFT | RIGHT |