re2/parse.cc - Issue 1174041: code review 1174041: re2: memory diet

Side by Side Diff: re2/parse.cc

Issue 1174041: code review 1174041: re2: memory diet (Closed)

Patch Set: code review 1174041: re2: memory diet Created 14 years, 11 months ago

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2006 The RE2 Authors. All Rights Reserved.	1 // Copyright 2006 The RE2 Authors. All Rights Reserved.

2 // Use of this source code is governed by a BSD-style	2 // Use of this source code is governed by a BSD-style

3 // license that can be found in the LICENSE file.	3 // license that can be found in the LICENSE file.

4	4

5 // Regular expression parser.	5 // Regular expression parser.

6	6

7 // The parser is a simple precedence-based parser with a	7 // The parser is a simple precedence-based parser with a

8 // manual stack. The parsing work is done by the methods	8 // manual stack. The parsing work is done by the methods

9 // of the ParseState class. The Regexp::Parse function is	9 // of the ParseState class. The Regexp::Parse function is

10 // essentially just a lexer that calls the ParseState method	10 // essentially just a lexer that calls the ParseState method

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
95 bool DoLeftParenNoCapture();	95 bool DoLeftParenNoCapture();

96	96

97 // Processes a vertical bar in the input.	97 // Processes a vertical bar in the input.

98 bool DoVerticalBar();	98 bool DoVerticalBar();

99	99

100 // Processes a right parenthesis in the input.	100 // Processes a right parenthesis in the input.

101 bool DoRightParen();	101 bool DoRightParen();

102	102

103 // Processes the end of input, returning the final regexp.	103 // Processes the end of input, returning the final regexp.

104 Regexp* DoFinish();	104 Regexp* DoFinish();

	105 ··

	106 // Finishes the regexp if necessary, preparing it for use

	107 // in a more complicated expression.

	108 // If it is a CharClassBuilder, converts into a CharClass.

	109 Regexp* FinishRegexp(Regexp*);

105	110

106 // These routines don't manipulate the parse stack	111 // These routines don't manipulate the parse stack

107 // directly, but they do need to look at flags_.	112 // directly, but they do need to look at flags_.

108 // ParseCharClass also manipulates the internals of Regexp	113 // ParseCharClass also manipulates the internals of Regexp

109 // while creating *out_re.	114 // while creating *out_re.

110	115

111 // Parse a character class into *out_re.	116 // Parse a character class into *out_re.

112 // Removes parsed text from s.	117 // Removes parsed text from s.

113 bool ParseCharClass(StringPiece* s, Regexp** out_re,	118 bool ParseCharClass(StringPiece* s, Regexp** out_re,

114 RegexpStatus* status);	119 RegexpStatus* status);

(...skipping 52 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
167 // Cleans up by freeing all the regexps on the stack.	172 // Cleans up by freeing all the regexps on the stack.

168 Regexp::ParseState::~ParseState() {	173 Regexp::ParseState::~ParseState() {

169 Regexp* next;	174 Regexp* next;

170 for (Regexp* re = stacktop_; re != NULL; re = next) {	175 for (Regexp* re = stacktop_; re != NULL; re = next) {

171 next = re->down_;	176 next = re->down_;

172 re->down_ = NULL;	177 re->down_ = NULL;

173 re->Decref();	178 re->Decref();

174 }	179 }

175 }	180 }

176	181

	182 // Finishes the regexp if necessary, preparing it for use in

	183 // a more complex expression.

	184 // If it is a CharClassBuilder, converts into a CharClass.

	185 Regexp* Regexp::ParseState::FinishRegexp(Regexp* re) {

	186 if (re == NULL)

	187 return NULL;

	188 re->down_ = NULL;

	189

	190 if (re->op_ == kRegexpCharClass && re->ccb_ != NULL) {

	191 CharClassBuilder* ccb = re->ccb_;

	192 re->ccb_ = NULL;

	193 re->cc_ = ccb->GetCharClass();

	194 delete ccb;

	195 }

	196

	197 return re;

	198 }

	199

177 // Pushes the given regular expression onto the stack.	200 // Pushes the given regular expression onto the stack.

178 // Could check for too much memory used here.	201 // Could check for too much memory used here.

179 bool Regexp::ParseState::PushRegexp(Regexp* re) {	202 bool Regexp::ParseState::PushRegexp(Regexp* re) {

180 MaybeConcatString(-1, NoParseFlags);	203 MaybeConcatString(-1, NoParseFlags);

181	204

182 // Special case: a character class of one character is just	205 // Special case: a character class of one character is just

183 // a literal. This is a common idiom for escaping	206 // a literal. This is a common idiom for escaping

184 // single characters (e.g., [.] instead of \.), and some	207 // single characters (e.g., [.] instead of \.), and some

185 // analysis does better with fewer character classes.	208 // analysis does better with fewer character classes.

186 // Similarly, [Aa] can be rewritten as a literal A with ASCII case folding.	209 // Similarly, [Aa] can be rewritten as a literal A with ASCII case folding.

187 if (re->op_ == kRegexpCharClass) {	210 if (re->op_ == kRegexpCharClass) {

188 if (re->cc_->size() == 1) {	211 if (re->ccb_->size() == 1) {

189 Rune r = re->cc_->begin()->lo;	212 Rune r = re->ccb_->begin()->lo;

190 re->Decref();	213 re->Decref();

191 re = new Regexp(kRegexpLiteral, flags_);	214 re = new Regexp(kRegexpLiteral, flags_);

192 re->rune_ = r;	215 re->rune_ = r;

193 } else if (re->cc_->size() == 2) {	216 } else if (re->ccb_->size() == 2) {

194 Rune r = re->cc_->begin()->lo;	217 Rune r = re->ccb_->begin()->lo;

195 if ('A' <= r && r <= 'Z' && re->cc_->Contains(r + 'a' - 'A')) {	218 if ('A' <= r && r <= 'Z' && re->ccb_->Contains(r + 'a' - 'A')) {

196 re->Decref();	219 re->Decref();

197 re = new Regexp(kRegexpLiteral, flags_ \| FoldCase);	220 re = new Regexp(kRegexpLiteral, flags_ \| FoldCase);

198 re->rune_ = r + 'a' - 'A';	221 re->rune_ = r + 'a' - 'A';

199 }	222 }

200 }	223 }

201 }	224 }

202	225

203 if (!IsMarker(re->op()))	226 if (!IsMarker(re->op()))

204 re->simple_ = re->ComputeSimple();	227 re->simple_ = re->ComputeSimple();

205 re->down_ = stacktop_;	228 re->down_ = stacktop_;

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
272 Rune CycleFoldRune(Rune r) {	295 Rune CycleFoldRune(Rune r) {

273 CaseFold* f = LookupCaseFold(r);	296 CaseFold* f = LookupCaseFold(r);

274 if (f == NULL \|\| r < f->lo)	297 if (f == NULL \|\| r < f->lo)

275 return r;	298 return r;

276 return ApplyFold(f, r);	299 return ApplyFold(f, r);

277 }	300 }

278	301

279 // Add lo-hi to the class, along with their fold-equivalent characters.	302 // Add lo-hi to the class, along with their fold-equivalent characters.

280 // If lo-hi is already in the class, assume that the fold-equivalent	303 // If lo-hi is already in the class, assume that the fold-equivalent

281 // chars are there too, so there's no work to do.	304 // chars are there too, so there's no work to do.

282 static void AddFoldedRange(CharClass* cc, Rune lo, Rune hi, int depth) {	305 static void AddFoldedRange(CharClassBuilder* cc, Rune lo, Rune hi, int depth) {

283 // AddFoldedRange calls itself recursively for each rune in the fold cycle.	306 // AddFoldedRange calls itself recursively for each rune in the fold cycle.

284 // Most folding cycles are small: there aren't any bigger than four in the	307 // Most folding cycles are small: there aren't any bigger than four in the

285 // current Unicode tables. make_unicode_casefold.py checks that	308 // current Unicode tables. make_unicode_casefold.py checks that

286 // the cycles are not too long, and we double-check here using depth.	309 // the cycles are not too long, and we double-check here using depth.

287 if (depth > 10) {	310 if (depth > 10) {

288 LOG(DFATAL) << "AddFoldedRange recurses too much.";	311 LOG(DFATAL) << "AddFoldedRange recurses too much.";

289 return;	312 return;

290 }	313 }

291	314

292 if (!cc->AddRange(lo, hi)) // lo-hi was already there? we're done	315 if (!cc->AddRange(lo, hi)) // lo-hi was already there? we're done

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
328 // Pick up where this fold left off.	351 // Pick up where this fold left off.

329 lo = f->hi + 1;	352 lo = f->hi + 1;

330 }	353 }

331 }	354 }

332	355

333 // Pushes the literal rune r onto the stack.	356 // Pushes the literal rune r onto the stack.

334 bool Regexp::ParseState::PushLiteral(Rune r) {	357 bool Regexp::ParseState::PushLiteral(Rune r) {

335 // Do case folding if needed.	358 // Do case folding if needed.

336 if ((flags_ & FoldCase) && CycleFoldRune(r) != r) {	359 if ((flags_ & FoldCase) && CycleFoldRune(r) != r) {

337 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);	360 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);

	361 re->ccb_ = new CharClassBuilder;

338 Rune r1 = r;	362 Rune r1 = r;

339 do {	363 do {

340 if (!(flags_ & NeverNL) \|\| r != '\n') {	364 if (!(flags_ & NeverNL) \|\| r != '\n') {

341 re->cc()->AddRange(r, r);	365 re->ccb_->AddRange(r, r);

342 }	366 }

343 r = CycleFoldRune(r);	367 r = CycleFoldRune(r);

344 } while (r != r1);	368 } while (r != r1);

345 re->cc()->RemoveAbove(rune_max_);	369 re->ccb_->RemoveAbove(rune_max_);

346 return PushRegexp(re);	370 return PushRegexp(re);

347 }	371 }

348	372

349 // Exclude newline if applicable.	373 // Exclude newline if applicable.

350 if ((flags_ & NeverNL) && r == '\n')	374 if ((flags_ & NeverNL) && r == '\n')

351 return PushRegexp(new Regexp(kRegexpNoMatch, flags_));	375 return PushRegexp(new Regexp(kRegexpNoMatch, flags_));

352	376

353 // No fancy stuff worked. Ordinary literal.	377 // No fancy stuff worked. Ordinary literal.

354 if (MaybeConcatString(r, flags_))	378 if (MaybeConcatString(r, flags_))

355 return true;	379 return true;

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
387 }	411 }

388 return PushSimpleOp(kRegexpEndLine);	412 return PushSimpleOp(kRegexpEndLine);

389 }	413 }

390	414

391 // Pushes a . onto the stack.	415 // Pushes a . onto the stack.

392 bool Regexp::ParseState::PushDot() {	416 bool Regexp::ParseState::PushDot() {

393 if ((flags_ & DotNL) && !(flags_ & NeverNL))	417 if ((flags_ & DotNL) && !(flags_ & NeverNL))

394 return PushSimpleOp(kRegexpAnyChar);	418 return PushSimpleOp(kRegexpAnyChar);

395 // Rewrite . into [^\n]	419 // Rewrite . into [^\n]

396 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);	420 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);

397 re->cc_->AddRange(0, '\n' - 1);	421 re->ccb_ = new CharClassBuilder;

398 re->cc_->AddRange('\n' + 1, rune_max_);	422 re->ccb_->AddRange(0, '\n' - 1);

	423 re->ccb_->AddRange('\n' + 1, rune_max_);

399 return PushRegexp(re);	424 return PushRegexp(re);

400 }	425 }

401	426

402 // Pushes a regexp with the given op (and no args) onto the stack.	427 // Pushes a regexp with the given op (and no args) onto the stack.

403 bool Regexp::ParseState::PushSimpleOp(RegexpOp op) {	428 bool Regexp::ParseState::PushSimpleOp(RegexpOp op) {

404 Regexp* re = new Regexp(op, flags_);	429 Regexp* re = new Regexp(op, flags_);

405 return PushRegexp(re);	430 return PushRegexp(re);

406 }	431 }

407	432

408 // Pushes a repeat operator regexp onto the stack.	433 // Pushes a repeat operator regexp onto the stack.

409 // A valid argument for the operator must already be on the stack.	434 // A valid argument for the operator must already be on the stack.

410 // The char c is the name of the operator, for use in error messages.	435 // The char c is the name of the operator, for use in error messages.

411 bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s,	436 bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s,

412 bool nongreedy) {	437 bool nongreedy) {

413 if (stacktop_ == NULL \|\| IsMarker(stacktop_->op())) {	438 if (stacktop_ == NULL \|\| IsMarker(stacktop_->op())) {

414 status_->set_code(kRegexpRepeatArgument);	439 status_->set_code(kRegexpRepeatArgument);

415 status_->set_error_arg(s);	440 status_->set_error_arg(s);

416 return false;	441 return false;

417 }	442 }

418 Regexp::ParseFlags fl = flags_;	443 Regexp::ParseFlags fl = flags_;

419 if (nongreedy)	444 if (nongreedy)

420 fl = fl ^ NonGreedy;	445 fl = fl ^ NonGreedy;

421 Regexp* re = new Regexp(op, fl);	446 Regexp* re = new Regexp(op, fl);

422 re->AllocSub(1);	447 re->AllocSub(1);

423 re->sub_[0] = stacktop_;	448 re->down_ = stacktop_->down_;

	449 re->sub()[0] = FinishRegexp(stacktop_);

424 re->simple_ = re->ComputeSimple();	450 re->simple_ = re->ComputeSimple();

425 re->down_ = stacktop_->down_;

426 stacktop_ = re;	451 stacktop_ = re;

427 return true;	452 return true;

428 }	453 }

429	454

430 // Pushes a repetition regexp onto the stack.	455 // Pushes a repetition regexp onto the stack.

431 // A valid argument for the operator must already be on the stack.	456 // A valid argument for the operator must already be on the stack.

432 bool Regexp::ParseState::PushRepetition(int min, int max,	457 bool Regexp::ParseState::PushRepetition(int min, int max,

433 const StringPiece& s,	458 const StringPiece& s,

434 bool nongreedy) {	459 bool nongreedy) {

435 if ((max != -1 && max < min) \|\| max > 1000) {	460 if ((max != -1 && max < min) \|\| max > 1000) {

436 status_->set_code(kRegexpRepeatSize);	461 status_->set_code(kRegexpRepeatSize);

437 status_->set_error_arg(s);	462 status_->set_error_arg(s);

438 return false;	463 return false;

439 }	464 }

440 if (stacktop_ == NULL \|\| IsMarker(stacktop_->op())) {	465 if (stacktop_ == NULL \|\| IsMarker(stacktop_->op())) {

441 status_->set_code(kRegexpRepeatArgument);	466 status_->set_code(kRegexpRepeatArgument);

442 status_->set_error_arg(s);	467 status_->set_error_arg(s);

443 return false;	468 return false;

444 }	469 }

445 Regexp::ParseFlags fl = flags_;	470 Regexp::ParseFlags fl = flags_;

446 if (nongreedy)	471 if (nongreedy)

447 fl = fl ^ NonGreedy;	472 fl = fl ^ NonGreedy;

448 Regexp* re = new Regexp(kRegexpRepeat, fl);	473 Regexp* re = new Regexp(kRegexpRepeat, fl);

449 re->min_ = min;	474 re->min_ = min;

450 re->max_ = max;	475 re->max_ = max;

451 re->AllocSub(1);	476 re->AllocSub(1);

452 re->sub_[0] = stacktop_;	477 re->down_ = stacktop_->down_;

	478 re->sub()[0] = FinishRegexp(stacktop_);

453 re->simple_ = re->ComputeSimple();	479 re->simple_ = re->ComputeSimple();

454	480

455 re->down_ = stacktop_->down_;

456 stacktop_ = re;	481 stacktop_ = re;

457 return true;	482 return true;

458 }	483 }

459	484

460 // Pseudo-operators marking left paren and pipe operators	485 // Pseudo-operators - only on parse stack.

461 // in the regexp stack.	486

	487 // Markers for left paren and pipe operators in the regexp stack.

462 const RegexpOp kLeftParen = static_cast<RegexpOp>(kMaxRegexpOp+1);	488 const RegexpOp kLeftParen = static_cast<RegexpOp>(kMaxRegexpOp+1);

463 const RegexpOp kVerticalBar = static_cast<RegexpOp>(kMaxRegexpOp+2);	489 const RegexpOp kVerticalBar = static_cast<RegexpOp>(kMaxRegexpOp+2);

464	490

465 // Checks whether a particular regexp op is a marker.	491 // Checks whether a particular regexp op is a marker.

466 bool Regexp::ParseState::IsMarker(RegexpOp op) {	492 bool Regexp::ParseState::IsMarker(RegexpOp op) {

467 return op >= kLeftParen;	493 return op >= kLeftParen;

468 }	494 }

469	495

470 // Processes a left parenthesis in the input.	496 // Processes a left parenthesis in the input.

471 // Pushes a marker onto the stack.	497 // Pushes a marker onto the stack.

472 bool Regexp::ParseState::DoLeftParen(const StringPiece& name) {	498 bool Regexp::ParseState::DoLeftParen(const StringPiece& name) {

473 Regexp* re = new Regexp(kLeftParen, flags_);	499 Regexp* re = new Regexp(kLeftParen, flags_);

474 re->cap_ = ++ncap_;	500 re->cap_ = ++ncap_;

475 if (name.data() != NULL)	501 if (name.data() != NULL)

476 re->name_ = new string(name.as_string());	502 re->name_ = new string(name.as_string());

477 return PushRegexp(re);	503 return PushRegexp(re);

478 }	504 }

479	505

480 // Pushes a non-capturing marker onto the stack.	506 // Pushes a non-capturing marker onto the stack.

481 bool Regexp::ParseState::DoLeftParenNoCapture() {	507 bool Regexp::ParseState::DoLeftParenNoCapture() {

482 Regexp* re = new Regexp(kLeftParen, flags_);	508 Regexp* re = new Regexp(kLeftParen, flags_);

483 re->cap_ = -1;	509 re->cap_ = -1;

484 return PushRegexp(re);	510 return PushRegexp(re);

485 }	511 }

486	512

487 // Adds r to cc, along with r's upper case if foldascii is set.	513 // Adds r to cc, along with r's upper case if foldascii is set.

488 static void AddLiteral(CharClass* cc, Rune r, bool foldascii) {	514 static void AddLiteral(CharClassBuilder* cc, Rune r, bool foldascii) {

489 cc->AddRange(r, r);	515 cc->AddRange(r, r);

490 if (foldascii && 'a' <= r && r <= 'z')	516 if (foldascii && 'a' <= r && r <= 'z')

491 cc->AddRange(r + 'A' - 'a', r + 'A' - 'a');	517 cc->AddRange(r + 'A' - 'a', r + 'A' - 'a');

492 }	518 }

493	519

494 // Processes a vertical bar in the input.	520 // Processes a vertical bar in the input.

495 bool Regexp::ParseState::DoVerticalBar() {	521 bool Regexp::ParseState::DoVerticalBar() {

496 MaybeConcatString(-1, NoParseFlags);	522 MaybeConcatString(-1, NoParseFlags);

497 DoConcatenation();	523 DoConcatenation();

498	524

499 // Below the vertical bar is a list to alternate.	525 // Below the vertical bar is a list to alternate.

500 // Above the vertical bar is a list to concatenate.	526 // Above the vertical bar is a list to concatenate.

501 // We just did the concatenation, so either swap	527 // We just did the concatenation, so either swap

502 // the result below the vertical bar or push a new	528 // the result below the vertical bar or push a new

503 // vertical bar on the stack.	529 // vertical bar on the stack.

504 Regexp* r1;	530 Regexp* r1;

505 Regexp* r2;	531 Regexp* r2;

506 if ((r1 = stacktop_) != NULL &&	532 if ((r1 = stacktop_) != NULL &&

507 (r2 = stacktop_->down_) != NULL &&	533 (r2 = stacktop_->down_) != NULL &&

508 r2->op() == kVerticalBar) {	534 r2->op() == kVerticalBar) {

509 // If above and below vertical bar are literal or char class,	535 // If above and below vertical bar are literal or char class,

510 // can merge into a single char class.	536 // can merge into a single char class.

511 Regexp* r3;	537 Regexp* r3;

512 if ((r1->op() == kRegexpLiteral \|\|	538 if ((r1->op() == kRegexpLiteral \|\|

513 r1->op() == kRegexpCharClass \|\|	539 r1->op() == kRegexpCharClass \|\|

514 r1->op() == kRegexpAnyChar) &&	540 r1->op() == kRegexpAnyChar) &&

515 (r3 = r2->down_) != NULL) {	541 (r3 = r2->down_) != NULL) {

	542 Rune rune;

516 switch (r3->op()) {	543 switch (r3->op()) {

517 case kRegexpLiteral: // convert to char class	544 case kRegexpLiteral: // convert to char class

	545 rune = r3->rune_;

518 r3->op_ = kRegexpCharClass;	546 r3->op_ = kRegexpCharClass;

519 r3->cc_ = new CharClass;	547 r3->cc_ = NULL;

520 AddLiteral(r3->cc_, r3->rune_, r3->parse_flags_ & Regexp::FoldCase);	548 r3->ccb_ = new CharClassBuilder;

	549 AddLiteral(r3->ccb_, rune, r3->parse_flags_ & Regexp::FoldCase);

521 // fall through	550 // fall through

522 case kRegexpCharClass:	551 case kRegexpCharClass:

523 if (r1->op() == kRegexpLiteral)	552 if (r1->op() == kRegexpLiteral)

524 AddLiteral(r3->cc_, r1->rune_, r1->parse_flags_ & Regexp::FoldCase);	553 AddLiteral(r3->ccb_, r1->rune_, r1->parse_flags_ & Regexp::FoldCase) ;

525 else if (r1->op() == kRegexpCharClass)	554 else if (r1->op() == kRegexpCharClass)

526 r3->cc_->AddCharClass(r1->cc());	555 r3->ccb_->AddCharClass(r1->ccb_);

527 if (r1->op() == kRegexpAnyChar \|\| r3->cc_->full()) {	556 if (r1->op() == kRegexpAnyChar \|\| r3->ccb_->full()) {

	557 delete r3->ccb_;

	558 r3->ccb_ = NULL;

528 r3->op_ = kRegexpAnyChar;	559 r3->op_ = kRegexpAnyChar;

529 delete r3->cc_;

530 r3->cc_ = NULL;

531 }	560 }

532 // fall through	561 // fall through

533 case kRegexpAnyChar:	562 case kRegexpAnyChar:

534 // pop r1	563 // pop r1

535 stacktop_ = r2;	564 stacktop_ = r2;

536 r1->Decref();	565 r1->Decref();

537 return true;	566 return true;

538 default:	567 default:

539 break;	568 break;

540 }	569 }

(...skipping 31 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
572	601

573 // Restore flags from when paren opened.	602 // Restore flags from when paren opened.

574 Regexp* re = r2;	603 Regexp* re = r2;

575 flags_ = re->parse_flags();	604 flags_ = re->parse_flags();

576	605

577 // Rewrite LeftParen as capture if needed.	606 // Rewrite LeftParen as capture if needed.

578 if (re->cap_ > 0) {	607 if (re->cap_ > 0) {

579 re->op_ = kRegexpCapture;	608 re->op_ = kRegexpCapture;

580 // re->cap_ is already set	609 // re->cap_ is already set

581 re->AllocSub(1);	610 re->AllocSub(1);

582 re->sub_[0] = r1;	611 re->sub()[0] = FinishRegexp(r1);

583 re->simple_ = re->ComputeSimple();	612 re->simple_ = re->ComputeSimple();

584 } else {	613 } else {

585 re->Decref();	614 re->Decref();

586 re = r1;	615 re = r1;

587 }	616 }

588 return PushRegexp(re);	617 return PushRegexp(re);

589 }	618 }

590	619

591 // Processes the end of input, returning the final regexp.	620 // Processes the end of input, returning the final regexp.

592 Regexp* Regexp::ParseState::DoFinish() {	621 Regexp* Regexp::ParseState::DoFinish() {

593 DoAlternation();	622 DoAlternation();

594 Regexp* re = stacktop_;	623 Regexp* re = stacktop_;

595 if (re != NULL && re->down_ != NULL) {	624 if (re != NULL && re->down_ != NULL) {

596 status_->set_code(kRegexpMissingParen);	625 status_->set_code(kRegexpMissingParen);

597 status_->set_error_arg(whole_regexp_);	626 status_->set_error_arg(whole_regexp_);

598 return NULL;	627 return NULL;

599 }	628 }

600 stacktop_ = NULL;	629 stacktop_ = NULL;

601 return re;	630 return FinishRegexp(re);

602 }	631 }

603	632

604 // Collapse the regexps on top of the stack, down to the	633 // Collapse the regexps on top of the stack, down to the

605 // first marker, into a new op node (op == kRegexpAlternate	634 // first marker, into a new op node (op == kRegexpAlternate

606 // or op == kRegexpConcat).	635 // or op == kRegexpConcat).

607 void Regexp::ParseState::DoCollapse(RegexpOp op) {	636 void Regexp::ParseState::DoCollapse(RegexpOp op) {

608 // Scan backward to marker, counting children of composite.	637 // Scan backward to marker, counting children of composite.

609 int n = 0;	638 int n = 0;

610 Regexp* next = NULL;	639 Regexp* next = NULL;

611 Regexp* sub;	640 Regexp* sub;

612 for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) {	641 for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) {

613 next = sub->down_;	642 next = sub->down_;

614 if (sub->op_ == op)	643 if (sub->op_ == op)

615 n += sub->nsub_;	644 n += sub->nsub_;

616 else	645 else

617 n++;	646 n++;

618 }	647 }

619	648

620 // If there's just one child, leave it alone.	649 // If there's just one child, leave it alone.

621 // (Concat of one thing is that one thing; alternate of one thing is same.)	650 // (Concat of one thing is that one thing; alternate of one thing is same.)

622 if (stacktop_ != NULL && stacktop_->down_ == next)	651 if (stacktop_ != NULL && stacktop_->down_ == next)

623 return;	652 return;

624	653

625 // Construct op (alternation or concatenation), flattening op of op.	654 // Construct op (alternation or concatenation), flattening op of op.

626 Regexp* re = new Regexp(op, flags_);	655 Regexp** subs = new Regexp*[n];

627 re->AllocSub(n);

628 next = NULL;	656 next = NULL;

	657 int i = n;

629 for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) {	658 for (sub = stacktop_; sub != NULL && !IsMarker(sub->op()); sub = next) {

630 next = sub->down_; // in case we sub->Decref().	659 next = sub->down_;

631 if (sub->op_ == op) {	660 if (sub->op_ == op) {

	661 Regexp** sub_subs = sub->sub();

632 for (int k = sub->nsub_ - 1; k >= 0; k--)	662 for (int k = sub->nsub_ - 1; k >= 0; k--)

633 re->sub_[--n] = sub->sub_[k]->Incref();	663 subs[--i] = sub_subs[k]->Incref();

634 sub->Decref();	664 sub->Decref();

635 } else {	665 } else {

636 re->sub_[--n] = sub;	666 subs[--i] = FinishRegexp(sub);

637 }	667 }

638 }	668 }

	669 ··

	670 Regexp* re = ConcatOrAlternate(op, subs, n, flags_);

	671 delete[] subs;

639 re->simple_ = re->ComputeSimple();	672 re->simple_ = re->ComputeSimple();

640 re->down_ = next;	673 re->down_ = next;

641 stacktop_ = re;	674 stacktop_ = re;

642 }	675 }

643	676

644 // Finishes the current concatenation,	677 // Finishes the current concatenation,

645 // collapsing it into a single regexp on the stack.	678 // collapsing it into a single regexp on the stack.

646 void Regexp::ParseState::DoConcatenation() {	679 void Regexp::ParseState::DoConcatenation() {

647 Regexp* r1 = stacktop_;	680 Regexp* r1 = stacktop_;

648 if (r1 == NULL \|\| IsMarker(r1->op())) {	681 if (r1 == NULL \|\| IsMarker(r1->op())) {

649 // empty concatenation is special case	682 // empty concatenation is special case

650 Regexp* re = new Regexp(kRegexpEmptyMatch, flags_);	683 Regexp* re = new Regexp(kRegexpEmptyMatch, flags_);

651 PushRegexp(re);	684 PushRegexp(re);

652 }	685 }

653 DoCollapse(kRegexpConcat);	686 DoCollapse(kRegexpConcat);

654 }	687 }

655	688

656 // Finishes the current alternation,	689 // Finishes the current alternation,

657 // collapsing it to a single regexp on the stack.	690 // collapsing it to a single regexp on the stack.

658 void Regexp::ParseState::DoAlternation() {	691 void Regexp::ParseState::DoAlternation() {

659 DoVerticalBar();	692 DoVerticalBar();

660 // Now stack top is kVerticalBar.	693 // Now stack top is kVerticalBar.

661 Regexp* r1 = stacktop_;	694 Regexp* r1 = stacktop_;

662 stacktop_ = r1->down_;	695 stacktop_ = r1->down_;

663 r1->Decref();	696 r1->Decref();

664 DoCollapse(kRegexpAlternate);	697 DoCollapse(kRegexpAlternate);

665 }	698 }

666	699

667 // Incremental conversion of concatenated literals into strings.	700 // Incremental conversion of concatenated literals into strings.

668 // If top of stack is literal, literal or literal, string or string, literal	701 // If top of stack is (literal, literal) or (literal, string) or (string, litera l)

669 // or string, string, collapse into single string.	702 // or (string, string), collapse into single string.

670 // Don't walk down the stack -- the parser calls this frequently	703 // Don't walk down the stack -- the parser calls this frequently

671 // enough that below the bottom two is known to be collapsed.	704 // enough that below the bottom two is known to be collapsed.

672 // Only called when another regexp is about to be pushed	705 // Only called when another regexp is about to be pushed

673 // on the stack, so that the topmost literal is not being considered.	706 // on the stack, so that the topmost literal is not being considered.

674 // (Otherwise ab* would turn into (ab)*.)	707 // (Otherwise ab* would turn into (ab)*.)

675 // If r >= 0, consider pushing a literal r on the stack.	708 // If r >= 0, consider pushing a literal r on the stack.

676 // Return whether that happened.	709 // Return whether that happened.

677 bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {	710 bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {

678 Regexp* re1;	711 Regexp* re1;

679 Regexp* re2;	712 Regexp* re2;

680 if ((re1 = stacktop_) == NULL \|\| (re2 = re1->down_) == NULL)	713 if ((re1 = stacktop_) == NULL \|\| (re2 = re1->down_) == NULL)

681 return false;	714 return false;

682	715

683 if (re1->op_ != kRegexpLiteral && re1->op_ != kRegexpLiteralString)	716 if (re1->op_ != kRegexpLiteral && re1->op_ != kRegexpLiteralString)

684 return false;	717 return false;

685 if (re2->op_ != kRegexpLiteral && re2->op_ != kRegexpLiteralString)	718 if (re2->op_ != kRegexpLiteral && re2->op_ != kRegexpLiteralString)

686 return false;	719 return false;

687 if ((re1->parse_flags_ & FoldCase) != (re2->parse_flags_ & FoldCase))	720 if ((re1->parse_flags_ & FoldCase) != (re2->parse_flags_ & FoldCase))

688 return false;	721 return false;

689	722

690 if (re2->op_ == kRegexpLiteral) {	723 if (re2->op_ == kRegexpLiteral) {

691 // convert into string	724 // convert into string

	725 Rune rune = re2->rune_;

692 re2->op_ = kRegexpLiteralString;	726 re2->op_ = kRegexpLiteralString;

693 re2->AddRuneToString(re2->rune_);	727 re2->nrunes_ = 0;

	728 re2->runes_ = NULL;

	729 re2->AddRuneToString(rune);

694 }	730 }

695	731

696 // push re1 into re2.	732 // push re1 into re2.

697 if (re1->op_ == kRegexpLiteral) {	733 if (re1->op_ == kRegexpLiteral) {

698 re2->AddRuneToString(re1->rune_);	734 re2->AddRuneToString(re1->rune_);

699 } else {	735 } else {

700 for (int i = 0; i < re1->nrunes_; i++)	736 for (int i = 0; i < re1->nrunes_; i++)

701 re2->AddRuneToString(re1->runes_[i]);	737 re2->AddRuneToString(re1->runes_[i]);

702 re1->nrunes_ = 0;	738 re1->nrunes_ = 0;

703 delete[] re1->runes_;	739 delete[] re1->runes_;

(...skipping 272 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
976	1012

977 BadEscape:	1013 BadEscape:

978 // Unrecognized escape sequence.	1014 // Unrecognized escape sequence.

979 status->set_code(kRegexpBadEscape);	1015 status->set_code(kRegexpBadEscape);

980 status->set_error_arg(StringPiece(begin, s->data() - begin));	1016 status->set_error_arg(StringPiece(begin, s->data() - begin));

981 return false;	1017 return false;

982 }	1018 }

983	1019

984 // Add a range to the character class, but exclude newline if asked.	1020 // Add a range to the character class, but exclude newline if asked.

985 // Also handle case folding.	1021 // Also handle case folding.

986 static void AddRange(CharClass* cc, Rune lo, Rune hi,	1022 static void AddRange(CharClassBuilder* cc, Rune lo, Rune hi,

987 Regexp::ParseFlags parse_flags) {	1023 Regexp::ParseFlags parse_flags) {

988	1024

989 // Take out \n if the flags say so.	1025 // Take out \n if the flags say so.

990 bool cutnl = !(parse_flags & Regexp::ClassNL) \|\|	1026 bool cutnl = !(parse_flags & Regexp::ClassNL) \|\|

991 (parse_flags & Regexp::NeverNL);	1027 (parse_flags & Regexp::NeverNL);

992 if (cutnl && lo <= '\n' && '\n' <= hi) {	1028 if (cutnl && lo <= '\n' && '\n' <= hi) {

993 if (lo < '\n')	1029 if (lo < '\n')

994 AddRange(cc, lo, '\n' - 1, parse_flags);	1030 AddRange(cc, lo, '\n' - 1, parse_flags);

995 if (hi > '\n')	1031 if (hi > '\n')

996 AddRange(cc, '\n' + 1, hi, parse_flags);	1032 AddRange(cc, '\n' + 1, hi, parse_flags);

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1030	1066

1031 // Look for a Unicode group with the given name (e.g., "Han")	1067 // Look for a Unicode group with the given name (e.g., "Han")

1032 static UGroup* LookupUnicodeGroup(const StringPiece& name) {	1068 static UGroup* LookupUnicodeGroup(const StringPiece& name) {

1033 // Special case: "Any" means any.	1069 // Special case: "Any" means any.

1034 if (name == "Any")	1070 if (name == "Any")

1035 return &anygroup;	1071 return &anygroup;

1036 return LookupGroup(name, unicode_groups, num_unicode_groups);	1072 return LookupGroup(name, unicode_groups, num_unicode_groups);

1037 }	1073 }

1038	1074

1039 // Add a UGroup to the character class.	1075 // Add a UGroup to the character class.

1040 static void AddUGroup(CharClass cc, UGroup g,	1076 static void AddUGroup(CharClassBuilder cc, UGroup g,

1041 Regexp::ParseFlags parse_flags) {	1077 Regexp::ParseFlags parse_flags) {

1042 for (int i = 0; i < g->nr16; i++) {	1078 for (int i = 0; i < g->nr16; i++) {

1043 AddRange(cc, g->r16[i].lo, g->r16[i].hi, parse_flags);	1079 AddRange(cc, g->r16[i].lo, g->r16[i].hi, parse_flags);

1044 }	1080 }

1045 for (int i = 0; i < g->nr32; i++) {	1081 for (int i = 0; i < g->nr32; i++) {

1046 AddRange(cc, g->r32[i].lo, g->r32[i].hi, parse_flags);	1082 AddRange(cc, g->r32[i].lo, g->r32[i].hi, parse_flags);

1047 }	1083 }

1048 }	1084 }

1049	1085

1050 // Add the negation of a UGroup to the character class.	1086 // Add the negation of a UGroup to the character class.

1051 static void AddNegatedUGroup(CharClass* cc, UGroup *g,	1087 static void AddNegatedUGroup(CharClassBuilder* cc, UGroup *g,

1052 Regexp::ParseFlags parse_flags) {	1088 Regexp::ParseFlags parse_flags) {

1053 int next = 0;	1089 int next = 0;

1054 for (int i = 0; i < g->nr16; i++) {	1090 for (int i = 0; i < g->nr16; i++) {

1055 if (next < g->r16[i].lo)	1091 if (next < g->r16[i].lo)

1056 AddRange(cc, next, g->r16[i].lo - 1, parse_flags);	1092 AddRange(cc, next, g->r16[i].lo - 1, parse_flags);

1057 next = g->r16[i].hi + 1;	1093 next = g->r16[i].hi + 1;

1058 }	1094 }

1059 for (int i = 0; i < g->nr32; i++) {	1095 for (int i = 0; i < g->nr32; i++) {

1060 if (next < g->r32[i].lo)	1096 if (next < g->r32[i].lo)

1061 AddRange(cc, next, g->r32[i].lo - 1, parse_flags);	1097 AddRange(cc, next, g->r32[i].lo - 1, parse_flags);

(...skipping 26 matching lines...) Expand all Loading...
1088	1124

1089 enum ParseStatus {	1125 enum ParseStatus {

1090 kParseOk, // Did some parsing.	1126 kParseOk, // Did some parsing.

1091 kParseError, // Found an error.	1127 kParseError, // Found an error.

1092 kParseNothing, // Decided not to parse.	1128 kParseNothing, // Decided not to parse.

1093 };	1129 };

1094	1130

1095 // Maybe parses a Unicode character group like \p{Han} or \P{Han}	1131 // Maybe parses a Unicode character group like \p{Han} or \P{Han}

1096 // (the latter is a negated group).	1132 // (the latter is a negated group).

1097 ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,	1133 ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,

1098 CharClass *cc,	1134 CharClassBuilder *cc,

1099 RegexpStatus* status) {	1135 RegexpStatus* status) {

1100 // Decide whether to parse.	1136 // Decide whether to parse.

1101 if (!(parse_flags & Regexp::UnicodeGroups))	1137 if (!(parse_flags & Regexp::UnicodeGroups))

1102 return kParseNothing;	1138 return kParseNothing;

1103 if (s->size() < 2 \|\| (*s)[0] != '\\')	1139 if (s->size() < 2 \|\| (*s)[0] != '\\')

1104 return kParseNothing;	1140 return kParseNothing;

1105 Rune c = (*s)[1];	1141 Rune c = (*s)[1];

1106 if (c != 'p' && c != 'P')	1142 if (c != 'p' && c != 'P')

1107 return kParseNothing;	1143 return kParseNothing;

1108	1144

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1153 AddUGroup(cc, g, parse_flags);	1189 AddUGroup(cc, g, parse_flags);

1154 else	1190 else

1155 AddNegatedUGroup(cc, g, parse_flags);	1191 AddNegatedUGroup(cc, g, parse_flags);

1156 return kParseOk;	1192 return kParseOk;

1157 }	1193 }

1158	1194

1159 // Parses a character class name like [:alnum:].	1195 // Parses a character class name like [:alnum:].

1160 // Sets *s to span the remainder of the string.	1196 // Sets *s to span the remainder of the string.

1161 // Adds the ranges corresponding to the class to ranges.	1197 // Adds the ranges corresponding to the class to ranges.

1162 static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,	1198 static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,

1163 CharClass *cc,	1199 CharClassBuilder *cc,

1164 RegexpStatus* status) {	1200 RegexpStatus* status) {

1165 // Check begins with [:	1201 // Check begins with [:

1166 const char* p = s->data();	1202 const char* p = s->data();

1167 const char* ep = s->data() + s->size();	1203 const char* ep = s->data() + s->size();

1168 if (ep - p < 2 \|\| p[0] != '[' \|\| p[1] != ':')	1204 if (ep - p < 2 \|\| p[0] != '[' \|\| p[1] != ':')

1169 return kParseNothing;	1205 return kParseNothing;

1170	1206

1171 // Look for closing :].	1207 // Look for closing :].

1172 const char* q;	1208 const char* q;

1173 for (q = p+2; q <= ep-2 && (q != ':' \|\| (q+1) != ']'); q++)	1209 for (q = p+2; q <= ep-2 && (q != ':' \|\| (q+1) != ']'); q++)

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1250 RegexpStatus* status) {	1286 RegexpStatus* status) {

1251 StringPiece whole_class = *s;	1287 StringPiece whole_class = *s;

1252 if (s->size() == 0 \|\| (*s)[0] != '[') {	1288 if (s->size() == 0 \|\| (*s)[0] != '[') {

1253 // Caller checked this.	1289 // Caller checked this.

1254 status->set_code(kRegexpInternalError);	1290 status->set_code(kRegexpInternalError);

1255 status->set_error_arg(NULL);	1291 status->set_error_arg(NULL);

1256 return false;	1292 return false;

1257 }	1293 }

1258 bool negated = false;	1294 bool negated = false;

1259 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);	1295 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);

	1296 re->ccb_ = new CharClassBuilder;

1260 s->remove_prefix(1); // '['	1297 s->remove_prefix(1); // '['

1261 if (s->size() > 0 && (*s)[0] == '^') {	1298 if (s->size() > 0 && (*s)[0] == '^') {

1262 s->remove_prefix(1); // '^'	1299 s->remove_prefix(1); // '^'

1263 negated = true;	1300 negated = true;

1264 if (!(flags_ & ClassNL) \|\| (flags_ & NeverNL)) {	1301 if (!(flags_ & ClassNL) \|\| (flags_ & NeverNL)) {

1265 // If NL can't match implicitly, then pretend	1302 // If NL can't match implicitly, then pretend

1266 // negated classes include a leading \n.	1303 // negated classes include a leading \n.

1267 re->cc_->AddRange('\n', '\n');	1304 re->ccb_->AddRange('\n', '\n');

1268 }	1305 }

1269 }	1306 }

1270 bool first = true; // ] is okay as first char in class	1307 bool first = true; // ] is okay as first char in class

1271 while (s->size() > 0 && ((*s)[0] != ']' \|\| first)) {	1308 while (s->size() > 0 && ((*s)[0] != ']' \|\| first)) {

1272 // - is only okay unescaped as first or last in class.	1309 // - is only okay unescaped as first or last in class.

1273 // Except that Perl allows - anywhere.	1310 // Except that Perl allows - anywhere.

1274 if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&	1311 if ((*s)[0] == '-' && !first && !(flags_&PerlX) &&

1275 (s->size() == 1 \|\| (*s)[1] != ']')) {	1312 (s->size() == 1 \|\| (*s)[1] != ']')) {

1276 StringPiece t = *s;	1313 StringPiece t = *s;

1277 t.remove_prefix(1); // '-'	1314 t.remove_prefix(1); // '-'

1278 Rune r;	1315 Rune r;

1279 int n = StringPieceToRune(&r, &t, status);	1316 int n = StringPieceToRune(&r, &t, status);

1280 if (n < 0) {	1317 if (n < 0) {

1281 re->Decref();	1318 re->Decref();

1282 return false;	1319 return false;

1283 }	1320 }

1284 status->set_code(kRegexpBadCharRange);	1321 status->set_code(kRegexpBadCharRange);

1285 status->set_error_arg(StringPiece(s->data(), 1+n));	1322 status->set_error_arg(StringPiece(s->data(), 1+n));

1286 re->Decref();	1323 re->Decref();

1287 return false;	1324 return false;

1288 }	1325 }

1289 first = false;	1326 first = false;

1290	1327

1291 // Look for [:alnum:] etc.	1328 // Look for [:alnum:] etc.

1292 if (s->size() > 2 && (s)[0] == '[' && (s)[1] == ':') {	1329 if (s->size() > 2 && (s)[0] == '[' && (s)[1] == ':') {

1293 switch (ParseCCName(s, flags_, re->cc_, status)) {	1330 switch (ParseCCName(s, flags_, re->ccb_, status)) {

1294 case kParseOk:	1331 case kParseOk:

1295 continue;	1332 continue;

1296 case kParseError:	1333 case kParseError:

1297 re->Decref();	1334 re->Decref();

1298 return false;	1335 return false;

1299 case kParseNothing:	1336 case kParseNothing:

1300 break;	1337 break;

1301 }	1338 }

1302 }	1339 }

1303	1340

1304 // Look for Unicode character group like \p{Han}	1341 // Look for Unicode character group like \p{Han}

1305 if (s->size() > 2 &&	1342 if (s->size() > 2 &&

1306 (*s)[0] == '\\' &&	1343 (*s)[0] == '\\' &&

1307 ((s)[1] == 'p' \|\| (s)[1] == 'P')) {	1344 ((s)[1] == 'p' \|\| (s)[1] == 'P')) {

1308 switch (ParseUnicodeGroup(s, flags_, re->cc_, status)) {	1345 switch (ParseUnicodeGroup(s, flags_, re->ccb_, status)) {

1309 case kParseOk:	1346 case kParseOk:

1310 continue;	1347 continue;

1311 case kParseError:	1348 case kParseError:

1312 re->Decref();	1349 re->Decref();

1313 return false;	1350 return false;

1314 case kParseNothing:	1351 case kParseNothing:

1315 break;	1352 break;

1316 }	1353 }

1317 }	1354 }

1318	1355

1319 // Look for Perl character class symbols (extension).	1356 // Look for Perl character class symbols (extension).

1320 UGroup *g = MaybeParsePerlCCEscape(s, flags_);	1357 UGroup *g = MaybeParsePerlCCEscape(s, flags_);

1321 if (g != NULL) {	1358 if (g != NULL) {

1322 AddUGroup(re->cc_, g, flags_);	1359 AddUGroup(re->ccb_, g, flags_);

1323 continue;	1360 continue;

1324 }	1361 }

1325	1362

1326 // Otherwise assume single character or simple range.	1363 // Otherwise assume single character or simple range.

1327 RuneRange rr;	1364 RuneRange rr;

1328 if (!ParseCCRange(s, &rr, whole_class, status)) {	1365 if (!ParseCCRange(s, &rr, whole_class, status)) {

1329 re->Decref();	1366 re->Decref();

1330 return false;	1367 return false;

1331 }	1368 }

1332 // AddRange is usually called in response to a class like	1369 // AddRange is usually called in response to a class like

1333 // \p{Foo} or [[:foo:]]; for those, it filters \n out unless	1370 // \p{Foo} or [[:foo:]]; for those, it filters \n out unless

1334 // Regexp::ClassNL is set. In an explicit range or singleton	1371 // Regexp::ClassNL is set. In an explicit range or singleton

1335 // like we just parsed, we do not filter \n out, so set ClassNL	1372 // like we just parsed, we do not filter \n out, so set ClassNL

1336 // in the flags.	1373 // in the flags.

1337 AddRange(re->cc_, rr.lo, rr.hi, flags_ \| Regexp::ClassNL);	1374 AddRange(re->ccb_, rr.lo, rr.hi, flags_ \| Regexp::ClassNL);

1338 }	1375 }

1339 if (s->size() == 0) {	1376 if (s->size() == 0) {

1340 status->set_code(kRegexpMissingBracket);	1377 status->set_code(kRegexpMissingBracket);

1341 status->set_error_arg(whole_class);	1378 status->set_error_arg(whole_class);

1342 re->Decref();	1379 re->Decref();

1343 return false;	1380 return false;

1344 }	1381 }

1345 s->remove_prefix(1); // ']'	1382 s->remove_prefix(1); // ']'

1346	1383

1347 if (negated)	1384 if (negated)

1348 re->cc_->Negate();	1385 re->ccb_->Negate();

1349 re->cc_->RemoveAbove(rune_max_);	1386 re->ccb_->RemoveAbove(rune_max_);

1350	1387

1351 *out_re = re;	1388 *out_re = re;

1352 return true;	1389 return true;

1353 }	1390 }

1354	1391

1355 // Is this a valid capture name? [A-Za-z0-9_]+	1392 // Is this a valid capture name? [A-Za-z0-9_]+

1356 // PCRE limits names to 32 bytes.	1393 // PCRE limits names to 32 bytes.

1357 // Python rejects names starting with digits.	1394 // Python rejects names starting with digits.

1358 // We don't enforce either of those.	1395 // We don't enforce either of those.

1359 static bool IsValidCaptureName(const StringPiece& name) {	1396 static bool IsValidCaptureName(const StringPiece& name) {

(...skipping 385 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1745 return NULL;	1782 return NULL;

1746 if (!ps.PushLiteral(r))	1783 if (!ps.PushLiteral(r))

1747 return NULL;	1784 return NULL;

1748 }	1785 }

1749 break;	1786 break;

1750 }	1787 }

1751 }	1788 }

1752	1789

1753 if (t[1] == 'p' \|\| t[1] == 'P') {	1790 if (t[1] == 'p' \|\| t[1] == 'P') {

1754 Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);	1791 Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);

1755 switch (ParseUnicodeGroup(&t, ps.flags(), re->cc_, status)) {	1792 re->ccb_ = new CharClassBuilder;

	1793 switch (ParseUnicodeGroup(&t, ps.flags(), re->ccb_, status)) {

1756 case kParseOk:	1794 case kParseOk:

1757 if (!ps.PushRegexp(re))	1795 if (!ps.PushRegexp(re))

1758 return NULL;	1796 return NULL;

1759 goto Break2;	1797 goto Break2;

1760 case kParseError:	1798 case kParseError:

1761 re->Decref();	1799 re->Decref();

1762 return NULL;	1800 return NULL;

1763 case kParseNothing:	1801 case kParseNothing:

1764 re->Decref();	1802 re->Decref();

1765 break;	1803 break;

1766 }	1804 }

1767 }	1805 }

1768	1806

1769 UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags());	1807 UGroup *g = MaybeParsePerlCCEscape(&t, ps.flags());

1770 if (g != NULL) {	1808 if (g != NULL) {

1771 Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);	1809 Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);

1772 AddUGroup(re->cc_, g, ps.flags());	1810 re->ccb_ = new CharClassBuilder;

	1811 AddUGroup(re->ccb_, g, ps.flags());

1773 if (!ps.PushRegexp(re))	1812 if (!ps.PushRegexp(re))

1774 return NULL;	1813 return NULL;

1775 break;	1814 break;

1776 }	1815 }

1777	1816

1778 Rune r;	1817 Rune r;

1779 if (!ParseEscape(&t, &r, status, ps.rune_max()))	1818 if (!ParseEscape(&t, &r, status, ps.rune_max()))

1780 return NULL;	1819 return NULL;

1781 if (!ps.PushLiteral(r))	1820 if (!ps.PushLiteral(r))

1782 return NULL;	1821 return NULL;

1783 break;	1822 break;

1784 }	1823 }

1785 }	1824 }

1786 Break2:	1825 Break2:

1787 lastunary = isunary;	1826 lastunary = isunary;

1788 }	1827 }

1789 return ps.DoFinish();	1828 return ps.DoFinish();

1790 }	1829 }

1791	1830

1792 } // namespace re2	1831 } // namespace re2

OLD	NEW

« no previous file with comments | « no previous file | re2/re2.h » ('j') | no next file with comments »