Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(50)

Delta Between Two Patch Sets: re2/dfa.cc

Issue 1129043: code review 1129043: re2: memory diet part 2 (Closed)
Left Patch Set: Created 14 years, 10 months ago
Right Patch Set: code review 1129043: re2: memory diet part 2 Created 14 years, 10 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « re2/compile.cc ('k') | re2/nfa.cc » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. 1 // Copyright 2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // A DFA (deterministic finite automaton)-based regular expression search. 5 // A DFA (deterministic finite automaton)-based regular expression search.
6 // 6 //
7 // The DFA search has two main parts: the construction of the automaton, 7 // The DFA search has two main parts: the construction of the automaton,
8 // which is represented by a graph of State structures, and the execution 8 // which is represented by a graph of State structures, and the execution
9 // of the automaton over a given input string. 9 // of the automaton over a given input string.
10 // 10 //
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
88 // difficult to mark them as such. 88 // difficult to mark them as such.
89 class Workq; 89 class Workq;
90 class RWLocker; 90 class RWLocker;
91 class StateSaver; 91 class StateSaver;
92 92
93 // A single DFA state. The DFA is represented as a graph of these 93 // A single DFA state. The DFA is represented as a graph of these
94 // States, linked by the next_ pointers. If in state s and reading 94 // States, linked by the next_ pointers. If in state s and reading
95 // byte c, the next state should be s->next_[c]. 95 // byte c, the next state should be s->next_[c].
96 struct State { 96 struct State {
97 inline bool IsMatch() const { return flag_ & kFlagMatch; } 97 inline bool IsMatch() const { return flag_ & kFlagMatch; }
98 uint32* inst_; // Instruction pointers in the state. 98 int* inst_; // Instruction pointers in the state.
99 int ninst_; // # of inst_ pointers. 99 int ninst_; // # of inst_ pointers.
100 uint flag_; // Empty string bitfield flags in effect on the way 100 uint flag_; // Empty string bitfield flags in effect on the way
101 // into this state, along with kFlagMatch if this 101 // into this state, along with kFlagMatch if this
102 // is a matching state. 102 // is a matching state.
103 State** next_; // Outgoing arrows from State, 103 State** next_; // Outgoing arrows from State,
104 // one per input byte class 104 // one per input byte class
105 }; 105 };
106 106
107 enum { 107 enum {
108 kByteEndText = 256, // imaginary byte at end of text 108 kByteEndText = 256, // imaginary byte at end of text
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
173 // cache_mutex_.r <= L < mutex_ 173 // cache_mutex_.r <= L < mutex_
174 // After: cache_mutex_.w <= L < mutex_ 174 // After: cache_mutex_.w <= L < mutex_
175 void ResetCache(RWLocker* cache_lock); 175 void ResetCache(RWLocker* cache_lock);
176 176
177 // Looks up and returns the State corresponding to a Workq. 177 // Looks up and returns the State corresponding to a Workq.
178 // L >= mutex_ 178 // L >= mutex_
179 State* WorkqToCachedState(Workq* q, uint flag); 179 State* WorkqToCachedState(Workq* q, uint flag);
180 180
181 // Looks up and returns a State matching the inst, ninst, and flag. 181 // Looks up and returns a State matching the inst, ninst, and flag.
182 // L >= mutex_ 182 // L >= mutex_
183 State* CachedState(uint32* inst, int ninst, uint flag); 183 State* CachedState(int* inst, int ninst, uint flag);
184 184
185 // Clear the cache entirely. 185 // Clear the cache entirely.
186 // Must hold cache_mutex_.w or be in destructor. 186 // Must hold cache_mutex_.w or be in destructor.
187 void ClearCache(); 187 void ClearCache();
188 188
189 // Converts a State into a Workq: the opposite of WorkqToCachedState. 189 // Converts a State into a Workq: the opposite of WorkqToCachedState.
190 // L >= mutex_ 190 // L >= mutex_
191 static void StateToWorkq(State* s, Workq* q); 191 static void StateToWorkq(State* s, Workq* q);
192 192
193 // Runs a State on a given byte, returning the next state. 193 // Runs a State on a given byte, returning the next state.
194 State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_ 194 State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_
195 State* RunStateOnByte(State*, int); // L >= mutex_ 195 State* RunStateOnByte(State*, int); // L >= mutex_
196 196
197 // Runs a Workq on a given byte followed by a set of empty-string flags, 197 // Runs a Workq on a given byte followed by a set of empty-string flags,
198 // producing a new Workq in nq. If a match instruction is encountered, 198 // producing a new Workq in nq. If a match instruction is encountered,
199 // sets *ismatch to true. 199 // sets *ismatch to true.
200 // L >= mutex_ 200 // L >= mutex_
201 void RunWorkqOnByte(Workq* q, Workq* nq, 201 void RunWorkqOnByte(Workq* q, Workq* nq,
202 int c, uint flag, bool* ismatch, 202 int c, uint flag, bool* ismatch,
203 Prog::MatchKind kind, 203 Prog::MatchKind kind,
204 uint32 new_byte_loop); 204 int new_byte_loop);
205 205
206 // Runs a Workq on a set of empty-string flags, producing a new Workq in nq. 206 // Runs a Workq on a set of empty-string flags, producing a new Workq in nq.
207 // L >= mutex_ 207 // L >= mutex_
208 void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint flag); 208 void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint flag);
209 209
210 // Adds the instruction id to the Workq, following empty arrows 210 // Adds the instruction id to the Workq, following empty arrows
211 // according to flag. 211 // according to flag.
212 // L >= mutex_ 212 // L >= mutex_
213 void AddToQueue(Workq* q, int id, uint flag); 213 void AddToQueue(Workq* q, int id, uint flag);
214 214
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
310 Prog* prog_; // The regular expression program to run. 310 Prog* prog_; // The regular expression program to run.
311 Prog::MatchKind kind_; // The kind of DFA. 311 Prog::MatchKind kind_; // The kind of DFA.
312 int start_unanchored_; // start of unanchored program 312 int start_unanchored_; // start of unanchored program
313 bool init_failed_; // initialization failed (out of memory) 313 bool init_failed_; // initialization failed (out of memory)
314 314
315 Mutex mutex_; // mutex_ >= cache_mutex_.r 315 Mutex mutex_; // mutex_ >= cache_mutex_.r
316 316
317 // Scratch areas, protected by mutex_. 317 // Scratch areas, protected by mutex_.
318 Workq* q0_; // Two pre-allocated work queues. 318 Workq* q0_; // Two pre-allocated work queues.
319 Workq* q1_; 319 Workq* q1_;
320 uint32* astack_; // Pre-allocated stack for AddToQueue 320 int* astack_; // Pre-allocated stack for AddToQueue
321 int nastack_; 321 int nastack_;
322 322
323 // State* cache. Many threads use and add to the cache simultaneously, 323 // State* cache. Many threads use and add to the cache simultaneously,
324 // holding cache_mutex_ for reading and mutex_ (above) when adding. 324 // holding cache_mutex_ for reading and mutex_ (above) when adding.
325 // If the cache fills and needs to be discarded, the discarding is done 325 // If the cache fills and needs to be discarded, the discarding is done
326 // while holding cache_mutex_ for writing, to avoid interrupting other 326 // while holding cache_mutex_ for writing, to avoid interrupting other
327 // readers. Any State* pointers are only valid while cache_mutex_ 327 // readers. Any State* pointers are only valid while cache_mutex_
328 // is held. 328 // is held.
329 Mutex cache_mutex_; 329 Mutex cache_mutex_;
330 int64 mem_budget_; // Total memory budget for all States. 330 int64 mem_budget_; // Total memory budget for all States.
331 int64 state_budget_; // Amount of memory remaining for new States. 331 int64 state_budget_; // Amount of memory remaining for new States.
332 StateSet state_cache_; // All States computed so far. 332 StateSet state_cache_; // All States computed so far.
333 StartInfo start_[kMaxStart]; 333 StartInfo start_[kMaxStart];
334 bool cache_warned_; // have printed to LOG(INFO) about the cache 334 bool cache_warned_; // have printed to LOG(INFO) about the cache
335 }; 335 };
336 336
337 // Shorthand for casting to uint8*. 337 // Shorthand for casting to uint8*.
338 static inline const uint8* BytePtr(const void* v) { 338 static inline const uint8* BytePtr(const void* v) {
339 return reinterpret_cast<const uint8*>(v); 339 return reinterpret_cast<const uint8*>(v);
340 } 340 }
341 341
342 // Work queues 342 // Work queues
343 343
344 // Marks separate thread groups of different priority 344 // Marks separate thread groups of different priority
345 // in the work queue when in leftmost-longest matching mode. 345 // in the work queue when in leftmost-longest matching mode.
346 #define Mark static_cast<uint32>(-1) 346 #define Mark (-1)
347 347
348 // Internally, the DFA uses a sparse array of 348 // Internally, the DFA uses a sparse array of
349 // program instruction pointers as a work queue. 349 // program instruction pointers as a work queue.
350 // In leftmost longest mode, marks separate sections 350 // In leftmost longest mode, marks separate sections
351 // of workq that started executing at different 351 // of workq that started executing at different
352 // locations in the string (earlier locations first). 352 // locations in the string (earlier locations first).
353 class DFA::Workq : public SparseSet { 353 class DFA::Workq : public SparseSet {
354 public: 354 public:
355 // Constructor: n is number of normal slots, maxmark number of mark slots. 355 // Constructor: n is number of normal slots, maxmark number of mark slots.
356 Workq(int n, int maxmark) : 356 Workq(int n, int maxmark) :
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 start_unanchored_ = NULL; 415 start_unanchored_ = NULL;
416 if (kind_ == Prog::kLongestMatch) { 416 if (kind_ == Prog::kLongestMatch) {
417 nmark = prog->size(); 417 nmark = prog->size();
418 start_unanchored_ = prog->start_unanchored(); 418 start_unanchored_ = prog->start_unanchored();
419 } 419 }
420 nastack_ = 2 * prog->size() + nmark; 420 nastack_ = 2 * prog->size() + nmark;
421 421
422 // Account for space needed for DFA, q0, q1, astack. 422 // Account for space needed for DFA, q0, q1, astack.
423 mem_budget_ -= sizeof(DFA); 423 mem_budget_ -= sizeof(DFA);
424 mem_budget_ -= (prog_->size() + nmark) * 424 mem_budget_ -= (prog_->size() + nmark) *
425 (sizeof(int)+sizeof(int)+sizeof(uint32)) * 2; // q0, q1 425 (sizeof(int)+sizeof(int)) * 2; // q0, q1
426 mem_budget_ -= nastack_ * sizeof(uint32); // astack 426 mem_budget_ -= nastack_ * sizeof(int); // astack
427 if (mem_budget_ < 0) { 427 if (mem_budget_ < 0) {
428 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld", 428 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
429 prog_->size(), max_mem); 429 prog_->size(), max_mem);
430 init_failed_ = true; 430 init_failed_ = true;
431 return; 431 return;
432 } 432 }
433 433
434 state_budget_ = mem_budget_; 434 state_budget_ = mem_budget_;
435 435
436 // Make sure there is a reasonable amount of working room left. 436 // Make sure there is a reasonable amount of working room left.
437 // At minimum, the search requires room for two states in order 437 // At minimum, the search requires room for two states in order
438 // to limp along, restarting frequently. We'll get better performance 438 // to limp along, restarting frequently. We'll get better performance
439 // if there is room for a larger number of states, say 20. 439 // if there is room for a larger number of states, say 20.
440 int one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(uint32) + 440 int one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) +
441 (prog_->bytemap_range()+1)*sizeof(State*); 441 (prog_->bytemap_range()+1)*sizeof(State*);
442 if (state_budget_ < 20*one_state) { 442 if (state_budget_ < 20*one_state) {
443 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld", 443 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld",
444 prog_->size(), max_mem); 444 prog_->size(), max_mem);
445 init_failed_ = true; 445 init_failed_ = true;
446 return; 446 return;
447 } 447 }
448 448
449 q0_ = new Workq(prog->size(), nmark); 449 q0_ = new Workq(prog->size(), nmark);
450 q1_ = new Workq(prog->size(), nmark); 450 q1_ = new Workq(prog->size(), nmark);
451 astack_ = new uint32[nastack_]; 451 astack_ = new int[nastack_];
452 } 452 }
453 453
454 DFA::~DFA() { 454 DFA::~DFA() {
455 delete q0_; 455 delete q0_;
456 delete q1_; 456 delete q1_;
457 delete[] astack_; 457 delete[] astack_;
458 ClearCache(); 458 ClearCache();
459 } 459 }
460 460
461 // In the DFA state graph, s->next[c] == NULL means that the 461 // In the DFA state graph, s->next[c] == NULL means that the
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
516 // 516 //
517 // DFA state graph construction. 517 // DFA state graph construction.
518 // 518 //
519 // The DFA state graph is a heavily-linked collection of State* structures. 519 // The DFA state graph is a heavily-linked collection of State* structures.
520 // The state_cache_ is a set of all the State structures ever allocated, 520 // The state_cache_ is a set of all the State structures ever allocated,
521 // so that if the same state is reached by two different paths, 521 // so that if the same state is reached by two different paths,
522 // the same State structure can be used. This reduces allocation 522 // the same State structure can be used. This reduces allocation
523 // requirements and also avoids duplication of effort across the two 523 // requirements and also avoids duplication of effort across the two
524 // identical states. 524 // identical states.
525 // 525 //
526 // A State is defined by an ordered list of uint32 instruction ids and a flag wo rd. 526 // A State is defined by an ordered list of instruction ids and a flag word.
527 // 527 //
528 // The choice of an ordered list of instructions differs from a typical 528 // The choice of an ordered list of instructions differs from a typical
529 // textbook DFA implementation, which would use an unordered set. 529 // textbook DFA implementation, which would use an unordered set.
530 // Textbook descriptions, however, only care about whether 530 // Textbook descriptions, however, only care about whether
531 // the DFA matches, not where it matches in the text. To decide where the 531 // the DFA matches, not where it matches in the text. To decide where the
532 // DFA matches, we need to mimic the behavior of the dominant backtracking 532 // DFA matches, we need to mimic the behavior of the dominant backtracking
533 // implementations like PCRE, which try one possible regular expression 533 // implementations like PCRE, which try one possible regular expression
534 // execution, then another, then another, stopping when one of them succeeds. 534 // execution, then another, then another, stopping when one of them succeeds.
535 // The DFA execution tries these many executions in parallel, representing 535 // The DFA execution tries these many executions in parallel, representing
536 // each by an instruction id. These pointers are ordered in the State.inst_ 536 // each by an instruction id. These pointers are ordered in the State.inst_
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
571 // If one is found, returns it. If one is not found, allocates one, 571 // If one is found, returns it. If one is not found, allocates one,
572 // inserts it in the cache, and returns it. 572 // inserts it in the cache, and returns it.
573 DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) { 573 DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) {
574 if (DEBUG_MODE) 574 if (DEBUG_MODE)
575 mutex_.AssertHeld(); 575 mutex_.AssertHeld();
576 576
577 // Construct array of instruction ids for the new state. 577 // Construct array of instruction ids for the new state.
578 // Only ByteRange, EmptyWidth, and Match instructions are useful to keep: 578 // Only ByteRange, EmptyWidth, and Match instructions are useful to keep:
579 // those are the only operators with any effect in 579 // those are the only operators with any effect in
580 // RunWorkqOnEmptyString or RunWorkqOnByte. 580 // RunWorkqOnEmptyString or RunWorkqOnByte.
581 uint32* inst = new uint32[q->size()]; 581 int* inst = new int[q->size()];
582 int n = 0; 582 int n = 0;
583 uint needflags = 0; // flags needed by kInstEmptyWidth instructions 583 uint needflags = 0; // flags needed by kInstEmptyWidth instructions
584 bool sawmatch = false; // whether queue contains guaranteed kInstMatch 584 bool sawmatch = false; // whether queue contains guaranteed kInstMatch
585 if (DebugDFA) 585 if (DebugDFA)
586 fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); 586 fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag);
587 for (Workq::iterator it = q->begin(); it != q->end(); ++it) { 587 for (Workq::iterator it = q->begin(); it != q->end(); ++it) {
588 int id = *it; 588 int id = *it;
589 if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id))) 589 if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id)))
590 break; 590 break;
591 if (q->is_mark(id)) { 591 if (q->is_mark(id)) {
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
657 // if the state is *not* a matching state. 657 // if the state is *not* a matching state.
658 if (n == 0 && flag == 0) { 658 if (n == 0 && flag == 0) {
659 delete[] inst; 659 delete[] inst;
660 return DeadState; 660 return DeadState;
661 } 661 }
662 662
663 // If we're in longest match mode, the state is a sequence of 663 // If we're in longest match mode, the state is a sequence of
664 // unordered state sets separated by Marks. Sort each set 664 // unordered state sets separated by Marks. Sort each set
665 // to canonicalize, to reduce the number of distinct sets stored. 665 // to canonicalize, to reduce the number of distinct sets stored.
666 if (kind_ == Prog::kLongestMatch) { 666 if (kind_ == Prog::kLongestMatch) {
667 uint32* ip = inst; 667 int* ip = inst;
668 uint32* ep = ip + n; 668 int* ep = ip + n;
669 while (ip < ep) { 669 while (ip < ep) {
670 uint32* markp = ip; 670 int* markp = ip;
671 while (markp < ep && *markp != Mark) 671 while (markp < ep && *markp != Mark)
672 markp++; 672 markp++;
673 sort(ip, markp); 673 sort(ip, markp);
674 if (markp < ep) 674 if (markp < ep)
675 markp++; 675 markp++;
676 ip = markp; 676 ip = markp;
677 } 677 }
678 } 678 }
679 679
680 // Save the needed empty-width flags in the top bits for use later. 680 // Save the needed empty-width flags in the top bits for use later.
681 flag |= needflags << kFlagNeedShift; 681 flag |= needflags << kFlagNeedShift;
682 682
683 State* state = CachedState(inst, n, flag); 683 State* state = CachedState(inst, n, flag);
684 delete[] inst; 684 delete[] inst;
685 return state; 685 return state;
686 } 686 }
687 687
688 // Looks in the State cache for a State matching inst, ninst, flag. 688 // Looks in the State cache for a State matching inst, ninst, flag.
689 // If one is found, returns it. If one is not found, allocates one, 689 // If one is found, returns it. If one is not found, allocates one,
690 // inserts it in the cache, and returns it. 690 // inserts it in the cache, and returns it.
691 DFA::State* DFA::CachedState(uint32* inst, int ninst, uint flag) { 691 DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) {
692 if (DEBUG_MODE) 692 if (DEBUG_MODE)
693 mutex_.AssertHeld(); 693 mutex_.AssertHeld();
694 694
695 // Look in the cache for a pre-existing state. 695 // Look in the cache for a pre-existing state.
696 State state = { inst, ninst, flag, NULL }; 696 State state = { inst, ninst, flag, NULL };
697 StateSet::iterator it = state_cache_.find(&state); 697 StateSet::iterator it = state_cache_.find(&state);
698 if (it != state_cache_.end()) { 698 if (it != state_cache_.end()) {
699 if (DebugDFA) 699 if (DebugDFA)
700 fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str()); 700 fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str());
701 return *it; 701 return *it;
702 } 702 }
703 703
704 // Must have enough memory for new state. 704 // Must have enough memory for new state.
705 // In addition to what we're going to allocate, 705 // In addition to what we're going to allocate,
706 // the state cache hash table seems to incur about 32 bytes per 706 // the state cache hash table seems to incur about 32 bytes per
707 // State*, empirically. 707 // State*, empirically.
708 const int kStateCacheOverhead = 32; 708 const int kStateCacheOverhead = 32;
709 int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot 709 int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot
710 int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(uint32); 710 int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(int);
711 if (mem_budget_ < mem + kStateCacheOverhead) { 711 if (mem_budget_ < mem + kStateCacheOverhead) {
712 mem_budget_ = -1; 712 mem_budget_ = -1;
713 return NULL; 713 return NULL;
714 } 714 }
715 mem_budget_ -= mem + kStateCacheOverhead; 715 mem_budget_ -= mem + kStateCacheOverhead;
716 716
717 // Allocate new state, along with room for next and inst. 717 // Allocate new state, along with room for next and inst.
718 char* space = new char[mem]; 718 char* space = new char[mem];
719 State* s = reinterpret_cast<State*>(space); 719 State* s = reinterpret_cast<State*>(space);
720 s->next_ = reinterpret_cast<State**>(s + 1); 720 s->next_ = reinterpret_cast<State**>(s + 1);
721 s->inst_ = reinterpret_cast<uint32*>(s->next_ + nnext); 721 s->inst_ = reinterpret_cast<int*>(s->next_ + nnext);
722 memset(s->next_, 0, nnext*sizeof s->next_[0]); 722 memset(s->next_, 0, nnext*sizeof s->next_[0]);
723 memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); 723 memmove(s->inst_, inst, ninst*sizeof s->inst_[0]);
724 s->ninst_ = ninst; 724 s->ninst_ = ninst;
725 s->flag_ = flag; 725 s->flag_ = flag;
726 if (DebugDFA) 726 if (DebugDFA)
727 fprintf(stderr, " -> %s\n", DumpState(s).c_str()); 727 fprintf(stderr, " -> %s\n", DumpState(s).c_str());
728 728
729 // Put state in cache and return it. 729 // Put state in cache and return it.
730 state_cache_.insert(s); 730 state_cache_.insert(s);
731 return s; 731 return s;
(...skipping 27 matching lines...) Expand all
759 // Adds ip to the work queue, following empty arrows according to flag 759 // Adds ip to the work queue, following empty arrows according to flag
760 // and expanding kInstAlt instructions (two-target gotos). 760 // and expanding kInstAlt instructions (two-target gotos).
761 void DFA::AddToQueue(Workq* q, int id, uint flag) { 761 void DFA::AddToQueue(Workq* q, int id, uint flag) {
762 762
763 // Use astack_ to hold our stack of states yet to process. 763 // Use astack_ to hold our stack of states yet to process.
764 // It is sized to have room for nastack_ == 2*prog->size() + nmark 764 // It is sized to have room for nastack_ == 2*prog->size() + nmark
765 // instructions, which is enough: each instruction can be 765 // instructions, which is enough: each instruction can be
766 // processed by the switch below only once, and the processing 766 // processed by the switch below only once, and the processing
767 // pushes at most two instructions plus maybe a mark. 767 // pushes at most two instructions plus maybe a mark.
768 // (If we're using marks, nmark == prog->size(); otherwise nmark == 0.) 768 // (If we're using marks, nmark == prog->size(); otherwise nmark == 0.)
769 uint32* stk = astack_; 769 int* stk = astack_;
770 int nstk = 0; 770 int nstk = 0;
771 771
772 stk[nstk++] = id; 772 stk[nstk++] = id;
773 while (nstk > 0) { 773 while (nstk > 0) {
774 DCHECK_LE(nstk, nastack_); 774 DCHECK_LE(nstk, nastack_);
775 id = stk[--nstk]; 775 id = stk[--nstk];
776 776
777 if (id == Mark) { 777 if (id == Mark) {
778 q->mark(); 778 q->mark();
779 continue; 779 continue;
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
856 } 856 }
857 } 857 }
858 858
859 // Runs the work queue, processing the single byte c followed by any empty 859 // Runs the work queue, processing the single byte c followed by any empty
860 // strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine, 860 // strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine,
861 // means to match c$. Sets the bool *ismatch to true if the end of the 861 // means to match c$. Sets the bool *ismatch to true if the end of the
862 // regular expression program has been reached (the regexp has matched). 862 // regular expression program has been reached (the regexp has matched).
863 void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, 863 void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq,
864 int c, uint flag, bool* ismatch, 864 int c, uint flag, bool* ismatch,
865 Prog::MatchKind kind, 865 Prog::MatchKind kind,
866 uint32 new_byte_loop) { 866 int new_byte_loop) {
867 if (DEBUG_MODE) 867 if (DEBUG_MODE)
868 mutex_.AssertHeld(); 868 mutex_.AssertHeld();
869 869
870 newq->clear(); 870 newq->clear();
871 for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { 871 for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) {
872 if (oldq->is_mark(*i)) { 872 if (oldq->is_mark(*i)) {
873 if (*ismatch) 873 if (*ismatch)
874 return; 874 return;
875 newq->mark(); 875 newq->mark();
876 continue; 876 continue;
(...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after
1139 // Recreates and returns a state equivalent to the 1139 // Recreates and returns a state equivalent to the
1140 // original state passed to the constructor. 1140 // original state passed to the constructor.
1141 // Returns NULL if the cache has filled, but 1141 // Returns NULL if the cache has filled, but
1142 // since the DFA guarantees to have room in the cache 1142 // since the DFA guarantees to have room in the cache
1143 // for a couple states, should never return NULL 1143 // for a couple states, should never return NULL
1144 // if used right after ResetCache. 1144 // if used right after ResetCache.
1145 State* Restore(); 1145 State* Restore();
1146 1146
1147 private: 1147 private:
1148 DFA* dfa_; // the DFA to use 1148 DFA* dfa_; // the DFA to use
1149 uint32* inst_; // saved info from State 1149 int* inst_; // saved info from State
1150 int ninst_; 1150 int ninst_;
1151 uint flag_; 1151 uint flag_;
1152 bool is_special_; // whether original state was special 1152 bool is_special_; // whether original state was special
1153 State* special_; // if is_special_, the original state 1153 State* special_; // if is_special_, the original state
1154 1154
1155 DISALLOW_EVIL_CONSTRUCTORS(StateSaver); 1155 DISALLOW_EVIL_CONSTRUCTORS(StateSaver);
1156 }; 1156 };
1157 1157
1158 DFA::StateSaver::StateSaver(DFA* dfa, State* state) { 1158 DFA::StateSaver::StateSaver(DFA* dfa, State* state) {
1159 dfa_ = dfa; 1159 dfa_ = dfa;
1160 if (state <= SpecialStateMax) { 1160 if (state <= SpecialStateMax) {
1161 inst_ = NULL; 1161 inst_ = NULL;
1162 ninst_ = 0; 1162 ninst_ = 0;
1163 flag_ = 0; 1163 flag_ = 0;
1164 is_special_ = true; 1164 is_special_ = true;
1165 special_ = state; 1165 special_ = state;
1166 return; 1166 return;
1167 } 1167 }
1168 is_special_ = false; 1168 is_special_ = false;
1169 special_ = NULL; 1169 special_ = NULL;
1170 flag_ = state->flag_; 1170 flag_ = state->flag_;
1171 ninst_ = state->ninst_; 1171 ninst_ = state->ninst_;
1172 inst_ = new uint32[ninst_]; 1172 inst_ = new int[ninst_];
1173 memmove(inst_, state->inst_, ninst_*sizeof inst_[0]); 1173 memmove(inst_, state->inst_, ninst_*sizeof inst_[0]);
1174 } 1174 }
1175 1175
1176 DFA::StateSaver::~StateSaver() { 1176 DFA::StateSaver::~StateSaver() {
1177 if (!is_special_) 1177 if (!is_special_)
1178 delete[] inst_; 1178 delete[] inst_;
1179 } 1179 }
1180 1180
1181 DFA::State* DFA::StateSaver::Restore() { 1181 DFA::State* DFA::StateSaver::Restore() {
1182 if (is_special_) 1182 if (is_special_)
(...skipping 831 matching lines...) Expand 10 before | Expand all | Expand 10 after
2014 if (dfa_longest_ == NULL) { 2014 if (dfa_longest_ == NULL) {
2015 dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2); 2015 dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2);
2016 delete_dfa_ = DeleteDFA; 2016 delete_dfa_ = DeleteDFA;
2017 } 2017 }
2018 dfa = dfa_longest_; 2018 dfa = dfa_longest_;
2019 } 2019 }
2020 return dfa->PossibleMatchRange(min, max, maxlen); 2020 return dfa->PossibleMatchRange(min, max, maxlen);
2021 } 2021 }
2022 2022
2023 } // namespace re2 2023 } // namespace re2
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b