LEFT | RIGHT |
1 // Copyright 2008 The RE2 Authors. All Rights Reserved. | 1 // Copyright 2008 The RE2 Authors. All Rights Reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // A DFA (deterministic finite automaton)-based regular expression search. | 5 // A DFA (deterministic finite automaton)-based regular expression search. |
6 // | 6 // |
7 // The DFA search has two main parts: the construction of the automaton, | 7 // The DFA search has two main parts: the construction of the automaton, |
8 // which is represented by a graph of State structures, and the execution | 8 // which is represented by a graph of State structures, and the execution |
9 // of the automaton over a given input string. | 9 // of the automaton over a given input string. |
10 // | 10 // |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 // difficult to mark them as such. | 88 // difficult to mark them as such. |
89 class Workq; | 89 class Workq; |
90 class RWLocker; | 90 class RWLocker; |
91 class StateSaver; | 91 class StateSaver; |
92 | 92 |
93 // A single DFA state. The DFA is represented as a graph of these | 93 // A single DFA state. The DFA is represented as a graph of these |
94 // States, linked by the next_ pointers. If in state s and reading | 94 // States, linked by the next_ pointers. If in state s and reading |
95 // byte c, the next state should be s->next_[c]. | 95 // byte c, the next state should be s->next_[c]. |
96 struct State { | 96 struct State { |
97 inline bool IsMatch() const { return flag_ & kFlagMatch; } | 97 inline bool IsMatch() const { return flag_ & kFlagMatch; } |
98 uint32* inst_; // Instruction pointers in the state. | 98 int* inst_; // Instruction pointers in the state. |
99 int ninst_; // # of inst_ pointers. | 99 int ninst_; // # of inst_ pointers. |
100 uint flag_; // Empty string bitfield flags in effect on the way | 100 uint flag_; // Empty string bitfield flags in effect on the way |
101 // into this state, along with kFlagMatch if this | 101 // into this state, along with kFlagMatch if this |
102 // is a matching state. | 102 // is a matching state. |
103 State** next_; // Outgoing arrows from State, | 103 State** next_; // Outgoing arrows from State, |
104 // one per input byte class | 104 // one per input byte class |
105 }; | 105 }; |
106 | 106 |
107 enum { | 107 enum { |
108 kByteEndText = 256, // imaginary byte at end of text | 108 kByteEndText = 256, // imaginary byte at end of text |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
173 // cache_mutex_.r <= L < mutex_ | 173 // cache_mutex_.r <= L < mutex_ |
174 // After: cache_mutex_.w <= L < mutex_ | 174 // After: cache_mutex_.w <= L < mutex_ |
175 void ResetCache(RWLocker* cache_lock); | 175 void ResetCache(RWLocker* cache_lock); |
176 | 176 |
177 // Looks up and returns the State corresponding to a Workq. | 177 // Looks up and returns the State corresponding to a Workq. |
178 // L >= mutex_ | 178 // L >= mutex_ |
179 State* WorkqToCachedState(Workq* q, uint flag); | 179 State* WorkqToCachedState(Workq* q, uint flag); |
180 | 180 |
181 // Looks up and returns a State matching the inst, ninst, and flag. | 181 // Looks up and returns a State matching the inst, ninst, and flag. |
182 // L >= mutex_ | 182 // L >= mutex_ |
183 State* CachedState(uint32* inst, int ninst, uint flag); | 183 State* CachedState(int* inst, int ninst, uint flag); |
184 | 184 |
185 // Clear the cache entirely. | 185 // Clear the cache entirely. |
186 // Must hold cache_mutex_.w or be in destructor. | 186 // Must hold cache_mutex_.w or be in destructor. |
187 void ClearCache(); | 187 void ClearCache(); |
188 | 188 |
189 // Converts a State into a Workq: the opposite of WorkqToCachedState. | 189 // Converts a State into a Workq: the opposite of WorkqToCachedState. |
190 // L >= mutex_ | 190 // L >= mutex_ |
191 static void StateToWorkq(State* s, Workq* q); | 191 static void StateToWorkq(State* s, Workq* q); |
192 | 192 |
193 // Runs a State on a given byte, returning the next state. | 193 // Runs a State on a given byte, returning the next state. |
194 State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_ | 194 State* RunStateOnByteUnlocked(State*, int); // cache_mutex_.r <= L < mutex_ |
195 State* RunStateOnByte(State*, int); // L >= mutex_ | 195 State* RunStateOnByte(State*, int); // L >= mutex_ |
196 | 196 |
197 // Runs a Workq on a given byte followed by a set of empty-string flags, | 197 // Runs a Workq on a given byte followed by a set of empty-string flags, |
198 // producing a new Workq in nq. If a match instruction is encountered, | 198 // producing a new Workq in nq. If a match instruction is encountered, |
199 // sets *ismatch to true. | 199 // sets *ismatch to true. |
200 // L >= mutex_ | 200 // L >= mutex_ |
201 void RunWorkqOnByte(Workq* q, Workq* nq, | 201 void RunWorkqOnByte(Workq* q, Workq* nq, |
202 int c, uint flag, bool* ismatch, | 202 int c, uint flag, bool* ismatch, |
203 Prog::MatchKind kind, | 203 Prog::MatchKind kind, |
204 uint32 new_byte_loop); | 204 int new_byte_loop); |
205 | 205 |
206 // Runs a Workq on a set of empty-string flags, producing a new Workq in nq. | 206 // Runs a Workq on a set of empty-string flags, producing a new Workq in nq. |
207 // L >= mutex_ | 207 // L >= mutex_ |
208 void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint flag); | 208 void RunWorkqOnEmptyString(Workq* q, Workq* nq, uint flag); |
209 | 209 |
210 // Adds the instruction id to the Workq, following empty arrows | 210 // Adds the instruction id to the Workq, following empty arrows |
211 // according to flag. | 211 // according to flag. |
212 // L >= mutex_ | 212 // L >= mutex_ |
213 void AddToQueue(Workq* q, int id, uint flag); | 213 void AddToQueue(Workq* q, int id, uint flag); |
214 | 214 |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
310 Prog* prog_; // The regular expression program to run. | 310 Prog* prog_; // The regular expression program to run. |
311 Prog::MatchKind kind_; // The kind of DFA. | 311 Prog::MatchKind kind_; // The kind of DFA. |
312 int start_unanchored_; // start of unanchored program | 312 int start_unanchored_; // start of unanchored program |
313 bool init_failed_; // initialization failed (out of memory) | 313 bool init_failed_; // initialization failed (out of memory) |
314 | 314 |
315 Mutex mutex_; // mutex_ >= cache_mutex_.r | 315 Mutex mutex_; // mutex_ >= cache_mutex_.r |
316 | 316 |
317 // Scratch areas, protected by mutex_. | 317 // Scratch areas, protected by mutex_. |
318 Workq* q0_; // Two pre-allocated work queues. | 318 Workq* q0_; // Two pre-allocated work queues. |
319 Workq* q1_; | 319 Workq* q1_; |
320 uint32* astack_; // Pre-allocated stack for AddToQueue | 320 int* astack_; // Pre-allocated stack for AddToQueue |
321 int nastack_; | 321 int nastack_; |
322 | 322 |
323 // State* cache. Many threads use and add to the cache simultaneously, | 323 // State* cache. Many threads use and add to the cache simultaneously, |
324 // holding cache_mutex_ for reading and mutex_ (above) when adding. | 324 // holding cache_mutex_ for reading and mutex_ (above) when adding. |
325 // If the cache fills and needs to be discarded, the discarding is done | 325 // If the cache fills and needs to be discarded, the discarding is done |
326 // while holding cache_mutex_ for writing, to avoid interrupting other | 326 // while holding cache_mutex_ for writing, to avoid interrupting other |
327 // readers. Any State* pointers are only valid while cache_mutex_ | 327 // readers. Any State* pointers are only valid while cache_mutex_ |
328 // is held. | 328 // is held. |
329 Mutex cache_mutex_; | 329 Mutex cache_mutex_; |
330 int64 mem_budget_; // Total memory budget for all States. | 330 int64 mem_budget_; // Total memory budget for all States. |
331 int64 state_budget_; // Amount of memory remaining for new States. | 331 int64 state_budget_; // Amount of memory remaining for new States. |
332 StateSet state_cache_; // All States computed so far. | 332 StateSet state_cache_; // All States computed so far. |
333 StartInfo start_[kMaxStart]; | 333 StartInfo start_[kMaxStart]; |
334 bool cache_warned_; // have printed to LOG(INFO) about the cache | 334 bool cache_warned_; // have printed to LOG(INFO) about the cache |
335 }; | 335 }; |
336 | 336 |
337 // Shorthand for casting to uint8*. | 337 // Shorthand for casting to uint8*. |
338 static inline const uint8* BytePtr(const void* v) { | 338 static inline const uint8* BytePtr(const void* v) { |
339 return reinterpret_cast<const uint8*>(v); | 339 return reinterpret_cast<const uint8*>(v); |
340 } | 340 } |
341 | 341 |
342 // Work queues | 342 // Work queues |
343 | 343 |
344 // Marks separate thread groups of different priority | 344 // Marks separate thread groups of different priority |
345 // in the work queue when in leftmost-longest matching mode. | 345 // in the work queue when in leftmost-longest matching mode. |
346 #define Mark static_cast<uint32>(-1) | 346 #define Mark (-1) |
347 | 347 |
348 // Internally, the DFA uses a sparse array of | 348 // Internally, the DFA uses a sparse array of |
349 // program instruction pointers as a work queue. | 349 // program instruction pointers as a work queue. |
350 // In leftmost longest mode, marks separate sections | 350 // In leftmost longest mode, marks separate sections |
351 // of workq that started executing at different | 351 // of workq that started executing at different |
352 // locations in the string (earlier locations first). | 352 // locations in the string (earlier locations first). |
353 class DFA::Workq : public SparseSet { | 353 class DFA::Workq : public SparseSet { |
354 public: | 354 public: |
355 // Constructor: n is number of normal slots, maxmark number of mark slots. | 355 // Constructor: n is number of normal slots, maxmark number of mark slots. |
356 Workq(int n, int maxmark) : | 356 Workq(int n, int maxmark) : |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
415 start_unanchored_ = NULL; | 415 start_unanchored_ = NULL; |
416 if (kind_ == Prog::kLongestMatch) { | 416 if (kind_ == Prog::kLongestMatch) { |
417 nmark = prog->size(); | 417 nmark = prog->size(); |
418 start_unanchored_ = prog->start_unanchored(); | 418 start_unanchored_ = prog->start_unanchored(); |
419 } | 419 } |
420 nastack_ = 2 * prog->size() + nmark; | 420 nastack_ = 2 * prog->size() + nmark; |
421 | 421 |
422 // Account for space needed for DFA, q0, q1, astack. | 422 // Account for space needed for DFA, q0, q1, astack. |
423 mem_budget_ -= sizeof(DFA); | 423 mem_budget_ -= sizeof(DFA); |
424 mem_budget_ -= (prog_->size() + nmark) * | 424 mem_budget_ -= (prog_->size() + nmark) * |
425 (sizeof(int)+sizeof(int)+sizeof(uint32)) * 2; // q0, q1 | 425 (sizeof(int)+sizeof(int)) * 2; // q0, q1 |
426 mem_budget_ -= nastack_ * sizeof(uint32); // astack | 426 mem_budget_ -= nastack_ * sizeof(int); // astack |
427 if (mem_budget_ < 0) { | 427 if (mem_budget_ < 0) { |
428 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld", | 428 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld", |
429 prog_->size(), max_mem); | 429 prog_->size(), max_mem); |
430 init_failed_ = true; | 430 init_failed_ = true; |
431 return; | 431 return; |
432 } | 432 } |
433 | 433 |
434 state_budget_ = mem_budget_; | 434 state_budget_ = mem_budget_; |
435 | 435 |
436 // Make sure there is a reasonable amount of working room left. | 436 // Make sure there is a reasonable amount of working room left. |
437 // At minimum, the search requires room for two states in order | 437 // At minimum, the search requires room for two states in order |
438 // to limp along, restarting frequently. We'll get better performance | 438 // to limp along, restarting frequently. We'll get better performance |
439 // if there is room for a larger number of states, say 20. | 439 // if there is room for a larger number of states, say 20. |
440 int one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(uint32) + | 440 int one_state = sizeof(State) + (prog_->size()+nmark)*sizeof(int) + |
441 (prog_->bytemap_range()+1)*sizeof(State*); | 441 (prog_->bytemap_range()+1)*sizeof(State*); |
442 if (state_budget_ < 20*one_state) { | 442 if (state_budget_ < 20*one_state) { |
443 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld", | 443 LOG(INFO) << StringPrintf("DFA out of memory: prog size %lld mem %lld", |
444 prog_->size(), max_mem); | 444 prog_->size(), max_mem); |
445 init_failed_ = true; | 445 init_failed_ = true; |
446 return; | 446 return; |
447 } | 447 } |
448 | 448 |
449 q0_ = new Workq(prog->size(), nmark); | 449 q0_ = new Workq(prog->size(), nmark); |
450 q1_ = new Workq(prog->size(), nmark); | 450 q1_ = new Workq(prog->size(), nmark); |
451 astack_ = new uint32[nastack_]; | 451 astack_ = new int[nastack_]; |
452 } | 452 } |
453 | 453 |
454 DFA::~DFA() { | 454 DFA::~DFA() { |
455 delete q0_; | 455 delete q0_; |
456 delete q1_; | 456 delete q1_; |
457 delete[] astack_; | 457 delete[] astack_; |
458 ClearCache(); | 458 ClearCache(); |
459 } | 459 } |
460 | 460 |
461 // In the DFA state graph, s->next[c] == NULL means that the | 461 // In the DFA state graph, s->next[c] == NULL means that the |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
516 // | 516 // |
517 // DFA state graph construction. | 517 // DFA state graph construction. |
518 // | 518 // |
519 // The DFA state graph is a heavily-linked collection of State* structures. | 519 // The DFA state graph is a heavily-linked collection of State* structures. |
520 // The state_cache_ is a set of all the State structures ever allocated, | 520 // The state_cache_ is a set of all the State structures ever allocated, |
521 // so that if the same state is reached by two different paths, | 521 // so that if the same state is reached by two different paths, |
522 // the same State structure can be used. This reduces allocation | 522 // the same State structure can be used. This reduces allocation |
523 // requirements and also avoids duplication of effort across the two | 523 // requirements and also avoids duplication of effort across the two |
524 // identical states. | 524 // identical states. |
525 // | 525 // |
526 // A State is defined by an ordered list of uint32 instruction ids and a flag wo
rd. | 526 // A State is defined by an ordered list of instruction ids and a flag word. |
527 // | 527 // |
528 // The choice of an ordered list of instructions differs from a typical | 528 // The choice of an ordered list of instructions differs from a typical |
529 // textbook DFA implementation, which would use an unordered set. | 529 // textbook DFA implementation, which would use an unordered set. |
530 // Textbook descriptions, however, only care about whether | 530 // Textbook descriptions, however, only care about whether |
531 // the DFA matches, not where it matches in the text. To decide where the | 531 // the DFA matches, not where it matches in the text. To decide where the |
532 // DFA matches, we need to mimic the behavior of the dominant backtracking | 532 // DFA matches, we need to mimic the behavior of the dominant backtracking |
533 // implementations like PCRE, which try one possible regular expression | 533 // implementations like PCRE, which try one possible regular expression |
534 // execution, then another, then another, stopping when one of them succeeds. | 534 // execution, then another, then another, stopping when one of them succeeds. |
535 // The DFA execution tries these many executions in parallel, representing | 535 // The DFA execution tries these many executions in parallel, representing |
536 // each by an instruction id. These pointers are ordered in the State.inst_ | 536 // each by an instruction id. These pointers are ordered in the State.inst_ |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
571 // If one is found, returns it. If one is not found, allocates one, | 571 // If one is found, returns it. If one is not found, allocates one, |
572 // inserts it in the cache, and returns it. | 572 // inserts it in the cache, and returns it. |
573 DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) { | 573 DFA::State* DFA::WorkqToCachedState(Workq* q, uint flag) { |
574 if (DEBUG_MODE) | 574 if (DEBUG_MODE) |
575 mutex_.AssertHeld(); | 575 mutex_.AssertHeld(); |
576 | 576 |
577 // Construct array of instruction ids for the new state. | 577 // Construct array of instruction ids for the new state. |
578 // Only ByteRange, EmptyWidth, and Match instructions are useful to keep: | 578 // Only ByteRange, EmptyWidth, and Match instructions are useful to keep: |
579 // those are the only operators with any effect in | 579 // those are the only operators with any effect in |
580 // RunWorkqOnEmptyString or RunWorkqOnByte. | 580 // RunWorkqOnEmptyString or RunWorkqOnByte. |
581 uint32* inst = new uint32[q->size()]; | 581 int* inst = new int[q->size()]; |
582 int n = 0; | 582 int n = 0; |
583 uint needflags = 0; // flags needed by kInstEmptyWidth instructions | 583 uint needflags = 0; // flags needed by kInstEmptyWidth instructions |
584 bool sawmatch = false; // whether queue contains guaranteed kInstMatch | 584 bool sawmatch = false; // whether queue contains guaranteed kInstMatch |
585 if (DebugDFA) | 585 if (DebugDFA) |
586 fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); | 586 fprintf(stderr, "WorkqToCachedState %s [%#x]", DumpWorkq(q).c_str(), flag); |
587 for (Workq::iterator it = q->begin(); it != q->end(); ++it) { | 587 for (Workq::iterator it = q->begin(); it != q->end(); ++it) { |
588 int id = *it; | 588 int id = *it; |
589 if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id))) | 589 if (sawmatch && (kind_ == Prog::kFirstMatch || q->is_mark(id))) |
590 break; | 590 break; |
591 if (q->is_mark(id)) { | 591 if (q->is_mark(id)) { |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
657 // if the state is *not* a matching state. | 657 // if the state is *not* a matching state. |
658 if (n == 0 && flag == 0) { | 658 if (n == 0 && flag == 0) { |
659 delete[] inst; | 659 delete[] inst; |
660 return DeadState; | 660 return DeadState; |
661 } | 661 } |
662 | 662 |
663 // If we're in longest match mode, the state is a sequence of | 663 // If we're in longest match mode, the state is a sequence of |
664 // unordered state sets separated by Marks. Sort each set | 664 // unordered state sets separated by Marks. Sort each set |
665 // to canonicalize, to reduce the number of distinct sets stored. | 665 // to canonicalize, to reduce the number of distinct sets stored. |
666 if (kind_ == Prog::kLongestMatch) { | 666 if (kind_ == Prog::kLongestMatch) { |
667 uint32* ip = inst; | 667 int* ip = inst; |
668 uint32* ep = ip + n; | 668 int* ep = ip + n; |
669 while (ip < ep) { | 669 while (ip < ep) { |
670 uint32* markp = ip; | 670 int* markp = ip; |
671 while (markp < ep && *markp != Mark) | 671 while (markp < ep && *markp != Mark) |
672 markp++; | 672 markp++; |
673 sort(ip, markp); | 673 sort(ip, markp); |
674 if (markp < ep) | 674 if (markp < ep) |
675 markp++; | 675 markp++; |
676 ip = markp; | 676 ip = markp; |
677 } | 677 } |
678 } | 678 } |
679 | 679 |
680 // Save the needed empty-width flags in the top bits for use later. | 680 // Save the needed empty-width flags in the top bits for use later. |
681 flag |= needflags << kFlagNeedShift; | 681 flag |= needflags << kFlagNeedShift; |
682 | 682 |
683 State* state = CachedState(inst, n, flag); | 683 State* state = CachedState(inst, n, flag); |
684 delete[] inst; | 684 delete[] inst; |
685 return state; | 685 return state; |
686 } | 686 } |
687 | 687 |
688 // Looks in the State cache for a State matching inst, ninst, flag. | 688 // Looks in the State cache for a State matching inst, ninst, flag. |
689 // If one is found, returns it. If one is not found, allocates one, | 689 // If one is found, returns it. If one is not found, allocates one, |
690 // inserts it in the cache, and returns it. | 690 // inserts it in the cache, and returns it. |
691 DFA::State* DFA::CachedState(uint32* inst, int ninst, uint flag) { | 691 DFA::State* DFA::CachedState(int* inst, int ninst, uint flag) { |
692 if (DEBUG_MODE) | 692 if (DEBUG_MODE) |
693 mutex_.AssertHeld(); | 693 mutex_.AssertHeld(); |
694 | 694 |
695 // Look in the cache for a pre-existing state. | 695 // Look in the cache for a pre-existing state. |
696 State state = { inst, ninst, flag, NULL }; | 696 State state = { inst, ninst, flag, NULL }; |
697 StateSet::iterator it = state_cache_.find(&state); | 697 StateSet::iterator it = state_cache_.find(&state); |
698 if (it != state_cache_.end()) { | 698 if (it != state_cache_.end()) { |
699 if (DebugDFA) | 699 if (DebugDFA) |
700 fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str()); | 700 fprintf(stderr, " -cached-> %s\n", DumpState(*it).c_str()); |
701 return *it; | 701 return *it; |
702 } | 702 } |
703 | 703 |
704 // Must have enough memory for new state. | 704 // Must have enough memory for new state. |
705 // In addition to what we're going to allocate, | 705 // In addition to what we're going to allocate, |
706 // the state cache hash table seems to incur about 32 bytes per | 706 // the state cache hash table seems to incur about 32 bytes per |
707 // State*, empirically. | 707 // State*, empirically. |
708 const int kStateCacheOverhead = 32; | 708 const int kStateCacheOverhead = 32; |
709 int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot | 709 int nnext = prog_->bytemap_range() + 1; // + 1 for kByteEndText slot |
710 int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(uint32); | 710 int mem = sizeof(State) + nnext*sizeof(State*) + ninst*sizeof(int); |
711 if (mem_budget_ < mem + kStateCacheOverhead) { | 711 if (mem_budget_ < mem + kStateCacheOverhead) { |
712 mem_budget_ = -1; | 712 mem_budget_ = -1; |
713 return NULL; | 713 return NULL; |
714 } | 714 } |
715 mem_budget_ -= mem + kStateCacheOverhead; | 715 mem_budget_ -= mem + kStateCacheOverhead; |
716 | 716 |
717 // Allocate new state, along with room for next and inst. | 717 // Allocate new state, along with room for next and inst. |
718 char* space = new char[mem]; | 718 char* space = new char[mem]; |
719 State* s = reinterpret_cast<State*>(space); | 719 State* s = reinterpret_cast<State*>(space); |
720 s->next_ = reinterpret_cast<State**>(s + 1); | 720 s->next_ = reinterpret_cast<State**>(s + 1); |
721 s->inst_ = reinterpret_cast<uint32*>(s->next_ + nnext); | 721 s->inst_ = reinterpret_cast<int*>(s->next_ + nnext); |
722 memset(s->next_, 0, nnext*sizeof s->next_[0]); | 722 memset(s->next_, 0, nnext*sizeof s->next_[0]); |
723 memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); | 723 memmove(s->inst_, inst, ninst*sizeof s->inst_[0]); |
724 s->ninst_ = ninst; | 724 s->ninst_ = ninst; |
725 s->flag_ = flag; | 725 s->flag_ = flag; |
726 if (DebugDFA) | 726 if (DebugDFA) |
727 fprintf(stderr, " -> %s\n", DumpState(s).c_str()); | 727 fprintf(stderr, " -> %s\n", DumpState(s).c_str()); |
728 | 728 |
729 // Put state in cache and return it. | 729 // Put state in cache and return it. |
730 state_cache_.insert(s); | 730 state_cache_.insert(s); |
731 return s; | 731 return s; |
(...skipping 27 matching lines...) Expand all Loading... |
759 // Adds ip to the work queue, following empty arrows according to flag | 759 // Adds ip to the work queue, following empty arrows according to flag |
760 // and expanding kInstAlt instructions (two-target gotos). | 760 // and expanding kInstAlt instructions (two-target gotos). |
761 void DFA::AddToQueue(Workq* q, int id, uint flag) { | 761 void DFA::AddToQueue(Workq* q, int id, uint flag) { |
762 | 762 |
763 // Use astack_ to hold our stack of states yet to process. | 763 // Use astack_ to hold our stack of states yet to process. |
764 // It is sized to have room for nastack_ == 2*prog->size() + nmark | 764 // It is sized to have room for nastack_ == 2*prog->size() + nmark |
765 // instructions, which is enough: each instruction can be | 765 // instructions, which is enough: each instruction can be |
766 // processed by the switch below only once, and the processing | 766 // processed by the switch below only once, and the processing |
767 // pushes at most two instructions plus maybe a mark. | 767 // pushes at most two instructions plus maybe a mark. |
768 // (If we're using marks, nmark == prog->size(); otherwise nmark == 0.) | 768 // (If we're using marks, nmark == prog->size(); otherwise nmark == 0.) |
769 uint32* stk = astack_; | 769 int* stk = astack_; |
770 int nstk = 0; | 770 int nstk = 0; |
771 | 771 |
772 stk[nstk++] = id; | 772 stk[nstk++] = id; |
773 while (nstk > 0) { | 773 while (nstk > 0) { |
774 DCHECK_LE(nstk, nastack_); | 774 DCHECK_LE(nstk, nastack_); |
775 id = stk[--nstk]; | 775 id = stk[--nstk]; |
776 | 776 |
777 if (id == Mark) { | 777 if (id == Mark) { |
778 q->mark(); | 778 q->mark(); |
779 continue; | 779 continue; |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
856 } | 856 } |
857 } | 857 } |
858 | 858 |
859 // Runs the work queue, processing the single byte c followed by any empty | 859 // Runs the work queue, processing the single byte c followed by any empty |
860 // strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine, | 860 // strings indicated by flag. For example, c == 'a' and flag == kEmptyEndLine, |
861 // means to match c$. Sets the bool *ismatch to true if the end of the | 861 // means to match c$. Sets the bool *ismatch to true if the end of the |
862 // regular expression program has been reached (the regexp has matched). | 862 // regular expression program has been reached (the regexp has matched). |
863 void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, | 863 void DFA::RunWorkqOnByte(Workq* oldq, Workq* newq, |
864 int c, uint flag, bool* ismatch, | 864 int c, uint flag, bool* ismatch, |
865 Prog::MatchKind kind, | 865 Prog::MatchKind kind, |
866 uint32 new_byte_loop) { | 866 int new_byte_loop) { |
867 if (DEBUG_MODE) | 867 if (DEBUG_MODE) |
868 mutex_.AssertHeld(); | 868 mutex_.AssertHeld(); |
869 | 869 |
870 newq->clear(); | 870 newq->clear(); |
871 for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { | 871 for (Workq::iterator i = oldq->begin(); i != oldq->end(); ++i) { |
872 if (oldq->is_mark(*i)) { | 872 if (oldq->is_mark(*i)) { |
873 if (*ismatch) | 873 if (*ismatch) |
874 return; | 874 return; |
875 newq->mark(); | 875 newq->mark(); |
876 continue; | 876 continue; |
(...skipping 262 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1139 // Recreates and returns a state equivalent to the | 1139 // Recreates and returns a state equivalent to the |
1140 // original state passed to the constructor. | 1140 // original state passed to the constructor. |
1141 // Returns NULL if the cache has filled, but | 1141 // Returns NULL if the cache has filled, but |
1142 // since the DFA guarantees to have room in the cache | 1142 // since the DFA guarantees to have room in the cache |
1143 // for a couple states, should never return NULL | 1143 // for a couple states, should never return NULL |
1144 // if used right after ResetCache. | 1144 // if used right after ResetCache. |
1145 State* Restore(); | 1145 State* Restore(); |
1146 | 1146 |
1147 private: | 1147 private: |
1148 DFA* dfa_; // the DFA to use | 1148 DFA* dfa_; // the DFA to use |
1149 uint32* inst_; // saved info from State | 1149 int* inst_; // saved info from State |
1150 int ninst_; | 1150 int ninst_; |
1151 uint flag_; | 1151 uint flag_; |
1152 bool is_special_; // whether original state was special | 1152 bool is_special_; // whether original state was special |
1153 State* special_; // if is_special_, the original state | 1153 State* special_; // if is_special_, the original state |
1154 | 1154 |
1155 DISALLOW_EVIL_CONSTRUCTORS(StateSaver); | 1155 DISALLOW_EVIL_CONSTRUCTORS(StateSaver); |
1156 }; | 1156 }; |
1157 | 1157 |
1158 DFA::StateSaver::StateSaver(DFA* dfa, State* state) { | 1158 DFA::StateSaver::StateSaver(DFA* dfa, State* state) { |
1159 dfa_ = dfa; | 1159 dfa_ = dfa; |
1160 if (state <= SpecialStateMax) { | 1160 if (state <= SpecialStateMax) { |
1161 inst_ = NULL; | 1161 inst_ = NULL; |
1162 ninst_ = 0; | 1162 ninst_ = 0; |
1163 flag_ = 0; | 1163 flag_ = 0; |
1164 is_special_ = true; | 1164 is_special_ = true; |
1165 special_ = state; | 1165 special_ = state; |
1166 return; | 1166 return; |
1167 } | 1167 } |
1168 is_special_ = false; | 1168 is_special_ = false; |
1169 special_ = NULL; | 1169 special_ = NULL; |
1170 flag_ = state->flag_; | 1170 flag_ = state->flag_; |
1171 ninst_ = state->ninst_; | 1171 ninst_ = state->ninst_; |
1172 inst_ = new uint32[ninst_]; | 1172 inst_ = new int[ninst_]; |
1173 memmove(inst_, state->inst_, ninst_*sizeof inst_[0]); | 1173 memmove(inst_, state->inst_, ninst_*sizeof inst_[0]); |
1174 } | 1174 } |
1175 | 1175 |
1176 DFA::StateSaver::~StateSaver() { | 1176 DFA::StateSaver::~StateSaver() { |
1177 if (!is_special_) | 1177 if (!is_special_) |
1178 delete[] inst_; | 1178 delete[] inst_; |
1179 } | 1179 } |
1180 | 1180 |
1181 DFA::State* DFA::StateSaver::Restore() { | 1181 DFA::State* DFA::StateSaver::Restore() { |
1182 if (is_special_) | 1182 if (is_special_) |
(...skipping 831 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2014 if (dfa_longest_ == NULL) { | 2014 if (dfa_longest_ == NULL) { |
2015 dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2); | 2015 dfa_longest_ = new DFA(this, Prog::kLongestMatch, dfa_mem_/2); |
2016 delete_dfa_ = DeleteDFA; | 2016 delete_dfa_ = DeleteDFA; |
2017 } | 2017 } |
2018 dfa = dfa_longest_; | 2018 dfa = dfa_longest_; |
2019 } | 2019 } |
2020 return dfa->PossibleMatchRange(min, max, maxlen); | 2020 return dfa->PossibleMatchRange(min, max, maxlen); |
2021 } | 2021 } |
2022 | 2022 |
2023 } // namespace re2 | 2023 } // namespace re2 |
LEFT | RIGHT |