OLD | NEW |
1 // © 2016 and later: Unicode, Inc. and others. | 1 // © 2016 and later: Unicode, Inc. and others. |
2 // License & terms of use: http://www.unicode.org/copyright.html | 2 // License & terms of use: http://www.unicode.org/copyright.html |
3 // | 3 // |
4 // rbbisetb.h | 4 // rbbisetb.h |
5 /* | 5 /* |
6 ********************************************************************** | 6 ********************************************************************** |
7 * Copyright (c) 2001-2005, International Business Machines | 7 * Copyright (c) 2001-2005, International Business Machines |
8 * Corporation and others. All Rights Reserved. | 8 * Corporation and others. All Rights Reserved. |
9 ********************************************************************** | 9 ********************************************************************** |
10 */ | 10 */ |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
75 // - construct the trie table that maps input characters | 75 // - construct the trie table that maps input characters |
76 // to set numbers in the non-overlapping set of sets. | 76 // to set numbers in the non-overlapping set of sets. |
77 // | 77 // |
78 | 78 |
79 | 79 |
80 class RBBISetBuilder : public UMemory { | 80 class RBBISetBuilder : public UMemory { |
81 public: | 81 public: |
82 RBBISetBuilder(RBBIRuleBuilder *rb); | 82 RBBISetBuilder(RBBIRuleBuilder *rb); |
83 ~RBBISetBuilder(); | 83 ~RBBISetBuilder(); |
84 | 84 |
85 void build(); | 85 void buildRanges(); |
| 86 void buildTrie(); |
86 void addValToSets(UVector *sets, uint32_t val); | 87 void addValToSets(UVector *sets, uint32_t val); |
87 void addValToSet (RBBINode *usetNode, uint32_t val); | 88 void addValToSet (RBBINode *usetNode, uint32_t val); |
88 int32_t getNumCharCategories() const; // CharCategories are the same as i
nput symbol set to the | 89 int32_t getNumCharCategories() const; // CharCategories are the same as i
nput symbol set to the |
89 // runtime state machine, which
are the same as | 90 // runtime state machine, which
are the same as |
90 // columns in the DFA state tabl
e | 91 // columns in the DFA state tabl
e |
91 int32_t getTrieSize() /*const*/; // Size in bytes of the serialized
Trie. | 92 int32_t getTrieSize() /*const*/; // Size in bytes of the serialized
Trie. |
92 void serializeTrie(uint8_t *where); // write out the serialized Trie. | 93 void serializeTrie(uint8_t *where); // write out the serialized Trie. |
93 UChar32 getFirstChar(int32_t val) const; | 94 UChar32 getFirstChar(int32_t val) const; |
94 UBool sawBOF() const; // Indicate whether any references
to the {bof} pseudo | 95 UBool sawBOF() const; // Indicate whether any references
to the {bof} pseudo |
95 // character were encountered. | 96 // character were encountered. |
| 97 /** merge two character categories that have been identified as having equiv
alent behavior. |
| 98 * The ranges belonging to the right category (table column) will be added
to the left. |
| 99 */ |
| 100 void mergeCategories(int32_t left, int32_t right); |
| 101 |
| 102 static constexpr int32_t DICT_BIT = 0x4000; |
| 103 |
96 #ifdef RBBI_DEBUG | 104 #ifdef RBBI_DEBUG |
97 void printSets(); | 105 void printSets(); |
98 void printRanges(); | 106 void printRanges(); |
99 void printRangeGroups(); | 107 void printRangeGroups(); |
100 #else | 108 #else |
101 #define printSets() | 109 #define printSets() |
102 #define printRanges() | 110 #define printRanges() |
103 #define printRangeGroups() | 111 #define printRangeGroups() |
104 #endif | 112 #endif |
105 | 113 |
(...skipping 22 matching lines...) Expand all Loading... |
128 RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of
this class | 136 RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of
this class |
129 }; | 137 }; |
130 | 138 |
131 | 139 |
132 | 140 |
133 U_NAMESPACE_END | 141 U_NAMESPACE_END |
134 | 142 |
135 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | 143 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |
136 | 144 |
137 #endif | 145 #endif |
OLD | NEW |