OLD | NEW |
1 # | 1 # |
2 # Copyright (C) 2016 and later: Unicode, Inc. and others. | 2 # Copyright (C) 2016 and later: Unicode, Inc. and others. |
3 # License & terms of use: http://www.unicode.org/copyright.html | 3 # License & terms of use: http://www.unicode.org/copyright.html |
4 # Copyright (C) 2002-2016, International Business Machines Corporation | 4 # Copyright (C) 2002-2016, International Business Machines Corporation |
5 # and others. All Rights Reserved. | 5 # and others. All Rights Reserved. |
6 # | 6 # |
7 # file: word_POSIX.txt | 7 # file: word_POSIX.txt |
8 # | 8 # |
9 # ICU Word Break Rules, POSIX locale. | 9 # ICU Word Break Rules, POSIX locale. |
10 # See Unicode Standard Annex #29. | 10 # See Unicode Standard Annex #29. |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
85 $NumericEx = $Numeric ($Extend | $Format | $ZWJ)*; | 85 $NumericEx = $Numeric ($Extend | $Format | $ZWJ)*; |
86 $ExtendNumLetEx = $ExtendNumLet ($Extend | $Format | $ZWJ)*; | 86 $ExtendNumLetEx = $ExtendNumLet ($Extend | $Format | $ZWJ)*; |
87 $Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format | $ZWJ)*; | 87 $Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format | $ZWJ)*; |
88 | 88 |
89 $Ideographic = [\p{Ideographic}]; | 89 $Ideographic = [\p{Ideographic}]; |
90 $HiraganaEx = $Hiragana ($Extend | $Format | $ZWJ)*; | 90 $HiraganaEx = $Hiragana ($Extend | $Format | $ZWJ)*; |
91 $IdeographicEx = $Ideographic ($Extend | $Format | $ZWJ)*; | 91 $IdeographicEx = $Ideographic ($Extend | $Format | $ZWJ)*; |
92 | 92 |
93 ## ------------------------------------------------- | 93 ## ------------------------------------------------- |
94 | 94 |
95 !!forward; | |
96 | |
97 | |
98 # Rule 3 - CR x LF | 95 # Rule 3 - CR x LF |
99 # | 96 # |
100 $CR $LF; | 97 $CR $LF; |
101 | 98 |
102 # Rule 3c ZWJ x (Extended_Pict | EmojiNRK). Precedes WB4, so no intervening E
xtend chars allowed. | 99 # Rule 3c ZWJ x (Extended_Pict | EmojiNRK). Precedes WB4, so no intervening E
xtend chars allowed. |
103 # | 100 # |
104 $ZWJ $Extended_Pict; | 101 $ZWJ $Extended_Pict; |
105 | 102 |
106 # Rule 3d - Keep horizontal whitespace together. | 103 # Rule 3d - Keep horizontal whitespace together. |
107 # | 104 # |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
181 # | 178 # |
182 ^$Regional_IndicatorEx $Regional_IndicatorEx; | 179 ^$Regional_IndicatorEx $Regional_IndicatorEx; |
183 | 180 |
184 # special handling for CJK characters: chain for later dictionary segmentation | 181 # special handling for CJK characters: chain for later dictionary segmentation |
185 $HangulSyllable $HangulSyllable {200}; | 182 $HangulSyllable $HangulSyllable {200}; |
186 $KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji foun
d | 183 $KanaKanji $KanaKanji {400}; # different rule status if both kana and kanji foun
d |
187 | 184 |
188 # Rule 999 | 185 # Rule 999 |
189 # Match a single code point if no other rule applies. | 186 # Match a single code point if no other rule applies. |
190 .; | 187 .; |
191 | |
192 | |
193 ## ------------------------------------------------- | |
194 | |
195 !!safe_reverse; | |
196 | |
197 # rule 3 | |
198 ($Extend | $Format | $ZWJ)+ .?; | |
199 | |
200 # rule 6 | |
201 ($MidLetter | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* ($Hebrew_L
etter | $ALetterPlus); | |
202 | |
203 # rule 7b | |
204 $Double_Quote ($Format | $Extend | $ZWJ)* $Hebrew_Letter; | |
205 | |
206 | |
207 # rule 11 | |
208 ($MidNum | $MidNumLet | $Single_Quote) ($Format | $Extend | $ZWJ)* $Numeric; | |
209 | |
210 # rule 13c | |
211 $Regional_Indicator ($Format | $Extend | $ZWJ)* $Regional_Indicator; | |
212 | |
213 # For dictionary-based break | |
214 $dictionary $dictionary; | |
OLD | NEW |