LEFT | RIGHT |
(no file at all) | |
1 // © 2017 and later: Unicode, Inc. and others. | 1 // © 2017 and later: Unicode, Inc. and others. |
2 // License & terms of use: http://www.unicode.org/copyright.html#License | 2 // License & terms of use: http://www.unicode.org/copyright.html#License |
3 package com.ibm.icu.impl.number; | 3 package com.ibm.icu.impl.number; |
4 | 4 |
5 import com.ibm.icu.text.NumberFormat; | 5 import com.ibm.icu.text.NumberFormat; |
| 6 import com.ibm.icu.text.UnicodeSet; |
6 | 7 |
7 /** | 8 /** |
8 * Performs manipulations on affix patterns: the prefix and suffix strings assoc
iated with a decimal | 9 * Performs manipulations on affix patterns: the prefix and suffix strings assoc
iated with a decimal |
9 * format pattern. For example: | 10 * format pattern. For example: |
10 * | 11 * |
11 * <table> | 12 * <table> |
12 * <tr><th>Affix Pattern</th><th>Example Unescaped (Formatted) String</th></tr> | 13 * <tr> |
13 * <tr><td>abc</td><td>abc</td></tr> | 14 * <th>Affix Pattern</th> |
14 * <tr><td>ab-</td><td>ab−</td></tr> | 15 * <th>Example Unescaped (Formatted) String</th> |
15 * <tr><td>ab'-'</td><td>ab-</td></tr> | 16 * </tr> |
16 * <tr><td>ab''</td><td>ab'</td></tr> | 17 * <tr> |
| 18 * <td>abc</td> |
| 19 * <td>abc</td> |
| 20 * </tr> |
| 21 * <tr> |
| 22 * <td>ab-</td> |
| 23 * <td>ab−</td> |
| 24 * </tr> |
| 25 * <tr> |
| 26 * <td>ab'-'</td> |
| 27 * <td>ab-</td> |
| 28 * </tr> |
| 29 * <tr> |
| 30 * <td>ab''</td> |
| 31 * <td>ab'</td> |
| 32 * </tr> |
17 * </table> | 33 * </table> |
18 * | 34 * |
19 * To manually iterate over tokens in a literal string, use the following patter
n, which is designed | 35 * To manually iterate over tokens in a literal string, use the following patter
n, which is designed to |
20 * to be efficient. | 36 * be efficient. |
21 * | 37 * |
22 * <pre> | 38 * <pre> |
23 * long tag = 0L; | 39 * long tag = 0L; |
24 * while (AffixPatternUtils.hasNext(tag, patternString)) { | 40 * while (AffixPatternUtils.hasNext(tag, patternString)) { |
25 * tag = AffixPatternUtils.nextToken(tag, patternString); | 41 * tag = AffixPatternUtils.nextToken(tag, patternString); |
26 * int typeOrCp = AffixPatternUtils.getTypeOrCp(tag); | 42 * int typeOrCp = AffixPatternUtils.getTypeOrCp(tag); |
27 * switch (typeOrCp) { | 43 * switch (typeOrCp) { |
28 * case AffixPatternUtils.TYPE_MINUS_SIGN: | 44 * case AffixPatternUtils.TYPE_MINUS_SIGN: |
29 * // Current token is a minus sign. | 45 * // Current token is a minus sign. |
30 * break; | 46 * break; |
31 * case AffixPatternUtils.TYPE_PLUS_SIGN: | 47 * case AffixPatternUtils.TYPE_PLUS_SIGN: |
32 * // Current token is a plus sign. | 48 * // Current token is a plus sign. |
33 * break; | 49 * break; |
34 * case AffixPatternUtils.TYPE_PERCENT: | 50 * case AffixPatternUtils.TYPE_PERCENT: |
35 * // Current token is a percent sign. | 51 * // Current token is a percent sign. |
36 * break; | 52 * break; |
37 * // ... other types ... | 53 * // ... other types ... |
38 * default: | 54 * default: |
39 * // Current token is an arbitrary code point. | 55 * // Current token is an arbitrary code point. |
40 * // The variable typeOrCp is the code point. | 56 * // The variable typeOrCp is the code point. |
41 * break; | 57 * break; |
42 * } | 58 * } |
43 * } | 59 * } |
44 * </pre> | 60 * </pre> |
45 */ | 61 */ |
46 public class AffixUtils { | 62 public class AffixUtils { |
47 | 63 |
48 private static final int STATE_BASE = 0; | 64 private static final int STATE_BASE = 0; |
49 private static final int STATE_FIRST_QUOTE = 1; | 65 private static final int STATE_FIRST_QUOTE = 1; |
50 private static final int STATE_INSIDE_QUOTE = 2; | 66 private static final int STATE_INSIDE_QUOTE = 2; |
51 private static final int STATE_AFTER_QUOTE = 3; | 67 private static final int STATE_AFTER_QUOTE = 3; |
52 private static final int STATE_FIRST_CURR = 4; | 68 private static final int STATE_FIRST_CURR = 4; |
53 private static final int STATE_SECOND_CURR = 5; | 69 private static final int STATE_SECOND_CURR = 5; |
54 private static final int STATE_THIRD_CURR = 6; | 70 private static final int STATE_THIRD_CURR = 6; |
55 private static final int STATE_FOURTH_CURR = 7; | 71 private static final int STATE_FOURTH_CURR = 7; |
56 private static final int STATE_FIFTH_CURR = 8; | 72 private static final int STATE_FIFTH_CURR = 8; |
57 private static final int STATE_OVERFLOW_CURR = 9; | 73 private static final int STATE_OVERFLOW_CURR = 9; |
58 | 74 |
59 /** Represents a literal character; the value is stored in the code point fiel
d. */ | 75 /** Represents a literal character; the value is stored in the code point fi
eld. */ |
60 private static final int TYPE_CODEPOINT = 0; | 76 private static final int TYPE_CODEPOINT = 0; |
61 | 77 |
62 /** Represents a minus sign symbol '-'. */ | 78 /** Represents a minus sign symbol '-'. */ |
63 public static final int TYPE_MINUS_SIGN = -1; | 79 public static final int TYPE_MINUS_SIGN = -1; |
64 | 80 |
65 /** Represents a plus sign symbol '+'. */ | 81 /** Represents a plus sign symbol '+'. */ |
66 public static final int TYPE_PLUS_SIGN = -2; | 82 public static final int TYPE_PLUS_SIGN = -2; |
67 | 83 |
68 /** Represents a percent sign symbol '%'. */ | 84 /** Represents a percent sign symbol '%'. */ |
69 public static final int TYPE_PERCENT = -3; | 85 public static final int TYPE_PERCENT = -3; |
70 | 86 |
71 /** Represents a permille sign symbol '‰'. */ | 87 /** Represents a permille sign symbol '‰'. */ |
72 public static final int TYPE_PERMILLE = -4; | 88 public static final int TYPE_PERMILLE = -4; |
73 | 89 |
74 /** Represents a single currency symbol '¤'. */ | 90 /** Represents a single currency symbol '¤'. */ |
75 public static final int TYPE_CURRENCY_SINGLE = -5; | 91 public static final int TYPE_CURRENCY_SINGLE = -5; |
76 | 92 |
77 /** Represents a double currency symbol '¤¤'. */ | 93 /** Represents a double currency symbol '¤¤'. */ |
78 public static final int TYPE_CURRENCY_DOUBLE = -6; | 94 public static final int TYPE_CURRENCY_DOUBLE = -6; |
79 | 95 |
80 /** Represents a triple currency symbol '¤¤¤'. */ | 96 /** Represents a triple currency symbol '¤¤¤'. */ |
81 public static final int TYPE_CURRENCY_TRIPLE = -7; | 97 public static final int TYPE_CURRENCY_TRIPLE = -7; |
82 | 98 |
83 /** Represents a quadruple currency symbol '¤¤¤¤'. */ | 99 /** Represents a quadruple currency symbol '¤¤¤¤'. */ |
84 public static final int TYPE_CURRENCY_QUAD = -8; | 100 public static final int TYPE_CURRENCY_QUAD = -8; |
85 | 101 |
86 /** Represents a quintuple currency symbol '¤¤¤¤¤'. */ | 102 /** Represents a quintuple currency symbol '¤¤¤¤¤'. */ |
87 public static final int TYPE_CURRENCY_QUINT = -9; | 103 public static final int TYPE_CURRENCY_QUINT = -9; |
88 | 104 |
89 /** Represents a sequence of six or more currency symbols. */ | 105 /** Represents a sequence of six or more currency symbols. */ |
90 public static final int TYPE_CURRENCY_OVERFLOW = -15; | 106 public static final int TYPE_CURRENCY_OVERFLOW = -15; |
91 | 107 |
92 public static interface SymbolProvider { | 108 public static interface SymbolProvider { |
93 public CharSequence getSymbol(int type); | 109 public CharSequence getSymbol(int type); |
94 } | 110 } |
95 | 111 |
96 /** | 112 /** |
97 * Estimates the number of code points present in an unescaped version of the
affix pattern string | 113 * Estimates the number of code points present in an unescaped version of th
e affix pattern string |
98 * (one that would be returned by {@link #unescape}), assuming that all interp
olated symbols | 114 * (one that would be returned by {@link #unescape}), assuming that all inte
rpolated symbols consume |
99 * consume one code point and that currencies consume as many code points as t
heir symbol width. | 115 * one code point and that currencies consume as many code points as their s
ymbol width. Used for |
100 * Used for computing padding width. | 116 * computing padding width. |
101 * | 117 * |
102 * @param patternString The original string whose width will be estimated. | 118 * @param patternString |
103 * @return The length of the unescaped string. | 119 * The original string whose width will be estimated. |
104 */ | 120 * @return The length of the unescaped string. |
105 public static int estimateLength(CharSequence patternString) { | 121 */ |
106 if (patternString == null) return 0; | 122 public static int estimateLength(CharSequence patternString) { |
107 int state = STATE_BASE; | 123 if (patternString == null) |
108 int offset = 0; | 124 return 0; |
109 int length = 0; | 125 int state = STATE_BASE; |
110 for (; offset < patternString.length(); ) { | 126 int offset = 0; |
111 int cp = Character.codePointAt(patternString, offset); | 127 int length = 0; |
112 | 128 for (; offset < patternString.length();) { |
113 switch (state) { | 129 int cp = Character.codePointAt(patternString, offset); |
| 130 |
| 131 switch (state) { |
| 132 case STATE_BASE: |
| 133 if (cp == '\'') { |
| 134 // First quote |
| 135 state = STATE_FIRST_QUOTE; |
| 136 } else { |
| 137 // Unquoted symbol |
| 138 length++; |
| 139 } |
| 140 break; |
| 141 case STATE_FIRST_QUOTE: |
| 142 if (cp == '\'') { |
| 143 // Repeated quote |
| 144 length++; |
| 145 state = STATE_BASE; |
| 146 } else { |
| 147 // Quoted code point |
| 148 length++; |
| 149 state = STATE_INSIDE_QUOTE; |
| 150 } |
| 151 break; |
| 152 case STATE_INSIDE_QUOTE: |
| 153 if (cp == '\'') { |
| 154 // End of quoted sequence |
| 155 state = STATE_AFTER_QUOTE; |
| 156 } else { |
| 157 // Quoted code point |
| 158 length++; |
| 159 } |
| 160 break; |
| 161 case STATE_AFTER_QUOTE: |
| 162 if (cp == '\'') { |
| 163 // Double quote inside of quoted sequence |
| 164 length++; |
| 165 state = STATE_INSIDE_QUOTE; |
| 166 } else { |
| 167 // Unquoted symbol |
| 168 length++; |
| 169 } |
| 170 break; |
| 171 default: |
| 172 throw new AssertionError(); |
| 173 } |
| 174 |
| 175 offset += Character.charCount(cp); |
| 176 } |
| 177 |
| 178 switch (state) { |
| 179 case STATE_FIRST_QUOTE: |
| 180 case STATE_INSIDE_QUOTE: |
| 181 throw new IllegalArgumentException("Unterminated quote: \"" + patter
nString + "\""); |
| 182 default: |
| 183 break; |
| 184 } |
| 185 |
| 186 return length; |
| 187 } |
| 188 |
| 189 /** |
| 190 * Takes a string and escapes (quotes) characters that have special meaning
in the affix pattern |
| 191 * syntax. This function does not reverse-lookup symbols. |
| 192 * |
| 193 * <p> |
| 194 * Example input: "-$x"; example output: "'-'$x" |
| 195 * |
| 196 * @param input |
| 197 * The string to be escaped. |
| 198 * @param output |
| 199 * The string builder to which to append the escaped string. |
| 200 * @return The number of chars (UTF-16 code units) appended to the output. |
| 201 */ |
| 202 public static int escape(CharSequence input, StringBuilder output) { |
| 203 if (input == null) |
| 204 return 0; |
| 205 int state = STATE_BASE; |
| 206 int offset = 0; |
| 207 int startLength = output.length(); |
| 208 for (; offset < input.length();) { |
| 209 int cp = Character.codePointAt(input, offset); |
| 210 |
| 211 switch (cp) { |
| 212 case '\'': |
| 213 output.append("''"); |
| 214 break; |
| 215 |
| 216 case '-': |
| 217 case '+': |
| 218 case '%': |
| 219 case '‰': |
| 220 case '¤': |
| 221 if (state == STATE_BASE) { |
| 222 output.append('\''); |
| 223 output.appendCodePoint(cp); |
| 224 state = STATE_INSIDE_QUOTE; |
| 225 } else { |
| 226 output.appendCodePoint(cp); |
| 227 } |
| 228 break; |
| 229 |
| 230 default: |
| 231 if (state == STATE_INSIDE_QUOTE) { |
| 232 output.append('\''); |
| 233 output.appendCodePoint(cp); |
| 234 state = STATE_BASE; |
| 235 } else { |
| 236 output.appendCodePoint(cp); |
| 237 } |
| 238 break; |
| 239 } |
| 240 offset += Character.charCount(cp); |
| 241 } |
| 242 |
| 243 if (state == STATE_INSIDE_QUOTE) { |
| 244 output.append('\''); |
| 245 } |
| 246 |
| 247 return output.length() - startLength; |
| 248 } |
| 249 |
| 250 /** Version of {@link #escape} that returns a String, or null if input is nu
ll. */ |
| 251 public static String escape(CharSequence input) { |
| 252 if (input == null) |
| 253 return null; |
| 254 StringBuilder sb = new StringBuilder(); |
| 255 escape(input, sb); |
| 256 return sb.toString(); |
| 257 } |
| 258 |
| 259 public static final NumberFormat.Field getFieldForType(int type) { |
| 260 switch (type) { |
| 261 case TYPE_MINUS_SIGN: |
| 262 return NumberFormat.Field.SIGN; |
| 263 case TYPE_PLUS_SIGN: |
| 264 return NumberFormat.Field.SIGN; |
| 265 case TYPE_PERCENT: |
| 266 return NumberFormat.Field.PERCENT; |
| 267 case TYPE_PERMILLE: |
| 268 return NumberFormat.Field.PERMILLE; |
| 269 case TYPE_CURRENCY_SINGLE: |
| 270 return NumberFormat.Field.CURRENCY; |
| 271 case TYPE_CURRENCY_DOUBLE: |
| 272 return NumberFormat.Field.CURRENCY; |
| 273 case TYPE_CURRENCY_TRIPLE: |
| 274 return NumberFormat.Field.CURRENCY; |
| 275 case TYPE_CURRENCY_QUAD: |
| 276 return NumberFormat.Field.CURRENCY; |
| 277 case TYPE_CURRENCY_QUINT: |
| 278 return NumberFormat.Field.CURRENCY; |
| 279 case TYPE_CURRENCY_OVERFLOW: |
| 280 return NumberFormat.Field.CURRENCY; |
| 281 default: |
| 282 throw new AssertionError(); |
| 283 } |
| 284 } |
| 285 |
| 286 /** |
| 287 * Executes the unescape state machine. Replaces the unquoted characters "-"
, "+", "%", "‰", and "¤" |
| 288 * with the corresponding symbols provided by the {@link SymbolProvider}, an
d inserts the result into |
| 289 * the NumberStringBuilder at the requested location. |
| 290 * |
| 291 * <p> |
| 292 * Example input: "'-'¤x"; example output: "-$x" |
| 293 * |
| 294 * @param affixPattern |
| 295 * The original string to be unescaped. |
| 296 * @param output |
| 297 * The NumberStringBuilder to mutate with the result. |
| 298 * @param position |
| 299 * The index into the NumberStringBuilder to insert the the strin
g. |
| 300 * @param provider |
| 301 * An object to generate locale symbols. |
| 302 * @return The length of the string added to affixPattern. |
| 303 */ |
| 304 public static int unescape( |
| 305 CharSequence affixPattern, |
| 306 NumberStringBuilder output, |
| 307 int position, |
| 308 SymbolProvider provider) { |
| 309 assert affixPattern != null; |
| 310 int length = 0; |
| 311 long tag = 0L; |
| 312 while (hasNext(tag, affixPattern)) { |
| 313 tag = nextToken(tag, affixPattern); |
| 314 int typeOrCp = getTypeOrCp(tag); |
| 315 if (typeOrCp == TYPE_CURRENCY_OVERFLOW) { |
| 316 // Don't go to the provider for this special case |
| 317 length += output.insertCodePoint(position + length, 0xFFFD, Numb
erFormat.Field.CURRENCY); |
| 318 } else if (typeOrCp < 0) { |
| 319 length += output.insert(position + length, |
| 320 provider.getSymbol(typeOrCp), |
| 321 getFieldForType(typeOrCp)); |
| 322 } else { |
| 323 length += output.insertCodePoint(position + length, typeOrCp, nu
ll); |
| 324 } |
| 325 } |
| 326 return length; |
| 327 } |
| 328 |
| 329 /** |
| 330 * Sames as {@link #unescape}, but only calculates the code point count. Mor
e efficient than |
| 331 * {@link #unescape} if you only need the length but not the string itself. |
| 332 * |
| 333 * @param affixPattern |
| 334 * The original string to be unescaped. |
| 335 * @param provider |
| 336 * An object to generate locale symbols. |
| 337 * @return The number of code points in the unescaped string. |
| 338 */ |
| 339 public static int unescapedCodePointCount(CharSequence affixPattern, SymbolP
rovider provider) { |
| 340 int length = 0; |
| 341 long tag = 0L; |
| 342 while (hasNext(tag, affixPattern)) { |
| 343 tag = nextToken(tag, affixPattern); |
| 344 int typeOrCp = getTypeOrCp(tag); |
| 345 if (typeOrCp == TYPE_CURRENCY_OVERFLOW) { |
| 346 length += 1; |
| 347 } else if (typeOrCp < 0) { |
| 348 CharSequence symbol = provider.getSymbol(typeOrCp); |
| 349 length += Character.codePointCount(symbol, 0, symbol.length()); |
| 350 } else { |
| 351 length += 1; |
| 352 } |
| 353 } |
| 354 return length; |
| 355 } |
| 356 |
| 357 /** |
| 358 * Checks whether the given affix pattern contains at least one token of the
given type, which is one |
| 359 * of the constants "TYPE_" in {@link AffixUtils}. |
| 360 * |
| 361 * @param affixPattern |
| 362 * The affix pattern to check. |
| 363 * @param type |
| 364 * The token type. |
| 365 * @return true if the affix pattern contains the given token type; false ot
herwise. |
| 366 */ |
| 367 public static boolean containsType(CharSequence affixPattern, int type) { |
| 368 if (affixPattern == null || affixPattern.length() == 0) { |
| 369 return false; |
| 370 } |
| 371 long tag = 0L; |
| 372 while (hasNext(tag, affixPattern)) { |
| 373 tag = nextToken(tag, affixPattern); |
| 374 if (getTypeOrCp(tag) == type) { |
| 375 return true; |
| 376 } |
| 377 } |
| 378 return false; |
| 379 } |
| 380 |
| 381 /** |
| 382 * Checks whether the specified affix pattern has any unquoted currency symb
ols ("¤"). |
| 383 * |
| 384 * @param affixPattern |
| 385 * The string to check for currency symbols. |
| 386 * @return true if the literal has at least one unquoted currency symbol; fa
lse otherwise. |
| 387 */ |
| 388 public static boolean hasCurrencySymbols(CharSequence affixPattern) { |
| 389 if (affixPattern == null || affixPattern.length() == 0) |
| 390 return false; |
| 391 long tag = 0L; |
| 392 while (hasNext(tag, affixPattern)) { |
| 393 tag = nextToken(tag, affixPattern); |
| 394 int typeOrCp = getTypeOrCp(tag); |
| 395 if (typeOrCp < 0 && getFieldForType(typeOrCp) == NumberFormat.Field.
CURRENCY) { |
| 396 return true; |
| 397 } |
| 398 } |
| 399 return false; |
| 400 } |
| 401 |
| 402 /** |
| 403 * Replaces all occurrences of tokens with the given type with the given rep
lacement char. |
| 404 * |
| 405 * @param affixPattern |
| 406 * The source affix pattern (does not get modified). |
| 407 * @param type |
| 408 * The token type. |
| 409 * @param replacementChar |
| 410 * The char to substitute in place of chars of the given token ty
pe. |
| 411 * @return A string containing the new affix pattern. |
| 412 */ |
| 413 public static String replaceType(CharSequence affixPattern, int type, char r
eplacementChar) { |
| 414 if (affixPattern == null || affixPattern.length() == 0) |
| 415 return ""; |
| 416 char[] chars = affixPattern.toString().toCharArray(); |
| 417 long tag = 0L; |
| 418 while (hasNext(tag, affixPattern)) { |
| 419 tag = nextToken(tag, affixPattern); |
| 420 if (getTypeOrCp(tag) == type) { |
| 421 int offset = getOffset(tag); |
| 422 chars[offset - 1] = replacementChar; |
| 423 } |
| 424 } |
| 425 return new String(chars); |
| 426 } |
| 427 |
| 428 /** |
| 429 * Appends a new affix pattern with all symbols and code points in the given
"ignorables" UnicodeSet |
| 430 * trimmed from the beginning and end. Similar to calling unescape with a sy
mbol provider that always |
| 431 * returns the empty string. |
| 432 * |
| 433 * <p> |
| 434 * Accepts and returns a StringBuilder, allocating it only if necessary. |
| 435 */ |
| 436 public static StringBuilder trimSymbolsAndIgnorables( |
| 437 CharSequence affixPattern, |
| 438 UnicodeSet ignorables, |
| 439 StringBuilder sb) { |
| 440 assert affixPattern != null; |
| 441 long tag = 0L; |
| 442 int trailingIgnorables = 0; |
| 443 while (hasNext(tag, affixPattern)) { |
| 444 tag = nextToken(tag, affixPattern); |
| 445 int typeOrCp = getTypeOrCp(tag); |
| 446 if (typeOrCp >= 0) { |
| 447 if (!ignorables.contains(typeOrCp)) { |
| 448 if (sb == null) { |
| 449 // Lazy-initialize the StringBuilder |
| 450 sb = new StringBuilder(); |
| 451 } |
| 452 sb.appendCodePoint(typeOrCp); |
| 453 trailingIgnorables = 0; |
| 454 } else if (sb != null && sb.length() > 0) { |
| 455 sb.appendCodePoint(typeOrCp); |
| 456 trailingIgnorables += Character.charCount(typeOrCp); |
| 457 } |
| 458 } |
| 459 } |
| 460 if (trailingIgnorables > 0) { |
| 461 sb.setLength(sb.length() - trailingIgnorables); |
| 462 } |
| 463 return sb; |
| 464 } |
| 465 |
| 466 /** |
| 467 * Returns the next token from the affix pattern. |
| 468 * |
| 469 * @param tag |
| 470 * A bitmask used for keeping track of state from token to token.
The initial value should |
| 471 * be 0L. |
| 472 * @param patternString |
| 473 * The affix pattern. |
| 474 * @return The bitmask tag to pass to the next call of this method to retrie
ve the following token |
| 475 * (never negative), or -1 if there were no more tokens in the affix
pattern. |
| 476 * @see #hasNext |
| 477 */ |
| 478 public static long nextToken(long tag, CharSequence patternString) { |
| 479 int offset = getOffset(tag); |
| 480 int state = getState(tag); |
| 481 for (; offset < patternString.length();) { |
| 482 int cp = Character.codePointAt(patternString, offset); |
| 483 int count = Character.charCount(cp); |
| 484 |
| 485 switch (state) { |
| 486 case STATE_BASE: |
| 487 switch (cp) { |
| 488 case '\'': |
| 489 state = STATE_FIRST_QUOTE; |
| 490 offset += count; |
| 491 // continue to the next code point |
| 492 break; |
| 493 case '-': |
| 494 return makeTag(offset + count, TYPE_MINUS_SIGN, STATE_BASE,
0); |
| 495 case '+': |
| 496 return makeTag(offset + count, TYPE_PLUS_SIGN, STATE_BASE, 0
); |
| 497 case '%': |
| 498 return makeTag(offset + count, TYPE_PERCENT, STATE_BASE, 0); |
| 499 case '‰': |
| 500 return makeTag(offset + count, TYPE_PERMILLE, STATE_BASE, 0)
; |
| 501 case '¤': |
| 502 state = STATE_FIRST_CURR; |
| 503 offset += count; |
| 504 // continue to the next code point |
| 505 break; |
| 506 default: |
| 507 return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, c
p); |
| 508 } |
| 509 break; |
| 510 case STATE_FIRST_QUOTE: |
| 511 if (cp == '\'') { |
| 512 return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, c
p); |
| 513 } else { |
| 514 return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_
QUOTE, cp); |
| 515 } |
| 516 case STATE_INSIDE_QUOTE: |
| 517 if (cp == '\'') { |
| 518 state = STATE_AFTER_QUOTE; |
| 519 offset += count; |
| 520 // continue to the next code point |
| 521 break; |
| 522 } else { |
| 523 return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_
QUOTE, cp); |
| 524 } |
| 525 case STATE_AFTER_QUOTE: |
| 526 if (cp == '\'') { |
| 527 return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_
QUOTE, cp); |
| 528 } else { |
| 529 state = STATE_BASE; |
| 530 // re-evaluate this code point |
| 531 break; |
| 532 } |
| 533 case STATE_FIRST_CURR: |
| 534 if (cp == '¤') { |
| 535 state = STATE_SECOND_CURR; |
| 536 offset += count; |
| 537 // continue to the next code point |
| 538 break; |
| 539 } else { |
| 540 return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0); |
| 541 } |
| 542 case STATE_SECOND_CURR: |
| 543 if (cp == '¤') { |
| 544 state = STATE_THIRD_CURR; |
| 545 offset += count; |
| 546 // continue to the next code point |
| 547 break; |
| 548 } else { |
| 549 return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0); |
| 550 } |
| 551 case STATE_THIRD_CURR: |
| 552 if (cp == '¤') { |
| 553 state = STATE_FOURTH_CURR; |
| 554 offset += count; |
| 555 // continue to the next code point |
| 556 break; |
| 557 } else { |
| 558 return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0); |
| 559 } |
| 560 case STATE_FOURTH_CURR: |
| 561 if (cp == '¤') { |
| 562 state = STATE_FIFTH_CURR; |
| 563 offset += count; |
| 564 // continue to the next code point |
| 565 break; |
| 566 } else { |
| 567 return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0); |
| 568 } |
| 569 case STATE_FIFTH_CURR: |
| 570 if (cp == '¤') { |
| 571 state = STATE_OVERFLOW_CURR; |
| 572 offset += count; |
| 573 // continue to the next code point |
| 574 break; |
| 575 } else { |
| 576 return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0); |
| 577 } |
| 578 case STATE_OVERFLOW_CURR: |
| 579 if (cp == '¤') { |
| 580 offset += count; |
| 581 // continue to the next code point and loop back to this sta
te |
| 582 break; |
| 583 } else { |
| 584 return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0
); |
| 585 } |
| 586 default: |
| 587 throw new AssertionError(); |
| 588 } |
| 589 } |
| 590 // End of string |
| 591 switch (state) { |
114 case STATE_BASE: | 592 case STATE_BASE: |
115 if (cp == '\'') { | 593 // No more tokens in string. |
116 // First quote | 594 return -1L; |
117 state = STATE_FIRST_QUOTE; | |
118 } else { | |
119 // Unquoted symbol | |
120 length++; | |
121 } | |
122 break; | |
123 case STATE_FIRST_QUOTE: | 595 case STATE_FIRST_QUOTE: |
124 if (cp == '\'') { | |
125 // Repeated quote | |
126 length++; | |
127 state = STATE_BASE; | |
128 } else { | |
129 // Quoted code point | |
130 length++; | |
131 state = STATE_INSIDE_QUOTE; | |
132 } | |
133 break; | |
134 case STATE_INSIDE_QUOTE: | 596 case STATE_INSIDE_QUOTE: |
135 if (cp == '\'') { | 597 // For consistent behavior with the JDK and ICU 58, throw an excepti
on here. |
136 // End of quoted sequence | 598 throw new IllegalArgumentException( |
137 state = STATE_AFTER_QUOTE; | 599 "Unterminated quote in pattern affix: \"" + patternString +
"\""); |
138 } else { | |
139 // Quoted code point | |
140 length++; | |
141 } | |
142 break; | |
143 case STATE_AFTER_QUOTE: | 600 case STATE_AFTER_QUOTE: |
144 if (cp == '\'') { | 601 // No more tokens in string. |
145 // Double quote inside of quoted sequence | 602 return -1L; |
146 length++; | 603 case STATE_FIRST_CURR: |
147 state = STATE_INSIDE_QUOTE; | 604 return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0); |
148 } else { | 605 case STATE_SECOND_CURR: |
149 // Unquoted symbol | 606 return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0); |
150 length++; | 607 case STATE_THIRD_CURR: |
151 } | 608 return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0); |
152 break; | 609 case STATE_FOURTH_CURR: |
| 610 return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0); |
| 611 case STATE_FIFTH_CURR: |
| 612 return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0); |
| 613 case STATE_OVERFLOW_CURR: |
| 614 return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0); |
153 default: | 615 default: |
154 throw new AssertionError(); | 616 throw new AssertionError(); |
155 } | 617 } |
156 | 618 } |
157 offset += Character.charCount(cp); | 619 |
158 } | 620 /** |
159 | 621 * Returns whether the affix pattern string has any more tokens to be retrie
ved from a call to |
160 switch (state) { | 622 * {@link #nextToken}. |
161 case STATE_FIRST_QUOTE: | 623 * |
162 case STATE_INSIDE_QUOTE: | 624 * @param tag |
163 throw new IllegalArgumentException("Unterminated quote: \"" + patternStr
ing + "\""); | 625 * The bitmask tag of the previous token, as returned by {@link #
nextToken}. |
164 default: | 626 * @param string |
165 break; | 627 * The affix pattern. |
166 } | 628 * @return true if there are more tokens to consume; false otherwise. |
167 | 629 */ |
168 return length; | 630 public static boolean hasNext(long tag, CharSequence string) { |
169 } | 631 assert tag >= 0; |
170 | 632 int state = getState(tag); |
171 /** | |
172 * Takes a string and escapes (quotes) characters that have special meaning in
the affix pattern | |
173 * syntax. This function does not reverse-lookup symbols. | |
174 * | |
175 * <p>Example input: "-$x"; example output: "'-'$x" | |
176 * | |
177 * @param input The string to be escaped. | |
178 * @param output The string builder to which to append the escaped string. | |
179 * @return The number of chars (UTF-16 code units) appended to the output. | |
180 */ | |
181 public static int escape(CharSequence input, StringBuilder output) { | |
182 if (input == null) return 0; | |
183 int state = STATE_BASE; | |
184 int offset = 0; | |
185 int startLength = output.length(); | |
186 for (; offset < input.length(); ) { | |
187 int cp = Character.codePointAt(input, offset); | |
188 | |
189 switch (cp) { | |
190 case '\'': | |
191 output.append("''"); | |
192 break; | |
193 | |
194 case '-': | |
195 case '+': | |
196 case '%': | |
197 case '‰': | |
198 case '¤': | |
199 if (state == STATE_BASE) { | |
200 output.append('\''); | |
201 output.appendCodePoint(cp); | |
202 state = STATE_INSIDE_QUOTE; | |
203 } else { | |
204 output.appendCodePoint(cp); | |
205 } | |
206 break; | |
207 | |
208 default: | |
209 if (state == STATE_INSIDE_QUOTE) { | |
210 output.append('\''); | |
211 output.appendCodePoint(cp); | |
212 state = STATE_BASE; | |
213 } else { | |
214 output.appendCodePoint(cp); | |
215 } | |
216 break; | |
217 } | |
218 offset += Character.charCount(cp); | |
219 } | |
220 | |
221 if (state == STATE_INSIDE_QUOTE) { | |
222 output.append('\''); | |
223 } | |
224 | |
225 return output.length() - startLength; | |
226 } | |
227 | |
228 /** Version of {@link #escape} that returns a String, or null if input is null
. */ | |
229 public static String escape(CharSequence input) { | |
230 if (input == null) return null; | |
231 StringBuilder sb = new StringBuilder(); | |
232 escape(input, sb); | |
233 return sb.toString(); | |
234 } | |
235 | |
236 public static final NumberFormat.Field getFieldForType(int type) { | |
237 switch (type) { | |
238 case TYPE_MINUS_SIGN: | |
239 return NumberFormat.Field.SIGN; | |
240 case TYPE_PLUS_SIGN: | |
241 return NumberFormat.Field.SIGN; | |
242 case TYPE_PERCENT: | |
243 return NumberFormat.Field.PERCENT; | |
244 case TYPE_PERMILLE: | |
245 return NumberFormat.Field.PERMILLE; | |
246 case TYPE_CURRENCY_SINGLE: | |
247 return NumberFormat.Field.CURRENCY; | |
248 case TYPE_CURRENCY_DOUBLE: | |
249 return NumberFormat.Field.CURRENCY; | |
250 case TYPE_CURRENCY_TRIPLE: | |
251 return NumberFormat.Field.CURRENCY; | |
252 case TYPE_CURRENCY_QUAD: | |
253 return NumberFormat.Field.CURRENCY; | |
254 case TYPE_CURRENCY_QUINT: | |
255 return NumberFormat.Field.CURRENCY; | |
256 case TYPE_CURRENCY_OVERFLOW: | |
257 return NumberFormat.Field.CURRENCY; | |
258 default: | |
259 throw new AssertionError(); | |
260 } | |
261 } | |
262 | |
263 /** | |
264 * Executes the unescape state machine. Replaces the unquoted characters "-",
"+", "%", "‰", and | |
265 * "¤" with the corresponding symbols provided by the {@link SymbolProvider},
and inserts the | |
266 * result into the NumberStringBuilder at the requested location. | |
267 * | |
268 * <p>Example input: "'-'¤x"; example output: "-$x" | |
269 * | |
270 * @param affixPattern The original string to be unescaped. | |
271 * @param output The NumberStringBuilder to mutate with the result. | |
272 * @param position The index into the NumberStringBuilder to insert the the st
ring. | |
273 * @param provider An object to generate locale symbols. | |
274 * @return The length of the string added to affixPattern. | |
275 */ | |
276 public static int unescape( | |
277 CharSequence affixPattern, | |
278 NumberStringBuilder output, | |
279 int position, | |
280 SymbolProvider provider) { | |
281 assert affixPattern != null; | |
282 int length = 0; | |
283 long tag = 0L; | |
284 while (hasNext(tag, affixPattern)) { | |
285 tag = nextToken(tag, affixPattern); | |
286 int typeOrCp = getTypeOrCp(tag); | |
287 if (typeOrCp == TYPE_CURRENCY_OVERFLOW) { | |
288 // Don't go to the provider for this special case | |
289 length += output.insertCodePoint(position + length, 0xFFFD, NumberFormat
.Field.CURRENCY); | |
290 } else if (typeOrCp < 0) { | |
291 length += output.insert(position + length, provider.getSymbol(typeOrCp),
getFieldForType(typeOrCp)); | |
292 } else { | |
293 length += output.insertCodePoint(position + length, typeOrCp, null); | |
294 } | |
295 } | |
296 return length; | |
297 } | |
298 | |
299 /** | |
300 * Sames as {@link #unescape}, but only calculates the code point count. More
efficient than {@link #unescape} | |
301 * if you only need the length but not the string itself. | |
302 * | |
303 * @param affixPattern The original string to be unescaped. | |
304 * @param provider An object to generate locale symbols. | |
305 * @return The number of code points in the unescaped string. | |
306 */ | |
307 public static int unescapedCodePointCount(CharSequence affixPattern, SymbolPro
vider provider) { | |
308 int length = 0; | |
309 long tag = 0L; | |
310 while (hasNext(tag, affixPattern)) { | |
311 tag = nextToken(tag, affixPattern); | |
312 int typeOrCp = getTypeOrCp(tag); | |
313 if (typeOrCp == TYPE_CURRENCY_OVERFLOW) { | |
314 length += 1; | |
315 } else if (typeOrCp < 0) { | |
316 CharSequence symbol = provider.getSymbol(typeOrCp); | |
317 length += Character.codePointCount(symbol, 0, symbol.length()); | |
318 } else { | |
319 length += 1; | |
320 } | |
321 } | |
322 return length; | |
323 } | |
324 | |
325 /** | |
326 * Checks whether the given affix pattern contains at least one token of the g
iven type, which is | |
327 * one of the constants "TYPE_" in {@link AffixUtils}. | |
328 * | |
329 * @param affixPattern The affix pattern to check. | |
330 * @param type The token type. | |
331 * @return true if the affix pattern contains the given token type; false othe
rwise. | |
332 */ | |
333 public static boolean containsType(CharSequence affixPattern, int type) { | |
334 if (affixPattern == null || affixPattern.length() == 0) { | |
335 return false; | |
336 } | |
337 long tag = 0L; | |
338 while (hasNext(tag, affixPattern)) { | |
339 tag = nextToken(tag, affixPattern); | |
340 if (getTypeOrCp(tag) == type) { | |
341 return true; | |
342 } | |
343 } | |
344 return false; | |
345 } | |
346 | |
347 /** | |
348 * Checks whether the specified affix pattern has any unquoted currency symbol
s ("¤"). | |
349 * | |
350 * @param affixPattern The string to check for currency symbols. | |
351 * @return true if the literal has at least one unquoted currency symbol; fals
e otherwise. | |
352 */ | |
353 public static boolean hasCurrencySymbols(CharSequence affixPattern) { | |
354 if (affixPattern == null || affixPattern.length() == 0) return false; | |
355 long tag = 0L; | |
356 while (hasNext(tag, affixPattern)) { | |
357 tag = nextToken(tag, affixPattern); | |
358 int typeOrCp = getTypeOrCp(tag); | |
359 if (typeOrCp < 0 && getFieldForType(typeOrCp) == NumberFormat.Field.CURREN
CY) { | |
360 return true; | |
361 } | |
362 } | |
363 return false; | |
364 } | |
365 | |
366 /** | |
367 * Replaces all occurrences of tokens with the given type with the given repla
cement char. | |
368 * | |
369 * @param affixPattern The source affix pattern (does not get modified). | |
370 * @param type The token type. | |
371 * @param replacementChar The char to substitute in place of chars of the give
n token type. | |
372 * @return A string containing the new affix pattern. | |
373 */ | |
374 public static String replaceType(CharSequence affixPattern, int type, char rep
lacementChar) { | |
375 if (affixPattern == null || affixPattern.length() == 0) return ""; | |
376 char[] chars = affixPattern.toString().toCharArray(); | |
377 long tag = 0L; | |
378 while (hasNext(tag, affixPattern)) { | |
379 tag = nextToken(tag, affixPattern); | |
380 if (getTypeOrCp(tag) == type) { | |
381 int offset = getOffset(tag); | 633 int offset = getOffset(tag); |
382 chars[offset - 1] = replacementChar; | 634 // Special case: the last character in string is an end quote. |
383 } | 635 if (state == STATE_INSIDE_QUOTE |
384 } | 636 && offset == string.length() - 1 |
385 return new String(chars); | 637 && string.charAt(offset) == '\'') { |
386 } | 638 return false; |
387 | 639 } else if (state != STATE_BASE) { |
388 /** | 640 return true; |
389 * Returns the next token from the affix pattern. | 641 } else { |
390 * | 642 return offset < string.length(); |
391 * @param tag A bitmask used for keeping track of state from token to token. T
he initial value | 643 } |
392 * should be 0L. | 644 } |
393 * @param patternString The affix pattern. | 645 |
394 * @return The bitmask tag to pass to the next call of this method to retrieve
the following token | 646 /** |
395 * (never negative), or -1 if there were no more tokens in the affix patte
rn. | 647 * This function helps determine the identity of the token consumed by {@lin
k #nextToken}. Converts |
396 * @see #hasNext | 648 * from a bitmask tag, based on a call to {@link #nextToken}, to its corresp
onding symbol type or |
397 */ | 649 * code point. |
398 public static long nextToken(long tag, CharSequence patternString) { | 650 * |
399 int offset = getOffset(tag); | 651 * @param tag |
400 int state = getState(tag); | 652 * The bitmask tag of the current token, as returned by {@link #n
extToken}. |
401 for (; offset < patternString.length(); ) { | 653 * @return If less than zero, a symbol type corresponding to one of the <cod
e>TYPE_</code> constants, |
402 int cp = Character.codePointAt(patternString, offset); | 654 * such as {@link #TYPE_MINUS_SIGN}. If greater than or equal to zer
o, a literal code point. |
403 int count = Character.charCount(cp); | 655 */ |
404 | 656 public static int getTypeOrCp(long tag) { |
405 switch (state) { | 657 assert tag >= 0; |
406 case STATE_BASE: | 658 int type = getType(tag); |
407 switch (cp) { | 659 return (type == TYPE_CODEPOINT) ? getCodePoint(tag) : -type; |
408 case '\'': | 660 } |
409 state = STATE_FIRST_QUOTE; | 661 |
410 offset += count; | 662 /** |
411 // continue to the next code point | 663 * Encodes the given values into a 64-bit tag. |
412 break; | 664 * |
413 case '-': | 665 * <ul> |
414 return makeTag(offset + count, TYPE_MINUS_SIGN, STATE_BASE, 0); | 666 * <li>Bits 0-31 => offset (int32) |
415 case '+': | 667 * <li>Bits 32-35 => type (uint4) |
416 return makeTag(offset + count, TYPE_PLUS_SIGN, STATE_BASE, 0); | 668 * <li>Bits 36-39 => state (uint4) |
417 case '%': | 669 * <li>Bits 40-60 => code point (uint21) |
418 return makeTag(offset + count, TYPE_PERCENT, STATE_BASE, 0); | 670 * <li>Bits 61-63 => unused |
419 case '‰': | 671 * </ul> |
420 return makeTag(offset + count, TYPE_PERMILLE, STATE_BASE, 0); | 672 */ |
421 case '¤': | 673 private static long makeTag(int offset, int type, int state, int cp) { |
422 state = STATE_FIRST_CURR; | 674 long tag = 0L; |
423 offset += count; | 675 tag |= offset; |
424 // continue to the next code point | 676 tag |= (-(long) type) << 32; |
425 break; | 677 tag |= ((long) state) << 36; |
426 default: | 678 tag |= ((long) cp) << 40; |
427 return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp); | 679 assert tag >= 0; |
428 } | 680 return tag; |
429 break; | 681 } |
430 case STATE_FIRST_QUOTE: | 682 |
431 if (cp == '\'') { | 683 static int getOffset(long tag) { |
432 return makeTag(offset + count, TYPE_CODEPOINT, STATE_BASE, cp); | 684 return (int) (tag & 0xffffffff); |
433 } else { | 685 } |
434 return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, c
p); | 686 |
435 } | 687 static int getType(long tag) { |
436 case STATE_INSIDE_QUOTE: | 688 return (int) ((tag >>> 32) & 0xf); |
437 if (cp == '\'') { | 689 } |
438 state = STATE_AFTER_QUOTE; | 690 |
439 offset += count; | 691 static int getState(long tag) { |
440 // continue to the next code point | 692 return (int) ((tag >>> 36) & 0xf); |
441 break; | 693 } |
442 } else { | 694 |
443 return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, c
p); | 695 static int getCodePoint(long tag) { |
444 } | 696 return (int) (tag >>> 40); |
445 case STATE_AFTER_QUOTE: | 697 } |
446 if (cp == '\'') { | |
447 return makeTag(offset + count, TYPE_CODEPOINT, STATE_INSIDE_QUOTE, c
p); | |
448 } else { | |
449 state = STATE_BASE; | |
450 // re-evaluate this code point | |
451 break; | |
452 } | |
453 case STATE_FIRST_CURR: | |
454 if (cp == '¤') { | |
455 state = STATE_SECOND_CURR; | |
456 offset += count; | |
457 // continue to the next code point | |
458 break; | |
459 } else { | |
460 return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0); | |
461 } | |
462 case STATE_SECOND_CURR: | |
463 if (cp == '¤') { | |
464 state = STATE_THIRD_CURR; | |
465 offset += count; | |
466 // continue to the next code point | |
467 break; | |
468 } else { | |
469 return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0); | |
470 } | |
471 case STATE_THIRD_CURR: | |
472 if (cp == '¤') { | |
473 state = STATE_FOURTH_CURR; | |
474 offset += count; | |
475 // continue to the next code point | |
476 break; | |
477 } else { | |
478 return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0); | |
479 } | |
480 case STATE_FOURTH_CURR: | |
481 if (cp == '¤') { | |
482 state = STATE_FIFTH_CURR; | |
483 offset += count; | |
484 // continue to the next code point | |
485 break; | |
486 } else { | |
487 return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0); | |
488 } | |
489 case STATE_FIFTH_CURR: | |
490 if (cp == '¤') { | |
491 state = STATE_OVERFLOW_CURR; | |
492 offset += count; | |
493 // continue to the next code point | |
494 break; | |
495 } else { | |
496 return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0); | |
497 } | |
498 case STATE_OVERFLOW_CURR: | |
499 if (cp == '¤') { | |
500 offset += count; | |
501 // continue to the next code point and loop back to this state | |
502 break; | |
503 } else { | |
504 return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0); | |
505 } | |
506 default: | |
507 throw new AssertionError(); | |
508 } | |
509 } | |
510 // End of string | |
511 switch (state) { | |
512 case STATE_BASE: | |
513 // No more tokens in string. | |
514 return -1L; | |
515 case STATE_FIRST_QUOTE: | |
516 case STATE_INSIDE_QUOTE: | |
517 // For consistent behavior with the JDK and ICU 58, throw an exception h
ere. | |
518 throw new IllegalArgumentException( | |
519 "Unterminated quote in pattern affix: \"" + patternString + "\""); | |
520 case STATE_AFTER_QUOTE: | |
521 // No more tokens in string. | |
522 return -1L; | |
523 case STATE_FIRST_CURR: | |
524 return makeTag(offset, TYPE_CURRENCY_SINGLE, STATE_BASE, 0); | |
525 case STATE_SECOND_CURR: | |
526 return makeTag(offset, TYPE_CURRENCY_DOUBLE, STATE_BASE, 0); | |
527 case STATE_THIRD_CURR: | |
528 return makeTag(offset, TYPE_CURRENCY_TRIPLE, STATE_BASE, 0); | |
529 case STATE_FOURTH_CURR: | |
530 return makeTag(offset, TYPE_CURRENCY_QUAD, STATE_BASE, 0); | |
531 case STATE_FIFTH_CURR: | |
532 return makeTag(offset, TYPE_CURRENCY_QUINT, STATE_BASE, 0); | |
533 case STATE_OVERFLOW_CURR: | |
534 return makeTag(offset, TYPE_CURRENCY_OVERFLOW, STATE_BASE, 0); | |
535 default: | |
536 throw new AssertionError(); | |
537 } | |
538 } | |
539 | |
540 /** | |
541 * Returns whether the affix pattern string has any more tokens to be retrieve
d from a call to | |
542 * {@link #nextToken}. | |
543 * | |
544 * @param tag The bitmask tag of the previous token, as returned by {@link #ne
xtToken}. | |
545 * @param string The affix pattern. | |
546 * @return true if there are more tokens to consume; false otherwise. | |
547 */ | |
548 public static boolean hasNext(long tag, CharSequence string) { | |
549 assert tag >= 0; | |
550 int state = getState(tag); | |
551 int offset = getOffset(tag); | |
552 // Special case: the last character in string is an end quote. | |
553 if (state == STATE_INSIDE_QUOTE | |
554 && offset == string.length() - 1 | |
555 && string.charAt(offset) == '\'') { | |
556 return false; | |
557 } else if (state != STATE_BASE) { | |
558 return true; | |
559 } else { | |
560 return offset < string.length(); | |
561 } | |
562 } | |
563 | |
564 /** | |
565 * This function helps determine the identity of the token consumed by {@link
#nextToken}. | |
566 * Converts from a bitmask tag, based on a call to {@link #nextToken}, to its
corresponding symbol | |
567 * type or code point. | |
568 * | |
569 * @param tag The bitmask tag of the current token, as returned by {@link #nex
tToken}. | |
570 * @return If less than zero, a symbol type corresponding to one of the <code>
TYPE_</code> | |
571 * constants, such as {@link #TYPE_MINUS_SIGN}. If greater than or equal t
o zero, a literal | |
572 * code point. | |
573 */ | |
574 public static int getTypeOrCp(long tag) { | |
575 assert tag >= 0; | |
576 int type = getType(tag); | |
577 return (type == TYPE_CODEPOINT) ? getCodePoint(tag) : -type; | |
578 } | |
579 | |
580 /** | |
581 * Encodes the given values into a 64-bit tag. | |
582 * | |
583 * <ul> | |
584 * <li>Bits 0-31 => offset (int32) | |
585 * <li>Bits 32-35 => type (uint4) | |
586 * <li>Bits 36-39 => state (uint4) | |
587 * <li>Bits 40-60 => code point (uint21) | |
588 * <li>Bits 61-63 => unused | |
589 * </ul> | |
590 */ | |
591 private static long makeTag(int offset, int type, int state, int cp) { | |
592 long tag = 0L; | |
593 tag |= offset; | |
594 tag |= (-(long) type) << 32; | |
595 tag |= ((long) state) << 36; | |
596 tag |= ((long) cp) << 40; | |
597 assert tag >= 0; | |
598 return tag; | |
599 } | |
600 | |
601 static int getOffset(long tag) { | |
602 return (int) (tag & 0xffffffff); | |
603 } | |
604 | |
605 static int getType(long tag) { | |
606 return (int) ((tag >>> 32) & 0xf); | |
607 } | |
608 | |
609 static int getState(long tag) { | |
610 return (int) ((tag >>> 36) & 0xf); | |
611 } | |
612 | |
613 static int getCodePoint(long tag) { | |
614 return (int) (tag >>> 40); | |
615 } | |
616 } | 698 } |
LEFT | RIGHT |