LEFT | RIGHT |
1 // © 2017 and later: Unicode, Inc. and others. | 1 // © 2017 and later: Unicode, Inc. and others. |
2 // License & terms of use: http://www.unicode.org/copyright.html#License | 2 // License & terms of use: http://www.unicode.org/copyright.html#License |
3 package com.ibm.icu.impl.number; | 3 package com.ibm.icu.impl.number; |
4 | 4 |
5 import java.math.BigDecimal; | 5 import java.math.BigDecimal; |
| 6 import java.math.MathContext; |
6 import java.text.ParseException; | 7 import java.text.ParseException; |
7 import java.text.ParsePosition; | 8 import java.text.ParsePosition; |
8 import java.util.HashSet; | 9 import java.util.HashSet; |
9 import java.util.Iterator; | 10 import java.util.Iterator; |
10 import java.util.Set; | 11 import java.util.Set; |
11 import java.util.concurrent.ConcurrentHashMap; | 12 import java.util.concurrent.ConcurrentHashMap; |
12 | 13 |
13 import com.ibm.icu.impl.StandardPlural; | 14 import com.ibm.icu.impl.StandardPlural; |
14 import com.ibm.icu.impl.TextTrieMap; | 15 import com.ibm.icu.impl.TextTrieMap; |
15 import com.ibm.icu.impl.number.Parse.ParseMode; | 16 import com.ibm.icu.impl.number.Parse.ParseMode; |
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
111 /** | 112 /** |
112 * Whether to ignore the fractional part of numbers. For example, parses "12
3.4" to "123" | 113 * Whether to ignore the fractional part of numbers. For example, parses "12
3.4" to "123" |
113 * instead of "123.4". | 114 * instead of "123.4". |
114 * | 115 * |
115 * @param parseIntegerOnly true to parse integers only; false to parse integ
ers with their | 116 * @param parseIntegerOnly true to parse integers only; false to parse integ
ers with their |
116 * fraction parts | 117 * fraction parts |
117 * @return The property bag, for chaining. | 118 * @return The property bag, for chaining. |
118 */ | 119 */ |
119 public IProperties setParseIntegerOnly(boolean parseIntegerOnly); | 120 public IProperties setParseIntegerOnly(boolean parseIntegerOnly); |
120 | 121 |
121 boolean DEFAULT_PARSE_IGNORE_EXPONENT = false; | 122 boolean DEFAULT_PARSE_NO_EXPONENT = false; |
122 | 123 |
123 /** @see #setParseIgnoreExponent */ | 124 /** @see #setParseNoExponent */ |
124 public boolean getParseIgnoreExponent(); | 125 public boolean getParseNoExponent(); |
125 | 126 |
126 /** | 127 /** |
127 * Whether to ignore the exponential part of numbers. For example, parses "1
23E4" to "123" | 128 * Whether to ignore the exponential part of numbers. For example, parses "1
23E4" to "123" |
128 * instead of "1230000". | 129 * instead of "1230000". |
129 * | 130 * |
130 * @param parseIgnoreExponent true to ignore exponents; false to parse them. | 131 * @param parseIgnoreExponent true to ignore exponents; false to parse them. |
131 * @return The property bag, for chaining. | 132 * @return The property bag, for chaining. |
132 */ | 133 */ |
133 public IProperties setParseIgnoreExponent(boolean parseIgnoreExponent); | 134 public IProperties setParseNoExponent(boolean parseIgnoreExponent); |
134 | 135 |
135 boolean DEFAULT_DECIMAL_PATTERN_MATCH_REQUIRED = false; | 136 boolean DEFAULT_DECIMAL_PATTERN_MATCH_REQUIRED = false; |
136 | 137 |
137 /** @see #setDecimalPatternMatchRequired */ | 138 /** @see #setDecimalPatternMatchRequired */ |
138 public boolean getDecimalPatternMatchRequired(); | 139 public boolean getDecimalPatternMatchRequired(); |
139 | 140 |
140 /** | 141 /** |
141 * Whether to require that a decimal point be present. If a decimal point is
not present, the | 142 * Whether to require that the presence of decimal point matches the pattern
. If a decimal point |
142 * parse will not succeed: null will be returned from <code>parse()</code>,
and an error index | 143 * is not present, but the pattern contained a decimal point, parse will not
succeed: null will |
143 * will be set in the {@link ParsePosition}. | 144 * be returned from <code>parse()</code>, and an error index will be set in
the {@link |
| 145 * ParsePosition}. |
144 * | 146 * |
145 * @param decimalPatternMatchRequired true to set an error if decimal is not
present | 147 * @param decimalPatternMatchRequired true to set an error if decimal is not
present |
146 * @return The property bag, for chaining. | 148 * @return The property bag, for chaining. |
147 */ | 149 */ |
148 public IProperties setDecimalPatternMatchRequired(boolean decimalPatternMatc
hRequired); | 150 public IProperties setDecimalPatternMatchRequired(boolean decimalPatternMatc
hRequired); |
149 | 151 |
150 ParseMode DEFAULT_PARSE_MODE = ParseMode.LENIENT; | 152 ParseMode DEFAULT_PARSE_MODE = null; |
151 | 153 |
152 /** @see #setParseMode */ | 154 /** @see #setParseMode */ |
153 public ParseMode getParseMode(); | 155 public ParseMode getParseMode(); |
154 | 156 |
155 /** | 157 /** |
156 * Controls certain rules for how strict this parser is when reading strings
. See {@link | 158 * Controls certain rules for how strict this parser is when reading strings
. See {@link |
157 * ParseMode#LENIENT} and {@link ParseMode#STRICT}. | 159 * ParseMode#LENIENT} and {@link ParseMode#STRICT}. |
158 * | 160 * |
159 * @param parseMode Either {@link ParseMode#LENIENT} or {@link ParseMode#STR
ICT}. | 161 * @param parseMode Either {@link ParseMode#LENIENT} or {@link ParseMode#STR
ICT}. |
160 * @return The property bag, for chaining. | 162 * @return The property bag, for chaining. |
161 */ | 163 */ |
162 public IProperties setParseMode(ParseMode parseMode); | 164 public IProperties setParseMode(ParseMode parseMode); |
163 | |
164 // boolean DEFAULT_PARSE_CURRENCY = false; | |
165 // | |
166 // /** @see #setParseCurrency */ | |
167 // public boolean getParseCurrency(); | |
168 // | |
169 // /** | |
170 // * Whether to parse currency codes and currency names in the string. | |
171 // * | |
172 // * <p>Due to the large number of possible currencies, enabling this op
tion may impact the | |
173 // * runtime of the parse operation. | |
174 // * | |
175 // * @param parseCurrency true to parse arbitrary currency codes and cur
rency names; false to | |
176 // * disable. (Default is false) | |
177 // * @return The property bag, for chaining. | |
178 // */ | |
179 // public IProperties setParseCurrency(boolean parseCurrency); | |
180 | 165 |
181 boolean DEFAULT_PARSE_TO_BIG_DECIMAL = false; | 166 boolean DEFAULT_PARSE_TO_BIG_DECIMAL = false; |
182 | 167 |
183 /** @see #setParseToBigDecimal */ | 168 /** @see #setParseToBigDecimal */ |
184 public boolean getParseToBigDecimal(); | 169 public boolean getParseToBigDecimal(); |
185 | 170 |
186 /** | 171 /** |
187 * Whether to always return a BigDecimal from {@link Parse#parse} and all ot
her parse methods. | 172 * Whether to always return a BigDecimal from {@link Parse#parse} and all ot
her parse methods. |
188 * By default, a Long or a BigInteger are returned when possible. | 173 * By default, a Long or a BigInteger are returned when possible. |
189 * | 174 * |
(...skipping 14 matching lines...) Expand all Loading... |
204 * symbol. Grouping separators, decimal separators, and padding are always c
ase-sensitive. | 189 * symbol. Grouping separators, decimal separators, and padding are always c
ase-sensitive. |
205 * Currencies are always case-insensitive. | 190 * Currencies are always case-insensitive. |
206 * | 191 * |
207 * <p>This setting is ignored in fast mode. In fast mode, strings are always
compared in a | 192 * <p>This setting is ignored in fast mode. In fast mode, strings are always
compared in a |
208 * case-sensitive way. | 193 * case-sensitive way. |
209 * | 194 * |
210 * @param parseCaseSensitive true to be case-sensitive when parsing; false t
o allow any case. | 195 * @param parseCaseSensitive true to be case-sensitive when parsing; false t
o allow any case. |
211 * @return The property bag, for chaining. | 196 * @return The property bag, for chaining. |
212 */ | 197 */ |
213 public IProperties setParseCaseSensitive(boolean parseCaseSensitive); | 198 public IProperties setParseCaseSensitive(boolean parseCaseSensitive); |
214 | |
215 // boolean DEFAULT_PARSE_STRICT = false; | |
216 // | |
217 // /** @see #setParseStrict */ | |
218 // public boolean getParseStrict(); | |
219 // | |
220 // public IProperties setParseStrict(boolean parseStrict); | |
221 // | |
222 // boolean DEFAULT_PARSE_FAST = true; | |
223 // | |
224 // /** @see #setParseFastMode */ | |
225 // public boolean getParseFastMode(); | |
226 // | |
227 // public IProperties setParseFastMode(boolean parseFastMode); | |
228 } | 199 } |
229 | 200 |
230 /** | 201 /** |
231 * @see #parse(String, ParsePosition, ParseMode, boolean, boolean, IProperties
, | 202 * @see #parse(String, ParsePosition, ParseMode, boolean, boolean, IProperties
, |
232 * DecimalFormatSymbols) | 203 * DecimalFormatSymbols) |
233 */ | 204 */ |
234 private static enum StateName { | 205 private static enum StateName { |
235 BEFORE_PREFIX, | 206 BEFORE_PREFIX, |
236 AFTER_PREFIX, | 207 AFTER_PREFIX, |
237 AFTER_INTEGER_DIGIT, | 208 AFTER_INTEGER_DIGIT, |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
298 EXPONENT | 269 EXPONENT |
299 } | 270 } |
300 | 271 |
301 /** | 272 /** |
302 * Holds a snapshot in time of a single parse path. This includes the digits s
een so far, the | 273 * Holds a snapshot in time of a single parse path. This includes the digits s
een so far, the |
303 * current state name, and other properties like the grouping separator used o
n this parse path, | 274 * current state name, and other properties like the grouping separator used o
n this parse path, |
304 * details about the exponent and negative signs, etc. | 275 * details about the exponent and negative signs, etc. |
305 */ | 276 */ |
306 private static class StateItem { | 277 private static class StateItem { |
307 // Parser state: | 278 // Parser state: |
| 279 // The "trailingChars" is used to keep track of how many characters from the
end of the string |
| 280 // are ignorable and should be removed from the parse position should this i
tem be accepted. |
308 // The "score" is used to help rank two otherwise equivalent parse paths. Cu
rrently, the only | 281 // The "score" is used to help rank two otherwise equivalent parse paths. Cu
rrently, the only |
309 // function giving points to the score is prefix/suffix. | 282 // function giving points to the score is prefix/suffix. |
310 StateName name; | 283 StateName name; |
| 284 int trailingCount; |
311 int score; | 285 int score; |
312 | 286 |
313 // Numerical value: | 287 // Numerical value: |
314 FormatQuantity4 fq = new FormatQuantity4(); | 288 FormatQuantity4 fq = new FormatQuantity4(); |
315 int numDigits; | 289 int numDigits; |
316 int trailingZeros; | 290 int trailingZeros; |
317 int exponent; | 291 int exponent; |
318 | 292 |
319 // Other items that we've seen: | 293 // Other items that we've seen: |
320 int groupingCp; | 294 int groupingCp; |
(...skipping 25 matching lines...) Expand all Loading... |
346 DigitType currentDigitType; | 320 DigitType currentDigitType; |
347 | 321 |
348 /** | 322 /** |
349 * Clears the instance so that it can be re-used. | 323 * Clears the instance so that it can be re-used. |
350 * | 324 * |
351 * @return Myself, for chaining. | 325 * @return Myself, for chaining. |
352 */ | 326 */ |
353 StateItem clear() { | 327 StateItem clear() { |
354 // Parser state: | 328 // Parser state: |
355 name = StateName.BEFORE_PREFIX; | 329 name = StateName.BEFORE_PREFIX; |
| 330 trailingCount = 0; |
356 score = 0; | 331 score = 0; |
357 | 332 |
358 // Numerical value: | 333 // Numerical value: |
359 fq.clear(); | 334 fq.clear(); |
360 numDigits = 0; | 335 numDigits = 0; |
361 trailingZeros = 0; | 336 trailingZeros = 0; |
362 exponent = 0; | 337 exponent = 0; |
363 | 338 |
364 // Other items we've seen: | 339 // Other items we've seen: |
365 groupingCp = -1; | 340 groupingCp = -1; |
(...skipping 19 matching lines...) Expand all Loading... |
385 currentCurrencyTrieState = null; | 360 currentCurrencyTrieState = null; |
386 currentDigitTrieState = null; | 361 currentDigitTrieState = null; |
387 currentDigitType = null; | 362 currentDigitType = null; |
388 | 363 |
389 return this; | 364 return this; |
390 } | 365 } |
391 | 366 |
392 /** | 367 /** |
393 * Sets the internal value of this instance equal to another instance. | 368 * Sets the internal value of this instance equal to another instance. |
394 * | 369 * |
| 370 * <p>newName and cpOrN1 are required as parameters to this function because
every time a code |
| 371 * point is consumed and a state item is copied, both of the corresponding f
ields should be |
| 372 * updated; it would be an error if they weren't updated. |
| 373 * |
395 * @param other The instance to copy from. | 374 * @param other The instance to copy from. |
| 375 * @param newName The state name that the new copy should take on. |
| 376 * @param trailing If positive, record this code point as trailing; if negat
ive, reset the |
| 377 * trailing count to zero. |
396 * @return Myself, for chaining. | 378 * @return Myself, for chaining. |
397 */ | 379 */ |
398 StateItem copyFrom(StateItem other) { | 380 StateItem copyFrom(StateItem other, StateName newName, int trailing) { |
399 // Parser state: | 381 // Parser state: |
400 name = other.name; | 382 name = newName; |
401 score = other.score; | 383 score = other.score; |
| 384 |
| 385 // Either reset trailingCount or add the width of the current code point. |
| 386 trailingCount = (trailing < 0) ? 0 : other.trailingCount + Character.charC
ount(trailing); |
402 | 387 |
403 // Numerical value: | 388 // Numerical value: |
404 fq.copyFrom(other.fq); | 389 fq.copyFrom(other.fq); |
405 numDigits = other.numDigits; | 390 numDigits = other.numDigits; |
406 trailingZeros = other.trailingZeros; | 391 trailingZeros = other.trailingZeros; |
407 exponent = other.exponent; | 392 exponent = other.exponent; |
408 | 393 |
409 // Other items we've seen: | 394 // Other items we've seen: |
410 groupingCp = other.groupingCp; | 395 groupingCp = other.groupingCp; |
411 groupingWidths = other.groupingWidths; | 396 groupingWidths = other.groupingWidths; |
(...skipping 25 matching lines...) Expand all Loading... |
437 /** | 422 /** |
438 * Adds a digit to the internal representation of this instance. | 423 * Adds a digit to the internal representation of this instance. |
439 * | 424 * |
440 * @param digit The digit that was read from the string. | 425 * @param digit The digit that was read from the string. |
441 * @param type Whether the digit occured after the decimal point. | 426 * @param type Whether the digit occured after the decimal point. |
442 */ | 427 */ |
443 void appendDigit(byte digit, DigitType type) { | 428 void appendDigit(byte digit, DigitType type) { |
444 if (type == DigitType.EXPONENT) { | 429 if (type == DigitType.EXPONENT) { |
445 int newExponent = exponent * 10 + digit; | 430 int newExponent = exponent * 10 + digit; |
446 if (newExponent < exponent) { | 431 if (newExponent < exponent) { |
447 // overflow: count as infinity. | 432 // overflow |
448 sawInfinity = true; | 433 exponent = Integer.MAX_VALUE; |
449 } else { | 434 } else { |
450 exponent = newExponent; | 435 exponent = newExponent; |
451 } | 436 } |
452 } else { | 437 } else { |
453 numDigits++; | 438 numDigits++; |
454 if (type == DigitType.FRACTION && digit == 0) { | 439 if (type == DigitType.FRACTION && digit == 0) { |
455 trailingZeros++; | 440 trailingZeros++; |
456 } else if (type == DigitType.FRACTION) { | 441 } else if (type == DigitType.FRACTION) { |
457 fq.appendDigit(digit, trailingZeros, false); | 442 fq.appendDigit(digit, trailingZeros, false); |
458 trailingZeros = 0; | 443 trailingZeros = 0; |
(...skipping 23 matching lines...) Expand all Loading... |
482 if (sawNegative) { | 467 if (sawNegative) { |
483 return Double.NEGATIVE_INFINITY; | 468 return Double.NEGATIVE_INFINITY; |
484 } else { | 469 } else { |
485 return Double.POSITIVE_INFINITY; | 470 return Double.POSITIVE_INFINITY; |
486 } | 471 } |
487 } | 472 } |
488 if (fq.isZero() && sawNegative) { | 473 if (fq.isZero() && sawNegative) { |
489 return -0.0; | 474 return -0.0; |
490 } | 475 } |
491 | 476 |
| 477 // Check for exponent overflow |
| 478 boolean forceBigDecimal = properties.getParseToBigDecimal(); |
| 479 if (exponent == Integer.MAX_VALUE) { |
| 480 if (sawNegativeExponent && sawNegative) { |
| 481 return -0.0; |
| 482 } else if (sawNegativeExponent) { |
| 483 return 0.0; |
| 484 } else if (sawNegative) { |
| 485 return Double.NEGATIVE_INFINITY; |
| 486 } else { |
| 487 return Double.POSITIVE_INFINITY; |
| 488 } |
| 489 } else if (exponent > 1000) { |
| 490 // BigDecimals can handle huge values better than BigIntegers. |
| 491 forceBigDecimal = true; |
| 492 } |
| 493 |
492 // Multipliers must be applied in reverse. | 494 // Multipliers must be applied in reverse. |
493 BigDecimal multiplier = properties.getMultiplier(); | 495 BigDecimal multiplier = properties.getMultiplier(); |
494 if (properties.getMagnitudeMultiplier() != 0) { | 496 if (properties.getMagnitudeMultiplier() != 0) { |
495 if (multiplier == null) multiplier = BigDecimal.ONE; | 497 if (multiplier == null) multiplier = BigDecimal.ONE; |
496 multiplier = multiplier.scaleByPowerOfTen(properties.getMagnitudeMultipl
ier()); | 498 multiplier = multiplier.scaleByPowerOfTen(properties.getMagnitudeMultipl
ier()); |
497 } | 499 } |
498 boolean forceBigDecimal = properties.getParseToBigDecimal(); | |
499 int delta = (sawNegativeExponent ? -1 : 1) * exponent; | 500 int delta = (sawNegativeExponent ? -1 : 1) * exponent; |
| 501 |
| 502 // We need to use a math context in order to prevent non-terminating decim
al expansions. |
| 503 // This is only used when dividing by the multiplier. |
| 504 MathContext mc = RoundingUtils.getMathContextOr16Digits(properties); |
500 | 505 |
501 // Construct the output number. | 506 // Construct the output number. |
502 // This is the only step during fast-mode parsing that incurs object creat
ions. | 507 // This is the only step during fast-mode parsing that incurs object creat
ions. |
503 BigDecimal result = fq.toBigDecimal(); | 508 BigDecimal result = fq.toBigDecimal(); |
504 if (sawNegative) result = result.negate(); | 509 if (sawNegative) result = result.negate(); |
505 result = result.scaleByPowerOfTen(delta); | 510 result = result.scaleByPowerOfTen(delta); |
506 if (multiplier != null) result = result.divide(multiplier); | 511 if (multiplier != null) { |
| 512 result = result.divide(multiplier, mc); |
| 513 } |
507 result = result.stripTrailingZeros(); | 514 result = result.stripTrailingZeros(); |
508 if (forceBigDecimal || result.scale() > 0) { | 515 if (forceBigDecimal || result.scale() > 0) { |
509 return result; | 516 return result; |
510 } else if (-result.scale() + result.precision() <= 18) { | 517 } else if (-result.scale() + result.precision() <= 18) { |
511 return result.longValueExact(); | 518 return result.longValueExact(); |
512 } else { | 519 } else { |
513 return result.toBigIntegerExact(); | 520 return result.toBigIntegerExact(); |
514 } | 521 } |
515 } | 522 } |
516 | 523 |
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
699 static final AffixHolder EMPTY_NEGATIVE = new AffixHolder("", "", true, true
); | 706 static final AffixHolder EMPTY_NEGATIVE = new AffixHolder("", "", true, true
); |
700 static final AffixHolder DEFAULT_POSITIVE = new AffixHolder("+", "", false,
false); | 707 static final AffixHolder DEFAULT_POSITIVE = new AffixHolder("+", "", false,
false); |
701 static final AffixHolder DEFAULT_NEGATIVE = new AffixHolder("-", "", false,
true); | 708 static final AffixHolder DEFAULT_NEGATIVE = new AffixHolder("-", "", false,
true); |
702 | 709 |
703 static void addToState(ParserState state, IProperties properties) { | 710 static void addToState(ParserState state, IProperties properties) { |
704 AffixHolder pp = fromPropertiesPositivePattern(properties); | 711 AffixHolder pp = fromPropertiesPositivePattern(properties); |
705 AffixHolder np = fromPropertiesNegativePattern(properties); | 712 AffixHolder np = fromPropertiesNegativePattern(properties); |
706 AffixHolder ps = fromPropertiesPositiveString(properties); | 713 AffixHolder ps = fromPropertiesPositiveString(properties); |
707 AffixHolder ns = fromPropertiesNegativeString(properties); | 714 AffixHolder ns = fromPropertiesNegativeString(properties); |
708 if (pp == null && ps == null) { | 715 if (pp == null && ps == null) { |
709 if (properties.getAlwaysShowPlusSign()) { | 716 if (properties.getPlusSignAlwaysShown()) { |
710 state.affixHolders.add(DEFAULT_POSITIVE); | 717 state.affixHolders.add(DEFAULT_POSITIVE); |
711 } else { | 718 } else { |
712 state.affixHolders.add(EMPTY_POSITIVE); | 719 state.affixHolders.add(EMPTY_POSITIVE); |
713 } | 720 } |
714 } else { | 721 } else { |
715 if (pp != null) state.affixHolders.add(pp); | 722 if (pp != null) state.affixHolders.add(pp); |
716 if (ps != null) state.affixHolders.add(ps); | 723 if (ps != null) state.affixHolders.add(ps); |
717 } | 724 } |
718 if (np == null && ns == null) { | 725 if (np == null && ns == null) { |
719 state.affixHolders.add(DEFAULT_NEGATIVE); | 726 state.affixHolders.add(DEFAULT_NEGATIVE); |
720 } else { | 727 } else { |
721 if (np != null) state.affixHolders.add(np); | 728 if (np != null) state.affixHolders.add(np); |
722 if (ns != null) state.affixHolders.add(ns); | 729 if (ns != null) state.affixHolders.add(ns); |
723 } | 730 } |
724 } | 731 } |
725 | 732 |
726 static AffixHolder fromPropertiesPositivePattern(IProperties properties) { | 733 static AffixHolder fromPropertiesPositivePattern(IProperties properties) { |
727 CharSequence ppp = properties.getPositivePrefixPattern(); | 734 String ppp = properties.getPositivePrefixPattern(); |
728 CharSequence psp = properties.getPositiveSuffixPattern(); | 735 String psp = properties.getPositiveSuffixPattern(); |
729 return getInstance(ppp, psp, false, false); | 736 return getInstance(ppp, psp, false, false); |
730 } | 737 } |
731 | 738 |
732 static AffixHolder fromPropertiesNegativePattern(IProperties properties) { | 739 static AffixHolder fromPropertiesNegativePattern(IProperties properties) { |
733 CharSequence npp = properties.getNegativePrefixPattern(); | 740 String npp = properties.getNegativePrefixPattern(); |
734 CharSequence nsp = properties.getNegativeSuffixPattern(); | 741 String nsp = properties.getNegativeSuffixPattern(); |
735 return getInstance(npp, nsp, false, true); | 742 return getInstance(npp, nsp, false, true); |
736 } | 743 } |
737 | 744 |
738 static AffixHolder fromPropertiesPositiveString(IProperties properties) { | 745 static AffixHolder fromPropertiesPositiveString(IProperties properties) { |
739 CharSequence pp = properties.getPositivePrefix(); | 746 String pp = properties.getPositivePrefix(); |
740 CharSequence ps = properties.getPositiveSuffix(); | 747 String ps = properties.getPositiveSuffix(); |
741 return getInstance(pp, ps, true, false); | 748 return getInstance(pp, ps, true, false); |
742 } | 749 } |
743 | 750 |
744 static AffixHolder fromPropertiesNegativeString(IProperties properties) { | 751 static AffixHolder fromPropertiesNegativeString(IProperties properties) { |
745 CharSequence np = properties.getNegativePrefix(); | 752 String np = properties.getNegativePrefix(); |
746 CharSequence ns = properties.getNegativeSuffix(); | 753 String ns = properties.getNegativeSuffix(); |
747 return getInstance(np, ns, true, true); | 754 return getInstance(np, ns, true, true); |
748 } | 755 } |
749 | 756 |
750 static AffixHolder getInstance( | 757 static AffixHolder getInstance(String p, String s, boolean strings, boolean
negative) { |
751 CharSequence p, CharSequence s, boolean strings, boolean negative) { | |
752 if (p == null && s == null) return null; | 758 if (p == null && s == null) return null; |
753 if (p == null) p = ""; | 759 if (p == null) p = ""; |
754 if (s == null) s = ""; | 760 if (s == null) s = ""; |
755 if (p.length() == 0 && s.length() == 0) return negative ? EMPTY_NEGATIVE :
EMPTY_POSITIVE; | 761 if (p.length() == 0 && s.length() == 0) return negative ? EMPTY_NEGATIVE :
EMPTY_POSITIVE; |
756 return new AffixHolder(p.toString(), s.toString(), strings, negative); | 762 return new AffixHolder(p, s, strings, negative); |
757 } | 763 } |
758 | 764 |
759 AffixHolder(String pp, String sp, boolean strings, boolean negative) { | 765 AffixHolder(String pp, String sp, boolean strings, boolean negative) { |
760 this.p = pp; | 766 this.p = pp; |
761 this.s = sp; | 767 this.s = sp; |
762 this.strings = strings; | 768 this.strings = strings; |
763 this.negative = negative; | 769 this.negative = negative; |
764 } | 770 } |
765 | 771 |
766 @Override | 772 @Override |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
799 * A class that holds information about all currency affix patterns for the lo
cale. This allows | 805 * A class that holds information about all currency affix patterns for the lo
cale. This allows |
800 * the parser to accept currencies in any format that are valid for the locale
. | 806 * the parser to accept currencies in any format that are valid for the locale
. |
801 */ | 807 */ |
802 private static class CurrencyAffixPatterns { | 808 private static class CurrencyAffixPatterns { |
803 private final Set<AffixHolder> set = new HashSet<AffixHolder>(); | 809 private final Set<AffixHolder> set = new HashSet<AffixHolder>(); |
804 | 810 |
805 private static final ConcurrentHashMap<ULocale, CurrencyAffixPatterns> curre
ncyAffixPatterns = | 811 private static final ConcurrentHashMap<ULocale, CurrencyAffixPatterns> curre
ncyAffixPatterns = |
806 new ConcurrentHashMap<ULocale, CurrencyAffixPatterns>(); | 812 new ConcurrentHashMap<ULocale, CurrencyAffixPatterns>(); |
807 | 813 |
808 static void addToState(ULocale uloc, ParserState state) { | 814 static void addToState(ULocale uloc, ParserState state) { |
809 if (!currencyAffixPatterns.contains(uloc)) { | 815 if (!currencyAffixPatterns.containsKey(uloc)) { |
810 // There can be multiple threads computing the same CurrencyAffixPattern
s simultaneously, | 816 // There can be multiple threads computing the same CurrencyAffixPattern
s simultaneously, |
811 // but that scenario is harmless. | 817 // but that scenario is harmless. |
812 CurrencyAffixPatterns value = new CurrencyAffixPatterns(uloc); | 818 CurrencyAffixPatterns value = new CurrencyAffixPatterns(uloc); |
813 currencyAffixPatterns.put(uloc, value); | 819 currencyAffixPatterns.put(uloc, value); |
814 } | 820 } |
815 CurrencyAffixPatterns instance = currencyAffixPatterns.get(uloc); | 821 CurrencyAffixPatterns instance = currencyAffixPatterns.get(uloc); |
816 state.affixHolders.addAll(instance.set); | 822 state.affixHolders.addAll(instance.set); |
817 } | 823 } |
818 | 824 |
819 private CurrencyAffixPatterns(ULocale uloc) { | 825 private CurrencyAffixPatterns(ULocale uloc) { |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
943 CharSequence input, | 949 CharSequence input, |
944 ParsePosition ppos, | 950 ParsePosition ppos, |
945 IProperties properties, | 951 IProperties properties, |
946 DecimalFormatSymbols symbols) { | 952 DecimalFormatSymbols symbols) { |
947 StateItem best = _parse(input, ppos, false, properties, symbols); | 953 StateItem best = _parse(input, ppos, false, properties, symbols); |
948 return (best == null) ? null : best.toNumber(properties); | 954 return (best == null) ? null : best.toNumber(properties); |
949 } | 955 } |
950 | 956 |
951 public static CurrencyAmount parseCurrency( | 957 public static CurrencyAmount parseCurrency( |
952 String input, IProperties properties, DecimalFormatSymbols symbols) throws
ParseException { | 958 String input, IProperties properties, DecimalFormatSymbols symbols) throws
ParseException { |
953 ParsePosition ppos = threadLocalParsePosition.get(); | 959 return parseCurrency(input, null, properties, symbols); |
954 ppos.setIndex(0); | |
955 return parseCurrency(input, ppos, properties, symbols); | |
956 } | 960 } |
957 | 961 |
958 public static CurrencyAmount parseCurrency( | 962 public static CurrencyAmount parseCurrency( |
959 CharSequence input, ParsePosition ppos, IProperties properties, DecimalFor
matSymbols symbols) | 963 CharSequence input, ParsePosition ppos, IProperties properties, DecimalFor
matSymbols symbols) |
960 throws ParseException { | 964 throws ParseException { |
| 965 if (ppos == null) { |
| 966 ppos = threadLocalParsePosition.get(); |
| 967 ppos.setIndex(0); |
| 968 ppos.setErrorIndex(-1); |
| 969 } |
961 StateItem best = _parse(input, ppos, true, properties, symbols); | 970 StateItem best = _parse(input, ppos, true, properties, symbols); |
962 return (best == null) ? null : best.toCurrencyAmount(properties); | 971 return (best == null) ? null : best.toCurrencyAmount(properties); |
963 } | 972 } |
964 | 973 |
965 private static StateItem _parse( | 974 private static StateItem _parse( |
966 CharSequence input, | 975 CharSequence input, |
967 ParsePosition ppos, | 976 ParsePosition ppos, |
968 boolean parseCurrency, | 977 boolean parseCurrency, |
969 IProperties properties, | 978 IProperties properties, |
970 DecimalFormatSymbols symbols) { | 979 DecimalFormatSymbols symbols) { |
971 | 980 |
972 if (input == null || ppos == null || properties == null || symbols == null)
{ | 981 if (input == null || ppos == null || properties == null || symbols == null)
{ |
973 throw new IllegalArgumentException("All arguments are required for parse."
); | 982 throw new IllegalArgumentException("All arguments are required for parse."
); |
974 } | 983 } |
975 | 984 |
976 ParseMode mode = properties.getParseMode(); | 985 ParseMode mode = properties.getParseMode(); |
| 986 if (mode == null) mode = ParseMode.LENIENT; |
977 boolean integerOnly = properties.getParseIntegerOnly(); | 987 boolean integerOnly = properties.getParseIntegerOnly(); |
978 boolean ignoreExponent = properties.getParseIgnoreExponent(); | 988 boolean ignoreExponent = properties.getParseNoExponent(); |
979 | 989 |
980 // Set up the initial state | 990 // Set up the initial state |
981 ParserState state = threadLocalParseState.get().clear(); | 991 ParserState state = threadLocalParseState.get().clear(); |
982 state.properties = properties; | 992 state.properties = properties; |
983 state.symbols = symbols; | 993 state.symbols = symbols; |
984 state.mode = mode; | 994 state.mode = mode; |
985 state.parseCurrency = parseCurrency; | 995 state.parseCurrency = parseCurrency; |
986 state.caseSensitive = properties.getParseCaseSensitive(); | 996 state.caseSensitive = properties.getParseCaseSensitive(); |
987 state.decimalCp1 = Character.codePointAt(symbols.getDecimalSeparatorString()
, 0); | 997 state.decimalCp1 = Character.codePointAt(symbols.getDecimalSeparatorString()
, 0); |
988 state.decimalCp2 = Character.codePointAt(symbols.getMonetaryDecimalSeparator
String(), 0); | 998 state.decimalCp2 = Character.codePointAt(symbols.getMonetaryDecimalSeparator
String(), 0); |
989 state.groupingCp1 = Character.codePointAt(symbols.getGroupingSeparatorString
(), 0); | 999 state.groupingCp1 = Character.codePointAt(symbols.getGroupingSeparatorString
(), 0); |
990 state.groupingCp2 = Character.codePointAt(symbols.getMonetaryGroupingSeparat
orString(), 0); | 1000 state.groupingCp2 = Character.codePointAt(symbols.getMonetaryGroupingSeparat
orString(), 0); |
991 state.decimalType1 = SeparatorType.fromCp(state.decimalCp1, mode); | 1001 state.decimalType1 = SeparatorType.fromCp(state.decimalCp1, mode); |
992 state.decimalType2 = SeparatorType.fromCp(state.decimalCp1, mode); | 1002 state.decimalType2 = SeparatorType.fromCp(state.decimalCp2, mode); |
993 state.groupingType1 = SeparatorType.fromCp(state.groupingCp1, mode); | 1003 state.groupingType1 = SeparatorType.fromCp(state.groupingCp1, mode); |
994 state.groupingType2 = SeparatorType.fromCp(state.groupingCp1, mode); | 1004 state.groupingType2 = SeparatorType.fromCp(state.groupingCp2, mode); |
995 StateItem initialStateItem = state.getNext().clear(); | 1005 StateItem initialStateItem = state.getNext().clear(); |
996 initialStateItem.name = StateName.BEFORE_PREFIX; | 1006 initialStateItem.name = StateName.BEFORE_PREFIX; |
997 | 1007 |
998 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { | 1008 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { |
999 state.digitTrie = makeDigitTrie(symbols.getDigitStringsLocal()); | 1009 state.digitTrie = makeDigitTrie(symbols.getDigitStringsLocal()); |
1000 AffixHolder.addToState(state, properties); | 1010 AffixHolder.addToState(state, properties); |
1001 if (parseCurrency) { | 1011 if (parseCurrency) { |
1002 CurrencyAffixPatterns.addToState(symbols.getULocale(), state); | 1012 CurrencyAffixPatterns.addToState(symbols.getULocale(), state); |
1003 } | 1013 } |
1004 } | 1014 } |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1049 if (!integerOnly) { | 1059 if (!integerOnly) { |
1050 acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item
); | 1060 acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item
); |
1051 if (state.length > 0 && mode == ParseMode.FAST) break; | 1061 if (state.length > 0 && mode == ParseMode.FAST) break; |
1052 } | 1062 } |
1053 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { | 1063 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { |
1054 acceptPrefix(cp, StateName.AFTER_PREFIX, state, item); | 1064 acceptPrefix(cp, StateName.AFTER_PREFIX, state, item); |
1055 } | 1065 } |
1056 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { | 1066 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { |
1057 acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); | 1067 acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); |
1058 if (state.length > 0 && mode == ParseMode.FAST) break; | 1068 if (state.length > 0 && mode == ParseMode.FAST) break; |
1059 acceptCurrency(cp, StateName.BEFORE_PREFIX, state, item); | 1069 if (parseCurrency) { |
| 1070 acceptCurrency(cp, StateName.BEFORE_PREFIX, state, item); |
| 1071 } |
1060 } | 1072 } |
1061 break; | 1073 break; |
1062 | 1074 |
1063 case AFTER_PREFIX: | 1075 case AFTER_PREFIX: |
1064 // Prefix is consumed | 1076 // Prefix is consumed |
1065 acceptBidi(cp, StateName.AFTER_PREFIX, state, item); | 1077 acceptBidi(cp, StateName.AFTER_PREFIX, state, item); |
1066 acceptPadding(cp, StateName.AFTER_PREFIX, state, item); | 1078 acceptPadding(cp, StateName.AFTER_PREFIX, state, item); |
1067 acceptNan(cp, StateName.BEFORE_SUFFIX, state, item); | 1079 acceptNan(cp, StateName.BEFORE_SUFFIX, state, item); |
1068 acceptInfinity(cp, StateName.BEFORE_SUFFIX, state, item); | 1080 acceptInfinity(cp, StateName.BEFORE_SUFFIX, state, item); |
1069 acceptIntegerDigit(cp, StateName.AFTER_INTEGER_DIGIT, state, item); | 1081 acceptIntegerDigit(cp, StateName.AFTER_INTEGER_DIGIT, state, item); |
1070 if (!integerOnly) { | 1082 if (!integerOnly) { |
1071 acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item
); | 1083 acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item
); |
1072 } | 1084 } |
1073 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { | 1085 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { |
1074 acceptWhitespace(cp, StateName.AFTER_PREFIX, state, item); | 1086 acceptWhitespace(cp, StateName.AFTER_PREFIX, state, item); |
1075 acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); | 1087 acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); |
1076 acceptCurrency(cp, StateName.AFTER_PREFIX, state, item); | 1088 if (parseCurrency) { |
| 1089 acceptCurrency(cp, StateName.AFTER_PREFIX, state, item); |
| 1090 } |
1077 } | 1091 } |
1078 break; | 1092 break; |
1079 | 1093 |
1080 case AFTER_INTEGER_DIGIT: | 1094 case AFTER_INTEGER_DIGIT: |
1081 // Previous character was an integer digit (or grouping/whitespace) | 1095 // Previous character was an integer digit (or grouping/whitespace) |
1082 acceptIntegerDigit(cp, StateName.AFTER_INTEGER_DIGIT, state, item); | 1096 acceptIntegerDigit(cp, StateName.AFTER_INTEGER_DIGIT, state, item); |
1083 if (state.length > 0 && mode == ParseMode.FAST) break; | 1097 if (state.length > 0 && mode == ParseMode.FAST) break; |
1084 if (!integerOnly) { | 1098 if (!integerOnly) { |
1085 acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item
); | 1099 acceptDecimalPoint(cp, StateName.AFTER_FRACTION_DIGIT, state, item
); |
1086 if (state.length > 0 && mode == ParseMode.FAST) break; | 1100 if (state.length > 0 && mode == ParseMode.FAST) break; |
1087 } | 1101 } |
1088 acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); | 1102 acceptGrouping(cp, StateName.AFTER_INTEGER_DIGIT, state, item); |
1089 if (state.length > 0 && mode == ParseMode.FAST) break; | 1103 if (state.length > 0 && mode == ParseMode.FAST) break; |
1090 acceptBidi(cp, StateName.AFTER_INTEGER_DIGIT, state, item); | 1104 acceptBidi(cp, StateName.AFTER_INTEGER_DIGIT, state, item); |
1091 if (state.length > 0 && mode == ParseMode.FAST) break; | 1105 if (state.length > 0 && mode == ParseMode.FAST) break; |
1092 acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); | 1106 acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); |
1093 if (state.length > 0 && mode == ParseMode.FAST) break; | 1107 if (state.length > 0 && mode == ParseMode.FAST) break; |
1094 if (!ignoreExponent) { | 1108 if (!ignoreExponent) { |
1095 acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, st
ate, item); | 1109 acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, st
ate, item); |
1096 if (state.length > 0 && mode == ParseMode.FAST) break; | 1110 if (state.length > 0 && mode == ParseMode.FAST) break; |
1097 } | 1111 } |
1098 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { | 1112 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { |
1099 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); | 1113 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); |
1100 } | 1114 } |
1101 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { | 1115 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { |
1102 acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); | 1116 acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); |
1103 if (state.length > 0 && mode == ParseMode.FAST) break; | 1117 if (state.length > 0 && mode == ParseMode.FAST) break; |
1104 acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); | 1118 acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX, state, item, fa
lse); |
| 1119 if (state.length > 0 && mode == ParseMode.FAST) break; |
| 1120 if (parseCurrency) { |
| 1121 acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); |
| 1122 } |
1105 } | 1123 } |
1106 break; | 1124 break; |
1107 | 1125 |
1108 case AFTER_FRACTION_DIGIT: | 1126 case AFTER_FRACTION_DIGIT: |
1109 // We encountered a decimal point | 1127 // We encountered a decimal point |
1110 acceptFractionDigit(cp, StateName.AFTER_FRACTION_DIGIT, state, item)
; | 1128 acceptFractionDigit(cp, StateName.AFTER_FRACTION_DIGIT, state, item)
; |
1111 if (state.length > 0 && mode == ParseMode.FAST) break; | 1129 if (state.length > 0 && mode == ParseMode.FAST) break; |
1112 acceptBidi(cp, StateName.AFTER_FRACTION_DIGIT, state, item); | 1130 acceptBidi(cp, StateName.AFTER_FRACTION_DIGIT, state, item); |
1113 if (state.length > 0 && mode == ParseMode.FAST) break; | 1131 if (state.length > 0 && mode == ParseMode.FAST) break; |
1114 acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); | 1132 acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); |
1115 if (state.length > 0 && mode == ParseMode.FAST) break; | 1133 if (state.length > 0 && mode == ParseMode.FAST) break; |
1116 if (!ignoreExponent) { | 1134 if (!ignoreExponent) { |
1117 acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, st
ate, item); | 1135 acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, st
ate, item); |
1118 if (state.length > 0 && mode == ParseMode.FAST) break; | 1136 if (state.length > 0 && mode == ParseMode.FAST) break; |
1119 } | 1137 } |
1120 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { | 1138 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { |
1121 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); | 1139 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); |
1122 } | 1140 } |
1123 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { | 1141 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { |
1124 acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); | 1142 acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); |
1125 if (state.length > 0 && mode == ParseMode.FAST) break; | 1143 if (state.length > 0 && mode == ParseMode.FAST) break; |
1126 acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); | 1144 acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX, state, item, fa
lse); |
| 1145 if (state.length > 0 && mode == ParseMode.FAST) break; |
| 1146 if (parseCurrency) { |
| 1147 acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); |
| 1148 } |
1127 } | 1149 } |
1128 break; | 1150 break; |
1129 | 1151 |
1130 case AFTER_EXPONENT_SEPARATOR: | 1152 case AFTER_EXPONENT_SEPARATOR: |
1131 acceptBidi(cp, StateName.AFTER_EXPONENT_SEPARATOR, state, item); | 1153 acceptBidi(cp, StateName.AFTER_EXPONENT_SEPARATOR, state, item); |
1132 acceptMinusOrPlusSign(cp, StateName.AFTER_EXPONENT_SEPARATOR, state,
item, true); | 1154 acceptMinusOrPlusSign(cp, StateName.AFTER_EXPONENT_SEPARATOR, state,
item, true); |
1133 acceptExponentDigit(cp, StateName.AFTER_EXPONENT_DIGIT, state, item)
; | 1155 acceptExponentDigit(cp, StateName.AFTER_EXPONENT_DIGIT, state, item)
; |
1134 break; | 1156 break; |
1135 | 1157 |
1136 case AFTER_EXPONENT_DIGIT: | 1158 case AFTER_EXPONENT_DIGIT: |
1137 acceptBidi(cp, StateName.AFTER_EXPONENT_DIGIT, state, item); | 1159 acceptBidi(cp, StateName.AFTER_EXPONENT_DIGIT, state, item); |
1138 acceptPadding(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item
); | 1160 acceptPadding(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item
); |
1139 acceptExponentDigit(cp, StateName.AFTER_EXPONENT_DIGIT, state, item)
; | 1161 acceptExponentDigit(cp, StateName.AFTER_EXPONENT_DIGIT, state, item)
; |
1140 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { | 1162 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { |
1141 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); | 1163 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); |
1142 } | 1164 } |
1143 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { | 1165 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { |
1144 acceptWhitespace(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state,
item); | 1166 acceptWhitespace(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state,
item); |
1145 acceptCurrency(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, i
tem); | 1167 acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX, state, item, fa
lse); |
| 1168 if (parseCurrency) { |
| 1169 acceptCurrency(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state,
item); |
| 1170 } |
1146 } | 1171 } |
1147 break; | 1172 break; |
1148 | 1173 |
1149 case BEFORE_SUFFIX: | 1174 case BEFORE_SUFFIX: |
1150 // Accept whitespace, suffixes, and exponent separators | 1175 // Accept whitespace, suffixes, and exponent separators |
1151 acceptBidi(cp, StateName.BEFORE_SUFFIX, state, item); | 1176 acceptBidi(cp, StateName.BEFORE_SUFFIX, state, item); |
1152 acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); | 1177 acceptPadding(cp, StateName.BEFORE_SUFFIX, state, item); |
1153 if (!ignoreExponent) { | 1178 if (!ignoreExponent) { |
1154 acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, st
ate, item); | 1179 acceptExponentSeparator(cp, StateName.AFTER_EXPONENT_SEPARATOR, st
ate, item); |
1155 } | 1180 } |
1156 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { | 1181 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { |
1157 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); | 1182 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); |
1158 } | 1183 } |
1159 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { | 1184 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { |
1160 acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); | 1185 acceptWhitespace(cp, StateName.BEFORE_SUFFIX, state, item); |
1161 acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); | 1186 acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX, state, item, fa
lse); |
| 1187 if (parseCurrency) { |
| 1188 acceptCurrency(cp, StateName.BEFORE_SUFFIX, state, item); |
| 1189 } |
1162 } | 1190 } |
1163 break; | 1191 break; |
1164 | 1192 |
1165 case BEFORE_SUFFIX_SEEN_EXPONENT: | 1193 case BEFORE_SUFFIX_SEEN_EXPONENT: |
1166 // Accept whitespace and suffixes but not exponent separators | 1194 // Accept whitespace and suffixes but not exponent separators |
1167 acceptBidi(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); | 1195 acceptBidi(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item); |
1168 acceptPadding(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item
); | 1196 acceptPadding(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, item
); |
1169 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { | 1197 if (mode == ParseMode.LENIENT || mode == ParseMode.STRICT) { |
1170 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); | 1198 acceptSuffix(cp, StateName.AFTER_SUFFIX, state, item); |
1171 } | 1199 } |
1172 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { | 1200 if (mode == ParseMode.LENIENT || mode == ParseMode.FAST) { |
1173 acceptWhitespace(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state,
item); | 1201 acceptWhitespace(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state,
item); |
1174 acceptCurrency(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state, i
tem); | 1202 acceptMinusOrPlusSign(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, s
tate, item, false); |
| 1203 if (parseCurrency) { |
| 1204 acceptCurrency(cp, StateName.BEFORE_SUFFIX_SEEN_EXPONENT, state,
item); |
| 1205 } |
1175 } | 1206 } |
1176 break; | 1207 break; |
1177 | 1208 |
1178 case AFTER_SUFFIX: | 1209 case AFTER_SUFFIX: |
1179 if ((mode == ParseMode.LENIENT || mode == ParseMode.FAST) && parseCu
rrency) { | 1210 if ((mode == ParseMode.LENIENT || mode == ParseMode.FAST) && parseCu
rrency) { |
1180 // Continue traversing in case there is a currency symbol to consu
me | 1211 // Continue traversing in case there is a currency symbol to consu
me |
1181 acceptBidi(cp, StateName.AFTER_SUFFIX, state, item); | 1212 acceptBidi(cp, StateName.AFTER_SUFFIX, state, item); |
1182 acceptPadding(cp, StateName.AFTER_SUFFIX, state, item); | 1213 acceptPadding(cp, StateName.AFTER_SUFFIX, state, item); |
1183 acceptWhitespace(cp, StateName.AFTER_SUFFIX, state, item); | 1214 acceptWhitespace(cp, StateName.AFTER_SUFFIX, state, item); |
1184 acceptCurrency(cp, StateName.AFTER_SUFFIX, state, item); | 1215 acceptMinusOrPlusSign(cp, StateName.AFTER_SUFFIX, state, item, fal
se); |
| 1216 if (parseCurrency) { |
| 1217 acceptCurrency(cp, StateName.AFTER_SUFFIX, state, item); |
| 1218 } |
1185 } | 1219 } |
1186 // Otherwise, do not accept any more characters. | 1220 // Otherwise, do not accept any more characters. |
1187 break; | 1221 break; |
1188 | 1222 |
1189 case INSIDE_CURRENCY: | 1223 case INSIDE_CURRENCY: |
1190 acceptCurrencyOffset(cp, state, item); | 1224 acceptCurrencyOffset(cp, state, item); |
1191 break; | 1225 break; |
1192 | 1226 |
1193 case INSIDE_DIGIT: | 1227 case INSIDE_DIGIT: |
1194 acceptDigitTrieOffset(cp, state, item); | 1228 acceptDigitTrieOffset(cp, state, item); |
1195 break; | 1229 break; |
1196 | 1230 |
1197 case INSIDE_STRING: | 1231 case INSIDE_STRING: |
1198 acceptStringOffset(cp, state, item); | 1232 acceptStringOffset(cp, state, item); |
1199 // Accept arbitrary bidi and whitespace (if lenient) in the middle o
f strings. | 1233 // Accept arbitrary bidi in the middle of strings. |
1200 if (state.length == 0) { | 1234 if (state.length == 0 && UNISET_BIDI.contains(cp)) { |
1201 acceptBidi(cp, StateName.INSIDE_STRING, state, item); | 1235 state.getNext().copyFrom(item, item.name, cp); |
1202 if (mode == ParseMode.LENIENT) { | |
1203 acceptWhitespace(cp, StateName.INSIDE_STRING, state, item); | |
1204 } | |
1205 } | 1236 } |
1206 break; | 1237 break; |
1207 | 1238 |
1208 case INSIDE_AFFIX_PATTERN: | 1239 case INSIDE_AFFIX_PATTERN: |
1209 acceptAffixPatternOffset(cp, state, item); | 1240 acceptAffixPatternOffset(cp, state, item); |
1210 // Accept arbitrary bidi and whitespace (if lenient) in the middle o
f affixes. | 1241 // Accept arbitrary bidi and whitespace (if lenient) in the middle o
f affixes. |
1211 if (state.length == 0) { | 1242 if (state.length == 0 && isIgnorable(cp, state)) { |
1212 acceptBidi(cp, StateName.INSIDE_AFFIX_PATTERN, state, item); | 1243 state.getNext().copyFrom(item, item.name, cp); |
1213 if (mode == ParseMode.LENIENT) { | |
1214 acceptWhitespace(cp, StateName.INSIDE_AFFIX_PATTERN, state, item
); | |
1215 } | |
1216 } | 1244 } |
1217 break; | 1245 break; |
1218 } | 1246 } |
1219 } | 1247 } |
1220 | 1248 |
1221 if (state.length == 0) { | 1249 if (state.length == 0) { |
1222 // No parse paths continue past this point. We have found the longest pa
rsable string | 1250 // No parse paths continue past this point. We have found the longest pa
rsable string |
1223 // from the input. Restore previous state without the offset and break. | 1251 // from the input. Restore previous state without the offset and break. |
1224 state.swapBack(); | 1252 state.swapBack(); |
1225 break; | 1253 break; |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1269 // Has a prefix or suffix that doesn't match | 1297 // Has a prefix or suffix that doesn't match |
1270 if (DEBUGGING) System.out.println("-> rejected due to mismatched pre
fix/suffix"); | 1298 if (DEBUGGING) System.out.println("-> rejected due to mismatched pre
fix/suffix"); |
1271 continue; | 1299 continue; |
1272 } | 1300 } |
1273 | 1301 |
1274 // Check that grouping sizes are valid. | 1302 // Check that grouping sizes are valid. |
1275 int grouping1 = properties.getGroupingSize(); | 1303 int grouping1 = properties.getGroupingSize(); |
1276 int grouping2 = properties.getSecondaryGroupingSize(); | 1304 int grouping2 = properties.getSecondaryGroupingSize(); |
1277 grouping1 = grouping1 > 0 ? grouping1 : grouping2; | 1305 grouping1 = grouping1 > 0 ? grouping1 : grouping2; |
1278 grouping2 = grouping2 > 0 ? grouping2 : grouping1; | 1306 grouping2 = grouping2 > 0 ? grouping2 : grouping1; |
1279 int groupingMin = properties.getMinimumGroupingDigits(); | |
1280 long groupingWidths = item.groupingWidths; | 1307 long groupingWidths = item.groupingWidths; |
1281 int numGroupingRegions = 16 - Long.numberOfLeadingZeros(groupingWidths
) / 4; | 1308 int numGroupingRegions = 16 - Long.numberOfLeadingZeros(groupingWidths
) / 4; |
1282 // If the last grouping is zero, accept strings like "1," but reject s
tring like "1,.23" | 1309 // If the last grouping is zero, accept strings like "1," but reject s
tring like "1,.23" |
1283 if (numGroupingRegions > 1 && (groupingWidths & 0xf) == 0) { | 1310 // Strip off multiple last-groupings to handle cases like "123,," or "
123 " |
| 1311 while (numGroupingRegions > 1 && (groupingWidths & 0xf) == 0) { |
1284 if (item.sawDecimalPoint) { | 1312 if (item.sawDecimalPoint) { |
1285 if (DEBUGGING) System.out.println("-> rejected due to decimal poin
t after grouping"); | 1313 if (DEBUGGING) System.out.println("-> rejected due to decimal poin
t after grouping"); |
1286 continue; | 1314 continue outer; |
1287 } else { | 1315 } else { |
1288 groupingWidths >>>= 4; | 1316 groupingWidths >>>= 4; |
1289 numGroupingRegions--; | 1317 numGroupingRegions--; |
1290 } | 1318 } |
1291 } | 1319 } |
1292 if (grouping1 < 0) { | 1320 if (grouping1 < 0) { |
1293 // OK (no grouping data available) | 1321 // OK (no grouping data available) |
1294 } else if (numGroupingRegions <= 1) { | 1322 } else if (numGroupingRegions <= 1) { |
1295 // OK (no grouping digits) | 1323 // OK (no grouping digits) |
1296 } else if ((groupingWidths & 0xf) != grouping1) { | 1324 } else if ((groupingWidths & 0xf) != grouping1) { |
1297 // First grouping size is invalid | 1325 // First grouping size is invalid |
1298 if (DEBUGGING) System.out.println("-> rejected due to first grouping
violation"); | 1326 if (DEBUGGING) System.out.println("-> rejected due to first grouping
violation"); |
1299 continue; | |
1300 } else if (numGroupingRegions == 2 | |
1301 && groupingMin > 0 | |
1302 && ((groupingWidths >>> 4) & 0xf) < groupingMin) { | |
1303 // String like "1,234" with groupingMin == 2 | |
1304 if (DEBUGGING) System.out.println("-> rejected due to minGrouping vi
olation"); | |
1305 continue; | 1327 continue; |
1306 } else if (((groupingWidths >>> ((numGroupingRegions - 1) * 4)) & 0xf)
> grouping2) { | 1328 } else if (((groupingWidths >>> ((numGroupingRegions - 1) * 4)) & 0xf)
> grouping2) { |
1307 // String like "1234,567" where the highest grouping is too large | 1329 // String like "1234,567" where the highest grouping is too large |
1308 if (DEBUGGING) System.out.println("-> rejected due to final grouping
violation"); | 1330 if (DEBUGGING) System.out.println("-> rejected due to final grouping
violation"); |
1309 continue; | 1331 continue; |
1310 } else { | 1332 } else { |
1311 for (int j = 1; j < numGroupingRegions - 1; j++) { | 1333 for (int j = 1; j < numGroupingRegions - 1; j++) { |
1312 if (((groupingWidths >>> (j * 4)) & 0xf) != grouping2) { | 1334 if (((groupingWidths >>> (j * 4)) & 0xf) != grouping2) { |
1313 // A grouping size somewhere in the middle is invalid | 1335 // A grouping size somewhere in the middle is invalid |
1314 if (DEBUGGING) System.out.println("-> rejected due to inner grou
ping violation"); | 1336 if (DEBUGGING) System.out.println("-> rejected due to inner grou
ping violation"); |
1315 continue outer; | 1337 continue outer; |
1316 } | 1338 } |
1317 } | 1339 } |
1318 } | 1340 } |
1319 } | 1341 } |
1320 | 1342 |
1321 // Optionally require that a decimal point be present. | 1343 // Optionally require that the presence of a decimal point matches the p
attern. |
1322 if (properties.getDecimalPatternMatchRequired() && !item.sawDecimalPoint
) { | 1344 if (properties.getDecimalPatternMatchRequired() |
| 1345 && item.sawDecimalPoint != PositiveDecimalFormat.allowsDecimalPoint(
properties)) { |
1323 if (DEBUGGING) System.out.println("-> rejected due to decimal point vi
olation"); | 1346 if (DEBUGGING) System.out.println("-> rejected due to decimal point vi
olation"); |
1324 continue; | 1347 continue; |
1325 } | 1348 } |
1326 | 1349 |
1327 // When parsing currencies, require that a currency symbol was found. | 1350 // When parsing currencies, require that a currency symbol was found. |
1328 if (parseCurrency && !item.sawCurrency) { | 1351 if (parseCurrency && !item.sawCurrency) { |
1329 if (DEBUGGING) System.out.println("-> rejected due to lack of currency
"); | 1352 if (DEBUGGING) System.out.println("-> rejected due to lack of currency
"); |
1330 continue; | 1353 continue; |
1331 } | 1354 } |
1332 | 1355 |
1333 // If we get here, then this candidate is acceptable. | 1356 // If we get here, then this candidate is acceptable. |
1334 // Use the earliest candidate in the list, or the one with the highest s
core. | 1357 // Use the earliest candidate in the list, or the one with the highest s
core. |
1335 if (best == null) { | 1358 if (best == null) { |
1336 best = item; | 1359 best = item; |
1337 } else if (item.score > best.score) { | 1360 } else if (item.score > best.score) { |
1338 best = item; | 1361 best = item; |
1339 } | 1362 } |
1340 } | 1363 } |
1341 | 1364 |
1342 if (DEBUGGING) { | 1365 if (DEBUGGING) { |
1343 System.out.println("- - - - - - - - - -"); | 1366 System.out.println("- - - - - - - - - -"); |
1344 } | 1367 } |
1345 | 1368 |
1346 if (best != null) { | 1369 if (best != null) { |
1347 ppos.setIndex(offset); | 1370 ppos.setIndex(offset - best.trailingCount); |
1348 return best; | 1371 return best; |
1349 } else { | 1372 } else { |
1350 ppos.setErrorIndex(offset); | 1373 ppos.setErrorIndex(offset); |
1351 return null; | 1374 return null; |
1352 } | 1375 } |
1353 } | 1376 } |
1354 } | 1377 } |
1355 | 1378 |
1356 /** | 1379 /** |
1357 * If <code>cp</code> is whitespace (as determined by the unicode set {@link #
UNISET_WHITESPACE}), | 1380 * If <code>cp</code> is whitespace (as determined by the unicode set {@link #
UNISET_WHITESPACE}), |
1358 * copies <code>item</code> to the new list in <code>state</code> and sets its
state name to | 1381 * copies <code>item</code> to the new list in <code>state</code> and sets its
state name to |
1359 * <code>nextName</code>. | 1382 * <code>nextName</code>. |
1360 * | 1383 * |
1361 * @param cp The code point to check. | 1384 * @param cp The code point to check. |
1362 * @param nextName The new state name if the check passes. | 1385 * @param nextName The new state name if the check passes. |
1363 * @param state The state object to update. | 1386 * @param state The state object to update. |
1364 * @param item The old state leading into the code point. | 1387 * @param item The old state leading into the code point. |
1365 */ | 1388 */ |
1366 private static void acceptWhitespace( | 1389 private static void acceptWhitespace( |
1367 int cp, StateName nextName, ParserState state, StateItem item) { | 1390 int cp, StateName nextName, ParserState state, StateItem item) { |
1368 if (UNISET_WHITESPACE.contains(cp)) { | 1391 if (UNISET_WHITESPACE.contains(cp)) { |
1369 StateItem next = state.getNext().copyFrom(item); | 1392 state.getNext().copyFrom(item, nextName, cp); |
1370 next.name = nextName; | |
1371 } | 1393 } |
1372 } | 1394 } |
1373 | 1395 |
1374 /** | 1396 /** |
1375 * If <code>cp</code> is a bidi control character (as determined by the unicod
e set {@link | 1397 * If <code>cp</code> is a bidi control character (as determined by the unicod
e set {@link |
1376 * #UNISET_BIDI}), copies <code>item</code> to the new list in <code>state</co
de> and sets its | 1398 * #UNISET_BIDI}), copies <code>item</code> to the new list in <code>state</co
de> and sets its |
1377 * state name to <code>nextName</code>. | 1399 * state name to <code>nextName</code>. |
1378 * | 1400 * |
1379 * @param cp The code point to check. | 1401 * @param cp The code point to check. |
1380 * @param nextName The new state name if the check passes. | 1402 * @param nextName The new state name if the check passes. |
1381 * @param state The state object to update. | 1403 * @param state The state object to update. |
1382 * @param item The old state leading into the code point. | 1404 * @param item The old state leading into the code point. |
1383 */ | 1405 */ |
1384 private static void acceptBidi(int cp, StateName nextName, ParserState state,
StateItem item) { | 1406 private static void acceptBidi(int cp, StateName nextName, ParserState state,
StateItem item) { |
1385 if (UNISET_BIDI.contains(cp)) { | 1407 if (UNISET_BIDI.contains(cp)) { |
1386 StateItem next = state.getNext().copyFrom(item); | 1408 state.getNext().copyFrom(item, nextName, cp); |
1387 next.name = nextName; | |
1388 } | 1409 } |
1389 } | 1410 } |
1390 | 1411 |
1391 /** | 1412 /** |
1392 * If <code>cp</code> is a padding character (as determined by {@link ParserSt
ate#paddingCp}), | 1413 * If <code>cp</code> is a padding character (as determined by {@link ParserSt
ate#paddingCp}), |
1393 * copies <code>item</code> to the new list in <code>state</code> and sets its
state name to | 1414 * copies <code>item</code> to the new list in <code>state</code> and sets its
state name to |
1394 * <code>nextName</code>. | 1415 * <code>nextName</code>. |
1395 * | 1416 * |
1396 * @param cp The code point to check. | 1417 * @param cp The code point to check. |
1397 * @param nextName The new state name if the check passes. | 1418 * @param nextName The new state name if the check passes. |
1398 * @param state The state object to update. | 1419 * @param state The state object to update. |
1399 * @param item The old state leading into the code point. | 1420 * @param item The old state leading into the code point. |
1400 */ | 1421 */ |
1401 private static void acceptPadding(int cp, StateName nextName, ParserState stat
e, StateItem item) { | 1422 private static void acceptPadding(int cp, StateName nextName, ParserState stat
e, StateItem item) { |
1402 CharSequence padding = state.properties.getPaddingString(); | 1423 CharSequence padding = state.properties.getPadString(); |
1403 if (padding == null || padding.length() == 0) return; | 1424 if (padding == null || padding.length() == 0) return; |
1404 int referenceCp = Character.codePointAt(padding, 0); | 1425 int referenceCp = Character.codePointAt(padding, 0); |
1405 if (cp == referenceCp) { | 1426 if (cp == referenceCp) { |
1406 StateItem next = state.getNext().copyFrom(item); | 1427 state.getNext().copyFrom(item, nextName, cp); |
1407 next.name = nextName; | |
1408 } | 1428 } |
1409 } | 1429 } |
1410 | 1430 |
1411 private static void acceptIntegerDigit( | 1431 private static void acceptIntegerDigit( |
1412 int cp, StateName nextName, ParserState state, StateItem item) { | 1432 int cp, StateName nextName, ParserState state, StateItem item) { |
1413 acceptDigitHelper(cp, nextName, state, item, DigitType.INTEGER); | 1433 acceptDigitHelper(cp, nextName, state, item, DigitType.INTEGER); |
1414 } | 1434 } |
1415 | 1435 |
1416 private static void acceptFractionDigit( | 1436 private static void acceptFractionDigit( |
1417 int cp, StateName nextName, ParserState state, StateItem item) { | 1437 int cp, StateName nextName, ParserState state, StateItem item) { |
(...skipping 24 matching lines...) Expand all Loading... |
1442 */ | 1462 */ |
1443 private static void acceptDigitHelper( | 1463 private static void acceptDigitHelper( |
1444 int cp, StateName nextName, ParserState state, StateItem item, DigitType t
ype) { | 1464 int cp, StateName nextName, ParserState state, StateItem item, DigitType t
ype) { |
1445 // Check the Unicode digit character property | 1465 // Check the Unicode digit character property |
1446 byte digit = (byte) UCharacter.digit(cp, 10); | 1466 byte digit = (byte) UCharacter.digit(cp, 10); |
1447 StateItem next = null; | 1467 StateItem next = null; |
1448 | 1468 |
1449 // Look for the digit: | 1469 // Look for the digit: |
1450 if (digit >= 0) { | 1470 if (digit >= 0) { |
1451 // Code point is a number | 1471 // Code point is a number |
1452 next = state.getNext().copyFrom(item); | 1472 next = state.getNext().copyFrom(item, nextName, -1); |
1453 next.name = nextName; | |
1454 } | 1473 } |
1455 | 1474 |
1456 // Do not perform the expensive string manipulations in fast mode. | 1475 // Do not perform the expensive string manipulations in fast mode. |
1457 if (digit < 0 && (state.mode == ParseMode.LENIENT || state.mode == ParseMode
.STRICT)) { | 1476 if (digit < 0 && (state.mode == ParseMode.LENIENT || state.mode == ParseMode
.STRICT)) { |
1458 if (state.digitTrie == null) { | 1477 if (state.digitTrie == null) { |
1459 // Check custom digits, all of which are at most one code point | 1478 // Check custom digits, all of which are at most one code point |
1460 for (byte d = 0; d < 10; d++) { | 1479 for (byte d = 0; d < 10; d++) { |
1461 int referenceCp = Character.codePointAt(state.symbols.getDigitStringsL
ocal()[d], 0); | 1480 int referenceCp = Character.codePointAt(state.symbols.getDigitStringsL
ocal()[d], 0); |
1462 if (cp == referenceCp) { | 1481 if (cp == referenceCp) { |
1463 digit = d; | 1482 digit = d; |
1464 next = state.getNext().copyFrom(item); | 1483 next = state.getNext().copyFrom(item, nextName, -1); |
1465 } | 1484 } |
1466 } | 1485 } |
1467 } else { | 1486 } else { |
1468 // Custom digits have more than one code point | 1487 // Custom digits have more than one code point |
1469 acceptDigitTrie(cp, nextName, state, item, type); | 1488 acceptDigitTrie(cp, nextName, state, item, type); |
1470 } | 1489 } |
1471 } | 1490 } |
1472 | 1491 |
1473 // Save state: | 1492 // Save state: |
1474 if (next != null) { | 1493 if (next != null) { |
1475 next.name = nextName; | |
1476 next.appendDigit(digit, type); | 1494 next.appendDigit(digit, type); |
1477 if (type == DigitType.INTEGER && (next.groupingWidths & 0xf) < 15) { | 1495 if (type == DigitType.INTEGER && (next.groupingWidths & 0xf) < 15) { |
1478 next.groupingWidths++; | 1496 next.groupingWidths++; |
1479 } | 1497 } |
1480 } | 1498 } |
1481 } | 1499 } |
1482 | 1500 |
1483 /** | 1501 /** |
1484 * If <code>cp</code> is a sign (as determined by the unicode sets {@link #UNI
SET_PLUS} and {@link | 1502 * If <code>cp</code> is a sign (as determined by the unicode sets {@link #UNI
SET_PLUS} and {@link |
1485 * #UNISET_MINUS}), copies <code>item</code> to the new list in <code>state</c
ode>. Loops back to | 1503 * #UNISET_MINUS}), copies <code>item</code> to the new list in <code>state</c
ode>. Loops back to |
1486 * the same state name. | 1504 * the same state name. |
1487 * | 1505 * |
1488 * @param cp The code point to check. | 1506 * @param cp The code point to check. |
1489 * @param state The state object to update. | 1507 * @param state The state object to update. |
1490 * @param item The old state leading into the code point. | 1508 * @param item The old state leading into the code point. |
1491 */ | 1509 */ |
1492 private static void acceptMinusOrPlusSign( | 1510 private static void acceptMinusOrPlusSign( |
1493 int cp, StateName nextName, ParserState state, StateItem item, boolean exp
onent) { | 1511 int cp, StateName nextName, ParserState state, StateItem item, boolean exp
onent) { |
| 1512 acceptMinusOrPlusSign(cp, nextName, null, state, item, exponent); |
| 1513 } |
| 1514 |
| 1515 private static void acceptMinusOrPlusSign( |
| 1516 int cp, |
| 1517 StateName returnTo1, |
| 1518 StateName returnTo2, |
| 1519 ParserState state, |
| 1520 StateItem item, |
| 1521 boolean exponent) { |
1494 if (UNISET_PLUS.contains(cp)) { | 1522 if (UNISET_PLUS.contains(cp)) { |
1495 StateItem next = state.getNext().copyFrom(item); | 1523 StateItem next = state.getNext().copyFrom(item, returnTo1, -1); |
1496 next.name = nextName; | 1524 next.returnTo1 = returnTo2; |
1497 } else if (UNISET_MINUS.contains(cp)) { | 1525 } else if (UNISET_MINUS.contains(cp)) { |
1498 StateItem next = state.getNext().copyFrom(item); | 1526 StateItem next = state.getNext().copyFrom(item, returnTo1, -1); |
1499 next.name = nextName; | 1527 next.returnTo1 = returnTo2; |
1500 if (exponent) { | 1528 if (exponent) { |
1501 next.sawNegativeExponent = true; | 1529 next.sawNegativeExponent = true; |
1502 } else { | 1530 } else { |
1503 next.sawNegative = true; | 1531 next.sawNegative = true; |
1504 } | 1532 } |
1505 } | 1533 } |
1506 } | 1534 } |
1507 | 1535 |
1508 /** | 1536 /** |
1509 * If <code>cp</code> is a grouping separator (as determined by the unicode se
t {@link | 1537 * If <code>cp</code> is a grouping separator (as determined by the unicode se
t {@link |
(...skipping 25 matching lines...) Expand all Loading... |
1535 return; | 1563 return; |
1536 } | 1564 } |
1537 if (cpType == SeparatorType.PERIOD_LIKE | 1565 if (cpType == SeparatorType.PERIOD_LIKE |
1538 && (state.decimalType1 == SeparatorType.PERIOD_LIKE | 1566 && (state.decimalType1 == SeparatorType.PERIOD_LIKE |
1539 || state.decimalType2 == SeparatorType.PERIOD_LIKE)) { | 1567 || state.decimalType2 == SeparatorType.PERIOD_LIKE)) { |
1540 return; | 1568 return; |
1541 } | 1569 } |
1542 } | 1570 } |
1543 | 1571 |
1544 // A match was found. | 1572 // A match was found. |
1545 StateItem next = state.getNext().copyFrom(item); | 1573 StateItem next = state.getNext().copyFrom(item, nextName, cp); |
1546 next.name = nextName; | |
1547 next.groupingCp = cp; | 1574 next.groupingCp = cp; |
1548 next.groupingWidths <<= 4; | 1575 next.groupingWidths <<= 4; |
1549 } else { | 1576 } else { |
1550 // Have already seen a grouping separator. | 1577 // Have already seen a grouping separator. |
1551 if (cp == item.groupingCp) { | 1578 if (cp == item.groupingCp) { |
1552 StateItem next = state.getNext().copyFrom(item); | 1579 StateItem next = state.getNext().copyFrom(item, nextName, cp); |
1553 next.name = nextName; | |
1554 next.groupingWidths <<= 4; | 1580 next.groupingWidths <<= 4; |
1555 } | 1581 } |
1556 } | 1582 } |
1557 } | 1583 } |
1558 | 1584 |
1559 /** | 1585 /** |
1560 * If <code>cp</code> is a decimal (as determined by the unicode set {@link #U
NISET_DECIMAL}), | 1586 * If <code>cp</code> is a decimal (as determined by the unicode set {@link #U
NISET_DECIMAL}), |
1561 * copies <code>item</code> to the new list in <code>state</code> and goes to
{@link | 1587 * copies <code>item</code> to the new list in <code>state</code> and goes to
{@link |
1562 * StateName#AFTER_FRACTION_DIGIT}. Also accepts if <code>cp</code> is the loc
ale-specific decimal | 1588 * StateName#AFTER_FRACTION_DIGIT}. Also accepts if <code>cp</code> is the loc
ale-specific decimal |
1563 * point in {@link ParserState#decimalCp}, in which case the {@link StateItem#
usesLocaleSymbols} | 1589 * point in {@link ParserState#decimalCp}, in which case the {@link StateItem#
usesLocaleSymbols} |
1564 * flag is also set. | 1590 * flag is also set. |
1565 * | 1591 * |
1566 * @param cp The code point to check. | 1592 * @param cp The code point to check. |
1567 * @param state The state object to update. | 1593 * @param state The state object to update. |
1568 * @param item The old state leading into the code point. | 1594 * @param item The old state leading into the code point. |
1569 */ | 1595 */ |
1570 private static void acceptDecimalPoint( | 1596 private static void acceptDecimalPoint( |
1571 int cp, StateName nextName, ParserState state, StateItem item) { | 1597 int cp, StateName nextName, ParserState state, StateItem item) { |
1572 if (cp == item.groupingCp) { | 1598 if (cp == item.groupingCp) { |
1573 // Don't accept a decimal point that is the same as the grouping separator | 1599 // Don't accept a decimal point that is the same as the grouping separator |
1574 return; | 1600 return; |
1575 } | 1601 } |
1576 | 1602 |
1577 SeparatorType cpType = SeparatorType.fromCp(cp, state.mode); | 1603 SeparatorType cpType = SeparatorType.fromCp(cp, state.mode); |
1578 | 1604 |
1579 // Always accept if exactly the same as the locale symbol. | 1605 // We require that the decimal separator be in the same class as the locale. |
1580 // Otherwise, reject if UNKNOWN, OTHER, the same class as the decimal separa
tor. | 1606 if (cpType != state.decimalType1 && cpType != state.decimalType2) { |
1581 if (cp != state.decimalCp1 && cp != state.decimalCp2) { | 1607 return; |
1582 if (cpType == SeparatorType.UNKNOWN || cpType == SeparatorType.OTHER_GROUP
ING) { | 1608 } |
| 1609 |
| 1610 // If in UNKNOWN or OTHER, require an exact match. |
| 1611 if (cpType == SeparatorType.OTHER_GROUPING || cpType == SeparatorType.UNKNOW
N) { |
| 1612 if (cp != state.decimalCp1 && cp != state.decimalCp2) { |
1583 return; | 1613 return; |
1584 } | 1614 } |
1585 if (cpType == SeparatorType.COMMA_LIKE | |
1586 && (state.groupingType1 == SeparatorType.COMMA_LIKE | |
1587 || state.groupingType2 == SeparatorType.COMMA_LIKE)) { | |
1588 return; | |
1589 } | |
1590 if (cpType == SeparatorType.PERIOD_LIKE | |
1591 && (state.groupingType1 == SeparatorType.PERIOD_LIKE | |
1592 || state.groupingType2 == SeparatorType.PERIOD_LIKE)) { | |
1593 return; | |
1594 } | |
1595 } | 1615 } |
1596 | 1616 |
1597 // A match was found. | 1617 // A match was found. |
1598 StateItem next = state.getNext().copyFrom(item); | 1618 StateItem next = state.getNext().copyFrom(item, nextName, -1); |
1599 next.name = nextName; | |
1600 next.sawDecimalPoint = true; | 1619 next.sawDecimalPoint = true; |
1601 } | 1620 } |
1602 | 1621 |
1603 private static void acceptNan(int cp, StateName nextName, ParserState state, S
tateItem item) { | 1622 private static void acceptNan(int cp, StateName nextName, ParserState state, S
tateItem item) { |
1604 CharSequence nan = state.symbols.getNaN(); | 1623 CharSequence nan = state.symbols.getNaN(); |
1605 long added = acceptString(cp, nextName, null, state, item, nan, 0); | 1624 long added = acceptString(cp, nextName, null, state, item, nan, 0); |
1606 | 1625 |
1607 // Set state in the items that were added by the function call | 1626 // Set state in the items that were added by the function call |
1608 for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { | 1627 for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { |
1609 if (((1L << i) & added) != 0) { | 1628 if (((1L << i) & added) != 0) { |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1713 StateName returnTo2, | 1732 StateName returnTo2, |
1714 ParserState state, | 1733 ParserState state, |
1715 StateItem item, | 1734 StateItem item, |
1716 CharSequence str, | 1735 CharSequence str, |
1717 int offset) { | 1736 int offset) { |
1718 if (str == null || str.length() == 0) return 0L; | 1737 if (str == null || str.length() == 0) return 0L; |
1719 | 1738 |
1720 // Fast path for fast mode | 1739 // Fast path for fast mode |
1721 if (state.mode == ParseMode.FAST && Character.codePointAt(str, offset) != cp
) return 0L; | 1740 if (state.mode == ParseMode.FAST && Character.codePointAt(str, offset) != cp
) return 0L; |
1722 | 1741 |
1723 // Skip over ignorable code points at the beginning of the string. | 1742 // Skip over bidi code points at the beginning of the string. |
1724 // They will be accepted in the main loop. | 1743 // They will be accepted in the main loop. |
1725 int count = 0; | 1744 int count = 0; |
1726 int referenceCp = -1; | 1745 int referenceCp = -1; |
1727 boolean equals = false; | 1746 boolean equals = false; |
1728 for (; offset < str.length(); offset += count) { | 1747 for (; offset < str.length(); offset += count) { |
1729 referenceCp = Character.codePointAt(str, offset); | 1748 referenceCp = Character.codePointAt(str, offset); |
1730 count = Character.charCount(referenceCp); | 1749 count = Character.charCount(referenceCp); |
1731 equals = codePointEquals(cp, referenceCp, state); | 1750 equals = codePointEquals(cp, referenceCp, state); |
1732 if (!isIgnorable(referenceCp, state)) break; | 1751 if (!UNISET_BIDI.contains(cp)) break; |
1733 } | 1752 } |
1734 | 1753 |
1735 if (equals) { | 1754 if (equals) { |
1736 // Matches first code point of the string | 1755 // Matches first code point of the string |
1737 StateItem next = state.getNext().copyFrom(item); | 1756 StateItem next = state.getNext().copyFrom(item, null, cp); |
1738 | 1757 |
1739 // Skip over ignorable code points in the middle of the string. | 1758 // Skip over ignorable code points in the middle of the string. |
1740 // They will be accepted in the main loop. | 1759 // They will be accepted in the main loop. |
1741 offset += count; | 1760 offset += count; |
1742 for (; offset < str.length(); offset += count) { | 1761 for (; offset < str.length(); offset += count) { |
1743 referenceCp = Character.codePointAt(str, offset); | 1762 referenceCp = Character.codePointAt(str, offset); |
1744 count = Character.charCount(referenceCp); | 1763 count = Character.charCount(referenceCp); |
1745 if (!isIgnorable(referenceCp, state)) break; | 1764 if (!UNISET_BIDI.contains(cp)) break; |
1746 } | 1765 } |
1747 | 1766 |
1748 if (offset < str.length()) { | 1767 if (offset < str.length()) { |
1749 // String has more interesting code points. | 1768 // String has more interesting code points. |
1750 next.name = StateName.INSIDE_STRING; | 1769 next.name = StateName.INSIDE_STRING; |
1751 next.returnTo1 = returnTo1; | 1770 next.returnTo1 = returnTo1; |
1752 next.returnTo2 = returnTo2; | 1771 next.returnTo2 = returnTo2; |
1753 next.currentString = str; | 1772 next.currentString = str; |
1754 next.currentOffset = offset; | 1773 next.currentOffset = offset; |
1755 } else { | 1774 } else { |
1756 // We've reached the end of the string. | 1775 // We've reached the end of the string. |
1757 next.name = returnTo1; | 1776 next.name = returnTo1; |
| 1777 next.trailingCount = 0; |
1758 next.returnTo1 = returnTo2; | 1778 next.returnTo1 = returnTo2; |
1759 next.returnTo2 = null; | 1779 next.returnTo2 = null; |
1760 } | 1780 } |
1761 return 1L << state.lastInsertedIndex(); | 1781 return 1L << state.lastInsertedIndex(); |
1762 } | 1782 } |
1763 return 0L; | 1783 return 0L; |
1764 } | 1784 } |
1765 | 1785 |
1766 private static void acceptAffixPatternOffset(int cp, ParserState state, StateI
tem item) { | 1786 private static void acceptAffixPatternOffset(int cp, ParserState state, StateI
tem item) { |
1767 acceptAffixPattern( | 1787 acceptAffixPattern( |
(...skipping 24 matching lines...) Expand all Loading... |
1792 while (hasNext) { | 1812 while (hasNext) { |
1793 tag = AffixPatternUtils.nextToken(tag, str); | 1813 tag = AffixPatternUtils.nextToken(tag, str); |
1794 typeOrCp = AffixPatternUtils.getTypeOrCp(tag); | 1814 typeOrCp = AffixPatternUtils.getTypeOrCp(tag); |
1795 hasNext = AffixPatternUtils.hasNext(tag, str); | 1815 hasNext = AffixPatternUtils.hasNext(tag, str); |
1796 if (typeOrCp < 0 || !isIgnorable(typeOrCp, state)) break; | 1816 if (typeOrCp < 0 || !isIgnorable(typeOrCp, state)) break; |
1797 } | 1817 } |
1798 | 1818 |
1799 // Convert from the returned tag to a code point, string, or currency to che
ck | 1819 // Convert from the returned tag to a code point, string, or currency to che
ck |
1800 int resolvedCp = -1; | 1820 int resolvedCp = -1; |
1801 CharSequence resolvedStr = null; | 1821 CharSequence resolvedStr = null; |
| 1822 boolean resolvedMinusSign = false; |
| 1823 boolean resolvedPlusSign = false; |
1802 boolean resolvedCurrency = false; | 1824 boolean resolvedCurrency = false; |
1803 if (typeOrCp < 0) { | 1825 if (typeOrCp < 0) { |
1804 // Symbol | 1826 // Symbol |
1805 switch (typeOrCp) { | 1827 switch (typeOrCp) { |
1806 case AffixPatternUtils.TYPE_MINUS_SIGN: | 1828 case AffixPatternUtils.TYPE_MINUS_SIGN: |
1807 resolvedStr = state.symbols.getMinusSignString(); | 1829 resolvedMinusSign = true; |
1808 break; | 1830 break; |
1809 case AffixPatternUtils.TYPE_PLUS_SIGN: | 1831 case AffixPatternUtils.TYPE_PLUS_SIGN: |
1810 resolvedStr = state.symbols.getPlusSignString(); | 1832 resolvedPlusSign = true; |
1811 break; | 1833 break; |
1812 case AffixPatternUtils.TYPE_PERCENT: | 1834 case AffixPatternUtils.TYPE_PERCENT: |
1813 resolvedStr = state.symbols.getPercentString(); | 1835 resolvedStr = state.symbols.getPercentString(); |
1814 break; | 1836 break; |
1815 case AffixPatternUtils.TYPE_PERMILLE: | 1837 case AffixPatternUtils.TYPE_PERMILLE: |
1816 resolvedStr = state.symbols.getPerMillString(); | 1838 resolvedStr = state.symbols.getPerMillString(); |
1817 break; | 1839 break; |
1818 case AffixPatternUtils.TYPE_CURRENCY_SINGLE: | 1840 case AffixPatternUtils.TYPE_CURRENCY_SINGLE: |
1819 case AffixPatternUtils.TYPE_CURRENCY_DOUBLE: | 1841 case AffixPatternUtils.TYPE_CURRENCY_DOUBLE: |
1820 case AffixPatternUtils.TYPE_CURRENCY_TRIPLE: | 1842 case AffixPatternUtils.TYPE_CURRENCY_TRIPLE: |
(...skipping 10 matching lines...) Expand all Loading... |
1831 // They will be accepted in the main loop. | 1853 // They will be accepted in the main loop. |
1832 while (hasNext) { | 1854 while (hasNext) { |
1833 long futureTag = AffixPatternUtils.nextToken(tag, str); | 1855 long futureTag = AffixPatternUtils.nextToken(tag, str); |
1834 int futureTypeOrCp = AffixPatternUtils.getTypeOrCp(futureTag); | 1856 int futureTypeOrCp = AffixPatternUtils.getTypeOrCp(futureTag); |
1835 if (futureTypeOrCp < 0 || !isIgnorable(futureTypeOrCp, state)) break; | 1857 if (futureTypeOrCp < 0 || !isIgnorable(futureTypeOrCp, state)) break; |
1836 tag = futureTag; | 1858 tag = futureTag; |
1837 typeOrCp = futureTypeOrCp; | 1859 typeOrCp = futureTypeOrCp; |
1838 hasNext = AffixPatternUtils.hasNext(tag, str); | 1860 hasNext = AffixPatternUtils.hasNext(tag, str); |
1839 } | 1861 } |
1840 | 1862 |
1841 long addedNormal = 0L; | 1863 long added = 0L; |
1842 long addedCurrencyNeeded = 0L; | |
1843 if (resolvedCp >= 0) { | 1864 if (resolvedCp >= 0) { |
1844 // Code point | 1865 // Code point |
1845 if (!codePointEquals(cp, resolvedCp, state)) return 0L; | 1866 if (!codePointEquals(cp, resolvedCp, state)) return 0L; |
1846 StateItem next = state.getNext().copyFrom(item); | 1867 StateItem next = state.getNext().copyFrom(item, null, cp); |
1847 | 1868 |
1848 if (hasNext) { | 1869 if (hasNext) { |
1849 // Additional tokens in affix string. | 1870 // Additional tokens in affix string. |
1850 next.name = StateName.INSIDE_AFFIX_PATTERN; | 1871 next.name = StateName.INSIDE_AFFIX_PATTERN; |
1851 next.returnTo1 = returnTo; | 1872 next.returnTo1 = returnTo; |
1852 } else { | 1873 } else { |
1853 // Reached last token in affix string. | 1874 // Reached last token in affix string. |
1854 next.name = returnTo; | 1875 next.name = returnTo; |
| 1876 next.trailingCount = 0; |
1855 next.returnTo1 = null; | 1877 next.returnTo1 = null; |
1856 } | 1878 } |
1857 addedNormal |= 1L << state.lastInsertedIndex(); | 1879 added |= 1L << state.lastInsertedIndex(); |
| 1880 } |
| 1881 if (resolvedMinusSign || resolvedPlusSign) { |
| 1882 // Sign |
| 1883 if (hasNext) { |
| 1884 acceptMinusOrPlusSign(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, stat
e, item, false); |
| 1885 } else { |
| 1886 acceptMinusOrPlusSign(cp, returnTo, null, state, item, false); |
| 1887 } |
| 1888 // Decide whether to accept a custom string |
| 1889 if (resolvedMinusSign) { |
| 1890 String mss = state.symbols.getMinusSignString(); |
| 1891 int mssCp = Character.codePointAt(mss, 0); |
| 1892 if (mss.length() != Character.charCount(mssCp) || !UNISET_MINUS.contains
(mssCp)) { |
| 1893 resolvedStr = mss; |
| 1894 } |
| 1895 } |
| 1896 if (resolvedPlusSign) { |
| 1897 String pss = state.symbols.getPlusSignString(); |
| 1898 int pssCp = Character.codePointAt(pss, 0); |
| 1899 if (pss.length() != Character.charCount(pssCp) || !UNISET_MINUS.contains
(pssCp)) { |
| 1900 resolvedStr = pss; |
| 1901 } |
| 1902 } |
1858 } | 1903 } |
1859 if (resolvedStr != null) { | 1904 if (resolvedStr != null) { |
1860 // String symbol | 1905 // String symbol |
1861 if (hasNext) { | 1906 if (hasNext) { |
1862 addedNormal |= | 1907 added |= |
1863 acceptString(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, it
em, resolvedStr, 0); | 1908 acceptString(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, it
em, resolvedStr, 0); |
1864 } else { | 1909 } else { |
1865 addedNormal |= acceptString(cp, returnTo, null, state, item, resolvedStr
, 0); | 1910 added |= acceptString(cp, returnTo, null, state, item, resolvedStr, 0); |
1866 } | 1911 } |
1867 } | 1912 } |
1868 if (resolvedCurrency && !item.sawCurrency) { | 1913 if (resolvedCurrency) { |
1869 // Accept from local currency information | 1914 // Currency symbol |
1870 CharSequence str1, str2; | 1915 if (hasNext) { |
1871 Currency currency = state.properties.getCurrency(); | 1916 added |= acceptCurrency(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, st
ate, item); |
1872 if (currency != null) { | |
1873 str1 = currency.getName(state.symbols.getULocale(), Currency.SYMBOL_NAME
, null); | |
1874 str2 = currency.getCurrencyCode(); | |
1875 } else { | 1917 } else { |
1876 str1 = state.symbols.getCurrencySymbol(); | 1918 added |= acceptCurrency(cp, returnTo, null, state, item); |
1877 str2 = state.symbols.getInternationalCurrencySymbol(); | |
1878 } | |
1879 if (hasNext) { | |
1880 addedCurrencyNeeded |= | |
1881 acceptString(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, it
em, str1, 0); | |
1882 addedCurrencyNeeded |= | |
1883 acceptString(cp, StateName.INSIDE_AFFIX_PATTERN, returnTo, state, it
em, str2, 0); | |
1884 } else { | |
1885 addedCurrencyNeeded |= acceptString(cp, returnTo, null, state, item, str
1, 0); | |
1886 addedCurrencyNeeded |= acceptString(cp, returnTo, null, state, item, str
2, 0); | |
1887 } | |
1888 // Accept from CLDR currency data (will not happen unless state.parseCurre
ncy is true) | |
1889 if (hasNext) { | |
1890 addedNormal |= acceptCurrency(cp, StateName.INSIDE_AFFIX_PATTERN, return
To, state, item); | |
1891 } else { | |
1892 addedNormal |= acceptCurrency(cp, returnTo, null, state, item); | |
1893 } | 1919 } |
1894 } | 1920 } |
1895 | 1921 |
1896 // Set state in the items that were added by the function calls | 1922 // Set state in the items that were added by the function calls |
1897 long added = addedNormal | addedCurrencyNeeded; | |
1898 for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { | 1923 for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { |
1899 if (((1L << i) & added) != 0) { | 1924 if (((1L << i) & added) != 0) { |
1900 state.getItem(i).currentAffixPattern = str; | 1925 state.getItem(i).currentAffixPattern = str; |
1901 state.getItem(i).currentStepwiseParserTag = tag; | 1926 state.getItem(i).currentStepwiseParserTag = tag; |
1902 } | |
1903 if (((1L << i) & addedCurrencyNeeded) != 0) { | |
1904 // Save the currency from symbols. | |
1905 state.getItem(i).sawCurrency = true; | |
1906 state.getItem(i).isoCode = state.symbols.getCurrency().getCurrencyCode()
; | |
1907 } | 1927 } |
1908 } | 1928 } |
1909 return added; | 1929 return added; |
1910 } | 1930 } |
1911 | 1931 |
1912 /** | 1932 /** |
1913 * This method can add up to four items to the new list in <code>state</code>. | 1933 * This method can add up to four items to the new list in <code>state</code>. |
1914 * | 1934 * |
1915 * <p>If <code>cp</code> is equal to any known ISO code or long name, copies <
code>item</code> to | 1935 * <p>If <code>cp</code> is equal to any known ISO code or long name, copies <
code>item</code> to |
1916 * the new list in <code>state</code> and sets its ISO code to the correspondi
ng currency. | 1936 * the new list in <code>state</code> and sets its ISO code to the correspondi
ng currency. |
1917 * | 1937 * |
1918 * <p>If <code>cp</code> is the first code point of any ISO code or long name
having more them one | 1938 * <p>If <code>cp</code> is the first code point of any ISO code or long name
having more them one |
1919 * code point in length, copies <code>item</code> to the new list in <code>sta
te</code> along with | 1939 * code point in length, copies <code>item</code> to the new list in <code>sta
te</code> along with |
1920 * an instance of {@link TextTrieMap.ParseState} for tracking the following co
de points. | 1940 * an instance of {@link TextTrieMap.ParseState} for tracking the following co
de points. |
1921 * | 1941 * |
1922 * @param cp The code point to check. | 1942 * @param cp The code point to check. |
1923 * @param state The state object to update. | 1943 * @param state The state object to update. |
1924 * @param item The old state leading into the code point. | 1944 * @param item The old state leading into the code point. |
1925 */ | 1945 */ |
1926 private static void acceptCurrency( | 1946 private static void acceptCurrency( |
1927 int cp, StateName nextName, ParserState state, StateItem item) { | 1947 int cp, StateName nextName, ParserState state, StateItem item) { |
1928 acceptCurrency(cp, nextName, null, state, item); | 1948 acceptCurrency(cp, nextName, null, state, item); |
1929 } | 1949 } |
1930 | 1950 |
1931 private static long acceptCurrency( | 1951 private static long acceptCurrency( |
1932 int cp, StateName returnTo1, StateName returnTo2, ParserState state, State
Item item) { | 1952 int cp, StateName returnTo1, StateName returnTo2, ParserState state, State
Item item) { |
1933 if (item.sawCurrency || !state.parseCurrency) return 0L; | 1953 if (item.sawCurrency) return 0L; |
1934 ULocale uloc = state.symbols.getULocale(); | 1954 long added = 0L; |
1935 TextTrieMap<Currency.CurrencyStringInfo>.ParseState trie1 = | 1955 |
1936 Currency.openParseState(uloc, cp, Currency.LONG_NAME); | 1956 // Accept from local currency information |
1937 TextTrieMap<Currency.CurrencyStringInfo>.ParseState trie2 = | 1957 String str1, str2; |
1938 Currency.openParseState(uloc, cp, Currency.SYMBOL_NAME); | 1958 Currency currency = state.properties.getCurrency(); |
1939 long accepted = 0L; | 1959 if (currency != null) { |
1940 accepted |= acceptCurrencyHelper(cp, returnTo1, returnTo2, state, item, trie
1); | 1960 str1 = currency.getName(state.symbols.getULocale(), Currency.SYMBOL_NAME,
null); |
1941 accepted |= acceptCurrencyHelper(cp, returnTo1, returnTo2, state, item, trie
2); | 1961 str2 = currency.getCurrencyCode(); |
1942 return accepted; | 1962 // TODO: Should we also accept long names? In currency mode, they are in t
he CLDR data. |
| 1963 } else { |
| 1964 currency = state.symbols.getCurrency(); |
| 1965 str1 = state.symbols.getCurrencySymbol(); |
| 1966 str2 = state.symbols.getInternationalCurrencySymbol(); |
| 1967 } |
| 1968 added |= acceptString(cp, returnTo1, returnTo2, state, item, str1, 0); |
| 1969 added |= acceptString(cp, returnTo1, returnTo2, state, item, str2, 0); |
| 1970 for (int i = Long.numberOfTrailingZeros(added); (1L << i) <= added; i++) { |
| 1971 if (((1L << i) & added) != 0) { |
| 1972 state.getItem(i).sawCurrency = true; |
| 1973 state.getItem(i).isoCode = str2; |
| 1974 } |
| 1975 } |
| 1976 |
| 1977 // Accept from CLDR data |
| 1978 if (state.parseCurrency) { |
| 1979 ULocale uloc = state.symbols.getULocale(); |
| 1980 TextTrieMap<Currency.CurrencyStringInfo>.ParseState trie1 = |
| 1981 Currency.openParseState(uloc, cp, Currency.LONG_NAME); |
| 1982 TextTrieMap<Currency.CurrencyStringInfo>.ParseState trie2 = |
| 1983 Currency.openParseState(uloc, cp, Currency.SYMBOL_NAME); |
| 1984 added |= acceptCurrencyHelper(cp, returnTo1, returnTo2, state, item, trie1
); |
| 1985 added |= acceptCurrencyHelper(cp, returnTo1, returnTo2, state, item, trie2
); |
| 1986 } |
| 1987 |
| 1988 return added; |
1943 } | 1989 } |
1944 | 1990 |
1945 /** | 1991 /** |
1946 * If <code>cp</code> is the next code point of any currency, copies <code>ite
m</code> to the new | 1992 * If <code>cp</code> is the next code point of any currency, copies <code>ite
m</code> to the new |
1947 * list in <code>state</code> along with an instance of {@link TextTrieMap.Par
seState} for | 1993 * list in <code>state</code> along with an instance of {@link TextTrieMap.Par
seState} for |
1948 * tracking the following code points. | 1994 * tracking the following code points. |
1949 * | 1995 * |
1950 * <p>This method should only be called in a state following {@link #acceptCur
rency}. | 1996 * <p>This method should only be called in a state following {@link #acceptCur
rency}. |
1951 * | 1997 * |
1952 * @param cp The code point to check. | 1998 * @param cp The code point to check. |
(...skipping 12 matching lines...) Expand all Loading... |
1965 ParserState state, | 2011 ParserState state, |
1966 StateItem item, | 2012 StateItem item, |
1967 TextTrieMap<Currency.CurrencyStringInfo>.ParseState trieState) { | 2013 TextTrieMap<Currency.CurrencyStringInfo>.ParseState trieState) { |
1968 if (trieState == null) return 0L; | 2014 if (trieState == null) return 0L; |
1969 trieState.accept(cp); | 2015 trieState.accept(cp); |
1970 long added = 0L; | 2016 long added = 0L; |
1971 Iterator<Currency.CurrencyStringInfo> currentMatches = trieState.getCurrentM
atches(); | 2017 Iterator<Currency.CurrencyStringInfo> currentMatches = trieState.getCurrentM
atches(); |
1972 if (currentMatches != null) { | 2018 if (currentMatches != null) { |
1973 // Match on current code point | 2019 // Match on current code point |
1974 // TODO: What should happen with multiple currency matches? | 2020 // TODO: What should happen with multiple currency matches? |
1975 StateItem next = state.getNext().copyFrom(item); | 2021 StateItem next = state.getNext().copyFrom(item, returnTo1, -1); |
1976 next.name = returnTo1; | |
1977 next.returnTo1 = returnTo2; | 2022 next.returnTo1 = returnTo2; |
1978 next.returnTo2 = null; | 2023 next.returnTo2 = null; |
1979 next.sawCurrency = true; | 2024 next.sawCurrency = true; |
1980 next.isoCode = currentMatches.next().getISOCode(); | 2025 next.isoCode = currentMatches.next().getISOCode(); |
1981 added |= 1L << state.lastInsertedIndex(); | 2026 added |= 1L << state.lastInsertedIndex(); |
1982 } | 2027 } |
1983 if (!trieState.atEnd()) { | 2028 if (!trieState.atEnd()) { |
1984 // Prepare for matches on future code points | 2029 // Prepare for matches on future code points |
1985 StateItem next = state.getNext().copyFrom(item); | 2030 StateItem next = state.getNext().copyFrom(item, StateName.INSIDE_CURRENCY,
-1); |
1986 next.name = StateName.INSIDE_CURRENCY; | |
1987 next.returnTo1 = returnTo1; | 2031 next.returnTo1 = returnTo1; |
1988 next.returnTo2 = returnTo2; | 2032 next.returnTo2 = returnTo2; |
1989 next.currentCurrencyTrieState = trieState; | 2033 next.currentCurrencyTrieState = trieState; |
1990 added |= 1L << state.lastInsertedIndex(); | 2034 added |= 1L << state.lastInsertedIndex(); |
1991 } | 2035 } |
1992 return added; | 2036 return added; |
1993 } | 2037 } |
1994 | 2038 |
1995 private static long acceptDigitTrie( | 2039 private static long acceptDigitTrie( |
1996 int cp, StateName nextName, ParserState state, StateItem item, DigitType t
ype) { | 2040 int cp, StateName nextName, ParserState state, StateItem item, DigitType t
ype) { |
(...skipping 15 matching lines...) Expand all Loading... |
2012 StateItem item, | 2056 StateItem item, |
2013 DigitType type, | 2057 DigitType type, |
2014 TextTrieMap<Byte>.ParseState trieState) { | 2058 TextTrieMap<Byte>.ParseState trieState) { |
2015 if (trieState == null) return 0L; | 2059 if (trieState == null) return 0L; |
2016 trieState.accept(cp); | 2060 trieState.accept(cp); |
2017 long added = 0L; | 2061 long added = 0L; |
2018 Iterator<Byte> currentMatches = trieState.getCurrentMatches(); | 2062 Iterator<Byte> currentMatches = trieState.getCurrentMatches(); |
2019 if (currentMatches != null) { | 2063 if (currentMatches != null) { |
2020 // Match on current code point | 2064 // Match on current code point |
2021 byte digit = currentMatches.next(); | 2065 byte digit = currentMatches.next(); |
2022 StateItem next = state.getNext().copyFrom(item); | 2066 StateItem next = state.getNext().copyFrom(item, returnTo1, -1); |
2023 next.name = returnTo1; | |
2024 next.returnTo1 = null; | 2067 next.returnTo1 = null; |
2025 next.appendDigit(digit, type); | 2068 next.appendDigit(digit, type); |
2026 added |= 1L << state.lastInsertedIndex(); | 2069 added |= 1L << state.lastInsertedIndex(); |
2027 } | 2070 } |
2028 if (!trieState.atEnd()) { | 2071 if (!trieState.atEnd()) { |
2029 // Prepare for matches on future code points | 2072 // Prepare for matches on future code points |
2030 StateItem next = state.getNext().copyFrom(item); | 2073 StateItem next = state.getNext().copyFrom(item, StateName.INSIDE_DIGIT, -1
); |
2031 next.name = StateName.INSIDE_DIGIT; | |
2032 next.returnTo1 = returnTo1; | 2074 next.returnTo1 = returnTo1; |
2033 next.currentDigitTrieState = trieState; | 2075 next.currentDigitTrieState = trieState; |
2034 next.currentDigitType = type; | 2076 next.currentDigitType = type; |
2035 added |= 1L << state.lastInsertedIndex(); | 2077 added |= 1L << state.lastInsertedIndex(); |
2036 } | 2078 } |
2037 return added; | 2079 return added; |
2038 } | 2080 } |
2039 | 2081 |
2040 /** | 2082 /** |
2041 * Checks whether the two given code points are equal after applying case mapp
ing as requested in | 2083 * Checks whether the two given code points are equal after applying case mapp
ing as requested in |
(...skipping 17 matching lines...) Expand all Loading... |
2059 * @param cp The code point to test. Returns false if cp is negative. | 2101 * @param cp The code point to test. Returns false if cp is negative. |
2060 * @param state The current {@link ParserState}, used for determining strict m
ode. | 2102 * @param state The current {@link ParserState}, used for determining strict m
ode. |
2061 * @return true if cp is bidi or whitespace in lenient mode; false otherwise. | 2103 * @return true if cp is bidi or whitespace in lenient mode; false otherwise. |
2062 */ | 2104 */ |
2063 private static boolean isIgnorable(int cp, ParserState state) { | 2105 private static boolean isIgnorable(int cp, ParserState state) { |
2064 if (cp < 0) return false; | 2106 if (cp < 0) return false; |
2065 if (UNISET_BIDI.contains(cp)) return true; | 2107 if (UNISET_BIDI.contains(cp)) return true; |
2066 return state.mode == ParseMode.LENIENT && UNISET_WHITESPACE.contains(cp); | 2108 return state.mode == ParseMode.LENIENT && UNISET_WHITESPACE.contains(cp); |
2067 } | 2109 } |
2068 } | 2110 } |
LEFT | RIGHT |