LEFT | RIGHT |
(no file at all) | |
| 1 // © 2017 and later: Unicode, Inc. and others. |
| 2 // License & terms of use: http://www.unicode.org/copyright.html#License |
| 3 package com.ibm.icu.impl.number.parse; |
| 4 |
| 5 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD; |
| 6 import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key; |
| 7 import com.ibm.icu.lang.UCharacter; |
| 8 import com.ibm.icu.number.Grouper; |
| 9 import com.ibm.icu.text.DecimalFormatSymbols; |
| 10 import com.ibm.icu.text.UnicodeSet; |
| 11 |
| 12 /** |
| 13 * @author sffc |
| 14 * |
| 15 */ |
| 16 public class DecimalMatcher implements NumberParseMatcher { |
| 17 |
| 18 private final boolean requireGroupingMatch; |
| 19 private final boolean groupingDisabled; |
| 20 private final int grouping1; |
| 21 private final int grouping2; |
| 22 private final boolean integerOnly; |
| 23 private final boolean isScientific; |
| 24 |
| 25 // Assumption: these sets all consist of single code points. If this assumpt
ion needs to be broken, |
| 26 // fix getLeadCodePoints() as well as matching logic. Be careful of the perf
ormance impact. |
| 27 private final UnicodeSet groupingUniSet; |
| 28 private final UnicodeSet decimalUniSet; |
| 29 private final UnicodeSet separatorSet; |
| 30 private final UnicodeSet leadSet; |
| 31 private final String[] digitStrings; |
| 32 |
| 33 public static DecimalMatcher getInstance( |
| 34 DecimalFormatSymbols symbols, |
| 35 Grouper grouper, |
| 36 int parseFlags) { |
| 37 // TODO: Cache popular instances? |
| 38 return new DecimalMatcher(symbols, grouper, parseFlags); |
| 39 } |
| 40 |
| 41 private DecimalMatcher(DecimalFormatSymbols symbols, Grouper grouper, int pa
rseFlags) { |
| 42 Key groupingKey, decimalKey; |
| 43 String groupingSeparator, decimalSeparator; |
| 44 if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS)) { |
| 45 groupingSeparator = symbols.getMonetaryGroupingSeparatorString(); |
| 46 decimalSeparator = symbols.getMonetaryDecimalSeparatorString(); |
| 47 } else { |
| 48 groupingSeparator = symbols.getGroupingSeparatorString(); |
| 49 decimalSeparator = symbols.getDecimalSeparatorString(); |
| 50 } |
| 51 |
| 52 // Attempt to find values in the static cache |
| 53 if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS)) { |
| 54 decimalKey = UnicodeSetStaticCache |
| 55 .chooseFrom(decimalSeparator, Key.STRICT_COMMA, Key.STRICT_P
ERIOD); |
| 56 if (decimalKey == Key.STRICT_COMMA) { |
| 57 // Decimal is comma; grouping should be period or custom |
| 58 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, |
| 59 Key.STRICT_PERIOD_OR_OTHER); |
| 60 } else if (decimalKey == Key.STRICT_PERIOD) { |
| 61 // Decimal is period; grouping should be comma or custom |
| 62 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, |
| 63 Key.STRICT_COMMA_OR_OTHER); |
| 64 } else { |
| 65 // Decimal is custom; grouping can be either comma or period or
custom |
| 66 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, |
| 67 Key.STRICT_COMMA_OR_OTHER, |
| 68 Key.STRICT_PERIOD_OR_OTHER); |
| 69 } |
| 70 } else { |
| 71 decimalKey = UnicodeSetStaticCache.chooseFrom(decimalSeparator, Key.
COMMA, Key.PERIOD); |
| 72 if (decimalKey == Key.COMMA) { |
| 73 // Decimal is comma; grouping should be period or custom |
| 74 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, Key.PERIOD_OR_OTHER); |
| 75 } else if (decimalKey == Key.PERIOD) { |
| 76 // Decimal is period; grouping should be comma or custom |
| 77 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, Key.COMMA_OR_OTHER); |
| 78 } else { |
| 79 // Decimal is custom; grouping can be either comma or period or
custom |
| 80 groupingKey = UnicodeSetStaticCache |
| 81 .chooseFrom(groupingSeparator, Key.COMMA_OR_OTHER, Key.P
ERIOD_OR_OTHER); |
| 82 } |
| 83 } |
| 84 |
| 85 // Get the sets from the static cache if they were found |
| 86 UnicodeSet _groupingUniSet = null, _decimalUniSet = null, _separatorSet
= null, _leadSet = null; |
| 87 if (groupingKey != null && decimalKey != null) { |
| 88 _groupingUniSet = UnicodeSetStaticCache.get(groupingKey); |
| 89 _decimalUniSet = UnicodeSetStaticCache.get(decimalKey); |
| 90 Key separatorKey = UnicodeSetStaticCache.unionOf(groupingKey, decima
lKey); |
| 91 if (separatorKey != null) { |
| 92 _separatorSet = UnicodeSetStaticCache.get(separatorKey); |
| 93 Key leadKey = UnicodeSetStaticCache.unionOf(Key.DIGITS, separato
rKey); |
| 94 if (leadKey != null) { |
| 95 _leadSet = UnicodeSetStaticCache.get(leadKey); |
| 96 } |
| 97 } |
| 98 } else if (groupingKey != null) { |
| 99 _groupingUniSet = UnicodeSetStaticCache.get(groupingKey); |
| 100 } else if (decimalKey != null) { |
| 101 _decimalUniSet = UnicodeSetStaticCache.get(decimalKey); |
| 102 } |
| 103 |
| 104 // Finish resolving fallbacks |
| 105 groupingUniSet = _groupingUniSet != null ? _groupingUniSet |
| 106 : new UnicodeSet().add(groupingSeparator.codePointAt(0)).freeze(
); |
| 107 decimalUniSet = _decimalUniSet != null ? _decimalUniSet |
| 108 : new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze()
; |
| 109 separatorSet = _separatorSet != null ? _separatorSet |
| 110 : new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).
freeze(); |
| 111 leadSet = _leadSet; // null if not available |
| 112 |
| 113 int cpZero = symbols.getCodePointZero(); |
| 114 if (cpZero == -1 || !UCharacter.isDigit(cpZero) || UCharacter.digit(cpZe
ro) != 0) { |
| 115 digitStrings = symbols.getDigitStringsLocal(); |
| 116 } else { |
| 117 digitStrings = null; |
| 118 } |
| 119 |
| 120 requireGroupingMatch = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT
_GROUPING_SIZE); |
| 121 groupingDisabled = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_GROUPING_D
ISABLED); |
| 122 grouping1 = grouper.getPrimary(); |
| 123 grouping2 = grouper.getSecondary(); |
| 124 integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY); |
| 125 isScientific = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_DECIMAL_SCIENT
IFIC); |
| 126 } |
| 127 |
| 128 @Override |
| 129 public boolean match(StringSegment segment, ParsedNumber result) { |
| 130 return match(segment, result, false); |
| 131 } |
| 132 |
| 133 public boolean match(StringSegment segment, ParsedNumber result, boolean neg
ativeExponent) { |
| 134 if (result.seenNumber() && !isScientific) { |
| 135 // A number has already been consumed. |
| 136 return false; |
| 137 } |
| 138 |
| 139 int initialOffset = segment.getOffset(); |
| 140 int currGroup = 0; |
| 141 int separator = -1; |
| 142 int lastSeparatorOffset = segment.getOffset(); |
| 143 int exponent = 0; |
| 144 boolean hasPartialPrefix = false; |
| 145 boolean seenBothSeparators = false; |
| 146 while (segment.length() > 0) { |
| 147 hasPartialPrefix = false; |
| 148 |
| 149 // Attempt to match a digit. |
| 150 byte digit = -1; |
| 151 |
| 152 // Try by code point digit value. |
| 153 int cp = segment.getCodePoint(); |
| 154 if (UCharacter.isDigit(cp)) { |
| 155 segment.adjustOffset(Character.charCount(cp)); |
| 156 digit = (byte) UCharacter.digit(cp); |
| 157 } |
| 158 |
| 159 // Try by digit string. |
| 160 if (digit == -1 && digitStrings != null) { |
| 161 for (int i = 0; i < digitStrings.length; i++) { |
| 162 String str = digitStrings[i]; |
| 163 int overlap = segment.getCommonPrefixLength(str); |
| 164 if (overlap == str.length()) { |
| 165 segment.adjustOffset(overlap); |
| 166 digit = (byte) i; |
| 167 break; |
| 168 } else if (overlap == segment.length()) { |
| 169 hasPartialPrefix = true; |
| 170 } |
| 171 } |
| 172 } |
| 173 |
| 174 // If found, save it in the DecimalQuantity or scientific adjustment
. |
| 175 if (digit >= 0) { |
| 176 if (isScientific) { |
| 177 int nextExponent = digit + exponent * 10; |
| 178 if (nextExponent < exponent) { |
| 179 // Overflow |
| 180 exponent = Integer.MAX_VALUE; |
| 181 } else { |
| 182 exponent = nextExponent; |
| 183 } |
| 184 } else { |
| 185 if (result.quantity == null) { |
| 186 result.quantity = new DecimalQuantity_DualStorageBCD(); |
| 187 } |
| 188 result.quantity.appendDigit(digit, 0, true); |
| 189 } |
| 190 result.setCharsConsumed(segment); |
| 191 currGroup++; |
| 192 continue; |
| 193 } |
| 194 |
| 195 // Attempt to match a separator. |
| 196 if (!seenBothSeparators && cp != -1 && separatorSet.contains(cp)) { |
| 197 if (separator == -1) { |
| 198 // First separator; could be either grouping or decimal. |
| 199 separator = cp; |
| 200 if (!groupingDisabled |
| 201 && requireGroupingMatch |
| 202 && groupingUniSet.contains(cp) |
| 203 && (currGroup == 0 || currGroup > grouping2)) { |
| 204 break; |
| 205 } |
| 206 } else if (!groupingDisabled && separator == cp && groupingUniSe
t.contains(cp)) { |
| 207 // Second or later grouping separator. |
| 208 if (requireGroupingMatch && currGroup != grouping2) { |
| 209 break; |
| 210 } |
| 211 } else if (!groupingDisabled && separator != cp && decimalUniSet
.contains(cp)) { |
| 212 // Decimal separator after a grouping separator. |
| 213 if (requireGroupingMatch && currGroup != grouping1) { |
| 214 break; |
| 215 } |
| 216 seenBothSeparators = true; |
| 217 } else { |
| 218 // Invalid separator. |
| 219 break; |
| 220 } |
| 221 currGroup = 0; |
| 222 lastSeparatorOffset = segment.getOffset(); |
| 223 segment.adjustOffset(Character.charCount(cp)); |
| 224 continue; |
| 225 } |
| 226 |
| 227 break; |
| 228 } |
| 229 |
| 230 if (isScientific) { |
| 231 boolean overflow = (exponent == Integer.MAX_VALUE); |
| 232 if (!overflow) { |
| 233 try { |
| 234 result.quantity.adjustMagnitude(negativeExponent ? -exponent
: exponent); |
| 235 } catch (ArithmeticException e) { |
| 236 overflow = true; |
| 237 } |
| 238 } |
| 239 if (overflow) { |
| 240 if (negativeExponent) { |
| 241 // Set to zero |
| 242 result.quantity.clear(); |
| 243 } else { |
| 244 // Set to infinity |
| 245 result.quantity = null; |
| 246 result.flags |= ParsedNumber.FLAG_INFINITY; |
| 247 } |
| 248 } |
| 249 } else if (result.quantity == null) { |
| 250 // No-op: strings that start with a separator without any other digi
ts |
| 251 } else if (seenBothSeparators || (separator != -1 && decimalUniSet.conta
ins(separator))) { |
| 252 // The final separator was a decimal separator. |
| 253 result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR; |
| 254 result.quantity.adjustMagnitude(-currGroup); |
| 255 if (integerOnly) { |
| 256 result.quantity.truncate(); |
| 257 segment.setOffset(lastSeparatorOffset); |
| 258 } |
| 259 } else if (separator != -1 && groupingDisabled) { |
| 260 // The final separator was a grouping separator, but we aren't accep
ting grouping. |
| 261 // Reset the offset to immediately before that grouping separator. |
| 262 result.quantity.adjustMagnitude(-currGroup); |
| 263 result.quantity.truncate(); |
| 264 segment.setOffset(lastSeparatorOffset); |
| 265 } else if (separator != -1 |
| 266 && requireGroupingMatch |
| 267 && groupingUniSet.contains(separator) |
| 268 && currGroup != grouping1) { |
| 269 // The final separator was a grouping separator, and we have a misma
tched grouping size. |
| 270 // Reset the offset to the beginning of the number. |
| 271 // TODO |
| 272 result.quantity.adjustMagnitude(-currGroup); |
| 273 result.quantity.truncate(); |
| 274 segment.setOffset(lastSeparatorOffset); |
| 275 // result.quantity = null; |
| 276 // segment.setOffset(initialOffset); |
| 277 } |
| 278 |
| 279 return segment.length() == 0 || hasPartialPrefix; |
| 280 } |
| 281 |
| 282 @Override |
| 283 public UnicodeSet getLeadCodePoints() { |
| 284 if (digitStrings == null && leadSet != null) { |
| 285 return leadSet; |
| 286 } |
| 287 |
| 288 UnicodeSet leadCodePoints = new UnicodeSet(); |
| 289 // Assumption: the sets are all single code points. |
| 290 leadCodePoints.addAll(UnicodeSetStaticCache.get(Key.DIGITS)); |
| 291 leadCodePoints.addAll(separatorSet); |
| 292 if (digitStrings != null) { |
| 293 for (int i = 0; i < digitStrings.length; i++) { |
| 294 ParsingUtils.putLeadCodePoint(digitStrings[i], leadCodePoints); |
| 295 } |
| 296 } |
| 297 return leadCodePoints.freeze(); |
| 298 } |
| 299 |
| 300 @Override |
| 301 public void postProcess(ParsedNumber result) { |
| 302 // No-op |
| 303 } |
| 304 |
| 305 @Override |
| 306 public String toString() { |
| 307 return "<DecimalMatcher>"; |
| 308 } |
| 309 } |
LEFT | RIGHT |