LEFT | RIGHT |
1 // © 2017 and later: Unicode, Inc. and others. | 1 // © 2017 and later: Unicode, Inc. and others. |
2 // License & terms of use: http://www.unicode.org/copyright.html#License | 2 // License & terms of use: http://www.unicode.org/copyright.html#License |
3 package com.ibm.icu.impl.number.parse; | 3 package com.ibm.icu.impl.number.parse; |
4 | 4 |
5 import java.math.RoundingMode; | |
6 | |
7 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD; | 5 import com.ibm.icu.impl.number.DecimalQuantity_DualStorageBCD; |
8 import com.ibm.icu.impl.number.RoundingUtils; | 6 import com.ibm.icu.impl.number.parse.UnicodeSetStaticCache.Key; |
9 import com.ibm.icu.lang.UCharacter; | 7 import com.ibm.icu.lang.UCharacter; |
| 8 import com.ibm.icu.number.Grouper; |
10 import com.ibm.icu.text.DecimalFormatSymbols; | 9 import com.ibm.icu.text.DecimalFormatSymbols; |
11 import com.ibm.icu.text.UnicodeSet; | 10 import com.ibm.icu.text.UnicodeSet; |
12 | 11 |
13 /** | 12 /** |
14 * @author sffc | 13 * @author sffc |
15 * | 14 * |
16 */ | 15 */ |
17 public class DecimalMatcher implements NumberParseMatcher { | 16 public class DecimalMatcher implements NumberParseMatcher { |
18 | 17 |
19 /** | 18 private final boolean requireGroupingMatch; |
20 * @return | 19 private final boolean groupingDisabled; |
21 */ | 20 private final int grouping1; |
22 public static DecimalMatcher getInstance(DecimalFormatSymbols symbols) { | 21 private final int grouping2; |
23 // TODO(sffc): Auto-generated method stub | 22 private final boolean integerOnly; |
24 return new DecimalMatcher(symbols.getDigitStrings(), | 23 private final boolean isScientific; |
25 new UnicodeSet("[,]").freeze(), | 24 |
26 new UnicodeSet("[.]").freeze(), | 25 // Assumption: these sets all consist of single code points. If this assumpt
ion needs to be broken, |
27 false); | 26 // fix getLeadCodePoints() as well as matching logic. Be careful of the perf
ormance impact. |
28 } | |
29 | |
30 public static DecimalMatcher getExponentInstance(DecimalFormatSymbols symbol
s) { | |
31 return new DecimalMatcher(symbols.getDigitStrings(), | |
32 new UnicodeSet("[,]").freeze(), | |
33 new UnicodeSet("[.]").freeze(), | |
34 true); | |
35 } | |
36 | |
37 private final String[] digitStrings; | |
38 private final UnicodeSet groupingUniSet; | 27 private final UnicodeSet groupingUniSet; |
39 private final UnicodeSet decimalUniSet; | 28 private final UnicodeSet decimalUniSet; |
40 private final UnicodeSet separatorSet; | 29 private final UnicodeSet separatorSet; |
41 public boolean requireGroupingMatch = false; | 30 private final UnicodeSet leadSet; |
42 private final int grouping1 = 3; | 31 private final String[] digitStrings; |
43 private final int grouping2 = 3; | 32 |
44 private final boolean isScientific; | 33 public static DecimalMatcher getInstance( |
45 | 34 DecimalFormatSymbols symbols, |
46 private DecimalMatcher( | 35 Grouper grouper, |
47 String[] digitStrings, | 36 int parseFlags) { |
48 UnicodeSet groupingUniSet, | 37 // TODO: Cache popular instances? |
49 UnicodeSet decimalUniSet, | 38 return new DecimalMatcher(symbols, grouper, parseFlags); |
50 boolean isScientific) { | 39 } |
51 this.digitStrings = digitStrings; | 40 |
52 this.groupingUniSet = groupingUniSet; | 41 private DecimalMatcher(DecimalFormatSymbols symbols, Grouper grouper, int pa
rseFlags) { |
53 this.decimalUniSet = decimalUniSet; | 42 Key groupingKey, decimalKey; |
54 separatorSet = groupingUniSet.cloneAsThawed().addAll(decimalUniSet).free
ze(); | 43 String groupingSeparator, decimalSeparator; |
55 this.isScientific = isScientific; | 44 if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS)) { |
| 45 groupingSeparator = symbols.getMonetaryGroupingSeparatorString(); |
| 46 decimalSeparator = symbols.getMonetaryDecimalSeparatorString(); |
| 47 } else { |
| 48 groupingSeparator = symbols.getGroupingSeparatorString(); |
| 49 decimalSeparator = symbols.getDecimalSeparatorString(); |
| 50 } |
| 51 |
| 52 // Attempt to find values in the static cache |
| 53 if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS)) { |
| 54 decimalKey = UnicodeSetStaticCache |
| 55 .chooseFrom(decimalSeparator, Key.STRICT_COMMA, Key.STRICT_P
ERIOD); |
| 56 if (decimalKey == Key.STRICT_COMMA) { |
| 57 // Decimal is comma; grouping should be period or custom |
| 58 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, |
| 59 Key.STRICT_PERIOD_OR_OTHER); |
| 60 } else if (decimalKey == Key.STRICT_PERIOD) { |
| 61 // Decimal is period; grouping should be comma or custom |
| 62 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, |
| 63 Key.STRICT_COMMA_OR_OTHER); |
| 64 } else { |
| 65 // Decimal is custom; grouping can be either comma or period or
custom |
| 66 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, |
| 67 Key.STRICT_COMMA_OR_OTHER, |
| 68 Key.STRICT_PERIOD_OR_OTHER); |
| 69 } |
| 70 } else { |
| 71 decimalKey = UnicodeSetStaticCache.chooseFrom(decimalSeparator, Key.
COMMA, Key.PERIOD); |
| 72 if (decimalKey == Key.COMMA) { |
| 73 // Decimal is comma; grouping should be period or custom |
| 74 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, Key.PERIOD_OR_OTHER); |
| 75 } else if (decimalKey == Key.PERIOD) { |
| 76 // Decimal is period; grouping should be comma or custom |
| 77 groupingKey = UnicodeSetStaticCache.chooseFrom(groupingSeparator
, Key.COMMA_OR_OTHER); |
| 78 } else { |
| 79 // Decimal is custom; grouping can be either comma or period or
custom |
| 80 groupingKey = UnicodeSetStaticCache |
| 81 .chooseFrom(groupingSeparator, Key.COMMA_OR_OTHER, Key.P
ERIOD_OR_OTHER); |
| 82 } |
| 83 } |
| 84 |
| 85 // Get the sets from the static cache if they were found |
| 86 UnicodeSet _groupingUniSet = null, _decimalUniSet = null, _separatorSet
= null, _leadSet = null; |
| 87 if (groupingKey != null && decimalKey != null) { |
| 88 _groupingUniSet = UnicodeSetStaticCache.get(groupingKey); |
| 89 _decimalUniSet = UnicodeSetStaticCache.get(decimalKey); |
| 90 Key separatorKey = UnicodeSetStaticCache.unionOf(groupingKey, decima
lKey); |
| 91 if (separatorKey != null) { |
| 92 _separatorSet = UnicodeSetStaticCache.get(separatorKey); |
| 93 Key leadKey = UnicodeSetStaticCache.unionOf(Key.DIGITS, separato
rKey); |
| 94 if (leadKey != null) { |
| 95 _leadSet = UnicodeSetStaticCache.get(leadKey); |
| 96 } |
| 97 } |
| 98 } else if (groupingKey != null) { |
| 99 _groupingUniSet = UnicodeSetStaticCache.get(groupingKey); |
| 100 } else if (decimalKey != null) { |
| 101 _decimalUniSet = UnicodeSetStaticCache.get(decimalKey); |
| 102 } |
| 103 |
| 104 // Finish resolving fallbacks |
| 105 groupingUniSet = _groupingUniSet != null ? _groupingUniSet |
| 106 : new UnicodeSet().add(groupingSeparator.codePointAt(0)).freeze(
); |
| 107 decimalUniSet = _decimalUniSet != null ? _decimalUniSet |
| 108 : new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze()
; |
| 109 separatorSet = _separatorSet != null ? _separatorSet |
| 110 : new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).
freeze(); |
| 111 leadSet = _leadSet; // null if not available |
| 112 |
| 113 int cpZero = symbols.getCodePointZero(); |
| 114 if (cpZero == -1 || !UCharacter.isDigit(cpZero) || UCharacter.digit(cpZe
ro) != 0) { |
| 115 digitStrings = symbols.getDigitStringsLocal(); |
| 116 } else { |
| 117 digitStrings = null; |
| 118 } |
| 119 |
| 120 requireGroupingMatch = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT
_GROUPING_SIZE); |
| 121 groupingDisabled = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_GROUPING_D
ISABLED); |
| 122 grouping1 = grouper.getPrimary(); |
| 123 grouping2 = grouper.getSecondary(); |
| 124 integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY); |
| 125 isScientific = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_DECIMAL_SCIENT
IFIC); |
56 } | 126 } |
57 | 127 |
58 @Override | 128 @Override |
59 public boolean match(StringSegment segment, ParsedNumber result) { | 129 public boolean match(StringSegment segment, ParsedNumber result) { |
60 if (result.quantity != null && !isScientific) { | 130 return match(segment, result, false); |
| 131 } |
| 132 |
| 133 public boolean match(StringSegment segment, ParsedNumber result, boolean neg
ativeExponent) { |
| 134 if (result.seenNumber() && !isScientific) { |
61 // A number has already been consumed. | 135 // A number has already been consumed. |
62 return false; | 136 return false; |
63 } | 137 } |
64 | 138 |
| 139 int initialOffset = segment.getOffset(); |
65 int currGroup = 0; | 140 int currGroup = 0; |
66 int separator = -1; | 141 int separator = -1; |
67 int lastSeparatorOffset = segment.getOffset(); | 142 int lastSeparatorOffset = segment.getOffset(); |
| 143 int exponent = 0; |
68 boolean hasPartialPrefix = false; | 144 boolean hasPartialPrefix = false; |
69 boolean seenBothSeparators = false; | 145 boolean seenBothSeparators = false; |
70 while (segment.length() > 0) { | 146 while (segment.length() > 0) { |
71 hasPartialPrefix = false; | 147 hasPartialPrefix = false; |
72 | 148 |
73 // Attempt to match a digit. | 149 // Attempt to match a digit. |
74 byte digit = -1; | 150 byte digit = -1; |
75 | 151 |
76 // Try by code point digit value. | 152 // Try by code point digit value. |
77 int cp = segment.getCodePoint(); | 153 int cp = segment.getCodePoint(); |
78 if (UCharacter.isDigit(cp)) { | 154 if (UCharacter.isDigit(cp)) { |
79 segment.adjustOffset(Character.charCount(cp)); | 155 segment.adjustOffset(Character.charCount(cp)); |
80 digit = (byte) UCharacter.digit(cp); | 156 digit = (byte) UCharacter.digit(cp); |
81 } | 157 } |
82 | 158 |
83 // Try by digit string. | 159 // Try by digit string. |
84 if (digit == -1) { | 160 if (digit == -1 && digitStrings != null) { |
85 for (int i = 0; i < digitStrings.length; i++) { | 161 for (int i = 0; i < digitStrings.length; i++) { |
86 String str = digitStrings[i]; | 162 String str = digitStrings[i]; |
87 int overlap = segment.getCommonPrefixLength(str); | 163 int overlap = segment.getCommonPrefixLength(str); |
88 if (overlap == str.length()) { | 164 if (overlap == str.length()) { |
89 segment.adjustOffset(str.length()); | 165 segment.adjustOffset(overlap); |
90 digit = (byte) i; | 166 digit = (byte) i; |
| 167 break; |
91 } else if (overlap == segment.length()) { | 168 } else if (overlap == segment.length()) { |
92 hasPartialPrefix = true; | 169 hasPartialPrefix = true; |
93 } | 170 } |
94 } | 171 } |
95 } | 172 } |
96 | 173 |
97 // If found, save it in the DecimalQuantity or scientific adjustment
. | 174 // If found, save it in the DecimalQuantity or scientific adjustment
. |
98 if (digit >= 0) { | 175 if (digit >= 0) { |
99 if (isScientific) { | 176 if (isScientific) { |
100 result.scientificAdjustment = digit + result.scientificAdjus
tment * 10; | 177 int nextExponent = digit + exponent * 10; |
| 178 if (nextExponent < exponent) { |
| 179 // Overflow |
| 180 exponent = Integer.MAX_VALUE; |
| 181 } else { |
| 182 exponent = nextExponent; |
| 183 } |
101 } else { | 184 } else { |
102 if (result.quantity == null) { | 185 if (result.quantity == null) { |
103 result.quantity = new DecimalQuantity_DualStorageBCD(); | 186 result.quantity = new DecimalQuantity_DualStorageBCD(); |
104 } | 187 } |
105 result.quantity.appendDigit(digit, 0, true); | 188 result.quantity.appendDigit(digit, 0, true); |
106 } | 189 } |
107 result.setCharsConsumed(segment); | 190 result.setCharsConsumed(segment); |
108 currGroup++; | 191 currGroup++; |
109 continue; | 192 continue; |
110 } | 193 } |
111 | 194 |
112 // Attempt to match a separator. | 195 // Attempt to match a separator. |
113 if (!seenBothSeparators && cp != -1 && separatorSet.contains(cp)) { | 196 if (!seenBothSeparators && cp != -1 && separatorSet.contains(cp)) { |
114 if (separator == -1) { | 197 if (separator == -1) { |
115 // First separator; could be either grouping or decimal. | 198 // First separator; could be either grouping or decimal. |
116 separator = cp; | 199 separator = cp; |
117 if (requireGroupingMatch && currGroup == 0) { | 200 if (!groupingDisabled |
| 201 && requireGroupingMatch |
| 202 && groupingUniSet.contains(cp) |
| 203 && (currGroup == 0 || currGroup > grouping2)) { |
118 break; | 204 break; |
119 } | 205 } |
120 } else if (separator == cp && groupingUniSet.contains(cp)) { | 206 } else if (!groupingDisabled && separator == cp && groupingUniSe
t.contains(cp)) { |
121 // Second or later grouping separator. | 207 // Second or later grouping separator. |
122 if (requireGroupingMatch && currGroup != grouping2) { | 208 if (requireGroupingMatch && currGroup != grouping2) { |
123 break; | 209 break; |
124 } | 210 } |
125 } else if (separator != cp && decimalUniSet.contains(cp)) { | 211 } else if (!groupingDisabled && separator != cp && decimalUniSet
.contains(cp)) { |
126 // Decimal separator. | 212 // Decimal separator after a grouping separator. |
127 if (requireGroupingMatch && currGroup != grouping1) { | 213 if (requireGroupingMatch && currGroup != grouping1) { |
128 break; | 214 break; |
129 } | 215 } |
130 seenBothSeparators = true; | 216 seenBothSeparators = true; |
131 } else { | 217 } else { |
132 // Invalid separator. | 218 // Invalid separator. |
133 break; | 219 break; |
134 } | 220 } |
135 currGroup = 0; | 221 currGroup = 0; |
136 lastSeparatorOffset = segment.getOffset(); | 222 lastSeparatorOffset = segment.getOffset(); |
137 segment.adjustOffset(Character.charCount(cp)); | 223 segment.adjustOffset(Character.charCount(cp)); |
138 continue; | 224 continue; |
139 } | 225 } |
140 | 226 |
141 break; | 227 break; |
142 } | 228 } |
143 | 229 |
144 if (seenBothSeparators || (separator != -1 && decimalUniSet.contains(sep
arator))) { | 230 if (isScientific) { |
| 231 boolean overflow = (exponent == Integer.MAX_VALUE); |
| 232 if (!overflow) { |
| 233 try { |
| 234 result.quantity.adjustMagnitude(negativeExponent ? -exponent
: exponent); |
| 235 } catch (ArithmeticException e) { |
| 236 overflow = true; |
| 237 } |
| 238 } |
| 239 if (overflow) { |
| 240 if (negativeExponent) { |
| 241 // Set to zero |
| 242 result.quantity.clear(); |
| 243 } else { |
| 244 // Set to infinity |
| 245 result.quantity = null; |
| 246 result.flags |= ParsedNumber.FLAG_INFINITY; |
| 247 } |
| 248 } |
| 249 } else if (result.quantity == null) { |
| 250 // No-op: strings that start with a separator without any other digi
ts |
| 251 } else if (seenBothSeparators || (separator != -1 && decimalUniSet.conta
ins(separator))) { |
| 252 // The final separator was a decimal separator. |
| 253 result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR; |
145 result.quantity.adjustMagnitude(-currGroup); | 254 result.quantity.adjustMagnitude(-currGroup); |
146 } else if (requireGroupingMatch && separator != -1 && groupingUniSet.con
tains(separator) | 255 if (integerOnly) { |
| 256 result.quantity.truncate(); |
| 257 segment.setOffset(lastSeparatorOffset); |
| 258 } |
| 259 } else if (separator != -1 && groupingDisabled) { |
| 260 // The final separator was a grouping separator, but we aren't accep
ting grouping. |
| 261 // Reset the offset to immediately before that grouping separator. |
| 262 result.quantity.adjustMagnitude(-currGroup); |
| 263 result.quantity.truncate(); |
| 264 segment.setOffset(lastSeparatorOffset); |
| 265 } else if (separator != -1 |
| 266 && requireGroupingMatch |
| 267 && groupingUniSet.contains(separator) |
147 && currGroup != grouping1) { | 268 && currGroup != grouping1) { |
| 269 // The final separator was a grouping separator, and we have a misma
tched grouping size. |
| 270 // Reset the offset to the beginning of the number. |
| 271 // TODO |
148 result.quantity.adjustMagnitude(-currGroup); | 272 result.quantity.adjustMagnitude(-currGroup); |
149 result.quantity.roundToMagnitude(0, RoundingUtils.mathContextUnlimit
ed(RoundingMode.FLOOR)); | 273 result.quantity.truncate(); |
150 segment.setOffset(lastSeparatorOffset); | 274 segment.setOffset(lastSeparatorOffset); |
151 } | 275 // result.quantity = null; |
152 | 276 // segment.setOffset(initialOffset); |
153 return segment.length() == 0 || hasPartialPrefix || segment.isLeadingSur
rogate(); | 277 } |
| 278 |
| 279 return segment.length() == 0 || hasPartialPrefix; |
| 280 } |
| 281 |
| 282 @Override |
| 283 public UnicodeSet getLeadCodePoints() { |
| 284 if (digitStrings == null && leadSet != null) { |
| 285 return leadSet; |
| 286 } |
| 287 |
| 288 UnicodeSet leadCodePoints = new UnicodeSet(); |
| 289 // Assumption: the sets are all single code points. |
| 290 leadCodePoints.addAll(UnicodeSetStaticCache.get(Key.DIGITS)); |
| 291 leadCodePoints.addAll(separatorSet); |
| 292 if (digitStrings != null) { |
| 293 for (int i = 0; i < digitStrings.length; i++) { |
| 294 ParsingUtils.putLeadCodePoint(digitStrings[i], leadCodePoints); |
| 295 } |
| 296 } |
| 297 return leadCodePoints.freeze(); |
154 } | 298 } |
155 | 299 |
156 @Override | 300 @Override |
157 public void postProcess(ParsedNumber result) { | 301 public void postProcess(ParsedNumber result) { |
158 // No-op | 302 // No-op |
159 } | 303 } |
160 | 304 |
161 @Override | 305 @Override |
162 public String toString() { | 306 public String toString() { |
163 return "<MantissaMatcher>"; | 307 return "<DecimalMatcher>"; |
164 } | 308 } |
165 } | 309 } |
LEFT | RIGHT |