OLD | NEW |
(Empty) | |
| 1 // © 2017 and later: Unicode, Inc. and others. |
| 2 // License & terms of use: http://www.unicode.org/copyright.html#License |
| 3 package com.ibm.icu.impl.number.parse; |
| 4 |
| 5 import java.util.ArrayList; |
| 6 import java.util.Collections; |
| 7 import java.util.Comparator; |
| 8 |
| 9 import com.ibm.icu.impl.number.AffixPatternProvider; |
| 10 import com.ibm.icu.impl.number.AffixUtils; |
| 11 import com.ibm.icu.text.UnicodeSet; |
| 12 |
| 13 /** |
| 14 * @author sffc |
| 15 * |
| 16 */ |
| 17 public class AffixMatcher implements NumberParseMatcher { |
| 18 private final String prefix; |
| 19 private final String suffix; |
| 20 private final int flags; |
| 21 |
| 22 /** |
| 23 * Comparator for two AffixMatcher instances which prioritizes longer prefix
es followed by longer suffixes, ensuring |
| 24 * that the longest prefix/suffix pair is always chosen. |
| 25 */ |
| 26 public static final Comparator<AffixMatcher> COMPARATOR = new Comparator<Aff
ixMatcher>() { |
| 27 @Override |
| 28 public int compare(AffixMatcher o1, AffixMatcher o2) { |
| 29 if (o1.prefix.length() != o2.prefix.length()) { |
| 30 return o1.prefix.length() > o2.prefix.length() ? -1 : 1; |
| 31 } else if (o1.suffix.length() != o2.suffix.length()) { |
| 32 return o1.suffix.length() > o2.suffix.length() ? -1 : 1; |
| 33 } else if (!o1.equals(o2)) { |
| 34 // If the prefix and suffix are the same length, arbitrarily bre
ak ties. |
| 35 // We can't return zero unless the elements are equal. |
| 36 return o1.hashCode() > o2.hashCode() ? -1 : 1; |
| 37 } else { |
| 38 return 0; |
| 39 } |
| 40 } |
| 41 }; |
| 42 |
| 43 public static void generateFromAffixPatternProvider( |
| 44 AffixPatternProvider patternInfo, |
| 45 NumberParserImpl output, |
| 46 IgnorablesMatcher ignorables, |
| 47 int parseFlags) { |
| 48 // Lazy-initialize the StringBuilder. |
| 49 StringBuilder sb = null; |
| 50 |
| 51 // Use initial capacity of 6, the highest possible number of AffixMatche
rs. |
| 52 // TODO: Lazy-initialize? |
| 53 ArrayList<AffixMatcher> matchers = new ArrayList<AffixMatcher>(6); |
| 54 |
| 55 sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_PREFIX, ig
norables.getSet(), sb); |
| 56 String posPrefix = ParsingUtils.maybeFold(toStringOrEmpty(sb), parseFlag
s); |
| 57 sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_POS_SUFFIX, ig
norables.getSet(), sb); |
| 58 String posSuffix = ParsingUtils.maybeFold(toStringOrEmpty(sb), parseFlag
s); |
| 59 |
| 60 boolean includeUnpaired = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INC
LUDE_UNPAIRED_AFFIXES); |
| 61 |
| 62 if (!posPrefix.isEmpty() || !posSuffix.isEmpty()) { |
| 63 matchers.add(getInstance(posPrefix, posSuffix, 0)); |
| 64 if (includeUnpaired && !posPrefix.isEmpty() && !posSuffix.isEmpty())
{ |
| 65 matchers.add(getInstance(posPrefix, "", 0)); |
| 66 matchers.add(getInstance("", posSuffix, 0)); |
| 67 } |
| 68 } |
| 69 |
| 70 if (patternInfo.hasNegativeSubpattern()) { |
| 71 sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_PREFIX
, ignorables.getSet(), sb); |
| 72 String negPrefix = ParsingUtils.maybeFold(toStringOrEmpty(sb), parse
Flags); |
| 73 sb = getCleanAffix(patternInfo, AffixPatternProvider.FLAG_NEG_SUFFIX
, ignorables.getSet(), sb); |
| 74 String negSuffix = ParsingUtils.maybeFold(toStringOrEmpty(sb), parse
Flags); |
| 75 |
| 76 if (negPrefix.equals(posPrefix) && negSuffix.equals(posSuffix)) { |
| 77 // No-op: favor the positive AffixMatcher |
| 78 } else if (!negPrefix.isEmpty() || !negSuffix.isEmpty()) { |
| 79 matchers.add(getInstance(negPrefix, negSuffix, ParsedNumber.FLAG
_NEGATIVE)); |
| 80 if (includeUnpaired && !negPrefix.isEmpty() && !negSuffix.isEmpt
y()) { |
| 81 if (!negPrefix.equals(posPrefix)) { |
| 82 matchers.add(getInstance(negPrefix, "", ParsedNumber.FLA
G_NEGATIVE)); |
| 83 } |
| 84 if (!negSuffix.equals(posSuffix)) { |
| 85 matchers.add(getInstance("", negSuffix, ParsedNumber.FLA
G_NEGATIVE)); |
| 86 } |
| 87 } |
| 88 } |
| 89 } |
| 90 |
| 91 // Put the AffixMatchers in order, and then add them to the output. |
| 92 Collections.sort(matchers, COMPARATOR); |
| 93 output.addMatchers(matchers); |
| 94 } |
| 95 |
| 96 private static StringBuilder getCleanAffix( |
| 97 AffixPatternProvider patternInfo, |
| 98 int flag, |
| 99 UnicodeSet ignorables, |
| 100 StringBuilder sb) { |
| 101 if (sb != null) { |
| 102 sb.setLength(0); |
| 103 } |
| 104 if (patternInfo.length(flag) > 0) { |
| 105 sb = AffixUtils.trimSymbolsAndIgnorables(patternInfo.getString(flag)
, ignorables, sb); |
| 106 } |
| 107 return sb; |
| 108 } |
| 109 |
| 110 private static String toStringOrEmpty(StringBuilder sb) { |
| 111 return (sb == null || sb.length() == 0) ? "" : sb.toString(); |
| 112 } |
| 113 |
| 114 private static final AffixMatcher getInstance(String prefix, String suffix,
int flags) { |
| 115 // TODO: Special handling for common cases like both strings empty. |
| 116 return new AffixMatcher(prefix, suffix, flags); |
| 117 } |
| 118 |
| 119 private AffixMatcher(String prefix, String suffix, int flags) { |
| 120 assert prefix != null; |
| 121 assert suffix != null; |
| 122 this.prefix = prefix; |
| 123 this.suffix = suffix; |
| 124 this.flags = flags; |
| 125 } |
| 126 |
| 127 @Override |
| 128 public boolean match(StringSegment segment, ParsedNumber result) { |
| 129 if (!result.seenNumber()) { |
| 130 // Prefix |
| 131 if (result.prefix != null || prefix.length() == 0) { |
| 132 return false; |
| 133 } |
| 134 int overlap = segment.getCommonPrefixLength(prefix); |
| 135 if (overlap == prefix.length()) { |
| 136 result.prefix = prefix; |
| 137 segment.adjustOffset(overlap); |
| 138 result.setCharsConsumed(segment); |
| 139 return false; |
| 140 } else if (overlap == segment.length()) { |
| 141 return true; |
| 142 } |
| 143 |
| 144 } else { |
| 145 // Suffix |
| 146 if (result.suffix != null || suffix.length() == 0 || !prefix.equals(
orEmpty(result.prefix))) { |
| 147 return false; |
| 148 } |
| 149 int overlap = segment.getCommonPrefixLength(suffix); |
| 150 if (overlap == suffix.length()) { |
| 151 result.suffix = suffix; |
| 152 segment.adjustOffset(overlap); |
| 153 result.setCharsConsumed(segment); |
| 154 return false; |
| 155 } else if (overlap == segment.length()) { |
| 156 return true; |
| 157 } |
| 158 } |
| 159 |
| 160 return false; |
| 161 } |
| 162 |
| 163 @Override |
| 164 public UnicodeSet getLeadCodePoints() { |
| 165 UnicodeSet leadCodePoints = new UnicodeSet(); |
| 166 ParsingUtils.putLeadCodePoint(prefix, leadCodePoints); |
| 167 ParsingUtils.putLeadCodePoint(suffix, leadCodePoints); |
| 168 return leadCodePoints.freeze(); |
| 169 } |
| 170 |
| 171 @Override |
| 172 public void postProcess(ParsedNumber result) { |
| 173 // Check to see if our affix is the one that was matched. If so, set the
flags in the result. |
| 174 if (prefix.equals(orEmpty(result.prefix)) && suffix.equals(orEmpty(resul
t.suffix))) { |
| 175 // Fill in the result prefix and suffix with non-null values (empty
string). |
| 176 // Used by strict mode to determine whether an entire affix pair was
matched. |
| 177 result.prefix = prefix; |
| 178 result.suffix = suffix; |
| 179 result.flags |= flags; |
| 180 } |
| 181 } |
| 182 |
| 183 /** |
| 184 * Returns the input string, or "" if input is null. |
| 185 */ |
| 186 static String orEmpty(String str) { |
| 187 return str == null ? "" : str; |
| 188 } |
| 189 |
| 190 /** |
| 191 * Returns the sum of prefix and suffix length in the ParsedNumber. |
| 192 */ |
| 193 public static int affixLength(ParsedNumber o2) { |
| 194 return orEmpty(o2.prefix).length() + orEmpty(o2.suffix).length(); |
| 195 } |
| 196 |
| 197 @Override |
| 198 public boolean equals(Object _other) { |
| 199 if (!(_other instanceof AffixMatcher)) { |
| 200 return false; |
| 201 } |
| 202 AffixMatcher other = (AffixMatcher) _other; |
| 203 return prefix.equals(other.prefix) && suffix.equals(other.suffix) && fla
gs == other.flags; |
| 204 } |
| 205 |
| 206 @Override |
| 207 public int hashCode() { |
| 208 return prefix.hashCode() ^ suffix.hashCode() ^ flags; |
| 209 } |
| 210 |
| 211 @Override |
| 212 public String toString() { |
| 213 boolean isNegative = 0 != (flags & ParsedNumber.FLAG_NEGATIVE); |
| 214 return "<AffixMatcher" + (isNegative ? ":negative " : " ") + prefix + "#
" + suffix + ">"; |
| 215 } |
| 216 } |
OLD | NEW |