OLD | NEW |
1 // © 2016 and later: Unicode, Inc. and others. | 1 // © 2016 and later: Unicode, Inc. and others. |
2 // License & terms of use: http://www.unicode.org/copyright.html#License | 2 // License & terms of use: http://www.unicode.org/copyright.html#License |
3 /** | 3 /** |
4 ******************************************************************************* | 4 ******************************************************************************* |
5 * Copyright (C) 1996-2016, International Business Machines Corporation and | 5 * Copyright (C) 1996-2016, International Business Machines Corporation and |
6 * others. All Rights Reserved. | 6 * others. All Rights Reserved. |
7 ******************************************************************************* | 7 ******************************************************************************* |
8 */ | 8 */ |
9 | 9 |
10 package com.ibm.icu.lang; | 10 package com.ibm.icu.lang; |
11 | 11 |
12 import java.lang.ref.SoftReference; | 12 import java.lang.ref.SoftReference; |
13 import java.util.HashMap; | 13 import java.util.HashMap; |
14 import java.util.Iterator; | 14 import java.util.Iterator; |
15 import java.util.Locale; | 15 import java.util.Locale; |
16 import java.util.Map; | 16 import java.util.Map; |
17 | 17 |
18 import com.ibm.icu.impl.CaseMap; | 18 import com.ibm.icu.impl.CaseMapImpl; |
19 import com.ibm.icu.impl.CaseMap.StringContextIterator; | |
20 import com.ibm.icu.impl.IllegalIcuArgumentException; | 19 import com.ibm.icu.impl.IllegalIcuArgumentException; |
21 import com.ibm.icu.impl.Trie2; | 20 import com.ibm.icu.impl.Trie2; |
22 import com.ibm.icu.impl.UBiDiProps; | 21 import com.ibm.icu.impl.UBiDiProps; |
23 import com.ibm.icu.impl.UCaseProps; | 22 import com.ibm.icu.impl.UCaseProps; |
24 import com.ibm.icu.impl.UCharacterName; | 23 import com.ibm.icu.impl.UCharacterName; |
25 import com.ibm.icu.impl.UCharacterNameChoice; | 24 import com.ibm.icu.impl.UCharacterNameChoice; |
26 import com.ibm.icu.impl.UCharacterProperty; | 25 import com.ibm.icu.impl.UCharacterProperty; |
27 import com.ibm.icu.impl.UCharacterUtility; | 26 import com.ibm.icu.impl.UCharacterUtility; |
28 import com.ibm.icu.impl.UPropertyAliases; | 27 import com.ibm.icu.impl.UPropertyAliases; |
29 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; | 28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; |
30 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; | 29 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; |
31 import com.ibm.icu.text.BreakIterator; | 30 import com.ibm.icu.text.BreakIterator; |
| 31 import com.ibm.icu.text.Edits; |
32 import com.ibm.icu.text.Normalizer2; | 32 import com.ibm.icu.text.Normalizer2; |
33 import com.ibm.icu.util.RangeValueIterator; | 33 import com.ibm.icu.util.RangeValueIterator; |
34 import com.ibm.icu.util.ULocale; | 34 import com.ibm.icu.util.ULocale; |
35 import com.ibm.icu.util.ValueIterator; | 35 import com.ibm.icu.util.ValueIterator; |
36 import com.ibm.icu.util.VersionInfo; | 36 import com.ibm.icu.util.VersionInfo; |
37 | 37 |
38 /** | 38 /** |
39 * {@icuenhanced java.lang.Character}.{@icu _usage_} | 39 * {@icuenhanced java.lang.Character}.{@icu _usage_} |
40 * | 40 * |
41 * <p>The UCharacter class provides extensions to the {@link java.lang.Character
} class. | 41 * <p>The UCharacter class provides extensions to the {@link java.lang.Character
} class. |
(...skipping 4826 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4868 | 4868 |
4869 /** | 4869 /** |
4870 * Returns the uppercase version of the argument string. | 4870 * Returns the uppercase version of the argument string. |
4871 * Casing is dependent on the default locale and context-sensitive. | 4871 * Casing is dependent on the default locale and context-sensitive. |
4872 * @param str source string to be performed on | 4872 * @param str source string to be performed on |
4873 * @return uppercase version of the argument string | 4873 * @return uppercase version of the argument string |
4874 * @stable ICU 2.1 | 4874 * @stable ICU 2.1 |
4875 */ | 4875 */ |
4876 public static String toUpperCase(String str) | 4876 public static String toUpperCase(String str) |
4877 { | 4877 { |
4878 return toUpperCase(ULocale.getDefault(), str); | 4878 return toUpperCase(getDefaultCaseLocale(), str); |
4879 } | 4879 } |
4880 | 4880 |
4881 /** | 4881 /** |
4882 * Returns the lowercase version of the argument string. | 4882 * Returns the lowercase version of the argument string. |
4883 * Casing is dependent on the default locale and context-sensitive | 4883 * Casing is dependent on the default locale and context-sensitive |
4884 * @param str source string to be performed on | 4884 * @param str source string to be performed on |
4885 * @return lowercase version of the argument string | 4885 * @return lowercase version of the argument string |
4886 * @stable ICU 2.1 | 4886 * @stable ICU 2.1 |
4887 */ | 4887 */ |
4888 public static String toLowerCase(String str) | 4888 public static String toLowerCase(String str) |
4889 { | 4889 { |
4890 return toLowerCase(ULocale.getDefault(), str); | 4890 return toLowerCase(getDefaultCaseLocale(), str); |
4891 } | 4891 } |
4892 | 4892 |
4893 /** | 4893 /** |
4894 * <p>Returns the titlecase version of the argument string. | 4894 * <p>Returns the titlecase version of the argument string. |
4895 * <p>Position for titlecasing is determined by the argument break | 4895 * <p>Position for titlecasing is determined by the argument break |
4896 * iterator, hence the user can customize his break iterator for | 4896 * iterator, hence the user can customize his break iterator for |
4897 * a specialized titlecasing. In this case only the forward iteration | 4897 * a specialized titlecasing. In this case only the forward iteration |
4898 * needs to be implemented. | 4898 * needs to be implemented. |
4899 * If the break iterator passed in is null, the default Unicode algorithm | 4899 * If the break iterator passed in is null, the default Unicode algorithm |
4900 * will be used to determine the titlecase positions. | 4900 * will be used to determine the titlecase positions. |
4901 * | 4901 * |
4902 * <p>Only positions returned by the break iterator will be title cased, | 4902 * <p>Only positions returned by the break iterator will be title cased, |
4903 * character in between the positions will all be in lower case. | 4903 * character in between the positions will all be in lower case. |
4904 * <p>Casing is dependent on the default locale and context-sensitive | 4904 * <p>Casing is dependent on the default locale and context-sensitive |
4905 * @param str source string to be performed on | 4905 * @param str source string to be performed on |
4906 * @param breakiter break iterator to determine the positions in which | 4906 * @param breakiter break iterator to determine the positions in which |
4907 * the character should be title cased. | 4907 * the character should be title cased. |
4908 * @return lowercase version of the argument string | 4908 * @return lowercase version of the argument string |
4909 * @stable ICU 2.6 | 4909 * @stable ICU 2.6 |
4910 */ | 4910 */ |
4911 public static String toTitleCase(String str, BreakIterator breakiter) | 4911 public static String toTitleCase(String str, BreakIterator breakiter) |
4912 { | 4912 { |
4913 return toTitleCase(ULocale.getDefault(), str, breakiter); | 4913 return toTitleCase(Locale.getDefault(), str, breakiter, 0); |
| 4914 } |
| 4915 |
| 4916 private static int getDefaultCaseLocale() { |
| 4917 return UCaseProps.getCaseLocale(Locale.getDefault()); |
| 4918 } |
| 4919 |
| 4920 private static int getCaseLocale(Locale locale) { |
| 4921 if (locale == null) { |
| 4922 locale = Locale.getDefault(); |
| 4923 } |
| 4924 return UCaseProps.getCaseLocale(locale); |
| 4925 } |
| 4926 |
| 4927 private static int getCaseLocale(ULocale locale) { |
| 4928 if (locale == null) { |
| 4929 locale = ULocale.getDefault(); |
| 4930 } |
| 4931 return UCaseProps.getCaseLocale(locale); |
| 4932 } |
| 4933 |
| 4934 private static String toLowerCase(int caseLocale, String str) { |
| 4935 if (str.length() <= 100) { |
| 4936 if (str.isEmpty()) { |
| 4937 return str; |
| 4938 } |
| 4939 // Collect and apply only changes. |
| 4940 // Good if no or few changes. Bad (slow) if many changes. |
| 4941 Edits edits = new Edits(); |
| 4942 StringBuilder replacementChars = CaseMapImpl.toLower( |
| 4943 caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new String
Builder(), edits); |
| 4944 return applyEdits(str, replacementChars, edits); |
| 4945 } else { |
| 4946 return CaseMapImpl.toLower(caseLocale, 0, str, |
| 4947 new StringBuilder(str.length()), null).toString(); |
| 4948 } |
| 4949 } |
| 4950 |
| 4951 private static String toUpperCase(int caseLocale, String str) { |
| 4952 if (str.length() <= 100) { |
| 4953 if (str.isEmpty()) { |
| 4954 return str; |
| 4955 } |
| 4956 // Collect and apply only changes. |
| 4957 // Good if no or few changes. Bad (slow) if many changes. |
| 4958 Edits edits = new Edits(); |
| 4959 StringBuilder replacementChars = CaseMapImpl.toUpper( |
| 4960 caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new String
Builder(), edits); |
| 4961 return applyEdits(str, replacementChars, edits); |
| 4962 } else { |
| 4963 return CaseMapImpl.toUpper(caseLocale, 0, str, |
| 4964 new StringBuilder(str.length()), null).toString(); |
| 4965 } |
| 4966 } |
| 4967 |
| 4968 private static String toTitleCase(int caseLocale, int options, BreakIterator
titleIter, String str) { |
| 4969 if (str.length() <= 100) { |
| 4970 if (str.isEmpty()) { |
| 4971 return str; |
| 4972 } |
| 4973 // Collect and apply only changes. |
| 4974 // Good if no or few changes. Bad (slow) if many changes. |
| 4975 Edits edits = new Edits(); |
| 4976 StringBuilder replacementChars = CaseMapImpl.toTitle( |
| 4977 caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, title
Iter, str, |
| 4978 new StringBuilder(), edits); |
| 4979 return applyEdits(str, replacementChars, edits); |
| 4980 } else { |
| 4981 return CaseMapImpl.toTitle(caseLocale, options, titleIter, str, |
| 4982 new StringBuilder(str.length()), null).toString(); |
| 4983 } |
| 4984 } |
| 4985 |
| 4986 private static String applyEdits(String str, StringBuilder replacementChars,
Edits edits) { |
| 4987 if (!edits.hasChanges()) { |
| 4988 return str; |
| 4989 } |
| 4990 StringBuilder result = new StringBuilder(str.length() + edits.lengthDelt
a()); |
| 4991 for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { |
| 4992 if (ei.hasChange()) { |
| 4993 int i = ei.replacementIndex(); |
| 4994 result.append(replacementChars, i, i + ei.newLength()); |
| 4995 } else { |
| 4996 int i = ei.sourceIndex(); |
| 4997 result.append(str, i, i + ei.oldLength()); |
| 4998 } |
| 4999 } |
| 5000 return result.toString(); |
4914 } | 5001 } |
4915 | 5002 |
4916 /** | 5003 /** |
4917 * Returns the uppercase version of the argument string. | 5004 * Returns the uppercase version of the argument string. |
4918 * Casing is dependent on the argument locale and context-sensitive. | 5005 * Casing is dependent on the argument locale and context-sensitive. |
4919 * @param locale which string is to be converted in | 5006 * @param locale which string is to be converted in |
4920 * @param str source string to be performed on | 5007 * @param str source string to be performed on |
4921 * @return uppercase version of the argument string | 5008 * @return uppercase version of the argument string |
4922 * @stable ICU 2.1 | 5009 * @stable ICU 2.1 |
4923 */ | 5010 */ |
4924 public static String toUpperCase(Locale locale, String str) | 5011 public static String toUpperCase(Locale locale, String str) |
4925 { | 5012 { |
4926 return toUpperCase(ULocale.forLocale(locale), str); | 5013 return toUpperCase(getCaseLocale(locale), str); |
4927 } | 5014 } |
4928 | 5015 |
4929 /** | 5016 /** |
4930 * Returns the uppercase version of the argument string. | 5017 * Returns the uppercase version of the argument string. |
4931 * Casing is dependent on the argument locale and context-sensitive. | 5018 * Casing is dependent on the argument locale and context-sensitive. |
4932 * @param locale which string is to be converted in | 5019 * @param locale which string is to be converted in |
4933 * @param str source string to be performed on | 5020 * @param str source string to be performed on |
4934 * @return uppercase version of the argument string | 5021 * @return uppercase version of the argument string |
4935 * @stable ICU 3.2 | 5022 * @stable ICU 3.2 |
4936 */ | 5023 */ |
4937 public static String toUpperCase(ULocale locale, String str) { | 5024 public static String toUpperCase(ULocale locale, String str) { |
4938 return CaseMap.toUpper(locale, str); | 5025 return toUpperCase(getCaseLocale(locale), str); |
4939 } | 5026 } |
4940 | 5027 |
4941 /** | 5028 /** |
4942 * Returns the lowercase version of the argument string. | 5029 * Returns the lowercase version of the argument string. |
4943 * Casing is dependent on the argument locale and context-sensitive | 5030 * Casing is dependent on the argument locale and context-sensitive |
4944 * @param locale which string is to be converted in | 5031 * @param locale which string is to be converted in |
4945 * @param str source string to be performed on | 5032 * @param str source string to be performed on |
4946 * @return lowercase version of the argument string | 5033 * @return lowercase version of the argument string |
4947 * @stable ICU 2.1 | 5034 * @stable ICU 2.1 |
4948 */ | 5035 */ |
4949 public static String toLowerCase(Locale locale, String str) | 5036 public static String toLowerCase(Locale locale, String str) |
4950 { | 5037 { |
4951 return toLowerCase(ULocale.forLocale(locale), str); | 5038 return toLowerCase(getCaseLocale(locale), str); |
4952 } | 5039 } |
4953 | 5040 |
4954 /** | 5041 /** |
4955 * Returns the lowercase version of the argument string. | 5042 * Returns the lowercase version of the argument string. |
4956 * Casing is dependent on the argument locale and context-sensitive | 5043 * Casing is dependent on the argument locale and context-sensitive |
4957 * @param locale which string is to be converted in | 5044 * @param locale which string is to be converted in |
4958 * @param str source string to be performed on | 5045 * @param str source string to be performed on |
4959 * @return lowercase version of the argument string | 5046 * @return lowercase version of the argument string |
4960 * @stable ICU 3.2 | 5047 * @stable ICU 3.2 |
4961 */ | 5048 */ |
4962 public static String toLowerCase(ULocale locale, String str) { | 5049 public static String toLowerCase(ULocale locale, String str) { |
4963 StringContextIterator iter = new StringContextIterator(str); | 5050 return toLowerCase(getCaseLocale(locale), str); |
4964 StringBuilder result = new StringBuilder(str.length()); | |
4965 int[] locCache = new int[1]; | |
4966 int c; | |
4967 | |
4968 if (locale == null) { | |
4969 locale = ULocale.getDefault(); | |
4970 } | |
4971 locCache[0]=0; | |
4972 | |
4973 while((c=iter.nextCaseMapCP())>=0) { | |
4974 c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCach
e); | |
4975 | |
4976 /* decode the result */ | |
4977 if(c<0) { | |
4978 /* (not) original code point */ | |
4979 c=~c; | |
4980 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { | |
4981 /* mapping already appended to result */ | |
4982 continue; | |
4983 /* } else { append single-code point mapping */ | |
4984 } | |
4985 result.appendCodePoint(c); | |
4986 } | |
4987 return result.toString(); | |
4988 } | 5051 } |
4989 | 5052 |
4990 /** | 5053 /** |
4991 * <p>Returns the titlecase version of the argument string. | 5054 * <p>Returns the titlecase version of the argument string. |
4992 * <p>Position for titlecasing is determined by the argument break | 5055 * <p>Position for titlecasing is determined by the argument break |
4993 * iterator, hence the user can customize his break iterator for | 5056 * iterator, hence the user can customize his break iterator for |
4994 * a specialized titlecasing. In this case only the forward iteration | 5057 * a specialized titlecasing. In this case only the forward iteration |
4995 * needs to be implemented. | 5058 * needs to be implemented. |
4996 * If the break iterator passed in is null, the default Unicode algorithm | 5059 * If the break iterator passed in is null, the default Unicode algorithm |
4997 * will be used to determine the titlecase positions. | 5060 * will be used to determine the titlecase positions. |
4998 * | 5061 * |
4999 * <p>Only positions returned by the break iterator will be title cased, | 5062 * <p>Only positions returned by the break iterator will be title cased, |
5000 * character in between the positions will all be in lower case. | 5063 * character in between the positions will all be in lower case. |
5001 * <p>Casing is dependent on the argument locale and context-sensitive | 5064 * <p>Casing is dependent on the argument locale and context-sensitive |
5002 * @param locale which string is to be converted in | 5065 * @param locale which string is to be converted in |
5003 * @param str source string to be performed on | 5066 * @param str source string to be performed on |
5004 * @param breakiter break iterator to determine the positions in which | 5067 * @param breakiter break iterator to determine the positions in which |
5005 * the character should be title cased. | 5068 * the character should be title cased. |
5006 * @return lowercase version of the argument string | 5069 * @return lowercase version of the argument string |
5007 * @stable ICU 2.6 | 5070 * @stable ICU 2.6 |
5008 */ | 5071 */ |
5009 public static String toTitleCase(Locale locale, String str, | 5072 public static String toTitleCase(Locale locale, String str, |
5010 BreakIterator breakiter) | 5073 BreakIterator breakiter) |
5011 { | 5074 { |
5012 return toTitleCase(ULocale.forLocale(locale), str, breakiter); | 5075 return toTitleCase(locale, str, breakiter, 0); |
5013 } | 5076 } |
5014 | 5077 |
5015 /** | 5078 /** |
5016 * <p>Returns the titlecase version of the argument string. | 5079 * <p>Returns the titlecase version of the argument string. |
5017 * <p>Position for titlecasing is determined by the argument break | 5080 * <p>Position for titlecasing is determined by the argument break |
5018 * iterator, hence the user can customize his break iterator for | 5081 * iterator, hence the user can customize his break iterator for |
5019 * a specialized titlecasing. In this case only the forward iteration | 5082 * a specialized titlecasing. In this case only the forward iteration |
5020 * needs to be implemented. | 5083 * needs to be implemented. |
5021 * If the break iterator passed in is null, the default Unicode algorithm | 5084 * If the break iterator passed in is null, the default Unicode algorithm |
5022 * will be used to determine the titlecase positions. | 5085 * will be used to determine the titlecase positions. |
(...skipping 29 matching lines...) Expand all Loading... |
5052 * @param str source string to be performed on | 5115 * @param str source string to be performed on |
5053 * @param titleIter break iterator to determine the positions in which | 5116 * @param titleIter break iterator to determine the positions in which |
5054 * the character should be title cased. | 5117 * the character should be title cased. |
5055 * @param options bit set to modify the titlecasing operation | 5118 * @param options bit set to modify the titlecasing operation |
5056 * @return lowercase version of the argument string | 5119 * @return lowercase version of the argument string |
5057 * @stable ICU 3.8 | 5120 * @stable ICU 3.8 |
5058 * @see #TITLECASE_NO_LOWERCASE | 5121 * @see #TITLECASE_NO_LOWERCASE |
5059 * @see #TITLECASE_NO_BREAK_ADJUSTMENT | 5122 * @see #TITLECASE_NO_BREAK_ADJUSTMENT |
5060 */ | 5123 */ |
5061 public static String toTitleCase(ULocale locale, String str, | 5124 public static String toTitleCase(ULocale locale, String str, |
5062 BreakIterator titleIter, | 5125 BreakIterator titleIter, int options) { |
5063 int options) { | |
5064 StringContextIterator iter = new StringContextIterator(str); | |
5065 StringBuilder result = new StringBuilder(str.length()); | |
5066 int[] locCache = new int[1]; | |
5067 int c, nc, srcLength = str.length(); | |
5068 | |
5069 if (locale == null) { | |
5070 locale = ULocale.getDefault(); | |
5071 } | |
5072 locCache[0]=0; | |
5073 | |
5074 if(titleIter == null) { | 5126 if(titleIter == null) { |
| 5127 if (locale == null) { |
| 5128 locale = ULocale.getDefault(); |
| 5129 } |
5075 titleIter = BreakIterator.getWordInstance(locale); | 5130 titleIter = BreakIterator.getWordInstance(locale); |
5076 } | 5131 } |
5077 titleIter.setText(str); | 5132 titleIter.setText(str); |
5078 | 5133 return toTitleCase(getCaseLocale(locale), options, titleIter, str); |
5079 int prev, titleStart, index; | |
5080 boolean isFirstIndex; | |
5081 boolean isDutch = locale.getLanguage().equals("nl"); | |
5082 boolean FirstIJ = true; | |
5083 | |
5084 /* set up local variables */ | |
5085 prev=0; | |
5086 isFirstIndex=true; | |
5087 | |
5088 /* titlecasing loop */ | |
5089 while(prev<srcLength) { | |
5090 /* find next index where to titlecase */ | |
5091 if(isFirstIndex) { | |
5092 isFirstIndex=false; | |
5093 index=titleIter.first(); | |
5094 } else { | |
5095 index=titleIter.next(); | |
5096 } | |
5097 if(index==BreakIterator.DONE || index>srcLength) { | |
5098 index=srcLength; | |
5099 } | |
5100 | |
5101 /* | |
5102 * Unicode 4 & 5 section 3.13 Default Case Operations: | |
5103 * | |
5104 * R3 toTitlecase(X): Find the word boundaries based on Unicode Sta
ndard Annex | |
5105 * #29, "Text Boundaries." Between each pair of word boundaries, fin
d the first | |
5106 * cased character F. If F exists, map F to default_title(F); then m
ap each | |
5107 * subsequent character C to default_lower(C). | |
5108 * | |
5109 * In this implementation, segment [prev..index[ into 3 parts: | |
5110 * a) uncased characters (copy as-is) [prev..titleStart[ | |
5111 * b) first case letter (titlecase) [titleStart..titleLimit[ | |
5112 * c) subsequent characters (lowercase) [titleLimit.
.index[ | |
5113 */ | |
5114 if(prev<index) { | |
5115 /* find and copy uncased characters [prev..titleStart[ */ | |
5116 iter.setLimit(index); | |
5117 c=iter.nextCaseMapCP(); | |
5118 if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0 | |
5119 && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) { | |
5120 while((c=iter.nextCaseMapCP())>=0 | |
5121 && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c))
{} | |
5122 titleStart=iter.getCPStart(); | |
5123 if(prev<titleStart) { | |
5124 result.append(str, prev, titleStart); | |
5125 } | |
5126 } else { | |
5127 titleStart=prev; | |
5128 } | |
5129 | |
5130 if(titleStart<index) { | |
5131 FirstIJ = true; | |
5132 /* titlecase c which is from titleStart */ | |
5133 c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale,
locCache); | |
5134 | |
5135 /* decode the result and lowercase up to index */ | |
5136 for(;;) { | |
5137 if(c<0) { | |
5138 /* (not) original code point */ | |
5139 c=~c; | |
5140 result.appendCodePoint(c); | |
5141 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { | |
5142 /* mapping already appended to result */ | |
5143 } else { | |
5144 /* append single-code point mapping */ | |
5145 result.appendCodePoint(c); | |
5146 } | |
5147 | |
5148 if((options&TITLECASE_NO_LOWERCASE)!=0) { | |
5149 /* Optionally just copy the rest of the word unchang
ed. */ | |
5150 | |
5151 int titleLimit=iter.getCPLimit(); | |
5152 if(titleLimit<index) { | |
5153 /* Special Case - Dutch IJ Titlecasing */ | |
5154 if (isDutch && c == 0x0049 && str.charAt(titleLi
mit) == 'j') { | |
5155 result.append('J').append(str, titleLimit +
1, index); | |
5156 } else { | |
5157 result.append(str, titleLimit, index); | |
5158 } | |
5159 } | |
5160 iter.moveToLimit(); | |
5161 break; | |
5162 } else if((nc=iter.nextCaseMapCP())>=0) { | |
5163 if (isDutch && (nc == 0x004A || nc == 0x006A) | |
5164 && (c == 0x0049) && (FirstIJ == true)) { | |
5165 c = 0x004A; /* J */ | |
5166 FirstIJ = false; | |
5167 } else { | |
5168 /* Normal operation: Lowercase the rest of the w
ord. */ | |
5169 c = UCaseProps.INSTANCE.toFullLower(nc, iter, re
sult, locale, | |
5170 locCache); | |
5171 } | |
5172 } else { | |
5173 break; | |
5174 } | |
5175 } | |
5176 } | |
5177 } | |
5178 | |
5179 prev=index; | |
5180 } | |
5181 return result.toString(); | |
5182 } | 5134 } |
5183 | 5135 |
5184 | 5136 |
5185 private static final int BREAK_MASK = | 5137 private static final int BREAK_MASK = |
5186 (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER) | 5138 (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER) |
5187 | (1<<UCharacterCategory.OTHER_LETTER) | 5139 | (1<<UCharacterCategory.OTHER_LETTER) |
5188 | (1<<UCharacterCategory.MODIFIER_LETTER); | 5140 | (1<<UCharacterCategory.MODIFIER_LETTER); |
5189 | 5141 |
5190 /** | 5142 /** |
5191 * Return a string with just the first word titlecased, for menus and UI, et
c. This does not affect most of the string, | 5143 * Return a string with just the first word titlecased, for menus and UI, et
c. This does not affect most of the string, |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5274 * the character should be title cased. | 5226 * the character should be title cased. |
5275 * @param options bit set to modify the titlecasing operation | 5227 * @param options bit set to modify the titlecasing operation |
5276 * @return lowercase version of the argument string | 5228 * @return lowercase version of the argument string |
5277 * @see #TITLECASE_NO_LOWERCASE | 5229 * @see #TITLECASE_NO_LOWERCASE |
5278 * @see #TITLECASE_NO_BREAK_ADJUSTMENT | 5230 * @see #TITLECASE_NO_BREAK_ADJUSTMENT |
5279 * @stable ICU 54 | 5231 * @stable ICU 54 |
5280 */ | 5232 */ |
5281 public static String toTitleCase(Locale locale, String str, | 5233 public static String toTitleCase(Locale locale, String str, |
5282 BreakIterator titleIter, | 5234 BreakIterator titleIter, |
5283 int options) { | 5235 int options) { |
5284 return toTitleCase(ULocale.forLocale(locale), str, titleIter, options); | 5236 if(titleIter == null) { |
| 5237 titleIter = BreakIterator.getWordInstance(locale); |
| 5238 } |
| 5239 titleIter.setText(str); |
| 5240 return toTitleCase(getCaseLocale(locale), options, titleIter, str); |
5285 } | 5241 } |
5286 | 5242 |
5287 /** | 5243 /** |
5288 * {@icu} The given character is mapped to its case folding equivalent accor
ding | 5244 * {@icu} The given character is mapped to its case folding equivalent accor
ding |
5289 * to UnicodeData.txt and CaseFolding.txt; if the character has no case | 5245 * to UnicodeData.txt and CaseFolding.txt; if the character has no case |
5290 * folding equivalent, the character itself is returned. | 5246 * folding equivalent, the character itself is returned. |
5291 * | 5247 * |
5292 * <p>This function only returns the simple, single-code point case mapping. | 5248 * <p>This function only returns the simple, single-code point case mapping. |
5293 * Full case mappings should be used whenever possible because they produce | 5249 * Full case mappings should be used whenever possible because they produce |
5294 * better results by working on whole strings. | 5250 * better results by working on whole strings. |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5391 * foldCase(int ch, boolean defaultmapping). | 5347 * foldCase(int ch, boolean defaultmapping). |
5392 * @param str the String to be converted | 5348 * @param str the String to be converted |
5393 * @param options A bit set for special processing. Currently the recognised
options | 5349 * @param options A bit set for special processing. Currently the recognised
options |
5394 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT | 5350 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT |
5395 * @return the case folding equivalent of the character, if any; otherwise t
he | 5351 * @return the case folding equivalent of the character, if any; otherwise t
he |
5396 * character itself. | 5352 * character itself. |
5397 * @see #foldCase(int, boolean) | 5353 * @see #foldCase(int, boolean) |
5398 * @stable ICU 2.6 | 5354 * @stable ICU 2.6 |
5399 */ | 5355 */ |
5400 public static final String foldCase(String str, int options) { | 5356 public static final String foldCase(String str, int options) { |
5401 StringBuilder result = new StringBuilder(str.length()); | 5357 if (str.length() <= 100) { |
5402 int c, i, length; | 5358 if (str.isEmpty()) { |
5403 | 5359 return str; |
5404 length = str.length(); | |
5405 for(i=0; i<length;) { | |
5406 c=str.codePointAt(i); | |
5407 i+=Character.charCount(c); | |
5408 c = UCaseProps.INSTANCE.toFullFolding(c, result, options); | |
5409 | |
5410 /* decode the result */ | |
5411 if(c<0) { | |
5412 /* (not) original code point */ | |
5413 c=~c; | |
5414 } else if(c<=UCaseProps.MAX_STRING_LENGTH) { | |
5415 /* mapping already appended to result */ | |
5416 continue; | |
5417 /* } else { append single-code point mapping */ | |
5418 } | 5360 } |
5419 result.appendCodePoint(c); | 5361 // Collect and apply only changes. |
| 5362 // Good if no or few changes. Bad (slow) if many changes. |
| 5363 Edits edits = new Edits(); |
| 5364 StringBuilder replacementChars = CaseMapImpl.fold( |
| 5365 options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBu
ilder(), edits); |
| 5366 return applyEdits(str, replacementChars, edits); |
| 5367 } else { |
| 5368 return CaseMapImpl.fold(options, str, new StringBuilder(str.length()
), null).toString(); |
5420 } | 5369 } |
5421 return result.toString(); | |
5422 } | 5370 } |
5423 | 5371 |
5424 /** | 5372 /** |
5425 * {@icu} Returns the numeric value of a Han character. | 5373 * {@icu} Returns the numeric value of a Han character. |
5426 * | 5374 * |
5427 * <p>This returns the value of Han 'numeric' code points, | 5375 * <p>This returns the value of Han 'numeric' code points, |
5428 * including those for zero, ten, hundred, thousand, ten thousand, | 5376 * including those for zero, ten, hundred, thousand, ten thousand, |
5429 * and hundred million. | 5377 * and hundred million. |
5430 * This includes both the standard and 'checkwriting' | 5378 * This includes both the standard and 'checkwriting' |
5431 * characters, the 'big circle' zero character, and the standard | 5379 * characters, the 'big circle' zero character, and the standard |
(...skipping 1078 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6510 // private constructor ----------------------------------------------- | 6458 // private constructor ----------------------------------------------- |
6511 ///CLOVER:OFF | 6459 ///CLOVER:OFF |
6512 /** | 6460 /** |
6513 * Private constructor to prevent instantiation | 6461 * Private constructor to prevent instantiation |
6514 */ | 6462 */ |
6515 private UCharacter() | 6463 private UCharacter() |
6516 { | 6464 { |
6517 } | 6465 } |
6518 ///CLOVER:ON | 6466 ///CLOVER:ON |
6519 } | 6467 } |
OLD | NEW |