LEFT | RIGHT |
(no file at all) | |
1 // Copyright 2012 The Go Authors. All rights reserved. | 1 // Copyright 2012 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 package collate | 5 package collate |
6 | 6 |
7 import ( | 7 import ( |
8 "unicode" | 8 "unicode" |
9 ) | 9 ) |
10 | 10 |
11 // weights holds the decoded weights per collation level. | 11 // weights holds the decoded weights per collation level. |
12 type weights struct { | 12 type weights struct { |
13 primary uint32 | 13 primary uint32 |
14 secondary uint16 | 14 secondary uint16 |
15 tertiary uint8 | 15 tertiary uint8 |
16 // TODO: compute quaternary on the fly or compress this value into 8 bit
s | 16 // TODO: compute quaternary on the fly or compress this value into 8 bit
s |
17 // such that weights fit within 64bit. | 17 // such that weights fit within 64bit. |
18 quaternary uint32 | 18 quaternary uint32 |
19 } | 19 } |
20 | 20 |
21 const ( | 21 const ( |
22 defaultSecondary = 0x20 | 22 defaultSecondary = 0x20 |
23 defaultTertiary = 0x2 | 23 defaultTertiary = 0x2 |
24 maxTertiary = 0x1F | 24 maxTertiary = 0x1F |
| 25 maxQuaternary = 0x1FFFFF // 21 bits. |
25 ) | 26 ) |
26 | 27 |
27 // colElem is a representation of a collation element. | 28 // colElem is a representation of a collation element. |
28 // In the typical case, a rune maps to a single collation element. If a rune | 29 // In the typical case, a rune maps to a single collation element. If a rune |
29 // can be the start of a contraction or expands into multiple collation elements
, | 30 // can be the start of a contraction or expands into multiple collation elements
, |
30 // then the colElem that is associated with a rune will have a special form to r
epresent | 31 // then the colElem that is associated with a rune will have a special form to r
epresent |
31 // such m to n mappings. Such special colElems have a value >= 0x80000000. | 32 // such m to n mappings. Such special colElems have a value >= 0x80000000. |
32 type colElem uint32 | 33 type colElem uint32 |
33 | 34 |
34 const ( | 35 const ( |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
138 maxUnified = 0x9FFF | 139 maxUnified = 0x9FFF |
139 minCompatibility = 0xF900 | 140 minCompatibility = 0xF900 |
140 maxCompatibility = 0xFAFF | 141 maxCompatibility = 0xFAFF |
141 minRare = 0x3400 | 142 minRare = 0x3400 |
142 maxRare = 0x4DBF | 143 maxRare = 0x4DBF |
143 ) | 144 ) |
144 const ( | 145 const ( |
145 commonUnifiedOffset = 0xFB40 | 146 commonUnifiedOffset = 0xFB40 |
146 rareUnifiedOffset = 0x1FB40 | 147 rareUnifiedOffset = 0x1FB40 |
147 otherOffset = 0x4FB40 | 148 otherOffset = 0x4FB40 |
148 » maxPrimary = otherOffset + unicode.MaxRune | 149 » illegalOffset = otherOffset + unicode.MaxRune |
| 150 » maxPrimary = illegalOffset + 1 |
149 ) | 151 ) |
150 | 152 |
151 // implicitPrimary returns the primary weight for the a rune | 153 // implicitPrimary returns the primary weight for the a rune |
152 // for which there is no entry for the rune in the collation table. | 154 // for which there is no entry for the rune in the collation table. |
153 // We take a different approach from the one specified in | 155 // We take a different approach from the one specified in |
154 // http://unicode.org/reports/tr10/#Implicit_Weights, | 156 // http://unicode.org/reports/tr10/#Implicit_Weights, |
155 // but preserve the resulting relative ordering of the runes. | 157 // but preserve the resulting relative ordering of the runes. |
156 func implicitPrimary(r rune) int { | 158 func implicitPrimary(r rune) int { |
157 if unicode.Is(unicode.Ideographic, r) { | 159 if unicode.Is(unicode.Ideographic, r) { |
158 if r >= minUnified && r <= maxUnified { | 160 if r >= minUnified && r <= maxUnified { |
159 // The most common case for CJK. | 161 // The most common case for CJK. |
160 return int(r) + commonUnifiedOffset | 162 return int(r) + commonUnifiedOffset |
161 } | 163 } |
162 if r >= minCompatibility && r <= maxCompatibility { | 164 if r >= minCompatibility && r <= maxCompatibility { |
163 // This will typically not hit. The DUCET explicitly spe
cifies mappings | 165 // This will typically not hit. The DUCET explicitly spe
cifies mappings |
164 // for all characters that do not decompose. | 166 // for all characters that do not decompose. |
165 return int(r) + commonUnifiedOffset | 167 return int(r) + commonUnifiedOffset |
166 } | 168 } |
167 return int(r) + rareUnifiedOffset | 169 return int(r) + rareUnifiedOffset |
168 } | 170 } |
169 return int(r) + otherOffset | 171 return int(r) + otherOffset |
170 } | 172 } |
LEFT | RIGHT |