OLD | NEW |
1 // Copyright 2009 The Go Authors. All rights reserved. | 1 // Copyright 2009 The Go Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style | 2 // Use of this source code is governed by a BSD-style |
3 // license that can be found in the LICENSE file. | 3 // license that can be found in the LICENSE file. |
4 | 4 |
5 // The bytes package implements functions for the manipulation of byte slices. | 5 // The bytes package implements functions for the manipulation of byte slices. |
6 // Analagous to the facilities of the strings package. | 6 // Analogous to the facilities of the strings package. |
7 package bytes | 7 package bytes |
8 | 8 |
9 import ( | 9 import ( |
10 "unicode" | 10 "unicode" |
11 "utf8" | 11 "utf8" |
12 ) | 12 ) |
13 | 13 |
14 // Compare returns an integer comparing the two byte arrays lexicographically. | 14 // Compare returns an integer comparing the two byte arrays lexicographically. |
15 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b | 15 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b |
16 func Compare(a, b []byte) int { | 16 func Compare(a, b []byte) int { |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
120 } | 120 } |
121 c := sep[0] | 121 c := sep[0] |
122 for i := len(s) - n; i >= 0; i-- { | 122 for i := len(s) - n; i >= 0; i-- { |
123 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { | 123 if s[i] == c && (n == 1 || Equal(s[i:i+n], sep)) { |
124 return i | 124 return i |
125 } | 125 } |
126 } | 126 } |
127 return -1 | 127 return -1 |
128 } | 128 } |
129 | 129 |
| 130 // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points. |
| 131 // It returns the byte index of the first occurrence in s of the given rune. |
| 132 // It returns -1 if rune is not present in s. |
| 133 func IndexRune(s []byte, rune int) int { |
| 134 for i := 0; i < len(s); { |
| 135 r, size := utf8.DecodeRune(s[i:]) |
| 136 if r == rune { |
| 137 return i |
| 138 } |
| 139 i += size |
| 140 } |
| 141 return -1 |
| 142 } |
| 143 |
130 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. | 144 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points. |
131 // It returns the byte index of the first occurrence in s of any of the Unicode | 145 // It returns the byte index of the first occurrence in s of any of the Unicode |
132 // code points in chars. It returns -1 if chars is empty or if there is no code | 146 // code points in chars. It returns -1 if chars is empty or if there is no code |
133 // point in common. | 147 // point in common. |
134 func IndexAny(s []byte, chars string) int { | 148 func IndexAny(s []byte, chars string) int { |
135 if len(chars) > 0 { | 149 if len(chars) > 0 { |
136 var rune, width int | 150 var rune, width int |
137 for i := 0; i < len(s); i += width { | 151 for i := 0; i < len(s); i += width { |
138 rune = int(s[i]) | 152 rune = int(s[i]) |
139 if rune < utf8.RuneSelf { | 153 if rune < utf8.RuneSelf { |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
195 // n > 0: at most n subslices; the last subslice will be the unsplit remainder
. | 209 // n > 0: at most n subslices; the last subslice will be the unsplit remainder
. |
196 // n == 0: the result is nil (zero subslices) | 210 // n == 0: the result is nil (zero subslices) |
197 // n < 0: all subslices | 211 // n < 0: all subslices |
198 func SplitAfter(s, sep []byte, n int) [][]byte { | 212 func SplitAfter(s, sep []byte, n int) [][]byte { |
199 return genSplit(s, sep, len(sep), n) | 213 return genSplit(s, sep, len(sep), n) |
200 } | 214 } |
201 | 215 |
202 // Fields splits the array s around each instance of one or more consecutive whi
te space | 216 // Fields splits the array s around each instance of one or more consecutive whi
te space |
203 // characters, returning a slice of subarrays of s or an empty list if s contain
s only white space. | 217 // characters, returning a slice of subarrays of s or an empty list if s contain
s only white space. |
204 func Fields(s []byte) [][]byte { | 218 func Fields(s []byte) [][]byte { |
| 219 return FieldsFunc(s, unicode.IsSpace) |
| 220 } |
| 221 |
| 222 // FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points. |
| 223 // It splits the array s at each run of code points c satisfying f(c) and |
| 224 // returns a slice of subarrays of s. If no code points in s satisfy f(c), an |
| 225 // empty slice is returned. |
| 226 func FieldsFunc(s []byte, f func(int) bool) [][]byte { |
205 n := 0 | 227 n := 0 |
206 inField := false | 228 inField := false |
207 for i := 0; i < len(s); { | 229 for i := 0; i < len(s); { |
208 rune, size := utf8.DecodeRune(s[i:]) | 230 rune, size := utf8.DecodeRune(s[i:]) |
209 wasInField := inField | 231 wasInField := inField |
210 » » inField = !unicode.IsSpace(rune) | 232 » » inField = !f(rune) |
211 if inField && !wasInField { | 233 if inField && !wasInField { |
212 n++ | 234 n++ |
213 } | 235 } |
214 i += size | 236 i += size |
215 } | 237 } |
216 | 238 |
217 a := make([][]byte, n) | 239 a := make([][]byte, n) |
218 na := 0 | 240 na := 0 |
219 fieldStart := -1 | 241 fieldStart := -1 |
220 for i := 0; i <= len(s) && na < n; { | 242 for i := 0; i <= len(s) && na < n; { |
221 rune, size := utf8.DecodeRune(s[i:]) | 243 rune, size := utf8.DecodeRune(s[i:]) |
222 » » if fieldStart < 0 && size > 0 && !unicode.IsSpace(rune) { | 244 » » if fieldStart < 0 && size > 0 && !f(rune) { |
223 fieldStart = i | 245 fieldStart = i |
224 i += size | 246 i += size |
225 continue | 247 continue |
226 } | 248 } |
227 » » if fieldStart >= 0 && (size == 0 || unicode.IsSpace(rune)) { | 249 » » if fieldStart >= 0 && (size == 0 || f(rune)) { |
228 a[na] = s[fieldStart:i] | 250 a[na] = s[fieldStart:i] |
229 na++ | 251 na++ |
230 fieldStart = -1 | 252 fieldStart = -1 |
231 } | 253 } |
232 if size == 0 { | 254 if size == 0 { |
233 break | 255 break |
234 } | 256 } |
235 i += size | 257 i += size |
236 } | 258 } |
237 return a[0:na] | 259 return a[0:na] |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
330 | 352 |
331 // ToUpper returns a copy of the byte array s with all Unicode letters mapped to
their upper case. | 353 // ToUpper returns a copy of the byte array s with all Unicode letters mapped to
their upper case. |
332 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } | 354 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) } |
333 | 355 |
334 // ToUpper returns a copy of the byte array s with all Unicode letters mapped to
their lower case. | 356 // ToUpper returns a copy of the byte array s with all Unicode letters mapped to
their lower case. |
335 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } | 357 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) } |
336 | 358 |
337 // ToTitle returns a copy of the byte array s with all Unicode letters mapped to
their title case. | 359 // ToTitle returns a copy of the byte array s with all Unicode letters mapped to
their title case. |
338 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } | 360 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) } |
339 | 361 |
| 362 // ToUpperSpecial returns a copy of the byte array s with all Unicode letters ma
pped to their |
| 363 // upper case, giving priority to the special casing rules. |
| 364 func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte { |
| 365 return Map(func(r int) int { return _case.ToUpper(r) }, s) |
| 366 } |
| 367 |
| 368 // ToLowerSpecial returns a copy of the byte array s with all Unicode letters ma
pped to their |
| 369 // lower case, giving priority to the special casing rules. |
| 370 func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte { |
| 371 return Map(func(r int) int { return _case.ToLower(r) }, s) |
| 372 } |
| 373 |
| 374 // ToTitleSpecial returns a copy of the byte array s with all Unicode letters ma
pped to their |
| 375 // title case, giving priority to the special casing rules. |
| 376 func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte { |
| 377 return Map(func(r int) int { return _case.ToTitle(r) }, s) |
| 378 } |
| 379 |
| 380 |
340 // isSeparator reports whether the rune could mark a word boundary. | 381 // isSeparator reports whether the rune could mark a word boundary. |
341 // TODO: update when package unicode captures more of the properties. | 382 // TODO: update when package unicode captures more of the properties. |
342 func isSeparator(rune int) bool { | 383 func isSeparator(rune int) bool { |
343 // ASCII alphanumerics and underscore are not separators | 384 // ASCII alphanumerics and underscore are not separators |
344 if rune <= 0x7F { | 385 if rune <= 0x7F { |
345 switch { | 386 switch { |
346 case '0' <= rune && rune <= '9': | 387 case '0' <= rune && rune <= '9': |
347 return false | 388 return false |
348 case 'a' <= rune && rune <= 'z': | 389 case 'a' <= rune && rune <= 'z': |
349 return false | 390 return false |
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
599 } else { | 640 } else { |
600 j += Index(s[start:], old) | 641 j += Index(s[start:], old) |
601 } | 642 } |
602 w += copy(t[w:], s[start:j]) | 643 w += copy(t[w:], s[start:j]) |
603 w += copy(t[w:], new) | 644 w += copy(t[w:], new) |
604 start = j + len(old) | 645 start = j + len(old) |
605 } | 646 } |
606 w += copy(t[w:], s[start:]) | 647 w += copy(t[w:], s[start:]) |
607 return t[0:w] | 648 return t[0:w] |
608 } | 649 } |
OLD | NEW |