src/pkg/bytes/bytes.go - Issue 1696062: code review 1696062: bytes: add IndexRune, FieldsFunc and To*Special

Side by Side Diff: src/pkg/bytes/bytes.go

Issue 1696062: code review 1696062: bytes: add IndexRune, FieldsFunc and To*Special (Closed)

Patch Set: code review 1696062: bytes: add IndexRune, FieldsFunc and To*Special Created 14 years, 7 months ago

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2009 The Go Authors. All rights reserved.	1 // Copyright 2009 The Go Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style	2 // Use of this source code is governed by a BSD-style

3 // license that can be found in the LICENSE file.	3 // license that can be found in the LICENSE file.

4	4

5 // The bytes package implements functions for the manipulation of byte slices.	5 // The bytes package implements functions for the manipulation of byte slices.

6 // Analagous to the facilities of the strings package.	6 // Analogous to the facilities of the strings package.

7 package bytes	7 package bytes

8	8

9 import (	9 import (

10 "unicode"	10 "unicode"

11 "utf8"	11 "utf8"

12 )	12 )

13	13

14 // Compare returns an integer comparing the two byte arrays lexicographically.	14 // Compare returns an integer comparing the two byte arrays lexicographically.

15 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b	15 // The result will be 0 if a==b, -1 if a < b, and +1 if a > b

16 func Compare(a, b []byte) int {	16 func Compare(a, b []byte) int {

(...skipping 103 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
120 }	120 }

121 c := sep[0]	121 c := sep[0]

122 for i := len(s) - n; i >= 0; i-- {	122 for i := len(s) - n; i >= 0; i-- {

123 if s[i] == c && (n == 1 \|\| Equal(s[i:i+n], sep)) {	123 if s[i] == c && (n == 1 \|\| Equal(s[i:i+n], sep)) {

124 return i	124 return i

125 }	125 }

126 }	126 }

127 return -1	127 return -1

128 }	128 }

129	129

	130 // IndexRune interprets s as a sequence of UTF-8-encoded Unicode code points.

	131 // It returns the byte index of the first occurrence in s of the given rune.

	132 // It returns -1 if rune is not present in s.

	133 func IndexRune(s []byte, rune int) int {

	134 for i := 0; i < len(s); {

	135 r, size := utf8.DecodeRune(s[i:])

	136 if r == rune {

	137 return i

	138 }

	139 i += size

	140 }

	141 return -1

	142 }

	143

130 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.	144 // IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.

131 // It returns the byte index of the first occurrence in s of any of the Unicode	145 // It returns the byte index of the first occurrence in s of any of the Unicode

132 // code points in chars. It returns -1 if chars is empty or if there is no code	146 // code points in chars. It returns -1 if chars is empty or if there is no code

133 // point in common.	147 // point in common.

134 func IndexAny(s []byte, chars string) int {	148 func IndexAny(s []byte, chars string) int {

135 if len(chars) > 0 {	149 if len(chars) > 0 {

136 var rune, width int	150 var rune, width int

137 for i := 0; i < len(s); i += width {	151 for i := 0; i < len(s); i += width {

138 rune = int(s[i])	152 rune = int(s[i])

139 if rune < utf8.RuneSelf {	153 if rune < utf8.RuneSelf {

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
195 // n > 0: at most n subslices; the last subslice will be the unsplit remainder .	209 // n > 0: at most n subslices; the last subslice will be the unsplit remainder .

196 // n == 0: the result is nil (zero subslices)	210 // n == 0: the result is nil (zero subslices)

197 // n < 0: all subslices	211 // n < 0: all subslices

198 func SplitAfter(s, sep []byte, n int) [][]byte {	212 func SplitAfter(s, sep []byte, n int) [][]byte {

199 return genSplit(s, sep, len(sep), n)	213 return genSplit(s, sep, len(sep), n)

200 }	214 }

201	215

202 // Fields splits the array s around each instance of one or more consecutive whi te space	216 // Fields splits the array s around each instance of one or more consecutive whi te space

203 // characters, returning a slice of subarrays of s or an empty list if s contain s only white space.	217 // characters, returning a slice of subarrays of s or an empty list if s contain s only white space.

204 func Fields(s []byte) [][]byte {	218 func Fields(s []byte) [][]byte {

	219 return FieldsFunc(s, unicode.IsSpace)

	220 }

	221

	222 // FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.

	223 // It splits the array s at each run of code points c satisfying f(c) and

	224 // returns a slice of subarrays of s. If no code points in s satisfy f(c), an

	225 // empty slice is returned.

	226 func FieldsFunc(s []byte, f func(int) bool) [][]byte {

205 n := 0	227 n := 0

206 inField := false	228 inField := false

207 for i := 0; i < len(s); {	229 for i := 0; i < len(s); {

208 rune, size := utf8.DecodeRune(s[i:])	230 rune, size := utf8.DecodeRune(s[i:])

209 wasInField := inField	231 wasInField := inField

210 » » inField = !unicode.IsSpace(rune)	232 » » inField = !f(rune)

211 if inField && !wasInField {	233 if inField && !wasInField {

212 n++	234 n++

213 }	235 }

214 i += size	236 i += size

215 }	237 }

216	238

217 a := make([][]byte, n)	239 a := make([][]byte, n)

218 na := 0	240 na := 0

219 fieldStart := -1	241 fieldStart := -1

220 for i := 0; i <= len(s) && na < n; {	242 for i := 0; i <= len(s) && na < n; {

221 rune, size := utf8.DecodeRune(s[i:])	243 rune, size := utf8.DecodeRune(s[i:])

222 » » if fieldStart < 0 && size > 0 && !unicode.IsSpace(rune) {	244 » » if fieldStart < 0 && size > 0 && !f(rune) {

223 fieldStart = i	245 fieldStart = i

224 i += size	246 i += size

225 continue	247 continue

226 }	248 }

227 » » if fieldStart >= 0 && (size == 0 \|\| unicode.IsSpace(rune)) {	249 » » if fieldStart >= 0 && (size == 0 \|\| f(rune)) {

228 a[na] = s[fieldStart:i]	250 a[na] = s[fieldStart:i]

229 na++	251 na++

230 fieldStart = -1	252 fieldStart = -1

231 }	253 }

232 if size == 0 {	254 if size == 0 {

233 break	255 break

234 }	256 }

235 i += size	257 i += size

236 }	258 }

237 return a[0:na]	259 return a[0:na]

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
330	352

331 // ToUpper returns a copy of the byte array s with all Unicode letters mapped to their upper case.	353 // ToUpper returns a copy of the byte array s with all Unicode letters mapped to their upper case.

332 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }	354 func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }

333	355

334 // ToUpper returns a copy of the byte array s with all Unicode letters mapped to their lower case.	356 // ToUpper returns a copy of the byte array s with all Unicode letters mapped to their lower case.

335 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }	357 func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }

336	358

337 // ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case.	359 // ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case.

338 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }	360 func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }

339	361

	362 // ToUpperSpecial returns a copy of the byte array s with all Unicode letters ma pped to their

	363 // upper case, giving priority to the special casing rules.

	364 func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte {

	365 return Map(func(r int) int { return _case.ToUpper(r) }, s)

	366 }

	367

	368 // ToLowerSpecial returns a copy of the byte array s with all Unicode letters ma pped to their

	369 // lower case, giving priority to the special casing rules.

	370 func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte {

	371 return Map(func(r int) int { return _case.ToLower(r) }, s)

	372 }

	373

	374 // ToTitleSpecial returns a copy of the byte array s with all Unicode letters ma pped to their

	375 // title case, giving priority to the special casing rules.

	376 func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte {

	377 return Map(func(r int) int { return _case.ToTitle(r) }, s)

	378 }

	379

	380

340 // isSeparator reports whether the rune could mark a word boundary.	381 // isSeparator reports whether the rune could mark a word boundary.

341 // TODO: update when package unicode captures more of the properties.	382 // TODO: update when package unicode captures more of the properties.

342 func isSeparator(rune int) bool {	383 func isSeparator(rune int) bool {

343 // ASCII alphanumerics and underscore are not separators	384 // ASCII alphanumerics and underscore are not separators

344 if rune <= 0x7F {	385 if rune <= 0x7F {

345 switch {	386 switch {

346 case '0' <= rune && rune <= '9':	387 case '0' <= rune && rune <= '9':

347 return false	388 return false

348 case 'a' <= rune && rune <= 'z':	389 case 'a' <= rune && rune <= 'z':

349 return false	390 return false

(...skipping 249 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
599 } else {	640 } else {

600 j += Index(s[start:], old)	641 j += Index(s[start:], old)

601 }	642 }

602 w += copy(t[w:], s[start:j])	643 w += copy(t[w:], s[start:j])

603 w += copy(t[w:], new)	644 w += copy(t[w:], new)

604 start = j + len(old)	645 start = j + len(old)

605 }	646 }

606 w += copy(t[w:], s[start:])	647 w += copy(t[w:], s[start:])

607 return t[0:w]	648 return t[0:w]

608 }	649 }

OLD	NEW

« no previous file with comments | « no previous file | src/pkg/bytes/bytes_test.go » ('j') | no next file with comments »