Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(40)

Delta Between Two Patch Sets: src/pkg/index/suffixarray/suffixarray.go

Issue 4715041: code review 4715041: go/printer: changed max. number of newlines from 3 to 2 (Closed)
Left Patch Set: Created 13 years, 8 months ago
Right Patch Set: diff -r 43f78423340b https://go.googlecode.com/hg/ Created 13 years, 8 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Right: Side by side diff | Download
« no previous file with change/comment | « src/pkg/index/suffixarray/qsufsort.go ('k') | src/pkg/index/suffixarray/suffixarray_test.go » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
(no file at all)
1 // Copyright 2010 The Go Authors. All rights reserved. 1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 // Package suffixarray implements substring search in logarithmic time using 5 // Package suffixarray implements substring search in logarithmic time using
6 // an in-memory suffix array. 6 // an in-memory suffix array.
7 // 7 //
8 // Example use: 8 // Example use:
9 // 9 //
10 // // create index for some data 10 // // create index for some data
11 // index := suffixarray.New(data) 11 // index := suffixarray.New(data)
12 // 12 //
13 // // lookup byte slice s 13 // // lookup byte slice s
14 // offsets1 := index.Lookup(s, -1) // the list of all indices where s occur s in data 14 // offsets1 := index.Lookup(s, -1) // the list of all indices where s occur s in data
15 // offsets2 := index.Lookup(s, 3) // the list of at most 3 indices where s occurs in data 15 // offsets2 := index.Lookup(s, 3) // the list of at most 3 indices where s occurs in data
16 // 16 //
17 package suffixarray 17 package suffixarray
18 18
19 import ( 19 import (
20 "bytes" 20 "bytes"
21 "regexp" 21 "regexp"
22 "sort" 22 "sort"
23 ) 23 )
24 24
25
26 // Index implements a suffix array for fast substring search. 25 // Index implements a suffix array for fast substring search.
27 type Index struct { 26 type Index struct {
28 data []byte 27 data []byte
29 sa []int // suffix array for data 28 sa []int // suffix array for data
30 } 29 }
31 30
32
33 // New creates a new Index for data. 31 // New creates a new Index for data.
34 // Index creation time is O(N*log(N)) for N = len(data). 32 // Index creation time is O(N*log(N)) for N = len(data).
35 func New(data []byte) *Index { 33 func New(data []byte) *Index {
36 return &Index{data, qsufsort(data)} 34 return &Index{data, qsufsort(data)}
37 } 35 }
38
39 36
40 // Bytes returns the data over which the index was created. 37 // Bytes returns the data over which the index was created.
41 // It must not be modified. 38 // It must not be modified.
42 // 39 //
43 func (x *Index) Bytes() []byte { 40 func (x *Index) Bytes() []byte {
44 return x.data 41 return x.data
45 } 42 }
46 43
47
48 func (x *Index) at(i int) []byte { 44 func (x *Index) at(i int) []byte {
49 return x.data[x.sa[i]:] 45 return x.data[x.sa[i]:]
50 } 46 }
51
52 47
53 // lookupAll returns a slice into the matching region of the index. 48 // lookupAll returns a slice into the matching region of the index.
54 // The runtime is O(log(N)*len(s)). 49 // The runtime is O(log(N)*len(s)).
55 func (x *Index) lookupAll(s []byte) []int { 50 func (x *Index) lookupAll(s []byte) []int {
56 // find matching suffix index range [i:j] 51 // find matching suffix index range [i:j]
57 // find the first index where s would be the prefix 52 // find the first index where s would be the prefix
58 i := sort.Search(len(x.sa), func(i int) bool { return bytes.Compare(x.at (i), s) >= 0 }) 53 i := sort.Search(len(x.sa), func(i int) bool { return bytes.Compare(x.at (i), s) >= 0 })
59 // starting at i, find the first index at which s is not a prefix 54 // starting at i, find the first index at which s is not a prefix
60 j := i + sort.Search(len(x.sa)-i, func(j int) bool { return !bytes.HasPr efix(x.at(j+i), s) }) 55 j := i + sort.Search(len(x.sa)-i, func(j int) bool { return !bytes.HasPr efix(x.at(j+i), s) })
61 return x.sa[i:j] 56 return x.sa[i:j]
62 } 57 }
63
64 58
65 // Lookup returns an unsorted list of at most n indices where the byte string s 59 // Lookup returns an unsorted list of at most n indices where the byte string s
66 // occurs in the indexed data. If n < 0, all occurrences are returned. 60 // occurs in the indexed data. If n < 0, all occurrences are returned.
67 // The result is nil if s is empty, s is not found, or n == 0. 61 // The result is nil if s is empty, s is not found, or n == 0.
68 // Lookup time is O(log(N)*len(s) + len(result)) where N is the 62 // Lookup time is O(log(N)*len(s) + len(result)) where N is the
69 // size of the indexed data. 63 // size of the indexed data.
70 // 64 //
71 func (x *Index) Lookup(s []byte, n int) (result []int) { 65 func (x *Index) Lookup(s []byte, n int) (result []int) {
72 if len(s) > 0 && n != 0 { 66 if len(s) > 0 && n != 0 {
73 matches := x.lookupAll(s) 67 matches := x.lookupAll(s)
74 if len(matches) < n || n < 0 { 68 if len(matches) < n || n < 0 {
75 n = len(matches) 69 n = len(matches)
76 } 70 }
77 if n > 0 { 71 if n > 0 {
78 result = make([]int, n) 72 result = make([]int, n)
79 copy(result, matches) 73 copy(result, matches)
80 } 74 }
81 } 75 }
82 return 76 return
83 } 77 }
84
85 78
86 // FindAllIndex returns a sorted list of non-overlapping matches of the 79 // FindAllIndex returns a sorted list of non-overlapping matches of the
87 // regular expression r, where a match is a pair of indices specifying 80 // regular expression r, where a match is a pair of indices specifying
88 // the matched slice of x.Bytes(). If n < 0, all matches are returned 81 // the matched slice of x.Bytes(). If n < 0, all matches are returned
89 // in successive order. Otherwise, at most n matches are returned and 82 // in successive order. Otherwise, at most n matches are returned and
90 // they may not be successive. The result is nil if there are no matches, 83 // they may not be successive. The result is nil if there are no matches,
91 // or if n == 0. 84 // or if n == 0.
92 // 85 //
93 func (x *Index) FindAllIndex(r *regexp.Regexp, n int) (result [][]int) { 86 func (x *Index) FindAllIndex(r *regexp.Regexp, n int) (result [][]int) {
94 // a non-empty literal prefix is used to determine possible 87 // a non-empty literal prefix is used to determine possible
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 // found all matches or there's no chance to find more 172 // found all matches or there's no chance to find more
180 // (n and n1 can be negative) 173 // (n and n1 can be negative)
181 break 174 break
182 } 175 }
183 } 176 }
184 if len(result) == 0 { 177 if len(result) == 0 {
185 result = nil 178 result = nil
186 } 179 }
187 return 180 return
188 } 181 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b