Left: | ||
Right: |
OLD | NEW |
---|---|
(Empty) | |
1 # Copyright (C) 2012 Google Inc. | |
2 # | |
3 # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 # you may not use this file except in compliance with the License. | |
5 # You may obtain a copy of the License at | |
6 # | |
7 # http://www.apache.org/licenses/LICENSE-2.0 | |
8 # | |
9 # Unless required by applicable law or agreed to in writing, software | |
10 # distributed under the License is distributed on an "AS IS" BASIS, | |
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 # See the License for the specific language governing permissions and | |
13 # limitations under the License. | |
14 | |
15 import difflib | |
16 | |
17 class PseudoPatienceSequenceMatcher(difflib.SequenceMatcher): | |
18 """Provides s SequenceMatcher that prefers longer "first" matches to longer | |
M-A
2012/10/05 19:30:11
remove extra 's'
macourteau1
2012/10/05 19:33:16
Done.
| |
19 "second" matches. | |
20 """ | |
21 | |
22 def __init__(self, isjunk=None, a='', b=''): | |
M-A
2012/10/05 19:30:11
Remove lines 21-24, they are not needed.
macourteau1
2012/10/05 19:33:16
Done.
| |
23 difflib.SequenceMatcher.__init__(self, isjunk, a, b) | |
24 | |
25 def get_matching_blocks(self): | |
26 """Return list of triples describing matching subsequences. | |
M-A
2012/10/05 19:30:11
Returns
macourteau1
2012/10/05 19:33:16
Done.
| |
27 | |
28 Each triple is of the form (i, j, n), and means that a[i:i+n] == b[j:j+n]. | |
29 The triples are monotonically increasing in i and j. | |
30 | |
31 The last triple is a dummy, and has the value (len(a), len(b), 0). It is the | |
32 only triple with n == 0. If (i, j, n) and (i', j', n') are adjacent triples | |
33 in the list, and the second is not the last triple in the list, then | |
34 i+n != i' or j+n != j'; in other words, adjacent triples always describe | |
35 non-adjacent equal blocks. | |
36 """ | |
37 matches = difflib.SequenceMatcher.get_matching_blocks(self) | |
38 | |
39 # Check if there's a match at the beginning of the current region, and | |
40 # insert a new Match object at the beginning of |matches| if necessary. | |
41 if matches[0].a != matches[0].b: | |
42 match_length = 0 | |
43 index = matches[0].a | |
44 while self.a[index + match_length] == self.b[index + match_length]: | |
45 match_length += 1 | |
46 | |
47 if match_length: | |
48 matches[0] = difflib.Match(index + match_length, | |
49 matches[0].b + match_length, | |
50 matches[0].size - match_length) | |
51 if matches[0].size == 0: | |
52 matches[0] = difflib.Match(index, index, match_length) | |
53 else: | |
54 matches.insert(0, difflib.Match(index, index, match_length)) | |
55 | |
56 if len(matches) < 2: | |
57 return matches | |
58 | |
59 # For all pairs of Match objects, prefer a longer |first| Match if the end | |
60 # of the first match is the same as the beginning of the second match. | |
61 for index in xrange(len(matches) - 2): | |
62 first = matches[index] | |
63 second = matches[index + 1] | |
64 while True: | |
65 if (self.a[first.a + first.size] == self.b[first.b + first.size] and | |
66 self.a[second.a] == self.b[second.b]): | |
67 first = difflib.Match(first.a, first.b, first.size + 1) | |
68 second = difflib.Match(second.a + 1, second.b + 1, second.size - 1) | |
69 else: | |
70 break | |
71 matches[index] = first | |
72 matches[index + 1] = second | |
73 | |
74 return matches | |
OLD | NEW |