| LEFT | RIGHT |
|---|---|
| 1 """Filename matching with shell patterns. | 1 """Filename matching with shell patterns. |
| 2 | 2 |
| 3 fnmatch(FILENAME, PATTERN) matches according to the local convention. | 3 fnmatch(FILENAME, PATTERN) matches according to the local convention. |
| 4 fnmatchcase(FILENAME, PATTERN) always takes case in account. | 4 fnmatchcase(FILENAME, PATTERN) always takes case in account. |
| 5 | 5 |
| 6 The functions operate by translating the pattern into a regular | 6 The functions operate by translating the pattern into a regular |
| 7 expression. They cache the compiled regular expressions for speed. | 7 expression. They cache the compiled regular expressions for speed. |
| 8 | 8 |
| 9 The function translate(PATTERN) returns a regular expression | 9 The function translate(PATTERN) returns a regular expression |
| 10 corresponding to PATTERN. (It does not compile it.) | 10 corresponding to PATTERN. (It does not compile it.) |
| 11 """ | 11 """ |
| 12 | 12 |
| 13 import re | 13 import re |
| 14 import sys | |
| 15 | 14 |
| 16 __all__ = ["filter", "fnmatch","fnmatchcase","translate"] | 15 __all__ = ["filter", "fnmatch","fnmatchcase","translate"] |
| 17 | 16 |
| 18 _cache = {} | 17 _cache = {} |
| 19 | 18 |
| 20 def fnmatch(name, pat): | 19 def fnmatch(name, pat): |
| 21 """Test whether FILENAME matches PATTERN. | 20 """Test whether FILENAME matches PATTERN. |
| 22 | 21 |
| 23 Patterns are Unix shell style: | 22 Patterns are Unix shell style: |
| 24 | 23 |
| 25 * matches everything | 24 * matches everything |
| 26 ? matches any single character | 25 ? matches any single character |
| 27 [seq] matches any character in seq | 26 [seq] matches any character in seq |
| 28 [!seq] matches any char not in seq | 27 [!seq] matches any char not in seq |
| 29 | 28 |
| 30 An initial period in FILENAME is not special. | 29 An initial period in FILENAME is not special. |
| 31 Both FILENAME and PATTERN are first case-normalized | 30 Both FILENAME and PATTERN are first case-normalized |
| 32 if the operating system requires it. | 31 if the operating system requires it. |
| 33 If you don't want this, use fnmatchcase(FILENAME, PATTERN). | 32 If you don't want this, use fnmatchcase(FILENAME, PATTERN). |
| 34 """ | 33 """ |
| 35 | 34 |
| 36 import os | 35 import os |
| 37 name = os.path.normcase(name) | 36 name = os.path.normcase(name) |
| 38 pat = os.path.normcase(pat) | 37 pat = os.path.normcase(pat) |
| 39 return fnmatchcase(name, pat) | 38 return fnmatchcase(name, pat) |
| 40 | 39 |
| 40 def _compile_pattern(pat): | |
| 41 regex = _cache.get(pat) | |
| 42 if regex is None: | |
| 43 if isinstance(pat, bytes): | |
| 44 pat_str = str(pat, 'ISO-8859-1') | |
| 45 res_str = translate(pat_str) | |
| 46 res = bytes(res_str, 'ISO-8859-1') | |
| 47 else: | |
| 48 res = translate(pat) | |
| 49 _cache[pat] = regex = re.compile(res) | |
| 50 return regex.match | |
| 51 | |
| 41 def filter(names, pat): | 52 def filter(names, pat): |
|
GvR
2008/08/22 19:04:51
I wonder if we shouldn't also support isinstance(p
| |
| 42 """Return the subset of the list NAMES that match PAT""" | 53 """Return the subset of the list NAMES that match PAT""" |
| 43 import os,posixpath | 54 import os,posixpath |
| 44 result=[] | 55 result = [] |
| 45 pat=os.path.normcase(pat) | 56 pat = os.path.normcase(pat) |
| 46 if not pat in _cache: | 57 match = _compile_pattern(pat) |
| 47 res = translate(pat) | 58 if os.path is posixpath: |
| 48 _cache[pat] = re.compile(res) | 59 # normcase on posix is NOP. Optimize it away from the loop. |
| 60 for name in names: | |
| 61 if match(name): | |
| 62 result.append(name) | |
| 49 else: | 63 else: |
| 50 res = None | 64 for name in names: |
| 51 match=_cache[pat].match | 65 if match(os.path.normcase(name)): |
| 52 match_bytes = None | |
| 53 for name in names: | |
| 54 if isinstance(name, bytes): | |
| 55 if not match_bytes: | |
| 56 # create match regex for bytes string | |
| 57 charset = sys.getfilesystemencoding() | |
| 58 pat = pat.encode(charset) | |
| 59 if not pat in _cache: | |
| 60 if res is None: | |
| 61 res = translate(pat) | |
| 62 res = res.encode(charset) | |
| 63 _cache[pat] = re.compile(res) | |
| 64 match_bytes = _cache[pat].match | |
| 65 if match_bytes(os.path.normcase(name)): | |
| 66 result.append(name) | |
| 67 elif match(os.path.normcase(name)): | |
| 68 result.append(name) | 66 result.append(name) |
| 69 return result | 67 return result |
| 70 | 68 |
| 71 def fnmatchcase(name, pat): | 69 def fnmatchcase(name, pat): |
| 72 """Test whether FILENAME matches PATTERN, including case. | 70 """Test whether FILENAME matches PATTERN, including case. |
| 73 | 71 |
| 74 This is a version of fnmatch() which doesn't case-normalize | 72 This is a version of fnmatch() which doesn't case-normalize |
| 75 its arguments. | 73 its arguments. |
| 76 """ | 74 """ |
| 77 | 75 |
| 78 if not pat in _cache: | 76 match = _compile_pattern(pat) |
| 79 res = translate(pat) | 77 return match(name) is not None |
| 80 _cache[pat] = re.compile(res) | |
| 81 return _cache[pat].match(name) is not None | |
| 82 | 78 |
| 83 def translate(pat): | 79 def translate(pat): |
| 84 """Translate a shell PATTERN to a regular expression. | 80 """Translate a shell PATTERN to a regular expression. |
| 85 | 81 |
| 86 There is no way to quote meta-characters. | 82 There is no way to quote meta-characters. |
| 87 """ | 83 """ |
| 88 | 84 |
| 89 i, n = 0, len(pat) | 85 i, n = 0, len(pat) |
| 90 res = '' | 86 res = '' |
| 91 while i < n: | 87 while i < n: |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 109 stuff = pat[i:j].replace('\\','\\\\') | 105 stuff = pat[i:j].replace('\\','\\\\') |
| 110 i = j+1 | 106 i = j+1 |
| 111 if stuff[0] == '!': | 107 if stuff[0] == '!': |
| 112 stuff = '^' + stuff[1:] | 108 stuff = '^' + stuff[1:] |
| 113 elif stuff[0] == '^': | 109 elif stuff[0] == '^': |
| 114 stuff = '\\' + stuff | 110 stuff = '\\' + stuff |
| 115 res = '%s[%s]' % (res, stuff) | 111 res = '%s[%s]' % (res, stuff) |
| 116 else: | 112 else: |
| 117 res = res + re.escape(c) | 113 res = res + re.escape(c) |
| 118 return res + "$" | 114 return res + "$" |
| LEFT | RIGHT |