Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 """Common operations on Posix pathnames. | 1 """Common operations on Posix pathnames. |
2 | 2 |
3 Instead of importing this module directly, import os and refer to | 3 Instead of importing this module directly, import os and refer to |
4 this module as os.path. The "os.path" name is an alias for this | 4 this module as os.path. The "os.path" name is an alias for this |
5 module on Posix systems; on other systems (e.g. Mac, Windows), | 5 module on Posix systems; on other systems (e.g. Mac, Windows), |
6 os.path provides the same operations in a manner specific to that | 6 os.path provides the same operations in a manner specific to that |
7 platform, and is an alias to another module (e.g. macpath, ntpath). | 7 platform, and is an alias to another module (e.g. macpath, ntpath). |
8 | 8 |
9 Some of this can actually be useful on non-Posix systems too, e.g. | 9 Some of this can actually be useful on non-Posix systems too, e.g. |
10 for manipulation of the pathname component of URLs. | 10 for manipulation of the pathname component of URLs. |
11 """ | 11 """ |
12 | 12 |
13 import os | 13 import os |
14 import sys | 14 import sys |
15 import stat | 15 import stat |
16 import genericpath | 16 import genericpath |
17 from genericpath import * | 17 from genericpath import * |
18 | 18 |
19 __all__ = ["normcase","isabs","join","splitdrive","split","splitext", | 19 __all__ = ["normcase","isabs","join","splitdrive","split","splitext", |
20 "basename","dirname","commonprefix","getsize","getmtime", | 20 "basename","dirname","commonprefix","getsize","getmtime", |
21 "getatime","getctime","islink","exists","lexists","isdir","isfile", | 21 "getatime","getctime","islink","exists","lexists","isdir","isfile", |
22 "ismount", "expanduser","expandvars","normpath","abspath", | 22 "ismount", "expanduser","expandvars","normpath","abspath", |
23 "samefile","sameopenfile","samestat", | 23 "samefile","sameopenfile","samestat", |
24 "curdir","pardir","sep","pathsep","defpath","altsep","extsep", | 24 "curdir","pardir","sep","pathsep","defpath","altsep","extsep", |
25 "devnull","realpath","supports_unicode_filenames","relpath"] | 25 "devnull","realpath","supports_unicode_filenames","relpath"] |
26 | 26 |
27 # strings representing various path-related bits and pieces | 27 # Strings representing various path-related bits and pieces. |
28 # These are primarily for export; internally, they are hardcoded. | |
28 curdir = '.' | 29 curdir = '.' |
29 pardir = '..' | 30 pardir = '..' |
30 extsep = '.' | 31 extsep = '.' |
31 sep = '/' | 32 sep = '/' |
32 pathsep = ':' | 33 pathsep = ':' |
33 defpath = ':/bin:/usr/bin' | 34 defpath = ':/bin:/usr/bin' |
34 altsep = None | 35 altsep = None |
35 devnull = '/dev/null' | 36 devnull = '/dev/null' |
36 | 37 |
38 def _get_sep(path): | |
39 if isinstance(path, bytes): | |
40 return b'/' | |
41 else: | |
42 return '/' | |
43 | |
37 # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. | 44 # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. |
38 # On MS-DOS this may also turn slashes into backslashes; however, other | 45 # On MS-DOS this may also turn slashes into backslashes; however, other |
39 # normalizations (such as optimizing '../' away) are not allowed | 46 # normalizations (such as optimizing '../' away) are not allowed |
40 # (another function should be defined to do that). | 47 # (another function should be defined to do that). |
41 | 48 |
42 def normcase(s): | 49 def normcase(s): |
43 """Normalize case of pathname. Has no effect under Posix""" | 50 """Normalize case of pathname. Has no effect under Posix""" |
51 # TODO: on Mac OS X, this should really return s.lower(). | |
44 return s | 52 return s |
45 | 53 |
46 | 54 |
47 # Return whether a path is absolute. | 55 # Return whether a path is absolute. |
48 # Trivial in Posix, harder on the Mac or MS-DOS. | 56 # Trivial in Posix, harder on the Mac or MS-DOS. |
49 | 57 |
50 def isabs(s): | 58 def isabs(s): |
51 """Test whether a path is absolute""" | 59 """Test whether a path is absolute""" |
52 return s.startswith('/') | 60 sep = _get_sep(s) |
61 return s.startswith(sep) | |
53 | 62 |
54 | 63 |
55 # Join pathnames. | 64 # Join pathnames. |
56 # Ignore the previous parts if a part is absolute. | 65 # Ignore the previous parts if a part is absolute. |
57 # Insert a '/' unless the first part is empty or already ends in '/'. | 66 # Insert a '/' unless the first part is empty or already ends in '/'. |
58 | 67 |
59 def join(a, *p): | 68 def join(a, *p): |
60 """Join two or more pathname components, inserting '/' as needed. | 69 """Join two or more pathname components, inserting '/' as needed. |
61 If any component is an absolute path, all previous path components | 70 If any component is an absolute path, all previous path components |
62 will be discarded.""" | 71 will be discarded.""" |
63 use_bytes = any( isinstance(part, bytes) for part in (a,)+p ) | 72 sep = _get_sep(a) |
GvR
2008/08/22 19:04:51
Style nit: don't put spaces within the ( parenthes
| |
64 path = a | 73 path = a |
65 sep = '/' | |
66 if use_bytes: | |
GvR
2008/08/22 19:04:51
Again, I'd recommend changing this so that the arg
| |
67 charset = sys.getfilesystemencoding() | |
68 sep = sep.encode(charset) | |
69 if use_bytes and isinstance(path, str): | |
70 path = path.encode(charset) | |
71 for b in p: | 74 for b in p: |
72 if use_bytes and isinstance(b, str): | |
73 b = b.encode(charset) | |
74 if b.startswith(sep): | 75 if b.startswith(sep): |
75 path = b | 76 path = b |
76 elif path == '' or path.endswith(sep): | 77 elif not path or path.endswith(sep): |
77 path += b | 78 path += b |
78 else: | 79 else: |
79 path += sep + b | 80 path += sep + b |
80 return path | 81 return path |
81 | 82 |
82 | 83 |
83 # Split a path in head (everything up to the last '/') and tail (the | 84 # Split a path in head (everything up to the last '/') and tail (the |
84 # rest). If the path ends in '/', tail will be empty. If there is no | 85 # rest). If the path ends in '/', tail will be empty. If there is no |
85 # '/' in the path, head will be empty. | 86 # '/' in the path, head will be empty. |
86 # Trailing '/'es are stripped from head unless it is the root. | 87 # Trailing '/'es are stripped from head unless it is the root. |
87 | 88 |
88 def split(p): | 89 def split(p): |
89 """Split a pathname. Returns tuple "(head, tail)" where "tail" is | 90 """Split a pathname. Returns tuple "(head, tail)" where "tail" is |
90 everything after the final slash. Either part may be empty.""" | 91 everything after the final slash. Either part may be empty.""" |
91 i = p.rfind('/') + 1 | 92 sep = _get_sep(p) |
93 i = p.rfind(sep) + 1 | |
92 head, tail = p[:i], p[i:] | 94 head, tail = p[:i], p[i:] |
93 if head and head != '/'*len(head): | 95 if head and head != sep*len(head): |
94 head = head.rstrip('/') | 96 head = head.rstrip(sep) |
95 return head, tail | 97 return head, tail |
96 | 98 |
97 | 99 |
98 # Split a path in root and extension. | 100 # Split a path in root and extension. |
99 # The extension is everything starting at the last dot in the last | 101 # The extension is everything starting at the last dot in the last |
100 # pathname component; the root is everything before that. | 102 # pathname component; the root is everything before that. |
101 # It is always true that root + ext == p. | 103 # It is always true that root + ext == p. |
102 | 104 |
103 def splitext(p): | 105 def splitext(p): |
104 return genericpath._splitext(p, sep, altsep, extsep) | 106 if isinstance(p, bytes): |
107 sep = b'/' | |
108 extsep = b'.' | |
109 else: | |
110 sep = '/' | |
111 extsep = '.' | |
112 return genericpath._splitext(p, sep, None, extsep) | |
105 splitext.__doc__ = genericpath._splitext.__doc__ | 113 splitext.__doc__ = genericpath._splitext.__doc__ |
106 | 114 |
107 # Split a pathname into a drive specification and the rest of the | 115 # Split a pathname into a drive specification and the rest of the |
108 # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. | 116 # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. |
109 | 117 |
110 def splitdrive(p): | 118 def splitdrive(p): |
111 """Split a pathname into drive and path. On Posix, drive is always | 119 """Split a pathname into drive and path. On Posix, drive is always |
112 empty.""" | 120 empty.""" |
113 return '', p | 121 return p[:0], p |
114 | 122 |
115 | 123 |
116 # Return the tail (basename) part of a path, same as split(path)[1]. | 124 # Return the tail (basename) part of a path, same as split(path)[1]. |
117 | 125 |
118 def basename(p): | 126 def basename(p): |
119 """Returns the final component of a pathname""" | 127 """Returns the final component of a pathname""" |
120 i = p.rfind('/') + 1 | 128 sep = _get_sep(p) |
129 i = p.rfind(sep) + 1 | |
121 return p[i:] | 130 return p[i:] |
122 | 131 |
123 | 132 |
124 # Return the head (dirname) part of a path, same as split(path)[0]. | 133 # Return the head (dirname) part of a path, same as split(path)[0]. |
125 | 134 |
126 def dirname(p): | 135 def dirname(p): |
127 """Returns the directory component of a pathname""" | 136 """Returns the directory component of a pathname""" |
128 i = p.rfind('/') + 1 | 137 sep = _get_sep(p) |
138 i = p.rfind(sep) + 1 | |
129 head = p[:i] | 139 head = p[:i] |
130 if head and head != '/'*len(head): | 140 if head and head != sep*len(head): |
131 head = head.rstrip('/') | 141 head = head.rstrip(sep) |
132 return head | 142 return head |
133 | 143 |
134 | 144 |
135 # Is a path a symbolic link? | 145 # Is a path a symbolic link? |
136 # This will always return false on systems where os.lstat doesn't exist. | 146 # This will always return false on systems where os.lstat doesn't exist. |
137 | 147 |
138 def islink(path): | 148 def islink(path): |
139 """Test whether a path is a symbolic link""" | 149 """Test whether a path is a symbolic link""" |
140 try: | 150 try: |
141 st = os.lstat(path) | 151 st = os.lstat(path) |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
182 s1.st_dev == s2.st_dev | 192 s1.st_dev == s2.st_dev |
183 | 193 |
184 | 194 |
185 # Is a path a mount point? | 195 # Is a path a mount point? |
186 # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) | 196 # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) |
187 | 197 |
188 def ismount(path): | 198 def ismount(path): |
189 """Test whether a path is a mount point""" | 199 """Test whether a path is a mount point""" |
190 try: | 200 try: |
191 s1 = os.lstat(path) | 201 s1 = os.lstat(path) |
192 s2 = os.lstat(join(path, '..')) | 202 if isinstance(path, bytes): |
203 parent = join(path, b'..') | |
204 else: | |
205 parent = join(path, '..') | |
206 s2 = os.lstat(parent) | |
193 except os.error: | 207 except os.error: |
194 return False # It doesn't exist -- so not a mount point :-) | 208 return False # It doesn't exist -- so not a mount point :-) |
195 dev1 = s1.st_dev | 209 dev1 = s1.st_dev |
196 dev2 = s2.st_dev | 210 dev2 = s2.st_dev |
197 if dev1 != dev2: | 211 if dev1 != dev2: |
198 return True # path/.. on a different device as path | 212 return True # path/.. on a different device as path |
199 ino1 = s1.st_ino | 213 ino1 = s1.st_ino |
200 ino2 = s2.st_ino | 214 ino2 = s2.st_ino |
201 if ino1 == ino2: | 215 if ino1 == ino2: |
202 return True # path/.. is the same i-node as path | 216 return True # path/.. is the same i-node as path |
203 return False | 217 return False |
204 | 218 |
205 | 219 |
206 # Expand paths beginning with '~' or '~user'. | 220 # Expand paths beginning with '~' or '~user'. |
207 # '~' means $HOME; '~user' means that user's home directory. | 221 # '~' means $HOME; '~user' means that user's home directory. |
208 # If the path doesn't begin with '~', or if the user or $HOME is unknown, | 222 # If the path doesn't begin with '~', or if the user or $HOME is unknown, |
209 # the path is returned unchanged (leaving error reporting to whatever | 223 # the path is returned unchanged (leaving error reporting to whatever |
210 # function is called with the expanded path as argument). | 224 # function is called with the expanded path as argument). |
211 # See also module 'glob' for expansion of *, ? and [...] in pathnames. | 225 # See also module 'glob' for expansion of *, ? and [...] in pathnames. |
212 # (A function should also be defined to do full *sh-style environment | 226 # (A function should also be defined to do full *sh-style environment |
213 # variable expansion.) | 227 # variable expansion.) |
214 | 228 |
215 def expanduser(path): | 229 def expanduser(path): |
216 """Expand ~ and ~user constructions. If user or $HOME is unknown, | 230 """Expand ~ and ~user constructions. If user or $HOME is unknown, |
217 do nothing.""" | 231 do nothing.""" |
218 if not path.startswith('~'): | 232 if isinstance(path, bytes): |
233 tilde = b'~' | |
234 else: | |
235 tilde = '~' | |
236 if not path.startswith(tilde): | |
219 return path | 237 return path |
220 i = path.find('/', 1) | 238 sep = _get_sep(path) |
239 i = path.find(sep, 1) | |
221 if i < 0: | 240 if i < 0: |
222 i = len(path) | 241 i = len(path) |
223 if i == 1: | 242 if i == 1: |
224 if 'HOME' not in os.environ: | 243 if 'HOME' not in os.environ: |
225 import pwd | 244 import pwd |
226 userhome = pwd.getpwuid(os.getuid()).pw_dir | 245 userhome = pwd.getpwuid(os.getuid()).pw_dir |
227 else: | 246 else: |
228 userhome = os.environ['HOME'] | 247 userhome = os.environ['HOME'] |
229 else: | 248 else: |
230 import pwd | 249 import pwd |
250 name = path[1:i] | |
251 if isinstance(name, bytes): | |
252 name = str(name, 'ASCII') | |
231 try: | 253 try: |
232 pwent = pwd.getpwnam(path[1:i]) | 254 pwent = pwd.getpwnam(name) |
233 except KeyError: | 255 except KeyError: |
234 return path | 256 return path |
235 userhome = pwent.pw_dir | 257 userhome = pwent.pw_dir |
236 userhome = userhome.rstrip('/') | 258 if isinstance(path, bytes): |
259 userhome = userhome.encode(sys.getfilesystemencoding()) | |
260 userhome = userhome.rstrip(sep) | |
237 return userhome + path[i:] | 261 return userhome + path[i:] |
238 | 262 |
239 | 263 |
240 # Expand paths containing shell variable substitutions. | 264 # Expand paths containing shell variable substitutions. |
241 # This expands the forms $variable and ${variable} only. | 265 # This expands the forms $variable and ${variable} only. |
242 # Non-existent variables are left unchanged. | 266 # Non-existent variables are left unchanged. |
243 | 267 |
244 _varprog = None | 268 _varprog = None |
269 _varprogb = None | |
245 | 270 |
246 def expandvars(path): | 271 def expandvars(path): |
247 """Expand shell variables of form $var and ${var}. Unknown variables | 272 """Expand shell variables of form $var and ${var}. Unknown variables |
248 are left unchanged.""" | 273 are left unchanged.""" |
249 global _varprog | 274 global _varprog, _varprogb |
250 if '$' not in path: | 275 if isinstance(path, bytes): |
251 return path | 276 if b'$' not in path: |
252 if not _varprog: | 277 return path |
253 import re | 278 if not _varprogb: |
254 _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII) | 279 import re |
280 _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII) | |
281 search = _varprogb.search | |
282 start = b'{' | |
283 end = b'}' | |
284 else: | |
285 if '$' not in path: | |
286 return path | |
287 if not _varprog: | |
288 import re | |
289 _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII) | |
290 search = _varprog.search | |
291 start = '{' | |
292 end = '}' | |
255 i = 0 | 293 i = 0 |
256 while True: | 294 while True: |
257 m = _varprog.search(path, i) | 295 m = search(path, i) |
258 if not m: | 296 if not m: |
259 break | 297 break |
260 i, j = m.span(0) | 298 i, j = m.span(0) |
261 name = m.group(1) | 299 name = m.group(1) |
262 if name.startswith('{') and name.endswith('}'): | 300 if name.startswith(start) and name.endswith(end): |
263 name = name[1:-1] | 301 name = name[1:-1] |
302 if isinstance(name, bytes): | |
303 name = str(name, 'ASCII') | |
264 if name in os.environ: | 304 if name in os.environ: |
265 tail = path[j:] | 305 tail = path[j:] |
266 path = path[:i] + os.environ[name] | 306 value = os.environ[name] |
307 if isinstance(path, bytes): | |
308 value = value.encode('ASCII') | |
309 path = path[:i] + value | |
267 i = len(path) | 310 i = len(path) |
268 path += tail | 311 path += tail |
269 else: | 312 else: |
270 i = j | 313 i = j |
271 return path | 314 return path |
272 | 315 |
273 | 316 |
274 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. | 317 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. |
275 # It should be understood that this may change the meaning of the path | 318 # It should be understood that this may change the meaning of the path |
276 # if it contains symbolic links! | 319 # if it contains symbolic links! |
277 | 320 |
278 def normpath(path): | 321 def normpath(path): |
279 """Normalize path, eliminating double slashes, etc.""" | 322 """Normalize path, eliminating double slashes, etc.""" |
280 if path == '': | 323 if isinstance(path, bytes): |
281 return '.' | 324 sep = b'/' |
282 initial_slashes = path.startswith('/') | 325 empty = b'' |
326 dot = b'.' | |
327 dotdot = b'..' | |
328 else: | |
329 sep = '/' | |
330 empty = '' | |
331 dot = '.' | |
332 dotdot = '..' | |
333 if path == empty: | |
334 return dot | |
335 initial_slashes = path.startswith(sep) | |
283 # POSIX allows one or two initial slashes, but treats three or more | 336 # POSIX allows one or two initial slashes, but treats three or more |
284 # as single slash. | 337 # as single slash. |
285 if (initial_slashes and | 338 if (initial_slashes and |
286 path.startswith('//') and not path.startswith('///')): | 339 path.startswith(sep*2) and not path.startswith(sep*3)): |
287 initial_slashes = 2 | 340 initial_slashes = 2 |
288 comps = path.split('/') | 341 comps = path.split(sep) |
289 new_comps = [] | 342 new_comps = [] |
290 for comp in comps: | 343 for comp in comps: |
291 if comp in ('', '.'): | 344 if comp in (empty, dot): |
292 continue | 345 continue |
293 if (comp != '..' or (not initial_slashes and not new_comps) or | 346 if (comp != dotdot or (not initial_slashes and not new_comps) or |
294 (new_comps and new_comps[-1] == '..')): | 347 (new_comps and new_comps[-1] == dotdot)): |
295 new_comps.append(comp) | 348 new_comps.append(comp) |
296 elif new_comps: | 349 elif new_comps: |
297 new_comps.pop() | 350 new_comps.pop() |
298 comps = new_comps | 351 comps = new_comps |
299 path = '/'.join(comps) | 352 path = sep.join(comps) |
300 if initial_slashes: | 353 if initial_slashes: |
301 path = '/'*initial_slashes + path | 354 path = sep*initial_slashes + path |
302 return path or '.' | 355 return path or dot |
303 | 356 |
304 | 357 |
305 def abspath(path): | 358 def abspath(path): |
306 """Return an absolute path.""" | 359 """Return an absolute path.""" |
307 if not isabs(path): | 360 if not isabs(path): |
308 path = join(os.getcwd(), path) | 361 if isinstance(path, bytes): |
362 cwd = os.getcwdb() | |
363 else: | |
364 cwd = os.getcwd() | |
365 path = join(cwd, path) | |
309 return normpath(path) | 366 return normpath(path) |
310 | 367 |
311 | 368 |
312 # Return a canonical path (i.e. the absolute location of a file on the | 369 # Return a canonical path (i.e. the absolute location of a file on the |
313 # filesystem). | 370 # filesystem). |
314 | 371 |
315 def realpath(filename): | 372 def realpath(filename): |
316 """Return the canonical path of the specified filename, eliminating any | 373 """Return the canonical path of the specified filename, eliminating any |
317 symbolic links encountered in the path.""" | 374 symbolic links encountered in the path.""" |
375 if isinstance(filename, bytes): | |
376 sep = b'/' | |
377 empty = b'' | |
378 else: | |
379 sep = '/' | |
380 empty = '' | |
318 if isabs(filename): | 381 if isabs(filename): |
319 bits = ['/'] + filename.split('/')[1:] | 382 bits = [sep] + filename.split(sep)[1:] |
320 else: | 383 else: |
321 bits = [''] + filename.split('/') | 384 bits = [empty] + filename.split(sep) |
322 | 385 |
323 for i in range(2, len(bits)+1): | 386 for i in range(2, len(bits)+1): |
324 component = join(*bits[0:i]) | 387 component = join(*bits[0:i]) |
325 # Resolve symbolic links. | 388 # Resolve symbolic links. |
326 if islink(component): | 389 if islink(component): |
327 resolved = _resolve_link(component) | 390 resolved = _resolve_link(component) |
328 if resolved is None: | 391 if resolved is None: |
329 # Infinite loop -- return original component + rest of the path | 392 # Infinite loop -- return original component + rest of the path |
330 return abspath(join(*([component] + bits[i:]))) | 393 return abspath(join(*([component] + bits[i:]))) |
331 else: | 394 else: |
(...skipping 18 matching lines...) Expand all Loading... | |
350 resolved = os.readlink(path) | 413 resolved = os.readlink(path) |
351 if not isabs(resolved): | 414 if not isabs(resolved): |
352 dir = dirname(path) | 415 dir = dirname(path) |
353 path = normpath(join(dir, resolved)) | 416 path = normpath(join(dir, resolved)) |
354 else: | 417 else: |
355 path = normpath(resolved) | 418 path = normpath(resolved) |
356 return path | 419 return path |
357 | 420 |
358 supports_unicode_filenames = False | 421 supports_unicode_filenames = False |
359 | 422 |
360 def relpath(path, start=curdir): | 423 def relpath(path, start=None): |
361 """Return a relative version of a path""" | 424 """Return a relative version of a path""" |
362 | 425 |
363 if not path: | 426 if not path: |
364 raise ValueError("no path specified") | 427 raise ValueError("no path specified") |
428 | |
429 if isinstance(path, bytes): | |
430 curdir = b'.' | |
431 sep = b'/' | |
432 pardir = b'..' | |
433 else: | |
434 curdir = '.' | |
435 sep = '/' | |
436 pardir = '..' | |
437 | |
438 if start is None: | |
439 start = curdir | |
365 | 440 |
366 start_list = abspath(start).split(sep) | 441 start_list = abspath(start).split(sep) |
367 path_list = abspath(path).split(sep) | 442 path_list = abspath(path).split(sep) |
368 | 443 |
369 # Work out how much of the filepath is shared by start and path. | 444 # Work out how much of the filepath is shared by start and path. |
370 i = len(commonprefix([start_list, path_list])) | 445 i = len(commonprefix([start_list, path_list])) |
371 | 446 |
372 rel_list = [pardir] * (len(start_list)-i) + path_list[i:] | 447 rel_list = [pardir] * (len(start_list)-i) + path_list[i:] |
373 if not rel_list: | 448 if not rel_list: |
374 return curdir | 449 return curdir |
375 return join(*rel_list) | 450 return join(*rel_list) |
LEFT | RIGHT |