Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(82)

Side by Side Diff: Lib/posixpath.py

Issue 3055: combined patches from http://bugs.python.org/issue3187 (Closed) SVN Base: http://svn.python.org/view/*checkout*/python/branches/py3k/
Patch Set: More complete patch from Victor Created 1 year, 2 months ago , Downloaded from: http://bugs.python.org/file11667/python3_bytes_filename-2.patch
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 """Common operations on Posix pathnames. 1 """Common operations on Posix pathnames.
2 2
3 Instead of importing this module directly, import os and refer to 3 Instead of importing this module directly, import os and refer to
4 this module as os.path. The "os.path" name is an alias for this 4 this module as os.path. The "os.path" name is an alias for this
5 module on Posix systems; on other systems (e.g. Mac, Windows), 5 module on Posix systems; on other systems (e.g. Mac, Windows),
6 os.path provides the same operations in a manner specific to that 6 os.path provides the same operations in a manner specific to that
7 platform, and is an alias to another module (e.g. macpath, ntpath). 7 platform, and is an alias to another module (e.g. macpath, ntpath).
8 8
9 Some of this can actually be useful on non-Posix systems too, e.g. 9 Some of this can actually be useful on non-Posix systems too, e.g.
10 for manipulation of the pathname component of URLs. 10 for manipulation of the pathname component of URLs.
11 """ 11 """
12 12
13 import os 13 import os
14 import stat 14 import stat
15 import genericpath 15 import genericpath
16 from genericpath import * 16 from genericpath import *
17 import sys
17 18
18 __all__ = ["normcase","isabs","join","splitdrive","split","splitext", 19 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
19 "basename","dirname","commonprefix","getsize","getmtime", 20 "basename","dirname","commonprefix","getsize","getmtime",
20 "getatime","getctime","islink","exists","lexists","isdir","isfile", 21 "getatime","getctime","islink","exists","lexists","isdir","isfile",
21 "ismount", "expanduser","expandvars","normpath","abspath", 22 "ismount", "expanduser","expandvars","normpath","abspath",
22 "samefile","sameopenfile","samestat", 23 "samefile","sameopenfile","samestat",
23 "curdir","pardir","sep","pathsep","defpath","altsep","extsep", 24 "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
24 "devnull","realpath","supports_unicode_filenames","relpath"] 25 "devnull","realpath","supports_unicode_filenames","relpath"]
25 26
26 # strings representing various path-related bits and pieces 27 # strings representing various path-related bits and pieces
27 curdir = '.' 28 curdir = '.'
28 pardir = '..' 29 pardir = '..'
29 extsep = '.' 30 extsep = '.'
30 sep = '/' 31 sep = '/'
31 pathsep = ':' 32 pathsep = ':'
32 defpath = ':/bin:/usr/bin' 33 defpath = ':/bin:/usr/bin'
33 altsep = None 34 altsep = None
34 devnull = '/dev/null' 35 devnull = '/dev/null'
36
37 def _get_sep(path):
38 if isinstance(path, (bytes, bytearray)):
39 return b'/'
40 else:
41 return '/'
35 42
36 # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. 43 # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
37 # On MS-DOS this may also turn slashes into backslashes; however, other 44 # On MS-DOS this may also turn slashes into backslashes; however, other
38 # normalizations (such as optimizing '../' away) are not allowed 45 # normalizations (such as optimizing '../' away) are not allowed
39 # (another function should be defined to do that). 46 # (another function should be defined to do that).
40 47
41 def normcase(s): 48 def normcase(s):
42 """Normalize case of pathname. Has no effect under Posix""" 49 """Normalize case of pathname. Has no effect under Posix"""
43 return s 50 return s
44 51
45 52
46 # Return whether a path is absolute. 53 # Return whether a path is absolute.
47 # Trivial in Posix, harder on the Mac or MS-DOS. 54 # Trivial in Posix, harder on the Mac or MS-DOS.
48 55
49 def isabs(s): 56 def isabs(s):
50 """Test whether a path is absolute""" 57 """Test whether a path is absolute"""
51 return s.startswith('/') 58 sep = _get_sep(s)
59 return s.startswith(sep)
52 60
53 61
54 # Join pathnames. 62 # Join pathnames.
55 # Ignore the previous parts if a part is absolute. 63 # Ignore the previous parts if a part is absolute.
56 # Insert a '/' unless the first part is empty or already ends in '/'. 64 # Insert a '/' unless the first part is empty or already ends in '/'.
57 65
58 def join(a, *p): 66 def join(a, *p):
59 """Join two or more pathname components, inserting '/' as needed. 67 """Join two or more pathname components, inserting '/' as needed.
60 If any component is an absolute path, all previous path components 68 If any component is an absolute path, all previous path components
61 will be discarded.""" 69 will be discarded."""
70 sep = _get_sep(a)
62 path = a 71 path = a
63 for b in p: 72 for b in p:
64 if b.startswith('/'): 73 if b.startswith(sep):
65 path = b 74 path = b
66 elif path == '' or path.endswith('/'): 75 elif not path or path.endswith(sep):
67 path += b 76 path += b
68 else: 77 else:
69 path += '/' + b 78 path += sep + b
70 return path 79 return path
71 80
72 81
73 # Split a path in head (everything up to the last '/') and tail (the 82 # Split a path in head (everything up to the last '/') and tail (the
74 # rest). If the path ends in '/', tail will be empty. If there is no 83 # rest). If the path ends in '/', tail will be empty. If there is no
75 # '/' in the path, head will be empty. 84 # '/' in the path, head will be empty.
76 # Trailing '/'es are stripped from head unless it is the root. 85 # Trailing '/'es are stripped from head unless it is the root.
77 86
78 def split(p): 87 def split(p):
79 """Split a pathname. Returns tuple "(head, tail)" where "tail" is 88 """Split a pathname. Returns tuple "(head, tail)" where "tail" is
80 everything after the final slash. Either part may be empty.""" 89 everything after the final slash. Either part may be empty."""
81 i = p.rfind('/') + 1 90 sep = _get_sep(p)
91 i = p.rfind(sep) + 1
82 head, tail = p[:i], p[i:] 92 head, tail = p[:i], p[i:]
83 if head and head != '/'*len(head): 93 if head and head != sep*len(head):
84 head = head.rstrip('/') 94 head = head.rstrip(sep)
85 return head, tail 95 return head, tail
86 96
87 97
88 # Split a path in root and extension. 98 # Split a path in root and extension.
89 # The extension is everything starting at the last dot in the last 99 # The extension is everything starting at the last dot in the last
90 # pathname component; the root is everything before that. 100 # pathname component; the root is everything before that.
91 # It is always true that root + ext == p. 101 # It is always true that root + ext == p.
92 102
93 def splitext(p): 103 def splitext(p):
94 return genericpath._splitext(p, sep, altsep, extsep) 104 return genericpath._splitext(p, sep, altsep, extsep)
95 splitext.__doc__ = genericpath._splitext.__doc__ 105 splitext.__doc__ = genericpath._splitext.__doc__
96 106
97 # Split a pathname into a drive specification and the rest of the 107 # Split a pathname into a drive specification and the rest of the
98 # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. 108 # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty.
99 109
100 def splitdrive(p): 110 def splitdrive(p):
101 """Split a pathname into drive and path. On Posix, drive is always 111 """Split a pathname into drive and path. On Posix, drive is always
102 empty.""" 112 empty."""
103 return '', p 113 if isinstance(p, (bytes, bytearray)):
114 return b'', p
115 else:
116 return '', p
104 117
105 118
106 # Return the tail (basename) part of a path, same as split(path)[1]. 119 # Return the tail (basename) part of a path, same as split(path)[1].
107 120
108 def basename(p): 121 def basename(p):
109 """Returns the final component of a pathname""" 122 """Returns the final component of a pathname"""
110 i = p.rfind('/') + 1 123 sep = _get_sep(p)
124 i = p.rfind(sep) + 1
111 return p[i:] 125 return p[i:]
112 126
113 127
114 # Return the head (dirname) part of a path, same as split(path)[0]. 128 # Return the head (dirname) part of a path, same as split(path)[0].
115 129
116 def dirname(p): 130 def dirname(p):
117 """Returns the directory component of a pathname""" 131 """Returns the directory component of a pathname"""
118 i = p.rfind('/') + 1 132 sep = _get_sep(p)
133 i = p.rfind(sep) + 1
119 head = p[:i] 134 head = p[:i]
120 if head and head != '/'*len(head): 135 if head and head != sep*len(head):
121 head = head.rstrip('/') 136 head = head.rstrip(sep)
122 return head 137 return head
123 138
124 139
125 # Is a path a symbolic link? 140 # Is a path a symbolic link?
126 # This will always return false on systems where os.lstat doesn't exist. 141 # This will always return false on systems where os.lstat doesn't exist.
127 142
128 def islink(path): 143 def islink(path):
129 """Test whether a path is a symbolic link""" 144 """Test whether a path is a symbolic link"""
130 try: 145 try:
131 st = os.lstat(path) 146 st = os.lstat(path)
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
172 s1.st_dev == s2.st_dev 187 s1.st_dev == s2.st_dev
173 188
174 189
175 # Is a path a mount point? 190 # Is a path a mount point?
176 # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) 191 # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?)
177 192
178 def ismount(path): 193 def ismount(path):
179 """Test whether a path is a mount point""" 194 """Test whether a path is a mount point"""
180 try: 195 try:
181 s1 = os.lstat(path) 196 s1 = os.lstat(path)
182 s2 = os.lstat(join(path, '..')) 197 if isinstance(path, (bytes, bytearray)):
198 parent = join(path, b'..')
199 else:
200 parent = join(path, '..')
201 s2 = os.lstat(parent)
183 except os.error: 202 except os.error:
184 return False # It doesn't exist -- so not a mount point :-) 203 return False # It doesn't exist -- so not a mount point :-)
185 dev1 = s1.st_dev 204 dev1 = s1.st_dev
186 dev2 = s2.st_dev 205 dev2 = s2.st_dev
187 if dev1 != dev2: 206 if dev1 != dev2:
188 return True # path/.. on a different device as path 207 return True # path/.. on a different device as path
189 ino1 = s1.st_ino 208 ino1 = s1.st_ino
190 ino2 = s2.st_ino 209 ino2 = s2.st_ino
191 if ino1 == ino2: 210 if ino1 == ino2:
192 return True # path/.. is the same i-node as path 211 return True # path/.. is the same i-node as path
193 return False 212 return False
194 213
195 214
196 # Expand paths beginning with '~' or '~user'. 215 # Expand paths beginning with '~' or '~user'.
197 # '~' means $HOME; '~user' means that user's home directory. 216 # '~' means $HOME; '~user' means that user's home directory.
198 # If the path doesn't begin with '~', or if the user or $HOME is unknown, 217 # If the path doesn't begin with '~', or if the user or $HOME is unknown,
199 # the path is returned unchanged (leaving error reporting to whatever 218 # the path is returned unchanged (leaving error reporting to whatever
200 # function is called with the expanded path as argument). 219 # function is called with the expanded path as argument).
201 # See also module 'glob' for expansion of *, ? and [...] in pathnames. 220 # See also module 'glob' for expansion of *, ? and [...] in pathnames.
202 # (A function should also be defined to do full *sh-style environment 221 # (A function should also be defined to do full *sh-style environment
203 # variable expansion.) 222 # variable expansion.)
204 223
205 def expanduser(path): 224 def expanduser(path):
206 """Expand ~ and ~user constructions. If user or $HOME is unknown, 225 """Expand ~ and ~user constructions. If user or $HOME is unknown,
207 do nothing.""" 226 do nothing."""
208 if not path.startswith('~'): 227 if isinstance(path, (bytes, bytearray)):
228 tilde = b'~'
229 else:
230 tilde = '~'
231 if not path.startswith(tilde):
209 return path 232 return path
210 i = path.find('/', 1) 233 sep = _get_sep(path)
234 i = path.find(sep, 1)
211 if i < 0: 235 if i < 0:
212 i = len(path) 236 i = len(path)
213 if i == 1: 237 if i == 1:
214 if 'HOME' not in os.environ: 238 if 'HOME' not in os.environ:
215 import pwd 239 import pwd
216 userhome = pwd.getpwuid(os.getuid()).pw_dir 240 userhome = pwd.getpwuid(os.getuid()).pw_dir
217 else: 241 else:
218 userhome = os.environ['HOME'] 242 userhome = os.environ['HOME']
219 else: 243 else:
220 import pwd 244 import pwd
245 name = path[1:i]
246 if isinstance(name, (bytes, bytearray)):
247 name = str(name, 'ASCII')
221 try: 248 try:
222 pwent = pwd.getpwnam(path[1:i]) 249 pwent = pwd.getpwnam(name)
223 except KeyError: 250 except KeyError:
224 return path 251 return path
225 userhome = pwent.pw_dir 252 userhome = pwent.pw_dir
226 userhome = userhome.rstrip('/') 253 if isinstance(path, (bytes, bytearray)):
254 userhome = userhome.encode(sys.getfilesystemencoding())
255 userhome = userhome.rstrip(sep)
227 return userhome + path[i:] 256 return userhome + path[i:]
228 257
229 258
230 # Expand paths containing shell variable substitutions. 259 # Expand paths containing shell variable substitutions.
231 # This expands the forms $variable and ${variable} only. 260 # This expands the forms $variable and ${variable} only.
232 # Non-existent variables are left unchanged. 261 # Non-existent variables are left unchanged.
233 262
234 _varprog = None 263 _varprog = None
264 _varprogb = None
235 265
236 def expandvars(path): 266 def expandvars(path):
237 """Expand shell variables of form $var and ${var}. Unknown variables 267 """Expand shell variables of form $var and ${var}. Unknown variables
238 are left unchanged.""" 268 are left unchanged."""
239 global _varprog 269 global _varprog, _varprogb
240 if '$' not in path: 270 if isinstance(path, (bytes, bytearray)):
241 return path 271 if b'$' not in path:
242 if not _varprog: 272 return path
243 import re 273 if not _varprogb:
244 _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII) 274 import re
275 _varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
276 search = _varprogb.search
277 start = b'{'
278 end = b'}'
279 else:
280 if '$' not in path:
281 return path
282 if not _varprog:
283 import re
284 _varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
285 search = _varprog.search
286 start = '{'
287 end = '}'
245 i = 0 288 i = 0
246 while True: 289 while True:
247 m = _varprog.search(path, i) 290 m = search(path, i)
248 if not m: 291 if not m:
249 break 292 break
250 i, j = m.span(0) 293 i, j = m.span(0)
251 name = m.group(1) 294 name = m.group(1)
252 if name.startswith('{') and name.endswith('}'): 295 if name.startswith(start) and name.endswith(end):
253 name = name[1:-1] 296 name = name[1:-1]
297 if isinstance(name, (bytes, bytearray)):
298 name = str(name, 'ASCII')
254 if name in os.environ: 299 if name in os.environ:
255 tail = path[j:] 300 tail = path[j:]
256 path = path[:i] + os.environ[name] 301 value = os.environ[name]
302 if isinstance(path, (bytes, bytearray)):
303 value = value.encode('ASCII')
304 path = path[:i] + value
257 i = len(path) 305 i = len(path)
258 path += tail 306 path += tail
259 else: 307 else:
260 i = j 308 i = j
261 return path 309 return path
262 310
263 311
264 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. 312 # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
265 # It should be understood that this may change the meaning of the path 313 # It should be understood that this may change the meaning of the path
266 # if it contains symbolic links! 314 # if it contains symbolic links!
267 315
268 def normpath(path): 316 def normpath(path):
269 """Normalize path, eliminating double slashes, etc.""" 317 """Normalize path, eliminating double slashes, etc."""
270 if path == '': 318 if isinstance(path, (bytes, bytearray)):
271 return '.' 319 sep = b'/'
272 initial_slashes = path.startswith('/') 320 empty = b''
321 dot = b'.'
322 dotdot = b'..'
323 else:
324 sep = '/'
325 empty = ''
326 dot = '.'
327 dotdot = '..'
328 if path == empty:
329 return dot
330 initial_slashes = path.startswith(sep)
273 # POSIX allows one or two initial slashes, but treats three or more 331 # POSIX allows one or two initial slashes, but treats three or more
274 # as single slash. 332 # as single slash.
275 if (initial_slashes and 333 if (initial_slashes and
276 path.startswith('//') and not path.startswith('///')): 334 path.startswith(sep*2) and not path.startswith(sep*3)):
277 initial_slashes = 2 335 initial_slashes = 2
278 comps = path.split('/') 336 comps = path.split(sep)
279 new_comps = [] 337 new_comps = []
280 for comp in comps: 338 for comp in comps:
281 if comp in ('', '.'): 339 if comp in (empty, dot):
282 continue 340 continue
283 if (comp != '..' or (not initial_slashes and not new_comps) or 341 if (comp != dotdot or (not initial_slashes and not new_comps) or
284 (new_comps and new_comps[-1] == '..')): 342 (new_comps and new_comps[-1] == dotdot)):
285 new_comps.append(comp) 343 new_comps.append(comp)
286 elif new_comps: 344 elif new_comps:
287 new_comps.pop() 345 new_comps.pop()
288 comps = new_comps 346 comps = new_comps
289 path = '/'.join(comps) 347 path = sep.join(comps)
290 if initial_slashes: 348 if initial_slashes:
291 path = '/'*initial_slashes + path 349 path = sep*initial_slashes + path
292 return path or '.' 350 return path or dot
293 351
294 352
295 def abspath(path): 353 def abspath(path):
296 """Return an absolute path.""" 354 """Return an absolute path."""
297 if not isabs(path): 355 if not isabs(path):
298 path = join(os.getcwd(), path) 356 if isinstance(path, (bytes, bytearray)):
357 cwd = os.getcwdb()
358 else:
359 cwd = os.getcwd()
360 path = join(cwd, path)
299 return normpath(path) 361 return normpath(path)
300 362
301 363
302 # Return a canonical path (i.e. the absolute location of a file on the 364 # Return a canonical path (i.e. the absolute location of a file on the
303 # filesystem). 365 # filesystem).
304 366
305 def realpath(filename): 367 def realpath(filename):
306 """Return the canonical path of the specified filename, eliminating any 368 """Return the canonical path of the specified filename, eliminating any
307 symbolic links encountered in the path.""" 369 symbolic links encountered in the path."""
370 if isinstance(filename, (bytes, bytearray)):
371 sep = b'/'
372 empty = b''
373 else:
374 sep = '/'
375 empty = ''
308 if isabs(filename): 376 if isabs(filename):
309 bits = ['/'] + filename.split('/')[1:] 377 bits = [sep] + filename.split(sep)[1:]
310 else: 378 else:
311 bits = [''] + filename.split('/') 379 bits = [empty] + filename.split(sep)
312 380
313 for i in range(2, len(bits)+1): 381 for i in range(2, len(bits)+1):
314 component = join(*bits[0:i]) 382 component = join(*bits[0:i])
315 # Resolve symbolic links. 383 # Resolve symbolic links.
316 if islink(component): 384 if islink(component):
317 resolved = _resolve_link(component) 385 resolved = _resolve_link(component)
318 if resolved is None: 386 if resolved is None:
319 # Infinite loop -- return original component + rest of the path 387 # Infinite loop -- return original component + rest of the path
320 return abspath(join(*([component] + bits[i:]))) 388 return abspath(join(*([component] + bits[i:])))
321 else: 389 else:
(...skipping 18 matching lines...) Expand all
340 resolved = os.readlink(path) 408 resolved = os.readlink(path)
341 if not isabs(resolved): 409 if not isabs(resolved):
342 dir = dirname(path) 410 dir = dirname(path)
343 path = normpath(join(dir, resolved)) 411 path = normpath(join(dir, resolved))
344 else: 412 else:
345 path = normpath(resolved) 413 path = normpath(resolved)
346 return path 414 return path
347 415
348 supports_unicode_filenames = False 416 supports_unicode_filenames = False
349 417
350 def relpath(path, start=curdir): 418 def relpath(path, start=None):
351 """Return a relative version of a path""" 419 """Return a relative version of a path"""
352 420
353 if not path: 421 if not path:
354 raise ValueError("no path specified") 422 raise ValueError("no path specified")
355 423
424 if isinstance(path, (bytes, bytearray)):
425 curdir = b'.'
426 sep = b'/'
427 pardir = b'..'
428 else:
429 curdir = '.'
430 sep = '/'
431 pardir = '..'
432
433 if start is None:
434 start = curdir
435
356 start_list = abspath(start).split(sep) 436 start_list = abspath(start).split(sep)
357 path_list = abspath(path).split(sep) 437 path_list = abspath(path).split(sep)
358 438
359 # Work out how much of the filepath is shared by start and path. 439 # Work out how much of the filepath is shared by start and path.
360 i = len(commonprefix([start_list, path_list])) 440 i = len(commonprefix([start_list, path_list]))
361 441
362 rel_list = [pardir] * (len(start_list)-i) + path_list[i:] 442 rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
363 if not rel_list: 443 if not rel_list:
364 return curdir 444 return curdir
365 return join(*rel_list) 445 return join(*rel_list)
OLDNEW

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld r497