Left: | ||
Right: |
OLD | NEW |
---|---|
1 """Guess the MIME type of a file. | 1 """Guess the MIME type of a file. |
2 | 2 |
3 This module defines two useful functions: | 3 This module defines three useful functions: |
4 | 4 |
5 guess_type(url, strict=1) -- guess the MIME type and encoding of a URL. | 5 guess_type(url, strict=True) -- guess the MIME type and encoding of a URL. |
6 | 6 |
7 guess_extension(type, strict=1) -- guess the extension for a given MIME type. | 7 guess_extension(type, strict=True) -- guess the extension for a given MIME type. |
8 | |
9 guess_all_extensions(type, strict=True) -- list all guessed extensions for a typ e. | |
8 | 10 |
9 It also contains the following, for tuning the behavior: | 11 It also contains the following, for tuning the behavior: |
10 | 12 |
13 Classes: | |
14 | |
15 MimeTypes -- The type mapping data store. Use this if you plan to change the | |
16 default type mappings, and call its methods in lieu of the above functions. | |
17 | |
18 Functions: | |
19 | |
20 read_mime_types(file) -- parse one file in Apache mime.types format, returning | |
21 the resulting mapping from extensions to types. | |
22 | |
23 init() -- reset the module singleton to default values. Should only be used | |
24 with care, because it could break other code using the module. | |
25 | |
11 Data: | 26 Data: |
12 | 27 |
13 knownfiles -- list of files to parse | 28 These mappings should be treated as read-only. Altering them could break any |
14 inited -- flag set when init() has been called | 29 code which makes assumptions about their content. |
30 | |
15 suffix_map -- dictionary mapping suffixes to suffixes | 31 suffix_map -- dictionary mapping suffixes to suffixes |
16 encodings_map -- dictionary mapping suffixes to encodings | 32 encodings_map -- dictionary mapping suffixes to encodings |
17 types_map -- dictionary mapping suffixes to types | 33 types_map -- dictionary mapping suffixes to types |
18 | |
19 Functions: | |
20 | |
21 init([files]) -- parse a list of files, default knownfiles | |
22 read_mime_types(file) -- parse one file, return a dictionary or None | |
23 """ | 34 """ |
24 | 35 |
25 import os | |
26 import posixpath | 36 import posixpath |
27 import urllib | 37 import urllib |
28 | 38 |
29 __all__ = [ | 39 __all__ = [ |
30 "guess_type","guess_extension","guess_all_extensions", | 40 "MimeTypes", "guess_type", "guess_extension", "guess_all_extensions", |
31 "add_type","read_mime_types","init" | 41 "add_type", "add_types", "read_mime_types", "knownfiles", "inited", |
42 "init", "suffix_map", "encodings_map", "types_map", "common_types", | |
Jacob Rus
2009/08/12 18:10:33
Actually, I'm thinking that I shouldn't add much i
| |
32 ] | 43 ] |
33 | 44 |
34 knownfiles = [ | 45 knownfiles = [ |
35 "/etc/mime.types", | 46 # later files will override type mappings from earlier ones |
36 "/etc/httpd/mime.types", # Mac OS X | 47 "/usr/local/lib/netscape/mime.types", # Netscape |
37 "/etc/httpd/conf/mime.types", # Apache | |
38 "/etc/apache/mime.types", # Apache 1 | 48 "/etc/apache/mime.types", # Apache 1 |
39 "/etc/apache2/mime.types", # Apache 2 | 49 "/etc/httpd/conf/mime.types", # Apache 1.3 |
40 "/usr/local/etc/httpd/conf/mime.types", | |
41 "/usr/local/lib/netscape/mime.types", | |
42 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 | 50 "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 |
43 "/usr/local/etc/mime.types", # Apache 1.3 | 51 "/usr/local/etc/mime.types", # Apache 1.3 |
44 ] | 52 "/etc/httpd/mime.types", # early Mac OS X |
45 | 53 "/usr/local/etc/httpd/conf/mime.types", |
46 inited = False | 54 "/etc/mime.types", # Linux |
47 _db = None | 55 "/etc/apache2/mime.types", # Apache 2 |
48 | 56 "/usr/local/apache2/conf/mime.types", # Apache 2 |
57 ] | |
Jacob Rus
2009/08/12 18:15:26
I reordered these so that more recent Apache versi
| |
49 | 58 |
50 class MimeTypes: | 59 class MimeTypes: |
51 """MIME-types datastore. | 60 """MIME-types datastore. |
52 | 61 |
53 This datastore can handle information from mime.types-style files | 62 This datastore can handle information from mime.types-style files |
54 and supports basic determination of MIME type from a filename or | 63 and supports basic determination of MIME type from a filename or |
55 URL, and can guess a reasonable extension given a MIME type. | 64 URL, and can guess a reasonable extension given a MIME type. |
56 """ | 65 """ |
57 | |
58 def __init__(self, filenames=(), strict=True): | 66 def __init__(self, filenames=(), strict=True): |
59 if not inited: | 67 self.encodings_map = _encodings_map.copy() |
60 init() | 68 self.suffix_map = _suffix_map.copy() |
61 self.encodings_map = encodings_map.copy() | |
62 self.suffix_map = suffix_map.copy() | |
63 self.types_map = ({}, {}) # dict for (non-strict, strict) | 69 self.types_map = ({}, {}) # dict for (non-strict, strict) |
64 self.types_map_inv = ({}, {}) | 70 self.types_map_inv = ({}, {}) |
65 for (ext, type) in types_map.items(): | 71 self.filenames = filenames |
66 self.add_type(type, ext, True) | 72 self._needs_init = False |
67 for (ext, type) in common_types.items(): | 73 self.add_types(_strict_types, True) |
68 self.add_type(type, ext, False) | 74 self.add_types(_lenient_types, False) |
69 for name in filenames: | 75 self._needs_init = True |
70 self.read(name, strict) | 76 |
77 def _lazy_init(self): | |
78 if self._needs_init: | |
79 self._needs_init = False | |
80 for filename in self.filenames: | |
81 try: | |
82 fp = open(filename) | |
83 except IOError: | |
84 continue | |
85 self.add_types(_parse_mimetypes(fp), strict) | |
86 fp.close() | |
87 | |
Jacob Rus
2009/08/12 18:10:33
I don't actually want to do a lazy init, I don't t
| |
88 def add_types(self, type_pairs, strict=True): | |
Jacob Rus
2009/08/12 18:15:26
I think I'd actually rather call this method 'regi
| |
89 """Add a set of mappings from extension -> MIME type | |
90 ········ | |
91 Takes in an iterable of (extension, type) pairs. For known extensions, | |
92 the new types will replace the old ones. For known types, the | |
93 extension will be added to the list of known extensions. | |
94 ········ | |
95 If `strict' is true, information will be added to list of standard | |
96 types, else to the list of non-standard types. | |
97 """ | |
98 self._lazy_init() | |
99 for ext, type in type_pairs: | |
100 self.types_map[strict][ext] = type | |
101 exts = self.types_map_inv[strict].setdefault(type, []) | |
102 if ext not in exts: | |
103 exts.append(ext) | |
71 | 104 |
72 def add_type(self, type, ext, strict=True): | 105 def add_type(self, type, ext, strict=True): |
73 """Add a mapping between a type and an extension. | 106 """Add a mapping between a type and an extension. |
74 | 107 |
75 When the extension is already known, the new | 108 Passes a single-element dictionary to add_types.· |
76 type will replace the old one. When the type | |
77 is already known the extension will be added | |
78 to the list of known extensions. | |
79 | |
80 If strict is true, information will be added to | |
81 list of standard types, else to the list of non-standard | |
82 types. | |
83 """ | 109 """ |
84 self.types_map[strict][ext] = type | 110 self.add_types([(type, ext)], strict) |
85 exts = self.types_map_inv[strict].setdefault(type, []) | |
86 if ext not in exts: | |
87 exts.append(ext) | |
88 | 111 |
89 def guess_type(self, url, strict=True): | 112 def guess_type(self, url, strict=True): |
90 """Guess the type of a file based on its URL. | 113 """Guess the type of a file based on its URL. |
91 | 114 |
92 Return value is a tuple (type, encoding) where type is None if | 115 Return value is a tuple (type, encoding) where type is None if |
93 the type can't be guessed (no or unknown suffix) or a string | 116 the type can't be guessed (no or unknown suffix) or a string |
94 of the form type/subtype, usable for a MIME Content-type | 117 of the form type/subtype, usable for a MIME Content-type |
95 header; and encoding is None for no encoding or the name of | 118 header; and encoding is None for no encoding or the name of |
96 the program used to encode (e.g. compress or gzip). The | 119 the program used to encode (e.g. compress or gzip). Encoding |
97 mappings are table driven. Encoding suffixes are case | 120 suffixes are case sensitive; type suffixes are first tried case |
98 sensitive; type suffixes are first tried case sensitive, then | 121 sensitive, then case insensitive. |
99 case insensitive. | |
100 | 122 |
101 The suffixes .tgz, .taz and .tz (case sensitive!) are all | 123 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped |
102 mapped to '.tar.gz'. (This is table-driven too, using the | 124 to '.tar.gz'. (This is table-driven too, using self.suffix_map.) |
103 dictionary suffix_map.) | |
104 | 125 |
105 Optional `strict' argument when False adds a bunch of commonly found, | 126 Optional `strict' argument when False adds commonly found but |
106 but non-standard types. | 127 non-standard types. |
107 """ | 128 """ |
129 self._lazy_init() | |
108 scheme, url = urllib.splittype(url) | 130 scheme, url = urllib.splittype(url) |
109 if scheme == 'data': | 131 if scheme == 'data': |
110 # syntax of data URLs: | 132 # syntax of data URLs: |
111 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data | 133 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data |
112 # mediatype := [ type "/" subtype ] *( ";" parameter ) | 134 # mediatype := [ type "/" subtype ] *( ";" parameter ) |
113 # data := *urlchar | 135 # data := *urlchar |
114 # parameter := attribute "=" value | 136 # parameter := attribute "=" value |
115 # type/subtype defaults to "text/plain" | 137 if ',' not in url: |
116 comma = url.find(',') | 138 return None, None # bad data URL |
117 if comma < 0: | 139 type = url.split(',', 1)[0].split(';', 1)[0] # up to first , or ; |
118 # bad data URL | |
119 return None, None | |
120 semi = url.find(';', 0, comma) | |
121 if semi >= 0: | |
122 type = url[:semi] | |
123 else: | |
124 type = url[:comma] | |
125 if '=' in type or '/' not in type: | 140 if '=' in type or '/' not in type: |
126 type = 'text/plain' | 141 type = 'text/plain' # type/subtype defaults to "text/plain" |
127 return type, None # never compressed, so encoding is None | 142 return type, None # data URLs are never compressed |
128 base, ext = posixpath.splitext(url) | 143 base, ext = posixpath.splitext(url) |
129 while ext in self.suffix_map: | 144 base, ext = posixpath.splitext(base + self.suffix_map.get(ext, ext)) |
130 base, ext = posixpath.splitext(base + self.suffix_map[ext]) | 145 encoding = self.encodings_map.get(ext) |
131 if ext in self.encodings_map: | 146 if encoding: |
132 encoding = self.encodings_map[ext] | |
133 base, ext = posixpath.splitext(base) | 147 base, ext = posixpath.splitext(base) |
134 else: | 148 loose_map, strict_map = self.types_map |
135 encoding = None | 149 type = strict_map.get(ext) or strict_map.get(ext.lower()) |
136 types_map = self.types_map[True] | 150 if not strict: |
137 if ext in types_map: | 151 type = type or loose_map.get(ext) or loose_map.get(ext.lower()) |
138 return types_map[ext], encoding | 152 return type, encoding |
139 elif ext.lower() in types_map: | |
140 return types_map[ext.lower()], encoding | |
141 elif strict: | |
142 return None, encoding | |
143 types_map = self.types_map[False] | |
144 if ext in types_map: | |
145 return types_map[ext], encoding | |
146 elif ext.lower() in types_map: | |
147 return types_map[ext.lower()], encoding | |
148 else: | |
149 return None, encoding | |
150 | 153 |
151 def guess_all_extensions(self, type, strict=True): | 154 def guess_all_extensions(self, type, strict=True): |
152 """Guess the extensions for a file based on its MIME type. | 155 """Guess the extensions for a file based on its MIME type. |
153 | 156 |
154 Return value is a list of strings giving the possible filename | 157 Return value is a list of strings giving the possible filename |
155 extensions, including the leading dot ('.'). The extension is not | 158 extensions, including the leading dot ('.'). The extension is not |
156 guaranteed to have been associated with any particular data stream, | 159 guaranteed to have been associated with any particular data stream, |
157 but would be mapped to the MIME type `type' by guess_type(). | 160 but would be mapped to the MIME type `type' by guess_type(). |
158 | 161 |
159 Optional `strict' argument when false adds a bunch of commonly found, | 162 Optional `strict' argument when False adds commonly found but |
160 but non-standard types. | 163 non-standard types. |
161 """ | 164 """ |
165 self._lazy_init() | |
162 type = type.lower() | 166 type = type.lower() |
163 extensions = self.types_map_inv[True].get(type, []) | 167 extensions = self.types_map_inv[True].get(type, []) |
164 if not strict: | 168 if not strict: |
165 for ext in self.types_map_inv[False].get(type, []): | 169 for ext in self.types_map_inv[False].get(type, []): |
166 if ext not in extensions: | 170 if ext not in extensions: |
167 extensions.append(ext) | 171 extensions.append(ext) |
168 return extensions | 172 return extensions |
169 | 173 |
170 def guess_extension(self, type, strict=True): | 174 def guess_extension(self, type, strict=True): |
171 """Guess the extension for a file based on its MIME type. | 175 """Guess the extension for a file based on its MIME type. |
172 | 176 |
173 Return value is a string giving a filename extension, | 177 Return value is a string giving a filename extension, |
174 including the leading dot ('.'). The extension is not | 178 including the leading dot ('.'). The extension is not |
175 guaranteed to have been associated with any particular data | 179 guaranteed to have been associated with any particular data |
176 stream, but would be mapped to the MIME type `type' by | 180 stream, but would be mapped to the MIME type `type' by |
177 guess_type(). If no extension can be guessed for `type', None | 181 guess_type(). If no extension can be guessed for `type', None |
178 is returned. | 182 is returned. |
179 | 183 |
180 Optional `strict' argument when false adds a bunch of commonly found, | 184 Optional `strict' argument when False adds commonly found but |
181 but non-standard types. | 185 non-standard types. |
182 """ | 186 """ |
183 extensions = self.guess_all_extensions(type, strict) | 187 extensions = self.guess_all_extensions(type, strict) |
184 if not extensions: | 188 return extensions[0] if extensions else None |
185 return None | |
186 return extensions[0] | |
187 | 189 |
188 def read(self, filename, strict=True): | 190 def read(self, filename, strict=True): |
191 """Read a single mime.types-format file, specified by pathname, and | |
192 add its contents to the type mappings. Originally called at instance | |
193 initialization, but now only included for backwards compatibility and | |
194 internally unused. | |
189 """ | 195 """ |
190 Read a single mime.types-format file, specified by pathname. | 196 with open(filename) as fp: |
191 | 197 self.add_types(_parse_mimetypes(fp), strict) |
192 If strict is true, information will be added to | 198 |
193 list of standard types, else to the list of non-standard | 199 def readfp(self, fp, strict=True): |
194 types. | 200 """Read a single mime.types-format file. Originally a helper for the |
201 `read' method, now only included for backwards compatibility and· | |
202 unused internally. | |
195 """ | 203 """ |
204 self.add_types(_parse_mimetypes(fp), strict) | |
205 | |
206 def _parse_mimetypes(types_lines): | |
207 """Parse Apache mime.types format. | |
208 ···· | |
209 Takes an iterable of lines. Returns an iterable of (ext, type) pairs. | |
210 """ | |
211 for line in types_lines: | |
212 words = line.split('#', 1)[0].split() | |
213 if not words: | |
214 continue | |
215 type = words[0] | |
216 for extension in words[1:]: | |
217 yield ('.' + extension, type) | |
218 | |
219 def read_mime_types(filename): | |
220 """Read MIME types from a file. | |
221 | |
222 Read a file in the format of Apache's mime.types, and returns a mapping | |
223 from extensions to MIME types. | |
224 """ | |
225 # Included for backwards compatibility | |
226 try: | |
196 fp = open(filename) | 227 fp = open(filename) |
197 self.readfp(fp, strict) | |
198 fp.close() | |
199 | |
200 def readfp(self, fp, strict=True): | |
201 """ | |
202 Read a single mime.types-format file. | |
203 | |
204 If strict is true, information will be added to | |
205 list of standard types, else to the list of non-standard | |
206 types. | |
207 """ | |
208 while 1: | |
209 line = fp.readline() | |
210 if not line: | |
211 break | |
212 words = line.split() | |
213 for i in range(len(words)): | |
214 if words[i][0] == '#': | |
215 del words[i:] | |
216 break | |
217 if not words: | |
218 continue | |
219 type, suffixes = words[0], words[1:] | |
220 for suff in suffixes: | |
221 self.add_type(type, '.' + suff, strict) | |
222 | |
Jacob Rus
2009/08/12 18:10:33
The way all the functions below (I removed them) w
| |
223 def guess_type(url, strict=True): | |
224 """Guess the type of a file based on its URL. | |
225 | |
226 Return value is a tuple (type, encoding) where type is None if the | |
227 type can't be guessed (no or unknown suffix) or a string of the | |
228 form type/subtype, usable for a MIME Content-type header; and | |
229 encoding is None for no encoding or the name of the program used | |
230 to encode (e.g. compress or gzip). The mappings are table | |
231 driven. Encoding suffixes are case sensitive; type suffixes are | |
232 first tried case sensitive, then case insensitive. | |
233 | |
234 The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped | |
235 to ".tar.gz". (This is table-driven too, using the dictionary | |
236 suffix_map). | |
237 | |
238 Optional `strict' argument when false adds a bunch of commonly found, but | |
239 non-standard types. | |
240 """ | |
241 if _db is None: | |
242 init() | |
243 return _db.guess_type(url, strict) | |
244 | |
245 | |
246 def guess_all_extensions(type, strict=True): | |
247 """Guess the extensions for a file based on its MIME type. | |
248 | |
249 Return value is a list of strings giving the possible filename | |
250 extensions, including the leading dot ('.'). The extension is not | |
251 guaranteed to have been associated with any particular data | |
252 stream, but would be mapped to the MIME type `type' by | |
253 guess_type(). If no extension can be guessed for `type', None | |
254 is returned. | |
255 | |
256 Optional `strict' argument when false adds a bunch of commonly found, | |
257 but non-standard types. | |
258 """ | |
259 if _db is None: | |
260 init() | |
261 return _db.guess_all_extensions(type, strict) | |
262 | |
263 def guess_extension(type, strict=True): | |
264 """Guess the extension for a file based on its MIME type. | |
265 | |
266 Return value is a string giving a filename extension, including the | |
267 leading dot ('.'). The extension is not guaranteed to have been | |
268 associated with any particular data stream, but would be mapped to the | |
269 MIME type `type' by guess_type(). If no extension can be guessed for | |
270 `type', None is returned. | |
271 | |
272 Optional `strict' argument when false adds a bunch of commonly found, | |
273 but non-standard types. | |
274 """ | |
275 if _db is None: | |
276 init() | |
277 return _db.guess_extension(type, strict) | |
278 | |
279 def add_type(type, ext, strict=True): | |
280 """Add a mapping between a type and an extension. | |
281 | |
282 When the extension is already known, the new | |
283 type will replace the old one. When the type | |
284 is already known the extension will be added | |
285 to the list of known extensions. | |
286 | |
287 If strict is true, information will be added to | |
288 list of standard types, else to the list of non-standard | |
289 types. | |
290 """ | |
291 if _db is None: | |
292 init() | |
293 return _db.add_type(type, ext, strict) | |
294 | |
295 | |
296 def init(files=None): | |
297 global suffix_map, types_map, encodings_map, common_types | |
298 global inited, _db | |
299 inited = True # so that MimeTypes.__init__() doesn't call us again | |
300 db = MimeTypes() | |
301 if files is None: | |
302 files = knownfiles | |
303 for file in files: | |
304 if os.path.isfile(file): | |
305 db.readfp(open(file)) | |
306 encodings_map = db.encodings_map | |
307 suffix_map = db.suffix_map | |
308 types_map = db.types_map[True] | |
309 common_types = db.types_map[False] | |
310 # Make the DB a global variable now that it is fully initialized | |
311 _db = db | |
312 | |
313 | |
314 def read_mime_types(file): | |
315 try: | |
316 f = open(file) | |
317 except IOError: | 228 except IOError: |
318 return None | 229 return None |
319 db = MimeTypes() | 230 return dict(_parse_mimetypes(fp)) |
320 db.readfp(f, True) | 231 |
321 return db.types_map[True] | 232 _suffix_map = { |
322 | 233 '.tgz': '.tar.gz', |
323 | 234 '.taz': '.tar.gz', |
324 def _default_mime_types(): | 235 '.tz': '.tar.gz', |
325 global suffix_map | 236 '.tbz2': '.tar.bz2', |
326 global encodings_map | 237 } |
327 global types_map | 238 _encodings_map = { |
328 global common_types | 239 '.gz': 'gzip', |
329 | 240 '.Z': 'compress', |
330 suffix_map = { | 241 '.bz2': 'bzip2', |
331 '.tgz': '.tar.gz', | 242 } |
Jacob Rus
2009/08/12 18:10:33
Previously, anyone who redefined suffix_map would
| |
332 '.taz': '.tar.gz', | 243 _strict_types = [ |
333 '.tz': '.tar.gz', | 244 # IANA registry: http://www.isi.edu/in-notes/iana/assignments/media-types |
334 '.tbz2': '.tar.bz2', | 245 # Apache: http://svn.apache.org/repos/asf/httpd/httpd/trunk/docs/conf/mime.t ypes |
335 } | 246 ···· |
336 | 247 # later rows override previous rows for the extension -> type map, but add |
337 encodings_map = { | 248 # themselves to the end of the list for the type -> extension map |
338 '.gz': 'gzip', | 249 ('.a' , 'application/octet-stream'), |
339 '.Z': 'compress', | 250 ('.ai' , 'application/postscript'), |
340 '.bz2': 'bzip2', | 251 ('.aif' , 'audio/x-aiff'), |
341 } | 252 ('.aifc' , 'audio/x-aiff'), |
342 | 253 ('.aiff' , 'audio/x-aiff'), |
343 # Before adding new types, make sure they are either registered with IANA, | 254 ('.au' , 'audio/basic'), |
344 # at http://www.isi.edu/in-notes/iana/assignments/media-types | 255 ('.avi' , 'video/x-msvideo'), |
345 # or extensions, i.e. using the x- prefix | 256 ('.bat' , 'text/plain'), |
346 | 257 ('.bcpio' , 'application/x-bcpio'), |
347 # If you add to these, please keep them sorted! | 258 ('.bin' , 'application/octet-stream'), |
348 types_map = { | 259 ('.bmp' , 'image/x-ms-bmp'), |
349 '.a' : 'application/octet-stream', | 260 ('.c' , 'text/plain'), |
350 '.ai' : 'application/postscript', | 261 ('.cdf' , 'application/x-cdf'), |
351 '.aif' : 'audio/x-aiff', | 262 ('.cdf' , 'application/x-netcdf'), # canonical |
352 '.aifc' : 'audio/x-aiff', | 263 ('.cpio' , 'application/x-cpio'), |
353 '.aiff' : 'audio/x-aiff', | 264 ('.csh' , 'application/x-csh'), |
354 '.au' : 'audio/basic', | 265 ('.css' , 'text/css'), |
355 '.avi' : 'video/x-msvideo', | 266 ('.dll' , 'application/octet-stream'), |
356 '.bat' : 'text/plain', | 267 ('.doc' , 'application/msword'), |
357 '.bcpio' : 'application/x-bcpio', | 268 ('.dot' , 'application/msword'), |
358 '.bin' : 'application/octet-stream', | 269 ('.dvi' , 'application/x-dvi'), |
359 '.bmp' : 'image/x-ms-bmp', | 270 ('.eml' , 'message/rfc822'), |
360 '.c' : 'text/plain', | 271 ('.eps' , 'application/postscript'), |
361 # Duplicates :( | 272 ('.etx' , 'text/x-setext'), |
362 '.cdf' : 'application/x-cdf', | 273 ('.exe' , 'application/octet-stream'), |
363 '.cdf' : 'application/x-netcdf', | 274 ('.gif' , 'image/gif'), |
364 '.cpio' : 'application/x-cpio', | 275 ('.gtar' , 'application/x-gtar'), |
365 '.csh' : 'application/x-csh', | 276 ('.h' , 'text/plain'), |
366 '.css' : 'text/css', | 277 ('.hdf' , 'application/x-hdf'), |
367 '.dll' : 'application/octet-stream', | 278 ('.htm' , 'text/html'), |
368 '.doc' : 'application/msword', | 279 ('.html' , 'text/html'), |
369 '.dot' : 'application/msword', | 280 ('.ief' , 'image/ief'), |
370 '.dvi' : 'application/x-dvi', | 281 ('.jpe' , 'image/jpeg'), |
371 '.eml' : 'message/rfc822', | 282 ('.jpeg' , 'image/jpeg'), |
372 '.eps' : 'application/postscript', | 283 ('.jpg' , 'image/jpeg'), |
373 '.etx' : 'text/x-setext', | 284 ('.js' , 'application/x-javascript'), |
374 '.exe' : 'application/octet-stream', | 285 ('.ksh' , 'text/plain'), |
375 '.gif' : 'image/gif', | 286 ('.latex' , 'application/x-latex'), |
376 '.gtar' : 'application/x-gtar', | 287 ('.m1v' , 'video/mpeg'), |
377 '.h' : 'text/plain', | 288 ('.man' , 'application/x-troff-man'), |
378 '.hdf' : 'application/x-hdf', | 289 ('.me' , 'application/x-troff-me'), |
379 '.htm' : 'text/html', | 290 ('.mht' , 'message/rfc822'), |
380 '.html' : 'text/html', | 291 ('.mhtml' , 'message/rfc822'), |
381 '.ief' : 'image/ief', | 292 ('.mif' , 'application/x-mif'), |
382 '.jpe' : 'image/jpeg', | 293 ('.mov' , 'video/quicktime'), |
383 '.jpeg' : 'image/jpeg', | 294 ('.movie' , 'video/x-sgi-movie'), |
384 '.jpg' : 'image/jpeg', | 295 ('.mp2' , 'audio/mpeg'), |
385 '.js' : 'application/x-javascript', | 296 ('.mp3' , 'audio/mpeg'), |
386 '.ksh' : 'text/plain', | 297 ('.mp4' , 'video/mp4'), |
387 '.latex' : 'application/x-latex', | 298 ('.mpa' , 'video/mpeg'), |
388 '.m1v' : 'video/mpeg', | 299 ('.mpe' , 'video/mpeg'), |
389 '.man' : 'application/x-troff-man', | 300 ('.mpeg' , 'video/mpeg'), |
390 '.me' : 'application/x-troff-me', | 301 ('.mpg' , 'video/mpeg'), |
391 '.mht' : 'message/rfc822', | 302 ('.ms' , 'application/x-troff-ms'), |
392 '.mhtml' : 'message/rfc822', | 303 ('.nc' , 'application/x-netcdf'), |
393 '.mif' : 'application/x-mif', | 304 ('.nws' , 'message/rfc822'), |
394 '.mov' : 'video/quicktime', | 305 ('.o' , 'application/octet-stream'), |
395 '.movie' : 'video/x-sgi-movie', | 306 ('.obj' , 'application/octet-stream'), |
396 '.mp2' : 'audio/mpeg', | 307 ('.oda' , 'application/oda'), |
397 '.mp3' : 'audio/mpeg', | 308 ('.p12' , 'application/x-pkcs12'), |
398 '.mp4' : 'video/mp4', | 309 ('.p7c' , 'application/pkcs7-mime'), |
399 '.mpa' : 'video/mpeg', | 310 ('.pbm' , 'image/x-portable-bitmap'), |
400 '.mpe' : 'video/mpeg', | 311 ('.pdf' , 'application/pdf'), |
401 '.mpeg' : 'video/mpeg', | 312 ('.pfx' , 'application/x-pkcs12'), |
402 '.mpg' : 'video/mpeg', | 313 ('.pgm' , 'image/x-portable-graymap'), |
403 '.ms' : 'application/x-troff-ms', | 314 ('.pl' , 'text/plain'), |
404 '.nc' : 'application/x-netcdf', | 315 ('.png' , 'image/png'), |
405 '.nws' : 'message/rfc822', | 316 ('.pnm' , 'image/x-portable-anymap'), |
406 '.o' : 'application/octet-stream', | 317 ('.pot' , 'application/vnd.ms-powerpoint'), |
407 '.obj' : 'application/octet-stream', | 318 ('.ppa' , 'application/vnd.ms-powerpoint'), |
408 '.oda' : 'application/oda', | 319 ('.ppm' , 'image/x-portable-pixmap'), |
409 '.p12' : 'application/x-pkcs12', | 320 ('.pps' , 'application/vnd.ms-powerpoint'), |
410 '.p7c' : 'application/pkcs7-mime', | 321 ('.ppt' , 'application/vnd.ms-powerpoint'), |
411 '.pbm' : 'image/x-portable-bitmap', | 322 ('.ps' , 'application/postscript'), |
412 '.pdf' : 'application/pdf', | 323 ('.pwz' , 'application/vnd.ms-powerpoint'), |
413 '.pfx' : 'application/x-pkcs12', | 324 ('.py' , 'text/x-python'), |
414 '.pgm' : 'image/x-portable-graymap', | 325 ('.pyc' , 'application/x-python-code'), |
415 '.pl' : 'text/plain', | 326 ('.pyo' , 'application/x-python-code'), |
416 '.png' : 'image/png', | 327 ('.qt' , 'video/quicktime'), |
417 '.pnm' : 'image/x-portable-anymap', | 328 ('.ra' , 'audio/x-pn-realaudio'), |
418 '.pot' : 'application/vnd.ms-powerpoint', | 329 ('.ram' , 'application/x-pn-realaudio'), |
419 '.ppa' : 'application/vnd.ms-powerpoint', | 330 ('.ras' , 'image/x-cmu-raster'), |
420 '.ppm' : 'image/x-portable-pixmap', | 331 ('.rdf' , 'application/xml'), |
421 '.pps' : 'application/vnd.ms-powerpoint', | 332 ('.rgb' , 'image/x-rgb'), |
422 '.ppt' : 'application/vnd.ms-powerpoint', | 333 ('.roff' , 'application/x-troff'), |
423 '.ps' : 'application/postscript', | 334 ('.rtx' , 'text/richtext'), |
424 '.pwz' : 'application/vnd.ms-powerpoint', | 335 ('.sgm' , 'text/x-sgml'), |
425 '.py' : 'text/x-python', | 336 ('.sgml' , 'text/x-sgml'), |
426 '.pyc' : 'application/x-python-code', | 337 ('.sh' , 'application/x-sh'), |
427 '.pyo' : 'application/x-python-code', | 338 ('.shar' , 'application/x-shar'), |
428 '.qt' : 'video/quicktime', | 339 ('.snd' , 'audio/basic'), |
429 '.ra' : 'audio/x-pn-realaudio', | 340 ('.so' , 'application/octet-stream'), |
430 '.ram' : 'application/x-pn-realaudio', | 341 ('.src' , 'application/x-wais-source'), |
431 '.ras' : 'image/x-cmu-raster', | 342 ('.sv4cpio', 'application/x-sv4cpio'), |
432 '.rdf' : 'application/xml', | 343 ('.sv4crc' , 'application/x-sv4crc'), |
433 '.rgb' : 'image/x-rgb', | 344 ('.swf' , 'application/x-shockwave-flash'), |
434 '.roff' : 'application/x-troff', | 345 ('.t' , 'application/x-troff'), |
435 '.rtx' : 'text/richtext', | 346 ('.tar' , 'application/x-tar'), |
436 '.sgm' : 'text/x-sgml', | 347 ('.tcl' , 'application/x-tcl'), |
437 '.sgml' : 'text/x-sgml', | 348 ('.tex' , 'application/x-tex'), |
438 '.sh' : 'application/x-sh', | 349 ('.texi' , 'application/x-texinfo'), |
439 '.shar' : 'application/x-shar', | 350 ('.texinfo', 'application/x-texinfo'), |
440 '.snd' : 'audio/basic', | 351 ('.tif' , 'image/tiff'), |
441 '.so' : 'application/octet-stream', | 352 ('.tiff' , 'image/tiff'), |
442 '.src' : 'application/x-wais-source', | 353 ('.tr' , 'application/x-troff'), |
443 '.sv4cpio': 'application/x-sv4cpio', | 354 ('.tsv' , 'text/tab-separated-values'), |
444 '.sv4crc' : 'application/x-sv4crc', | 355 ('.txt' , 'text/plain'), |
445 '.swf' : 'application/x-shockwave-flash', | 356 ('.ustar' , 'application/x-ustar'), |
446 '.t' : 'application/x-troff', | 357 ('.vcf' , 'text/x-vcard'), |
447 '.tar' : 'application/x-tar', | 358 ('.wav' , 'audio/x-wav'), |
448 '.tcl' : 'application/x-tcl', | 359 ('.wiz' , 'application/msword'), |
449 '.tex' : 'application/x-tex', | 360 ('.wsdl' , 'application/xml'), |
450 '.texi' : 'application/x-texinfo', | 361 ('.xbm' , 'image/x-xbitmap'), |
451 '.texinfo': 'application/x-texinfo', | 362 ('.xlb' , 'application/vnd.ms-excel'), |
452 '.tif' : 'image/tiff', | 363 ('.xls' , 'application/excel'), |
453 '.tiff' : 'image/tiff', | 364 ('.xls' , 'application/vnd.ms-excel'), # canonical |
454 '.tr' : 'application/x-troff', | 365 ('.xml' , 'text/xml'), |
455 '.tsv' : 'text/tab-separated-values', | 366 ('.xpdl' , 'application/xml'), |
456 '.txt' : 'text/plain', | 367 ('.xpm' , 'image/x-xpixmap'), |
457 '.ustar' : 'application/x-ustar', | 368 ('.xsl' , 'application/xml'), |
458 '.vcf' : 'text/x-vcard', | 369 ('.xwd' , 'image/x-xwindowdump'), |
459 '.wav' : 'audio/x-wav', | 370 ('.zip' , 'application/zip'), |
460 '.wiz' : 'application/msword', | 371 ] |
Jacob Rus
2009/08/12 18:10:33
These have been changed to a list of tuples so tha
| |
461 '.wsdl' : 'application/xml', | 372 _lenient_types = [ |
462 '.xbm' : 'image/x-xbitmap', | |
463 '.xlb' : 'application/vnd.ms-excel', | |
464 # Duplicates :( | |
465 '.xls' : 'application/excel', | |
466 '.xls' : 'application/vnd.ms-excel', | |
467 '.xml' : 'text/xml', | |
468 '.xpdl' : 'application/xml', | |
469 '.xpm' : 'image/x-xpixmap', | |
470 '.xsl' : 'application/xml', | |
471 '.xwd' : 'image/x-xwindowdump', | |
472 '.zip' : 'application/zip', | |
473 } | |
474 | |
475 # These are non-standard types, commonly found in the wild. They will | 373 # These are non-standard types, commonly found in the wild. They will |
476 # only match if strict=0 flag is given to the API methods. | 374 # only match if strict=0 flag is given to the API methods. |
477 | 375 ('.jpg' , 'image/jpg'), |
478 # Please sort these too | 376 ('.mid' , 'audio/midi'), |
479 common_types = { | 377 ('.midi', 'audio/midi'), |
480 '.jpg' : 'image/jpg', | 378 ('.pct' , 'image/pict'), |
481 '.mid' : 'audio/midi', | 379 ('.pic' , 'image/pict'), |
482 '.midi': 'audio/midi', | 380 ('.pict', 'image/pict'), |
483 '.pct' : 'image/pict', | 381 ('.rtf' , 'application/rtf'), |
484 '.pic' : 'image/pict', | 382 ('.xul' , 'text/xul'), |
485 '.pict': 'image/pict', | 383 ] |
Jacob Rus
2009/08/12 18:10:33
These should be removed altogether, because the on
| |
486 '.rtf' : 'application/rtf', | 384 |
487 '.xul' : 'text/xul' | 385 def _init_singleton(filenames=None): |
488 } | 386 global _db, guess_type, guess_extension, guess_all_extensions, add_type |
489 | 387 global add_types, encodings_map, suffix_map, types_map, common_types |
490 | 388 if filenames is None: |
491 _default_mime_types() | 389 filenames = knownfiles |
492 | 390 _db = MimeTypes(filenames=filenames) |
391 guess_type = _db.guess_type | |
392 guess_all_extensions = _db.guess_all_extensions | |
393 guess_extension = _db.guess_extension | |
394 add_type = _db.add_type | |
395 add_types = _db.add_types | |
396 encodings_map = _db.encodings_map | |
397 suffix_map = _db.suffix_map | |
398 types_map, common_types = _db.types_map | |
399 | |
400 # inited and init (the next 14 LOC) included for backwards compatibility. | |
401 global inited | |
402 inited = True | |
403 init = _init_singleton | |
404 init.__doc__ = """Re-initialize the module. | |
405 | |
406 Specifically, re-initialize singleton MimeTypes object and global functions | |
407 and objects. | |
408 | |
409 This should be used with care, because resetting the module to defaults could | |
410 break other third-party code which has changed the contents of these objects | |
411 and has expectations about their state. | |
412 | |
413 To make custom MIME type mappings, use a separate MimeTypes instance instead. | |
414 """ | |
415 | |
416 _init_singleton() # initialize the module | |
493 | 417 |
494 if __name__ == '__main__': | 418 if __name__ == '__main__': |
495 import sys | 419 import sys |
496 import getopt | 420 from getopt import getopt, GetoptError |
497 | 421 |
498 USAGE = """\ | 422 USAGE = """\ |
499 Usage: mimetypes.py [options] type | 423 Usage: mimetypes.py [options] type |
500 | 424 |
501 Options: | 425 Options: |
502 --help / -h -- print this message and exit | 426 --help / -h -- print this message and exit |
503 --lenient / -l -- additionally search of some common, but non-standard | 427 --lenient / -l -- additionally search common but non-standard types. |
504 types. | |
505 --extension / -e -- guess extension instead of type | 428 --extension / -e -- guess extension instead of type |
506 | 429 |
507 More than one type argument may be given. | 430 More than one type argument may be given. |
508 """ | 431 """ |
509 | 432 try: |
510 def usage(code, msg=''): | 433 opts, args = getopt(sys.argv[1:], 'hle', |
434 ['help', 'lenient', 'extension']) | |
435 except GetoptError, msg: | |
436 print msg | |
511 print USAGE | 437 print USAGE |
512 if msg: print msg | 438 sys.exit(1) |
513 sys.exit(code) | 439 |
514 | 440 opts = set(opt for opt, arg in opts) |
515 try: | 441 if '-h' in opts or '--help' in opts: |
516 opts, args = getopt.getopt(sys.argv[1:], 'hle', | 442 print USAGE |
517 ['help', 'lenient', 'extension']) | 443 sys.exit(0) |
518 except getopt.error, msg: | 444 if '-l' in opts or '--lenient' in opts: |
519 usage(1, msg) | 445 strict = False |
520 | 446 else: |
521 strict = 1 | 447 strict = True |
Jacob Rus
2009/08/12 18:10:33
It's probably unnecessary, but I tried to shorten
| |
522 extension = 0 | |
523 for opt, arg in opts: | |
524 if opt in ('-h', '--help'): | |
525 usage(0) | |
526 elif opt in ('-l', '--lenient'): | |
527 strict = 0 | |
528 elif opt in ('-e', '--extension'): | |
529 extension = 1 | |
530 for gtype in args: | 448 for gtype in args: |
531 if extension: | 449 if '-e' in opts or '--extension' in opts: |
532 guess = guess_extension(gtype, strict) | 450 guess = guess_extension(gtype, strict) |
533 if not guess: print "I don't know anything about type", gtype | 451 if not guess: print "I don't know anything about type", gtype |
534 else: print guess | 452 else: print guess |
535 else: | 453 else: |
536 guess, encoding = guess_type(gtype, strict) | 454 guess, encoding = guess_type(gtype, strict) |
537 if not guess: print "I don't know anything about type", gtype | 455 if not guess: print "I don't know anything about type", gtype |
538 else: print 'type:', guess, 'encoding:', encoding | 456 else: print 'type:', guess, 'encoding:', encoding |
OLD | NEW |