OLD | NEW |
1 """HTTP cookie handling for web clients. | 1 """HTTP cookie handling for web clients. |
2 | 2 |
3 This module has (now fairly distant) origins in Gisle Aas' Perl module | 3 This module has (now fairly distant) origins in Gisle Aas' Perl module |
4 HTTP::Cookies, from the libwww-perl library. | 4 HTTP::Cookies, from the libwww-perl library. |
5 | 5 |
6 Docstrings, comments and debug strings in this code refer to the | 6 Docstrings, comments and debug strings in this code refer to the |
7 attributes of the HTTP cookie system as cookie-attributes, to distinguish | 7 attributes of the HTTP cookie system as cookie-attributes, to distinguish |
8 them clearly from Python attributes. | 8 them clearly from Python attributes. |
9 | 9 |
10 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not | 10 Class diagram (note that BSDDBCookieJar and the MSIE* classes are not |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
114 | 114 |
115 """ | 115 """ |
116 if t is None: t = time.time() | 116 if t is None: t = time.time() |
117 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] | 117 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7] |
118 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( | 118 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % ( |
119 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) | 119 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec) |
120 | 120 |
121 | 121 |
122 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} | 122 UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None} |
123 | 123 |
124 TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$") | 124 TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII) |
125 def offset_from_tz_string(tz): | 125 def offset_from_tz_string(tz): |
126 offset = None | 126 offset = None |
127 if tz in UTC_ZONES: | 127 if tz in UTC_ZONES: |
128 offset = 0 | 128 offset = 0 |
129 else: | 129 else: |
130 m = TIMEZONE_RE.search(tz) | 130 m = TIMEZONE_RE.search(tz) |
131 if m: | 131 if m: |
132 offset = 3600 * int(m.group(2)) | 132 offset = 3600 * int(m.group(2)) |
133 if m.group(3): | 133 if m.group(3): |
134 offset = offset + 60 * int(m.group(3)) | 134 offset = offset + 60 * int(m.group(3)) |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
184 tz = tz.upper() | 184 tz = tz.upper() |
185 offset = offset_from_tz_string(tz) | 185 offset = offset_from_tz_string(tz) |
186 if offset is None: | 186 if offset is None: |
187 return None | 187 return None |
188 t = t - offset | 188 t = t - offset |
189 | 189 |
190 return t | 190 return t |
191 | 191 |
192 STRICT_DATE_RE = re.compile( | 192 STRICT_DATE_RE = re.compile( |
193 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " | 193 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) " |
194 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$") | 194 "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII) |
195 WEEKDAY_RE = re.compile( | 195 WEEKDAY_RE = re.compile( |
196 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I) | 196 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII) |
197 LOOSE_HTTP_DATE_RE = re.compile( | 197 LOOSE_HTTP_DATE_RE = re.compile( |
198 r"""^ | 198 r"""^ |
199 (\d\d?) # day | 199 (\d\d?) # day |
200 (?:\s+|[-\/]) | 200 (?:\s+|[-\/]) |
201 (\w+) # month | 201 (\w+) # month |
202 (?:\s+|[-\/]) | 202 (?:\s+|[-\/]) |
203 (\d+) # year | 203 (\d+) # year |
204 (?: | 204 (?: |
205 (?:\s+|:) # separator before clock | 205 (?:\s+|:) # separator before clock |
206 (\d\d?):(\d\d) # hour:min | 206 (\d\d?):(\d\d) # hour:min |
207 (?::(\d\d))? # optional seconds | 207 (?::(\d\d))? # optional seconds |
208 )? # optional clock | 208 )? # optional clock |
209 \s* | 209 \s* |
210 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone | 210 ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone |
211 \s* | 211 \s* |
212 (?:\(\w+\))? # ASCII representation of timezone in parens. | 212 (?:\(\w+\))? # ASCII representation of timezone in parens. |
213 \s*$""", re.X) | 213 \s*$""", re.X | re.ASCII) |
214 def http2time(text): | 214 def http2time(text): |
215 """Returns time in seconds since epoch of time represented by a string. | 215 """Returns time in seconds since epoch of time represented by a string. |
216 | 216 |
217 Return value is an integer. | 217 Return value is an integer. |
218 | 218 |
219 None is returned if the format of str is unrecognized, the time is outside | 219 None is returned if the format of str is unrecognized, the time is outside |
220 the representable range, or the timezone string is not recognized. If the | 220 the representable range, or the timezone string is not recognized. If the |
221 string contains no timezone, UTC is assumed. | 221 string contains no timezone, UTC is assumed. |
222 | 222 |
223 The timezone in the string may be numerical (like "-0800" or "+0100") or a | 223 The timezone in the string may be numerical (like "-0800" or "+0100") or a |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
275 [-\/]? | 275 [-\/]? |
276 (\d\d?) # day | 276 (\d\d?) # day |
277 (?: | 277 (?: |
278 (?:\s+|[-:Tt]) # separator before clock | 278 (?:\s+|[-:Tt]) # separator before clock |
279 (\d\d?):?(\d\d) # hour:min | 279 (\d\d?):?(\d\d) # hour:min |
280 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) | 280 (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional) |
281 )? # optional clock | 281 )? # optional clock |
282 \s* | 282 \s* |
283 ([-+]?\d\d?:?(:?\d\d)? | 283 ([-+]?\d\d?:?(:?\d\d)? |
284 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) | 284 |Z|z)? # timezone (Z is "zero meridian", i.e. GMT) |
285 \s*$""", re.X) | 285 \s*$""", re.X | re. ASCII) |
286 def iso2time(text): | 286 def iso2time(text): |
287 """ | 287 """ |
288 As for http2time, but parses the ISO 8601 formats: | 288 As for http2time, but parses the ISO 8601 formats: |
289 | 289 |
290 1994-02-03 14:15:29 -0100 -- ISO 8601 format | 290 1994-02-03 14:15:29 -0100 -- ISO 8601 format |
291 1994-02-03 14:15:29 -- zone is optional | 291 1994-02-03 14:15:29 -- zone is optional |
292 1994-02-03 -- only date | 292 1994-02-03 -- only date |
293 1994-02-03T14:15:29 -- Use T as separator | 293 1994-02-03T14:15:29 -- Use T as separator |
294 19940203T141529Z -- ISO 8601 compact format | 294 19940203T141529Z -- ISO 8601 compact format |
295 19940203 -- only date | 295 19940203 -- only date |
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
482 pairs.append((k, v)) | 482 pairs.append((k, v)) |
483 | 483 |
484 if pairs: | 484 if pairs: |
485 if not version_set: | 485 if not version_set: |
486 pairs.append(("version", "0")) | 486 pairs.append(("version", "0")) |
487 result.append(pairs) | 487 result.append(pairs) |
488 | 488 |
489 return result | 489 return result |
490 | 490 |
491 | 491 |
492 IPV4_RE = re.compile(r"\.\d+$") | 492 IPV4_RE = re.compile(r"\.\d+$", re.ASCII) |
493 def is_HDN(text): | 493 def is_HDN(text): |
494 """Return True if text is a host domain name.""" | 494 """Return True if text is a host domain name.""" |
495 # XXX | 495 # XXX |
496 # This may well be wrong. Which RFC is HDN defined in, if any (for | 496 # This may well be wrong. Which RFC is HDN defined in, if any (for |
497 # the purposes of RFC 2965)? | 497 # the purposes of RFC 2965)? |
498 # For the current implementation, what about IPv6? Remember to look | 498 # For the current implementation, what about IPv6? Remember to look |
499 # at other uses of IPV4_RE also, if change this. | 499 # at other uses of IPV4_RE also, if change this. |
500 if IPV4_RE.search(text): | 500 if IPV4_RE.search(text): |
501 return False | 501 return False |
502 if text == "": | 502 if text == "": |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
567 # equal IP addresses | 567 # equal IP addresses |
568 return True | 568 return True |
569 return False | 569 return False |
570 initial_dot = B.startswith(".") | 570 initial_dot = B.startswith(".") |
571 if initial_dot and A.endswith(B): | 571 if initial_dot and A.endswith(B): |
572 return True | 572 return True |
573 if not initial_dot and A == B: | 573 if not initial_dot and A == B: |
574 return True | 574 return True |
575 return False | 575 return False |
576 | 576 |
577 cut_port_re = re.compile(r":\d+$") | 577 cut_port_re = re.compile(r":\d+$", re.ASCII) |
578 def request_host(request): | 578 def request_host(request): |
579 """Return request-host, as defined by RFC 2965. | 579 """Return request-host, as defined by RFC 2965. |
580 | 580 |
581 Variation from RFC: returned value is lowercased, for convenient | 581 Variation from RFC: returned value is lowercased, for convenient |
582 comparison. | 582 comparison. |
583 | 583 |
584 """ | 584 """ |
585 url = request.get_full_url() | 585 url = request.get_full_url() |
586 host = urllib.parse.urlparse(url)[1] | 586 host = urllib.parse.urlparse(url)[1] |
587 if host == "": | 587 if host == "": |
(...skipping 612 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1200 You may not need to know about this class: try | 1200 You may not need to know about this class: try |
1201 urllib.request.build_opener(HTTPCookieProcessor).open(url). | 1201 urllib.request.build_opener(HTTPCookieProcessor).open(url). |
1202 """ | 1202 """ |
1203 | 1203 |
1204 non_word_re = re.compile(r"\W") | 1204 non_word_re = re.compile(r"\W") |
1205 quote_re = re.compile(r"([\"\\])") | 1205 quote_re = re.compile(r"([\"\\])") |
1206 strict_domain_re = re.compile(r"\.?[^.]*") | 1206 strict_domain_re = re.compile(r"\.?[^.]*") |
1207 domain_re = re.compile(r"[^.]*") | 1207 domain_re = re.compile(r"[^.]*") |
1208 dots_re = re.compile(r"^\.+") | 1208 dots_re = re.compile(r"^\.+") |
1209 | 1209 |
1210 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)" | 1210 magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII) |
1211 | 1211 |
1212 def __init__(self, policy=None): | 1212 def __init__(self, policy=None): |
1213 if policy is None: | 1213 if policy is None: |
1214 policy = DefaultCookiePolicy() | 1214 policy = DefaultCookiePolicy() |
1215 self._policy = policy | 1215 self._policy = policy |
1216 | 1216 |
1217 self._cookies_lock = _threading.RLock() | 1217 self._cookies_lock = _threading.RLock() |
1218 self._cookies = {} | 1218 self._cookies = {} |
1219 | 1219 |
1220 def set_policy(self, policy): | 1220 def set_policy(self, policy): |
(...skipping 628 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1849 # There really isn't an LWP Cookies 2.0 format, but this indicates | 1849 # There really isn't an LWP Cookies 2.0 format, but this indicates |
1850 # that there is extra information in here (domain_dot and | 1850 # that there is extra information in here (domain_dot and |
1851 # port_spec) while still being compatible with libwww-perl, I hope. | 1851 # port_spec) while still being compatible with libwww-perl, I hope. |
1852 f.write("#LWP-Cookies-2.0\n") | 1852 f.write("#LWP-Cookies-2.0\n") |
1853 f.write(self.as_lwp_str(ignore_discard, ignore_expires)) | 1853 f.write(self.as_lwp_str(ignore_discard, ignore_expires)) |
1854 finally: | 1854 finally: |
1855 f.close() | 1855 f.close() |
1856 | 1856 |
1857 def _really_load(self, f, filename, ignore_discard, ignore_expires): | 1857 def _really_load(self, f, filename, ignore_discard, ignore_expires): |
1858 magic = f.readline() | 1858 magic = f.readline() |
1859 if not re.search(self.magic_re, magic): | 1859 if not self.magic_re.search(magic): |
1860 msg = ("%r does not look like a Set-Cookie3 (LWP) format " | 1860 msg = ("%r does not look like a Set-Cookie3 (LWP) format " |
1861 "file" % filename) | 1861 "file" % filename) |
1862 raise LoadError(msg) | 1862 raise LoadError(msg) |
1863 | 1863 |
1864 now = time.time() | 1864 now = time.time() |
1865 | 1865 |
1866 header = "Set-Cookie3:" | 1866 header = "Set-Cookie3:" |
1867 boolean_attrs = ("port_spec", "path_spec", "domain_dot", | 1867 boolean_attrs = ("port_spec", "path_spec", "domain_dot", |
1868 "secure", "discard") | 1868 "secure", "discard") |
1869 value_attrs = ("version", | 1869 value_attrs = ("version", |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1958 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the | 1958 specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the |
1959 domain as set in the HTTP header started with a dot (yes, I'm aware some | 1959 domain as set in the HTTP header started with a dot (yes, I'm aware some |
1960 domains in Netscape files start with a dot and some don't -- trust me, you | 1960 domains in Netscape files start with a dot and some don't -- trust me, you |
1961 really don't want to know any more about this). | 1961 really don't want to know any more about this). |
1962 | 1962 |
1963 Note that though Mozilla and Netscape use the same format, they use | 1963 Note that though Mozilla and Netscape use the same format, they use |
1964 slightly different headers. The class saves cookies using the Netscape | 1964 slightly different headers. The class saves cookies using the Netscape |
1965 header by default (Mozilla can cope with that). | 1965 header by default (Mozilla can cope with that). |
1966 | 1966 |
1967 """ | 1967 """ |
1968 magic_re = "#( Netscape)? HTTP Cookie File" | 1968 magic_re = re.compile("#( Netscape)? HTTP Cookie File") |
1969 header = """\ | 1969 header = """\ |
1970 # Netscape HTTP Cookie File | 1970 # Netscape HTTP Cookie File |
1971 # http://www.netscape.com/newsref/std/cookie_spec.html | 1971 # http://www.netscape.com/newsref/std/cookie_spec.html |
1972 # This is a generated file! Do not edit. | 1972 # This is a generated file! Do not edit. |
1973 | 1973 |
1974 """ | 1974 """ |
1975 | 1975 |
1976 def _really_load(self, f, filename, ignore_discard, ignore_expires): | 1976 def _really_load(self, f, filename, ignore_discard, ignore_expires): |
1977 now = time.time() | 1977 now = time.time() |
1978 | 1978 |
1979 magic = f.readline() | 1979 magic = f.readline() |
1980 if not re.search(self.magic_re, magic): | 1980 if not self.magic_re.search(magic): |
1981 f.close() | 1981 f.close() |
1982 raise LoadError( | 1982 raise LoadError( |
1983 "%r does not look like a Netscape format cookies file" % | 1983 "%r does not look like a Netscape format cookies file" % |
1984 filename) | 1984 filename) |
1985 | 1985 |
1986 try: | 1986 try: |
1987 while 1: | 1987 while 1: |
1988 line = f.readline() | 1988 line = f.readline() |
1989 if line == "": break | 1989 if line == "": break |
1990 | 1990 |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2069 value = cookie.name | 2069 value = cookie.name |
2070 else: | 2070 else: |
2071 name = cookie.name | 2071 name = cookie.name |
2072 value = cookie.value | 2072 value = cookie.value |
2073 f.write( | 2073 f.write( |
2074 "\t".join([cookie.domain, initial_dot, cookie.path, | 2074 "\t".join([cookie.domain, initial_dot, cookie.path, |
2075 secure, expires, name, value])+ | 2075 secure, expires, name, value])+ |
2076 "\n") | 2076 "\n") |
2077 finally: | 2077 finally: |
2078 f.close() | 2078 f.close() |
OLD | NEW |