| OLD | NEW |
| 1 /* | 1 /* |
| 2 | 2 |
| 3 Unicode implementation based on original code by Fredrik Lundh, | 3 Unicode implementation based on original code by Fredrik Lundh, |
| 4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the | 4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the |
| 5 Unicode Integration Proposal (see file Misc/unicode.txt). | 5 Unicode Integration Proposal (see file Misc/unicode.txt). |
| 6 | 6 |
| 7 Major speed upgrades to the method implementations at the Reykjavik | 7 Major speed upgrades to the method implementations at the Reykjavik |
| 8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. | 8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. |
| 9 | 9 |
| 10 Copyright (c) Corporation for National Research Initiatives. | 10 Copyright (c) Corporation for National Research Initiatives. |
| (...skipping 2395 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2406 else if (byteorder == 1) { | 2406 else if (byteorder == 1) { |
| 2407 /* force BE */ | 2407 /* force BE */ |
| 2408 iorder[0] = 3; | 2408 iorder[0] = 3; |
| 2409 iorder[1] = 2; | 2409 iorder[1] = 2; |
| 2410 iorder[2] = 1; | 2410 iorder[2] = 1; |
| 2411 iorder[3] = 0; | 2411 iorder[3] = 0; |
| 2412 } | 2412 } |
| 2413 | 2413 |
| 2414 while (size-- > 0) { | 2414 while (size-- > 0) { |
| 2415 Py_UCS4 ch = *s++; | 2415 Py_UCS4 ch = *s++; |
| 2416 |
| 2416 #ifndef Py_UNICODE_WIDE | 2417 #ifndef Py_UNICODE_WIDE |
| 2417 if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { | 2418 if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { |
| 2418 Py_UCS4 ch2 = *s; | 2419 Py_UCS4 ch2 = *s; |
| 2419 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { | 2420 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { |
| 2420 ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; | 2421 ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; |
| 2421 s++; | 2422 s++; |
| 2422 size--; | 2423 size--; |
| 2423 } | 2424 } |
| 2424 } | 2425 } |
| 2425 #endif | 2426 #endif |
| (...skipping 5084 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 7510 if (repr == NULL) | 7511 if (repr == NULL) |
| 7511 return NULL; | 7512 return NULL; |
| 7512 | 7513 |
| 7513 p = PyUnicode_AS_UNICODE(repr); | 7514 p = PyUnicode_AS_UNICODE(repr); |
| 7514 | 7515 |
| 7515 /* Add quote */ | 7516 /* Add quote */ |
| 7516 *p++ = (findchar(s, size, '\'') && | 7517 *p++ = (findchar(s, size, '\'') && |
| 7517 !findchar(s, size, '"')) ? '"' : '\''; | 7518 !findchar(s, size, '"')) ? '"' : '\''; |
| 7518 while (size-- > 0) { | 7519 while (size-- > 0) { |
| 7519 Py_UNICODE ch = *s++; | 7520 Py_UNICODE ch = *s++; |
| 7520 | |
| 7521 /* Escape quotes and backslashes */ | 7521 /* Escape quotes and backslashes */ |
| 7522 if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) { | 7522 if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) { |
| 7523 *p++ = '\\'; | 7523 *p++ = '\\'; |
| 7524 *p++ = ch; | 7524 *p++ = ch; |
| 7525 continue; | 7525 continue; |
| 7526 } | 7526 } |
| 7527 | 7527 |
| 7528 #ifdef Py_UNICODE_WIDE | 7528 » /* Map special whitespace to '\t', \n', '\r' */ |
| 7529 /* Map 21-bit characters to '\U00xxxxxx' */ | 7529 if (ch == '\t') { |
| 7530 else if (ch >= 0x10000) { | |
| 7531 *p++ = '\\'; | |
| 7532 *p++ = 'U'; | |
| 7533 *p++ = hexdigits[(ch >> 28) & 0x0000000F]; | |
| 7534 *p++ = hexdigits[(ch >> 24) & 0x0000000F]; | |
| 7535 *p++ = hexdigits[(ch >> 20) & 0x0000000F]; | |
| 7536 *p++ = hexdigits[(ch >> 16) & 0x0000000F]; | |
| 7537 *p++ = hexdigits[(ch >> 12) & 0x0000000F]; | |
| 7538 *p++ = hexdigits[(ch >> 8) & 0x0000000F]; | |
| 7539 *p++ = hexdigits[(ch >> 4) & 0x0000000F]; | |
| 7540 *p++ = hexdigits[ch & 0x0000000F]; | |
| 7541 » continue; | |
| 7542 } | |
| 7543 #else | |
| 7544 » /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ | |
| 7545 » else if (ch >= 0xD800 && ch < 0xDC00) { | |
| 7546 » Py_UNICODE ch2; | |
| 7547 » Py_UCS4 ucs; | |
| 7548 | |
| 7549 » ch2 = *s++; | |
| 7550 » size--; | |
| 7551 » if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { | |
| 7552 » » ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; | |
| 7553 » » *p++ = '\\'; | |
| 7554 » » *p++ = 'U'; | |
| 7555 » » *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; | |
| 7556 » » *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; | |
| 7557 » » *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; | |
| 7558 » » *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; | |
| 7559 » » *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; | |
| 7560 » » *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; | |
| 7561 » » *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; | |
| 7562 » » *p++ = hexdigits[ucs & 0x0000000F]; | |
| 7563 » » continue; | |
| 7564 » } | |
| 7565 » /* Fall through: isolated surrogates are copied as-is */ | |
| 7566 » s--; | |
| 7567 » size++; | |
| 7568 » } | |
| 7569 #endif | |
| 7570 | |
| 7571 /* Map 16-bit characters to '\uxxxx' */ | |
| 7572 if (ch >= 256) { | |
| 7573 *p++ = '\\'; | |
| 7574 *p++ = 'u'; | |
| 7575 *p++ = hexdigits[(ch >> 12) & 0x000F]; | |
| 7576 *p++ = hexdigits[(ch >> 8) & 0x000F]; | |
| 7577 *p++ = hexdigits[(ch >> 4) & 0x000F]; | |
| 7578 *p++ = hexdigits[ch & 0x000F]; | |
| 7579 } | |
| 7580 | |
| 7581 /* Map special whitespace to '\t', \n', '\r' */ | |
| 7582 else if (ch == '\t') { | |
| 7583 *p++ = '\\'; | 7530 *p++ = '\\'; |
| 7584 *p++ = 't'; | 7531 *p++ = 't'; |
| 7585 } | 7532 } |
| 7586 else if (ch == '\n') { | 7533 else if (ch == '\n') { |
| 7587 *p++ = '\\'; | 7534 *p++ = '\\'; |
| 7588 *p++ = 'n'; | 7535 *p++ = 'n'; |
| 7589 } | 7536 } |
| 7590 else if (ch == '\r') { | 7537 else if (ch == '\r') { |
| 7591 *p++ = '\\'; | 7538 *p++ = '\\'; |
| 7592 *p++ = 'r'; | 7539 *p++ = 'r'; |
| 7593 } | 7540 } |
| 7594 | 7541 |
| 7595 /* Map non-printable US ASCII to '\xhh' */ | 7542 /* Map non-printable US ASCII to '\xhh' */ |
| 7596 else if (ch < ' ' || ch >= 0x7F) { | 7543 else if (ch < ' ' || ch == 0x7F) { |
| 7597 *p++ = '\\'; | 7544 *p++ = '\\'; |
| 7598 *p++ = 'x'; | 7545 *p++ = 'x'; |
| 7599 *p++ = hexdigits[(ch >> 4) & 0x000F]; | 7546 *p++ = hexdigits[(ch >> 4) & 0x000F]; |
| 7600 *p++ = hexdigits[ch & 0x000F]; | 7547 *p++ = hexdigits[ch & 0x000F]; |
| 7601 } | 7548 } |
| 7602 | 7549 |
| 7603 /* Copy everything else as-is */ | 7550 /* Copy ASCII characters as-is */ |
| 7604 else | 7551 else if (ch < 0x7F) { |
| 7605 *p++ = (char) ch; | 7552 *p++ = ch; |
| 7553 } |
| 7554 |
| 7555 » /* Non-ASCII characters */ |
| 7556 else { |
| 7557 Py_UCS4 ucs = ch; |
| 7558 |
| 7559 #ifndef Py_UNICODE_WIDE |
| 7560 Py_UNICODE ch2 = 0; |
| 7561 /* Get code point from surrogate pair */ |
| 7562 if (size > 0) { |
| 7563 ch2 = *s; |
| 7564 if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00 |
| 7565 && ch2 <= 0xDFFF) { |
| 7566 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) |
| 7567 + 0x00010000; |
| 7568 s++; |
| 7569 size--; |
| 7570 } |
| 7571 } |
| 7572 #endif |
| 7573 /* Map Unicode whitespace and control characters |
| 7574 (categories Z* and C* except ASCII space) |
| 7575 */ |
| 7576 if (Py_UNICODE_ISHEXESCAPED(ucs)) { |
| 7577 » /* Map 8-bit characters to '\xhh' */ |
| 7578 » » if (ucs <= 0xff) { |
| 7579 » » *p++ = '\\'; |
| 7580 » » *p++ = 'x'; |
| 7581 » » *p++ = hexdigits[(ch >> 4) & 0x000F]; |
| 7582 » » *p++ = hexdigits[ch & 0x000F]; |
| 7583 » » } |
| 7584 /* Map 21-bit characters to '\U00xxxxxx' */ |
| 7585 else if (ucs >= 0x10000) { |
| 7586 *p++ = '\\'; |
| 7587 *p++ = 'U'; |
| 7588 *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; |
| 7589 *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; |
| 7590 *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; |
| 7591 *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; |
| 7592 *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; |
| 7593 *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; |
| 7594 *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; |
| 7595 *p++ = hexdigits[ucs & 0x0000000F]; |
| 7596 } |
| 7597 /* Map 16-bit characters to '\uxxxx' */ |
| 7598 else { |
| 7599 *p++ = '\\'; |
| 7600 *p++ = 'u'; |
| 7601 *p++ = hexdigits[(ucs >> 12) & 0x000F]; |
| 7602 *p++ = hexdigits[(ucs >> 8) & 0x000F]; |
| 7603 *p++ = hexdigits[(ucs >> 4) & 0x000F]; |
| 7604 *p++ = hexdigits[ucs & 0x000F]; |
| 7605 } |
| 7606 } |
| 7607 /* Copy characters as-is */ |
| 7608 else { |
| 7609 *p++ = ch; |
| 7610 #ifndef Py_UNICODE_WIDE |
| 7611 if (ucs >= 0x10000) |
| 7612 *p++ = ch2; |
| 7613 #endif |
| 7614 } |
| 7615 } |
| 7606 } | 7616 } |
| 7607 /* Add quote */ | 7617 /* Add quote */ |
| 7608 *p++ = PyUnicode_AS_UNICODE(repr)[0]; | 7618 *p++ = PyUnicode_AS_UNICODE(repr)[0]; |
| 7609 | 7619 |
| 7610 *p = '\0'; | 7620 *p = '\0'; |
| 7611 _PyUnicode_Resize(&repr, p - PyUnicode_AS_UNICODE(repr)); | 7621 _PyUnicode_Resize(&repr, p - PyUnicode_AS_UNICODE(repr)); |
| 7612 return repr; | 7622 return repr; |
| 7613 } | 7623 } |
| 7614 | 7624 |
| 7615 PyDoc_STRVAR(rfind__doc__, | 7625 PyDoc_STRVAR(rfind__doc__, |
| (...skipping 1910 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 9526 } | 9536 } |
| 9527 #endif | 9537 #endif |
| 9528 | 9538 |
| 9529 | 9539 |
| 9530 /* | 9540 /* |
| 9531 Local variables: | 9541 Local variables: |
| 9532 c-basic-offset: 4 | 9542 c-basic-offset: 4 |
| 9533 indent-tabs-mode: nil | 9543 indent-tabs-mode: nil |
| 9534 End: | 9544 End: |
| 9535 */ | 9545 */ |
| OLD | NEW |