| Index: Objects/unicodeobject.c |
| =================================================================== |
| --- Objects/unicodeobject.c (revision 62744) |
| +++ Objects/unicodeobject.c (working copy) |
| @@ -2413,6 +2413,7 @@ |
| while (size-- > 0) { |
| Py_UCS4 ch = *s++; |
| + |
| #ifndef Py_UNICODE_WIDE |
| if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { |
| Py_UCS4 ch2 = *s; |
| @@ -7517,7 +7518,6 @@ |
| !findchar(s, size, '"')) ? '"' : '\''; |
| while (size-- > 0) { |
| Py_UNICODE ch = *s++; |
| - |
| /* Escape quotes and backslashes */ |
| if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) { |
| *p++ = '\\'; |
| @@ -7525,62 +7525,9 @@ |
| continue; |
| } |
| -#ifdef Py_UNICODE_WIDE |
| - /* Map 21-bit characters to '\U00xxxxxx' */ |
| - else if (ch >= 0x10000) { |
| + /* Map special whitespace to '\t', \n', '\r' */ |
| + if (ch == '\t') { |
| *p++ = '\\'; |
| - *p++ = 'U'; |
| - *p++ = hexdigits[(ch >> 28) & 0x0000000F]; |
| - *p++ = hexdigits[(ch >> 24) & 0x0000000F]; |
| - *p++ = hexdigits[(ch >> 20) & 0x0000000F]; |
| - *p++ = hexdigits[(ch >> 16) & 0x0000000F]; |
| - *p++ = hexdigits[(ch >> 12) & 0x0000000F]; |
| - *p++ = hexdigits[(ch >> 8) & 0x0000000F]; |
| - *p++ = hexdigits[(ch >> 4) & 0x0000000F]; |
| - *p++ = hexdigits[ch & 0x0000000F]; |
| - continue; |
| - } |
| -#else |
| - /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ |
| - else if (ch >= 0xD800 && ch < 0xDC00) { |
| - Py_UNICODE ch2; |
| - Py_UCS4 ucs; |
| - |
| - ch2 = *s++; |
| - size--; |
| - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { |
| - ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; |
| - *p++ = '\\'; |
| - *p++ = 'U'; |
| - *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; |
| - *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; |
| - *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; |
| - *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; |
| - *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; |
| - *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; |
| - *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; |
| - *p++ = hexdigits[ucs & 0x0000000F]; |
| - continue; |
| - } |
| - /* Fall through: isolated surrogates are copied as-is */ |
| - s--; |
| - size++; |
| - } |
| -#endif |
| - |
| - /* Map 16-bit characters to '\uxxxx' */ |
| - if (ch >= 256) { |
| - *p++ = '\\'; |
| - *p++ = 'u'; |
| - *p++ = hexdigits[(ch >> 12) & 0x000F]; |
| - *p++ = hexdigits[(ch >> 8) & 0x000F]; |
| - *p++ = hexdigits[(ch >> 4) & 0x000F]; |
| - *p++ = hexdigits[ch & 0x000F]; |
| - } |
| - |
| - /* Map special whitespace to '\t', \n', '\r' */ |
| - else if (ch == '\t') { |
| - *p++ = '\\'; |
| *p++ = 't'; |
| } |
| else if (ch == '\n') { |
| @@ -7593,16 +7540,79 @@ |
| } |
| /* Map non-printable US ASCII to '\xhh' */ |
| - else if (ch < ' ' || ch >= 0x7F) { |
| + else if (ch < ' ' || ch == 0x7F) { |
| *p++ = '\\'; |
| *p++ = 'x'; |
| *p++ = hexdigits[(ch >> 4) & 0x000F]; |
| *p++ = hexdigits[ch & 0x000F]; |
| } |
| - /* Copy everything else as-is */ |
| - else |
| - *p++ = (char) ch; |
| + /* Copy ASCII characters as-is */ |
| + else if (ch < 0x7F) { |
| + *p++ = ch; |
| + } |
| + |
| + /* Non-ASCII characters */ |
| + else { |
| + Py_UCS4 ucs = ch; |
| + |
| +#ifndef Py_UNICODE_WIDE |
| + Py_UNICODE ch2 = 0; |
| + /* Get code point from surrogate pair */ |
| + if (size > 0) { |
| + ch2 = *s; |
| + if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00 |
| + && ch2 <= 0xDFFF) { |
| + ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) |
| + + 0x00010000; |
| + s++; |
| + size--; |
| + } |
| + } |
| +#endif |
| + /* Map Unicode whitespace and control characters |
| + (categories Z* and C* except ASCII space) |
| + */ |
| + if (Py_UNICODE_ISHEXESCAPED(ucs)) { |
| + /* Map 8-bit characters to '\xhh' */ |
| + if (ucs <= 0xff) { |
| + *p++ = '\\'; |
| + *p++ = 'x'; |
| + *p++ = hexdigits[(ch >> 4) & 0x000F]; |
| + *p++ = hexdigits[ch & 0x000F]; |
| + } |
| + /* Map 21-bit characters to '\U00xxxxxx' */ |
| + else if (ucs >= 0x10000) { |
| + *p++ = '\\'; |
| + *p++ = 'U'; |
| + *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; |
| + *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; |
| + *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; |
| + *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; |
| + *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; |
| + *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; |
| + *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; |
| + *p++ = hexdigits[ucs & 0x0000000F]; |
| + } |
| + /* Map 16-bit characters to '\uxxxx' */ |
| + else { |
| + *p++ = '\\'; |
| + *p++ = 'u'; |
| + *p++ = hexdigits[(ucs >> 12) & 0x000F]; |
| + *p++ = hexdigits[(ucs >> 8) & 0x000F]; |
| + *p++ = hexdigits[(ucs >> 4) & 0x000F]; |
| + *p++ = hexdigits[ucs & 0x000F]; |
| + } |
| + } |
| + /* Copy characters as-is */ |
| + else { |
| + *p++ = ch; |
| +#ifndef Py_UNICODE_WIDE |
| + if (ucs >= 0x10000) |
| + *p++ = ch2; |
| +#endif |
| + } |
| + } |
| } |
| /* Add quote */ |
| *p++ = PyUnicode_AS_UNICODE(repr)[0]; |