Index: Objects/unicodeobject.c =================================================================== --- Objects/unicodeobject.c (revision 62744) +++ Objects/unicodeobject.c (working copy) @@ -2413,6 +2413,7 @@ while (size-- > 0) { Py_UCS4 ch = *s++; + #ifndef Py_UNICODE_WIDE if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { Py_UCS4 ch2 = *s; @@ -7517,7 +7518,6 @@ !findchar(s, size, '"')) ? '"' : '\''; while (size-- > 0) { Py_UNICODE ch = *s++; - /* Escape quotes and backslashes */ if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) { *p++ = '\\'; @@ -7525,62 +7525,9 @@ continue; } -#ifdef Py_UNICODE_WIDE - /* Map 21-bit characters to '\U00xxxxxx' */ - else if (ch >= 0x10000) { + /* Map special whitespace to '\t', \n', '\r' */ + if (ch == '\t') { *p++ = '\\'; - *p++ = 'U'; - *p++ = hexdigits[(ch >> 28) & 0x0000000F]; - *p++ = hexdigits[(ch >> 24) & 0x0000000F]; - *p++ = hexdigits[(ch >> 20) & 0x0000000F]; - *p++ = hexdigits[(ch >> 16) & 0x0000000F]; - *p++ = hexdigits[(ch >> 12) & 0x0000000F]; - *p++ = hexdigits[(ch >> 8) & 0x0000000F]; - *p++ = hexdigits[(ch >> 4) & 0x0000000F]; - *p++ = hexdigits[ch & 0x0000000F]; - continue; - } -#else - /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ - else if (ch >= 0xD800 && ch < 0xDC00) { - Py_UNICODE ch2; - Py_UCS4 ucs; - - ch2 = *s++; - size--; - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { - ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; - *p++ = '\\'; - *p++ = 'U'; - *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; - *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; - *p++ = hexdigits[ucs & 0x0000000F]; - continue; - } - /* Fall through: isolated surrogates are copied as-is */ - s--; - size++; - } -#endif - - /* Map 16-bit characters to '\uxxxx' */ - if (ch >= 256) { - *p++ = '\\'; - *p++ = 'u'; - *p++ = hexdigits[(ch >> 12) & 0x000F]; - *p++ = hexdigits[(ch >> 8) & 0x000F]; - *p++ = hexdigits[(ch >> 4) & 0x000F]; - *p++ = hexdigits[ch & 0x000F]; - } - - /* Map special whitespace to '\t', \n', '\r' */ - else if (ch == '\t') { - *p++ = '\\'; *p++ = 't'; } else if (ch == '\n') { @@ -7593,16 +7540,79 @@ } /* Map non-printable US ASCII to '\xhh' */ - else if (ch < ' ' || ch >= 0x7F) { + else if (ch < ' ' || ch == 0x7F) { *p++ = '\\'; *p++ = 'x'; *p++ = hexdigits[(ch >> 4) & 0x000F]; *p++ = hexdigits[ch & 0x000F]; } - /* Copy everything else as-is */ - else - *p++ = (char) ch; + /* Copy ASCII characters as-is */ + else if (ch < 0x7F) { + *p++ = ch; + } + + /* Non-ASCII characters */ + else { + Py_UCS4 ucs = ch; + +#ifndef Py_UNICODE_WIDE + Py_UNICODE ch2 = 0; + /* Get code point from surrogate pair */ + if (size > 0) { + ch2 = *s; + if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00 + && ch2 <= 0xDFFF) { + ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + + 0x00010000; + s++; + size--; + } + } +#endif + /* Map Unicode whitespace and control characters + (categories Z* and C* except ASCII space) + */ + if (Py_UNICODE_ISHEXESCAPED(ucs)) { + /* Map 8-bit characters to '\xhh' */ + if (ucs <= 0xff) { + *p++ = '\\'; + *p++ = 'x'; + *p++ = hexdigits[(ch >> 4) & 0x000F]; + *p++ = hexdigits[ch & 0x000F]; + } + /* Map 21-bit characters to '\U00xxxxxx' */ + else if (ucs >= 0x10000) { + *p++ = '\\'; + *p++ = 'U'; + *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; + *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; + *p++ = hexdigits[ucs & 0x0000000F]; + } + /* Map 16-bit characters to '\uxxxx' */ + else { + *p++ = '\\'; + *p++ = 'u'; + *p++ = hexdigits[(ucs >> 12) & 0x000F]; + *p++ = hexdigits[(ucs >> 8) & 0x000F]; + *p++ = hexdigits[(ucs >> 4) & 0x000F]; + *p++ = hexdigits[ucs & 0x000F]; + } + } + /* Copy characters as-is */ + else { + *p++ = ch; +#ifndef Py_UNICODE_WIDE + if (ucs >= 0x10000) + *p++ = ch2; +#endif + } + } } /* Add quote */ *p++ = PyUnicode_AS_UNICODE(repr)[0];