Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(5)

Unified Diff: Objects/unicodeobject.c

Issue 767: [issue2630] repr() should not escape non-ASCII characters (Closed) SVN Base: http://svn.python.org/view/*checkout*/python/branches/py3k/
Patch Set: Created 1 year, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View side by-side-diff with in-line comments
Download patch
Index: Objects/unicodeobject.c
===================================================================
--- Objects/unicodeobject.c (revision 62744)
+++ Objects/unicodeobject.c (working copy)
@@ -2413,6 +2413,7 @@
while (size-- > 0) {
Py_UCS4 ch = *s++;
+
#ifndef Py_UNICODE_WIDE
if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
Py_UCS4 ch2 = *s;
@@ -7517,7 +7518,6 @@
!findchar(s, size, '"')) ? '"' : '\'';
while (size-- > 0) {
Py_UNICODE ch = *s++;
-
/* Escape quotes and backslashes */
if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) {
*p++ = '\\';
@@ -7525,62 +7525,9 @@
continue;
}
-#ifdef Py_UNICODE_WIDE
- /* Map 21-bit characters to '\U00xxxxxx' */
- else if (ch >= 0x10000) {
+ /* Map special whitespace to '\t', \n', '\r' */
+ if (ch == '\t') {
*p++ = '\\';
- *p++ = 'U';
- *p++ = hexdigits[(ch >> 28) & 0x0000000F];
- *p++ = hexdigits[(ch >> 24) & 0x0000000F];
- *p++ = hexdigits[(ch >> 20) & 0x0000000F];
- *p++ = hexdigits[(ch >> 16) & 0x0000000F];
- *p++ = hexdigits[(ch >> 12) & 0x0000000F];
- *p++ = hexdigits[(ch >> 8) & 0x0000000F];
- *p++ = hexdigits[(ch >> 4) & 0x0000000F];
- *p++ = hexdigits[ch & 0x0000000F];
- continue;
- }
-#else
- /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
- else if (ch >= 0xD800 && ch < 0xDC00) {
- Py_UNICODE ch2;
- Py_UCS4 ucs;
-
- ch2 = *s++;
- size--;
- if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
- ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
- *p++ = '\\';
- *p++ = 'U';
- *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
- *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
- *p++ = hexdigits[ucs & 0x0000000F];
- continue;
- }
- /* Fall through: isolated surrogates are copied as-is */
- s--;
- size++;
- }
-#endif
-
- /* Map 16-bit characters to '\uxxxx' */
- if (ch >= 256) {
- *p++ = '\\';
- *p++ = 'u';
- *p++ = hexdigits[(ch >> 12) & 0x000F];
- *p++ = hexdigits[(ch >> 8) & 0x000F];
- *p++ = hexdigits[(ch >> 4) & 0x000F];
- *p++ = hexdigits[ch & 0x000F];
- }
-
- /* Map special whitespace to '\t', \n', '\r' */
- else if (ch == '\t') {
- *p++ = '\\';
*p++ = 't';
}
else if (ch == '\n') {
@@ -7593,16 +7540,79 @@
}
/* Map non-printable US ASCII to '\xhh' */
- else if (ch < ' ' || ch >= 0x7F) {
+ else if (ch < ' ' || ch == 0x7F) {
*p++ = '\\';
*p++ = 'x';
*p++ = hexdigits[(ch >> 4) & 0x000F];
*p++ = hexdigits[ch & 0x000F];
}
- /* Copy everything else as-is */
- else
- *p++ = (char) ch;
+ /* Copy ASCII characters as-is */
+ else if (ch < 0x7F) {
+ *p++ = ch;
+ }
+
+ /* Non-ASCII characters */
+ else {
+ Py_UCS4 ucs = ch;
+
+#ifndef Py_UNICODE_WIDE
+ Py_UNICODE ch2 = 0;
+ /* Get code point from surrogate pair */
+ if (size > 0) {
+ ch2 = *s;
+ if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00
+ && ch2 <= 0xDFFF) {
+ ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF))
+ + 0x00010000;
+ s++;
+ size--;
+ }
+ }
+#endif
+ /* Map Unicode whitespace and control characters
+ (categories Z* and C* except ASCII space)
+ */
+ if (Py_UNICODE_ISHEXESCAPED(ucs)) {
+ /* Map 8-bit characters to '\xhh' */
+ if (ucs <= 0xff) {
+ *p++ = '\\';
+ *p++ = 'x';
+ *p++ = hexdigits[(ch >> 4) & 0x000F];
+ *p++ = hexdigits[ch & 0x000F];
+ }
+ /* Map 21-bit characters to '\U00xxxxxx' */
+ else if (ucs >= 0x10000) {
+ *p++ = '\\';
+ *p++ = 'U';
+ *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
+ *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
+ *p++ = hexdigits[ucs & 0x0000000F];
+ }
+ /* Map 16-bit characters to '\uxxxx' */
+ else {
+ *p++ = '\\';
+ *p++ = 'u';
+ *p++ = hexdigits[(ucs >> 12) & 0x000F];
+ *p++ = hexdigits[(ucs >> 8) & 0x000F];
+ *p++ = hexdigits[(ucs >> 4) & 0x000F];
+ *p++ = hexdigits[ucs & 0x000F];
+ }
+ }
+ /* Copy characters as-is */
+ else {
+ *p++ = ch;
+#ifndef Py_UNICODE_WIDE
+ if (ucs >= 0x10000)
+ *p++ = ch2;
+#endif
+ }
+ }
}
/* Add quote */
*p++ = PyUnicode_AS_UNICODE(repr)[0];

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld r483