| OLD | NEW |
| 1 /* | 1 /* |
| 2 | 2 |
| 3 Unicode implementation based on original code by Fredrik Lundh, | 3 Unicode implementation based on original code by Fredrik Lundh, |
| 4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the | 4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the |
| 5 Unicode Integration Proposal (see file Misc/unicode.txt). | 5 Unicode Integration Proposal (see file Misc/unicode.txt). |
| 6 | 6 |
| 7 Major speed upgrades to the method implementations at the Reykjavik | 7 Major speed upgrades to the method implementations at the Reykjavik |
| 8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. | 8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. |
| 9 | 9 |
| 10 Copyright (c) Corporation for National Research Initiatives. | 10 Copyright (c) Corporation for National Research Initiatives. |
| 11 | 11 |
| 12 -------------------------------------------------------------------- | 12 -------------------------------------------------------------------- |
| 13 The original string type implementation is: | 13 The original string type implementation is: |
| 14 | 14 |
| 15 Copyright (c) 1999 by Secret Labs AB | 15 Copyright (c) 1999 by Secret Labs AB |
| 16 Copyright (c) 1999 by Fredrik Lundh | 16 Copyright (c) 1999 by Fredrik Lundh |
| 17 | 17 |
| 18 By obtaining, using, and/or copying this software and/or its | 18 By obtaining, using, and/or copying this software and/or its |
| 19 associated documentation, you agree that you have read, understood, | 19 associated documentation, you agree that you have read, understood, |
| 20 and will comply with the following terms and conditions: | 20 and will comply with the following terms and conditions: |
| 21 | 21 |
| 22 Permission to use, copy, modify, and distribute this software and its | 22 Permission to use, copy, modify, and distribute this software and its |
| 23 associated documentation for any purpose and without fee is hereby | 23 associated documentation for any purpose and without fee is hereby |
| 24 granted, provided that the above copyright notice appears in all | 24 granted, provided that the above copyright notice appears in all |
| 25 copies, and that both that copyright notice and this permission notice | 25 copies, and that both that copyright notice and this permission notice |
| 26 appear in supporting documentation, and that the name of Secret Labs | 26 appear in supporting documentation, and that the name of Secret Labs |
| 27 AB or the author not be used in advertising or publicity pertaining to | 27 AB or the author not be used in advertising or publicity pertaining to |
| 28 distribution of the software without specific, written prior | 28 distribution of the software without specific, written prior |
| 29 permission. | 29 permission. |
| 30 | 30 |
| 31 SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO | 31 SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO |
| 32 THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND | 32 THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND |
| 33 FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR | 33 FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR |
| 34 ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 34 ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 35 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 35 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
| 36 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT | 36 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT |
| 37 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 37 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 38 -------------------------------------------------------------------- | 38 -------------------------------------------------------------------- |
| 39 | 39 |
| 40 */ | 40 */ |
| 41 | 41 |
| 42 #define PY_SSIZE_T_CLEAN | 42 #define PY_SSIZE_T_CLEAN |
| 43 #include "Python.h" | 43 #include "Python.h" |
| 44 #include "bytes_methods.h" | 44 #include "bytes_methods.h" |
| 45 | 45 |
| 46 #include "unicodeobject.h" | 46 #include "unicodeobject.h" |
| 47 #include "ucnhash.h" | 47 #include "ucnhash.h" |
| 48 | 48 |
| 49 #include "formatter_unicode.h" | 49 #include "formatter_unicode.h" |
| 50 | 50 |
| (...skipping 2315 matching lines...) Show 10 above Show 10 below |
| 2366 int iorder[] = {0, 1, 2, 3}; | 2366 int iorder[] = {0, 1, 2, 3}; |
| 2367 #else | 2367 #else |
| 2368 int iorder[] = {3, 2, 1, 0}; | 2368 int iorder[] = {3, 2, 1, 0}; |
| 2369 #endif | 2369 #endif |
| 2370 | 2370 |
| 2371 #define STORECHAR(CH) \ | 2371 #define STORECHAR(CH) \ |
| 2372 do { \ | 2372 do { \ |
| 2373 p[iorder[3]] = ((CH) >> 24) & 0xff; \ | 2373 p[iorder[3]] = ((CH) >> 24) & 0xff; \ |
| 2374 p[iorder[2]] = ((CH) >> 16) & 0xff; \ | 2374 p[iorder[2]] = ((CH) >> 16) & 0xff; \ |
| 2375 p[iorder[1]] = ((CH) >> 8) & 0xff; \ | 2375 p[iorder[1]] = ((CH) >> 8) & 0xff; \ |
| 2376 p[iorder[0]] = (CH) & 0xff; \ | 2376 p[iorder[0]] = (CH) & 0xff; \ |
| 2377 p += 4; \ | 2377 p += 4; \ |
| 2378 } while(0) | 2378 } while(0) |
| 2379 | 2379 |
| 2380 /* In narrow builds we can output surrogate pairs as one codepoint, | 2380 /* In narrow builds we can output surrogate pairs as one codepoint, |
| 2381 so we need less space. */ | 2381 so we need less space. */ |
| 2382 #ifndef Py_UNICODE_WIDE | 2382 #ifndef Py_UNICODE_WIDE |
| 2383 for (i = pairs = 0; i < size-1; i++) | 2383 for (i = pairs = 0; i < size-1; i++) |
| 2384 if (0xD800 <= s[i] && s[i] <= 0xDBFF && | 2384 if (0xD800 <= s[i] && s[i] <= 0xDBFF && |
| 2385 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF) | 2385 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF) |
| 2386 pairs++; | 2386 pairs++; |
| 2387 #endif | 2387 #endif |
| 2388 v = PyBytes_FromStringAndSize(NULL, | 2388 v = PyBytes_FromStringAndSize(NULL, |
| 2389 4 * (size - pairs + (byteorder == 0))); | 2389 4 * (size - pairs + (byteorder == 0))); |
| 2390 if (v == NULL) | 2390 if (v == NULL) |
| 2391 return NULL; | 2391 return NULL; |
| 2392 | 2392 |
| 2393 p = (unsigned char *)PyBytes_AS_STRING(v); | 2393 p = (unsigned char *)PyBytes_AS_STRING(v); |
| 2394 if (byteorder == 0) | 2394 if (byteorder == 0) |
| 2395 STORECHAR(0xFEFF); | 2395 STORECHAR(0xFEFF); |
| 2396 if (size == 0) | 2396 if (size == 0) |
| 2397 goto done; | 2397 goto done; |
| 2398 | 2398 |
| 2399 if (byteorder == -1) { | 2399 if (byteorder == -1) { |
| 2400 /* force LE */ | 2400 /* force LE */ |
| 2401 iorder[0] = 0; | 2401 iorder[0] = 0; |
| 2402 iorder[1] = 1; | 2402 iorder[1] = 1; |
| 2403 iorder[2] = 2; | 2403 iorder[2] = 2; |
| 2404 iorder[3] = 3; | 2404 iorder[3] = 3; |
| 2405 } | 2405 } |
| 2406 else if (byteorder == 1) { | 2406 else if (byteorder == 1) { |
| 2407 /* force BE */ | 2407 /* force BE */ |
| 2408 iorder[0] = 3; | 2408 iorder[0] = 3; |
| 2409 iorder[1] = 2; | 2409 iorder[1] = 2; |
| 2410 iorder[2] = 1; | 2410 iorder[2] = 1; |
| 2411 iorder[3] = 0; | 2411 iorder[3] = 0; |
| 2412 } | 2412 } |
| 2413 | 2413 |
| 2414 while (size-- > 0) { | 2414 while (size-- > 0) { |
| 2415 Py_UCS4 ch = *s++; | 2415 Py_UCS4 ch = *s++; |
| 2416 |
| 2416 #ifndef Py_UNICODE_WIDE | 2417 #ifndef Py_UNICODE_WIDE |
| 2417 if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { | 2418 if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { |
| 2418 Py_UCS4 ch2 = *s; | 2419 Py_UCS4 ch2 = *s; |
| 2419 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { | 2420 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { |
| 2420 ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; | 2421 ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; |
| 2421 s++; | 2422 s++; |
| 2422 size--; | 2423 size--; |
| 2423 } | 2424 } |
| 2424 } | 2425 } |
| 2425 #endif | 2426 #endif |
| 2426 STORECHAR(ch); | 2427 STORECHAR(ch); |
| 2427 } | 2428 } |
| 2428 | 2429 |
| 2429 done: | 2430 done: |
| 2430 result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), Py_SIZE(v)); | 2431 result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), Py_SIZE(v)); |
| 2431 Py_DECREF(v); | 2432 Py_DECREF(v); |
| 2432 return result; | 2433 return result; |
| 2433 #undef STORECHAR | 2434 #undef STORECHAR |
| 2434 } | 2435 } |
| 2435 | 2436 |
| 2436 PyObject *PyUnicode_AsUTF32String(PyObject *unicode) | 2437 PyObject *PyUnicode_AsUTF32String(PyObject *unicode) |
| 2437 { | 2438 { |
| 2438 if (!PyUnicode_Check(unicode)) { | 2439 if (!PyUnicode_Check(unicode)) { |
| 2439 PyErr_BadArgument(); | 2440 PyErr_BadArgument(); |
| 2440 return NULL; | 2441 return NULL; |
| 2441 } | 2442 } |
| 2442 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode), | 2443 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode), |
| 2443 PyUnicode_GET_SIZE(unicode), | 2444 PyUnicode_GET_SIZE(unicode), |
| 2444 NULL, | 2445 NULL, |
| 2445 0); | 2446 0); |
| 2446 } | 2447 } |
| 2447 | 2448 |
| 2448 /* --- UTF-16 Codec ------------------------------------------------------- */ | 2449 /* --- UTF-16 Codec ------------------------------------------------------- */ |
| 2449 | 2450 |
| 2450 PyObject * | 2451 PyObject * |
| 2451 PyUnicode_DecodeUTF16(const char *s, | 2452 PyUnicode_DecodeUTF16(const char *s, |
| 2452 Py_ssize_t size, | 2453 Py_ssize_t size, |
| 2453 const char *errors, | 2454 const char *errors, |
| 2454 int *byteorder) | 2455 int *byteorder) |
| 2455 { | 2456 { |
| 2456 return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL); | 2457 return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL); |
| 2457 } | 2458 } |
| 2458 | 2459 |
| 2459 PyObject * | 2460 PyObject * |
| 2460 PyUnicode_DecodeUTF16Stateful(const char *s, | 2461 PyUnicode_DecodeUTF16Stateful(const char *s, |
| 2461 Py_ssize_t size, | 2462 Py_ssize_t size, |
| 2462 const char *errors, | 2463 const char *errors, |
| 2463 int *byteorder, | 2464 int *byteorder, |
| 2464 Py_ssize_t *consumed) | 2465 Py_ssize_t *consumed) |
| 2465 { | 2466 { |
| (...skipping 5004 matching lines...) Show 10 above Show 10 below |
| 7470 | 7471 |
| 7471 Py_DECREF(str1); | 7472 Py_DECREF(str1); |
| 7472 Py_DECREF(str2); | 7473 Py_DECREF(str2); |
| 7473 return result; | 7474 return result; |
| 7474 } | 7475 } |
| 7475 | 7476 |
| 7476 static | 7477 static |
| 7477 PyObject *unicode_repr(PyObject *unicode) | 7478 PyObject *unicode_repr(PyObject *unicode) |
| 7478 { | 7479 { |
| 7479 PyObject *repr; | 7480 PyObject *repr; |
| 7480 Py_UNICODE *p; | 7481 Py_UNICODE *p; |
| 7481 Py_UNICODE *s = PyUnicode_AS_UNICODE(unicode); | 7482 Py_UNICODE *s = PyUnicode_AS_UNICODE(unicode); |
| 7482 Py_ssize_t size = PyUnicode_GET_SIZE(unicode); | 7483 Py_ssize_t size = PyUnicode_GET_SIZE(unicode); |
| 7483 | 7484 |
| 7484 /* XXX(nnorwitz): rather than over-allocating, it would be | 7485 /* XXX(nnorwitz): rather than over-allocating, it would be |
| 7485 better to choose a different scheme. Perhaps scan the | 7486 better to choose a different scheme. Perhaps scan the |
| 7486 first N-chars of the string and allocate based on that size. | 7487 first N-chars of the string and allocate based on that size. |
| 7487 */ | 7488 */ |
| 7488 /* Initial allocation is based on the longest-possible unichr | 7489 /* Initial allocation is based on the longest-possible unichr |
| 7489 escape. | 7490 escape. |
| 7490 | 7491 |
| 7491 In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source | 7492 In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source |
| 7492 unichr, so in this case it's the longest unichr escape. In | 7493 unichr, so in this case it's the longest unichr escape. In |
| 7493 narrow (UTF-16) builds this is five chars per source unichr | 7494 narrow (UTF-16) builds this is five chars per source unichr |
| 7494 since there are two unichrs in the surrogate pair, so in narrow | 7495 since there are two unichrs in the surrogate pair, so in narrow |
| 7495 (UTF-16) builds it's not the longest unichr escape. | 7496 (UTF-16) builds it's not the longest unichr escape. |
| 7496 | 7497 |
| 7497 In wide or narrow builds '\uxxxx' is 6 chars per source unichr, | 7498 In wide or narrow builds '\uxxxx' is 6 chars per source unichr, |
| 7498 so in the narrow (UTF-16) build case it's the longest unichr | 7499 so in the narrow (UTF-16) build case it's the longest unichr |
| 7499 escape. | 7500 escape. |
| 7500 */ | 7501 */ |
| 7501 | 7502 |
| 7502 repr = PyUnicode_FromUnicode(NULL, | 7503 repr = PyUnicode_FromUnicode(NULL, |
| 7503 2 /* quotes */ | 7504 2 /* quotes */ |
| 7504 #ifdef Py_UNICODE_WIDE | 7505 #ifdef Py_UNICODE_WIDE |
| 7505 + 10*size | 7506 + 10*size |
| 7506 #else | 7507 #else |
| 7507 + 6*size | 7508 + 6*size |
| 7508 #endif | 7509 #endif |
| 7509 + 1); | 7510 + 1); |
| 7510 if (repr == NULL) | 7511 if (repr == NULL) |
| 7511 return NULL; | 7512 return NULL; |
| 7512 | 7513 |
| 7513 p = PyUnicode_AS_UNICODE(repr); | 7514 p = PyUnicode_AS_UNICODE(repr); |
| 7514 | 7515 |
| 7515 /* Add quote */ | 7516 /* Add quote */ |
| 7516 *p++ = (findchar(s, size, '\'') && | 7517 *p++ = (findchar(s, size, '\'') && |
| 7517 !findchar(s, size, '"')) ? '"' : '\''; | 7518 !findchar(s, size, '"')) ? '"' : '\''; |
| 7518 while (size-- > 0) { | 7519 while (size-- > 0) { |
| 7519 Py_UNICODE ch = *s++; | 7520 Py_UNICODE ch = *s++; |
| 7520 | |
| 7521 /* Escape quotes and backslashes */ | 7521 /* Escape quotes and backslashes */ |
| 7522 if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) { | 7522 if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) { |
| 7523 *p++ = '\\'; | 7523 *p++ = '\\'; |
| 7524 *p++ = ch; | 7524 *p++ = ch; |
| 7525 continue; | 7525 continue; |
| 7526 } | 7526 } |
| 7527 | 7527 |
| 7528 #ifdef Py_UNICODE_WIDE | 7528 /* Map special whitespace to '\t', \n', '\r' */ |
| 7529 /* Map 21-bit characters to '\U00xxxxxx' */ | 7529 if (ch == '\t') { |
| 7530 else if (ch >= 0x10000) { | |
| 7531 *p++ = '\\'; | |
| 7532 *p++ = 'U'; | |
| 7533 *p++ = hexdigits[(ch >> 28) & 0x0000000F]; | |
| 7534 *p++ = hexdigits[(ch >> 24) & 0x0000000F]; | |
| 7535 *p++ = hexdigits[(ch >> 20) & 0x0000000F]; | |
| 7536 *p++ = hexdigits[(ch >> 16) & 0x0000000F]; | |
| 7537 *p++ = hexdigits[(ch >> 12) & 0x0000000F]; | |
| 7538 *p++ = hexdigits[(ch >> 8) & 0x0000000F]; | |
| 7539 *p++ = hexdigits[(ch >> 4) & 0x0000000F]; | |
| 7540 *p++ = hexdigits[ch & 0x0000000F]; | |
| 7541 continue; | |
| 7542 } | |
| 7543 #else | |
| 7544 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */ | |
| 7545 else if (ch >= 0xD800 && ch < 0xDC00) { | |
| 7546 Py_UNICODE ch2; | |
| 7547 Py_UCS4 ucs; | |
| 7548 | |
| 7549 ch2 = *s++; | |
| 7550 size--; | |
| 7551 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { | |
| 7552 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; | |
| 7553 *p++ = '\\'; | |
| 7554 *p++ = 'U'; | |
| 7555 *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; | |
| 7556 *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; | |
| 7557 *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; | |
| 7558 *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; | |
| 7559 *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; | |
| 7560 *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; | |
| 7561 *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; | |
| 7562 *p++ = hexdigits[ucs & 0x0000000F]; | |
| 7563 continue; | |
| 7564 } | |
| 7565 /* Fall through: isolated surrogates are copied as-is */ | |
| 7566 s--; | |
| 7567 size++; | |
| 7568 } | |
| 7569 #endif | |
| 7570 | |
| 7571 /* Map 16-bit characters to '\uxxxx' */ | |
| 7572 if (ch >= 256) { | |
| 7573 *p++ = '\\'; | |
| 7574 *p++ = 'u'; | |
| 7575 *p++ = hexdigits[(ch >> 12) & 0x000F]; | |
| 7576 *p++ = hexdigits[(ch >> 8) & 0x000F]; | |
| 7577 *p++ = hexdigits[(ch >> 4) & 0x000F]; | |
| 7578 *p++ = hexdigits[ch & 0x000F]; | |
| 7579 } | |
| 7580 | |
| 7581 /* Map special whitespace to '\t', \n', '\r' */ | |
| 7582 else if (ch == '\t') { | |
| 7583 *p++ = '\\'; | 7530 *p++ = '\\'; |
| 7584 *p++ = 't'; | 7531 *p++ = 't'; |
| 7585 } | 7532 } |
| 7586 else if (ch == '\n') { | 7533 else if (ch == '\n') { |
| 7587 *p++ = '\\'; | 7534 *p++ = '\\'; |
| 7588 *p++ = 'n'; | 7535 *p++ = 'n'; |
| 7589 } | 7536 } |
| 7590 else if (ch == '\r') { | 7537 else if (ch == '\r') { |
| 7591 *p++ = '\\'; | 7538 *p++ = '\\'; |
| 7592 *p++ = 'r'; | 7539 *p++ = 'r'; |
| 7593 } | 7540 } |
| 7594 | 7541 |
| 7595 /* Map non-printable US ASCII to '\xhh' */ | 7542 /* Map non-printable US ASCII to '\xhh' */ |
| 7596 else if (ch < ' ' || ch >= 0x7F) { | 7543 else if (ch < ' ' || ch == 0x7F) { |
| 7597 *p++ = '\\'; | 7544 *p++ = '\\'; |
| 7598 *p++ = 'x'; | 7545 *p++ = 'x'; |
| 7599 *p++ = hexdigits[(ch >> 4) & 0x000F]; | 7546 *p++ = hexdigits[(ch >> 4) & 0x000F]; |
| 7600 *p++ = hexdigits[ch & 0x000F]; | 7547 *p++ = hexdigits[ch & 0x000F]; |
| 7601 } | 7548 } |
| 7602 | 7549 |
| 7603 /* Copy everything else as-is */ | 7550 /* Copy ASCII characters as-is */ |
| 7604 else | 7551 else if (ch < 0x7F) { |
| 7605 *p++ = (char) ch; | 7552 *p++ = ch; |
| 7553 } |
| 7554 |
| 7555 /* Non-ASCII characters */ |
| 7556 else { |
| 7557 Py_UCS4 ucs = ch; |
| 7558 |
| 7559 #ifndef Py_UNICODE_WIDE |
| 7560 Py_UNICODE ch2 = 0; |
| 7561 /* Get code point from surrogate pair */ |
| 7562 if (size > 0) { |
| 7563 ch2 = *s; |
| 7564 if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00 |
| 7565 && ch2 <= 0xDFFF) { |
| 7566 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) |
| 7567 + 0x00010000; |
| 7568 s++; |
| 7569 size--; |
| 7570 } |
| 7571 } |
| 7572 #endif |
| 7573 /* Map Unicode whitespace and control characters |
| 7574 (categories Z* and C* except ASCII space) |
| 7575 */ |
| 7576 if (Py_UNICODE_ISHEXESCAPED(ucs)) { |
| 7577 /* Map 8-bit characters to '\xhh' */ |
| 7578 if (ucs <= 0xff) { |
| 7579 *p++ = '\\'; |
| 7580 *p++ = 'x'; |
| 7581 *p++ = hexdigits[(ch >> 4) & 0x000F]; |
| 7582 *p++ = hexdigits[ch & 0x000F]; |
| 7583 } |
| 7584 /* Map 21-bit characters to '\U00xxxxxx' */ |
| 7585 else if (ucs >= 0x10000) { |
| 7586 *p++ = '\\'; |
| 7587 *p++ = 'U'; |
| 7588 *p++ = hexdigits[(ucs >> 28) & 0x0000000F]; |
| 7589 *p++ = hexdigits[(ucs >> 24) & 0x0000000F]; |
| 7590 *p++ = hexdigits[(ucs >> 20) & 0x0000000F]; |
| 7591 *p++ = hexdigits[(ucs >> 16) & 0x0000000F]; |
| 7592 *p++ = hexdigits[(ucs >> 12) & 0x0000000F]; |
| 7593 *p++ = hexdigits[(ucs >> 8) & 0x0000000F]; |
| 7594 *p++ = hexdigits[(ucs >> 4) & 0x0000000F]; |
| 7595 *p++ = hexdigits[ucs & 0x0000000F]; |
| 7596 } |
| 7597 /* Map 16-bit characters to '\uxxxx' */ |
| 7598 else { |
| 7599 *p++ = '\\'; |
| 7600 *p++ = 'u'; |
| 7601 *p++ = hexdigits[(ucs >> 12) & 0x000F]; |
| 7602 *p++ = hexdigits[(ucs >> 8) & 0x000F]; |
| 7603 *p++ = hexdigits[(ucs >> 4) & 0x000F]; |
| 7604 *p++ = hexdigits[ucs & 0x000F]; |
| 7605 } |
| 7606 } |
| 7607 /* Copy characters as-is */ |
| 7608 else { |
| 7609 *p++ = ch; |
| 7610 #ifndef Py_UNICODE_WIDE |
| 7611 if (ucs >= 0x10000) |
| 7612 *p++ = ch2; |
| 7613 #endif |
| 7614 } |
| 7615 } |
| 7606 } | 7616 } |
| 7607 /* Add quote */ | 7617 /* Add quote */ |
| 7608 *p++ = PyUnicode_AS_UNICODE(repr)[0]; | 7618 *p++ = PyUnicode_AS_UNICODE(repr)[0]; |
| 7609 | 7619 |
| 7610 *p = '\0'; | 7620 *p = '\0'; |
| 7611 _PyUnicode_Resize(&repr, p - PyUnicode_AS_UNICODE(repr)); | 7621 _PyUnicode_Resize(&repr, p - PyUnicode_AS_UNICODE(repr)); |
| 7612 return repr; | 7622 return repr; |
| 7613 } | 7623 } |
| 7614 | 7624 |
| 7615 PyDoc_STRVAR(rfind__doc__, | 7625 PyDoc_STRVAR(rfind__doc__, |
| 7616 "S.rfind(sub [,start [,end]]) -> int\n\ | 7626 "S.rfind(sub [,start [,end]]) -> int\n\ |
| 7617 \n\ | 7627 \n\ |
| 7618 Return the highest index in S where substring sub is found,\n\ | 7628 Return the highest index in S where substring sub is found,\n\ |
| 7619 such that sub is contained within s[start:end]. Optional\n\ | 7629 such that sub is contained within s[start:end]. Optional\n\ |
| 7620 arguments start and end are interpreted as in slice notation.\n\ | 7630 arguments start and end are interpreted as in slice notation.\n\ |
| 7621 \n\ | 7631 \n\ |
| 7622 Return -1 on failure."); | 7632 Return -1 on failure."); |
| 7623 | 7633 |
| 7624 static PyObject * | 7634 static PyObject * |
| 7625 unicode_rfind(PyUnicodeObject *self, PyObject *args) | 7635 unicode_rfind(PyUnicodeObject *self, PyObject *args) |
| 7626 { | 7636 { |
| 7627 PyObject *substring; | 7637 PyObject *substring; |
| 7628 Py_ssize_t start; | 7638 Py_ssize_t start; |
| 7629 Py_ssize_t end; | 7639 Py_ssize_t end; |
| 7630 Py_ssize_t result; | 7640 Py_ssize_t result; |
| 7631 | 7641 |
| 7632 if (!_ParseTupleFinds(args, &substring, &start, &end)) | 7642 if (!_ParseTupleFinds(args, &substring, &start, &end)) |
| 7633 return NULL; | 7643 return NULL; |
| 7634 | 7644 |
| 7635 result = stringlib_rfind_slice( | 7645 result = stringlib_rfind_slice( |
| 7636 PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self), | 7646 PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self), |
| 7637 PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring), | 7647 PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring), |
| 7638 start, end | 7648 start, end |
| 7639 ); | 7649 ); |
| 7640 | 7650 |
| 7641 Py_DECREF(substring); | 7651 Py_DECREF(substring); |
| 7642 | 7652 |
| 7643 return PyLong_FromSsize_t(result); | 7653 return PyLong_FromSsize_t(result); |
| 7644 } | 7654 } |
| 7645 | 7655 |
| 7646 PyDoc_STRVAR(rindex__doc__, | 7656 PyDoc_STRVAR(rindex__doc__, |
| 7647 "S.rindex(sub [,start [,end]]) -> int\n\ | 7657 "S.rindex(sub [,start [,end]]) -> int\n\ |
| 7648 \n\ | 7658 \n\ |
| 7649 Like S.rfind() but raise ValueError when the substring is not found."); | 7659 Like S.rfind() but raise ValueError when the substring is not found."); |
| 7650 | 7660 |
| 7651 static PyObject * | 7661 static PyObject * |
| 7652 unicode_rindex(PyUnicodeObject *self, PyObject *args) | 7662 unicode_rindex(PyUnicodeObject *self, PyObject *args) |
| 7653 { | 7663 { |
| 7654 PyObject *substring; | 7664 PyObject *substring; |
| 7655 Py_ssize_t start; | 7665 Py_ssize_t start; |
| (...skipping 1830 matching lines...) Show 10 above Show 10 below |
| 9486 while ((*u++ = *s2++)); | 9496 while ((*u++ = *s2++)); |
| 9487 return s1; | 9497 return s1; |
| 9488 } | 9498 } |
| 9489 | 9499 |
| 9490 Py_UNICODE* | 9500 Py_UNICODE* |
| 9491 Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) | 9501 Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) |
| 9492 { | 9502 { |
| 9493 Py_UNICODE *u = s1; | 9503 Py_UNICODE *u = s1; |
| 9494 while ((*u++ = *s2++)) | 9504 while ((*u++ = *s2++)) |
| 9495 if (n-- == 0) | 9505 if (n-- == 0) |
| 9496 break; | 9506 break; |
| 9497 return s1; | 9507 return s1; |
| 9498 } | 9508 } |
| 9499 | 9509 |
| 9500 int | 9510 int |
| 9501 Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) | 9511 Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) |
| 9502 { | 9512 { |
| 9503 while (*s1 && *s2 && *s1 == *s2) | 9513 while (*s1 && *s2 && *s1 == *s2) |
| 9504 s1++, s2++; | 9514 s1++, s2++; |
| 9505 if (*s1 && *s2) | 9515 if (*s1 && *s2) |
| 9506 return (*s1 < *s2) ? -1 : +1; | 9516 return (*s1 < *s2) ? -1 : +1; |
| 9507 if (*s1) | 9517 if (*s1) |
| 9508 return 1; | 9518 return 1; |
| 9509 if (*s2) | 9519 if (*s2) |
| 9510 return -1; | 9520 return -1; |
| 9511 return 0; | 9521 return 0; |
| 9512 } | 9522 } |
| 9513 | 9523 |
| 9514 Py_UNICODE* | 9524 Py_UNICODE* |
| 9515 Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) | 9525 Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) |
| 9516 { | 9526 { |
| 9517 const Py_UNICODE *p; | 9527 const Py_UNICODE *p; |
| 9518 for (p = s; *p; p++) | 9528 for (p = s; *p; p++) |
| 9519 if (*p == c) | 9529 if (*p == c) |
| 9520 return (Py_UNICODE*)p; | 9530 return (Py_UNICODE*)p; |
| 9521 return NULL; | 9531 return NULL; |
| 9522 } | 9532 } |
| 9523 | 9533 |
| 9524 | 9534 |
| 9525 #ifdef __cplusplus | 9535 #ifdef __cplusplus |
| 9526 } | 9536 } |
| 9527 #endif | 9537 #endif |