Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(8)

Side by Side Diff: Objects/unicodeobject.c

Issue 767: [issue2630] repr() should not escape non-ASCII characters (Closed) SVN Base: http://svn.python.org/view/*checkout*/python/branches/py3k/
Patch Set: Created 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 2
3 Unicode implementation based on original code by Fredrik Lundh, 3 Unicode implementation based on original code by Fredrik Lundh,
4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the 4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
5 Unicode Integration Proposal (see file Misc/unicode.txt). 5 Unicode Integration Proposal (see file Misc/unicode.txt).
6 6
7 Major speed upgrades to the method implementations at the Reykjavik 7 Major speed upgrades to the method implementations at the Reykjavik
8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. 8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
9 9
10 Copyright (c) Corporation for National Research Initiatives. 10 Copyright (c) Corporation for National Research Initiatives.
11 11
12 -------------------------------------------------------------------- 12 --------------------------------------------------------------------
13 The original string type implementation is: 13 The original string type implementation is:
14 14
15 Copyright (c) 1999 by Secret Labs AB 15 Copyright (c) 1999 by Secret Labs AB
16 Copyright (c) 1999 by Fredrik Lundh 16 Copyright (c) 1999 by Fredrik Lundh
17 17
18 By obtaining, using, and/or copying this software and/or its 18 By obtaining, using, and/or copying this software and/or its
19 associated documentation, you agree that you have read, understood, 19 associated documentation, you agree that you have read, understood,
20 and will comply with the following terms and conditions: 20 and will comply with the following terms and conditions:
21 21
22 Permission to use, copy, modify, and distribute this software and its 22 Permission to use, copy, modify, and distribute this software and its
23 associated documentation for any purpose and without fee is hereby 23 associated documentation for any purpose and without fee is hereby
24 granted, provided that the above copyright notice appears in all 24 granted, provided that the above copyright notice appears in all
25 copies, and that both that copyright notice and this permission notice 25 copies, and that both that copyright notice and this permission notice
26 appear in supporting documentation, and that the name of Secret Labs 26 appear in supporting documentation, and that the name of Secret Labs
27 AB or the author not be used in advertising or publicity pertaining to 27 AB or the author not be used in advertising or publicity pertaining to
28 distribution of the software without specific, written prior 28 distribution of the software without specific, written prior
29 permission. 29 permission.
30 30
31 SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO 31 SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
32 THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 32 THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
33 FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR 33 FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
34 ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 34 ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
35 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 35 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
36 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 36 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
37 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 37 OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
38 -------------------------------------------------------------------- 38 --------------------------------------------------------------------
39 39
40 */ 40 */
41 41
42 #define PY_SSIZE_T_CLEAN 42 #define PY_SSIZE_T_CLEAN
43 #include "Python.h" 43 #include "Python.h"
44 #include "bytes_methods.h" 44 #include "bytes_methods.h"
45 45
46 #include "unicodeobject.h" 46 #include "unicodeobject.h"
47 #include "ucnhash.h" 47 #include "ucnhash.h"
48 48
49 #include "formatter_unicode.h" 49 #include "formatter_unicode.h"
50 50
(...skipping 2315 matching lines...) Show 10 above Show 10 below
2366 int iorder[] = {0, 1, 2, 3}; 2366 int iorder[] = {0, 1, 2, 3};
2367 #else 2367 #else
2368 int iorder[] = {3, 2, 1, 0}; 2368 int iorder[] = {3, 2, 1, 0};
2369 #endif 2369 #endif
2370 2370
2371 #define STORECHAR(CH) \ 2371 #define STORECHAR(CH) \
2372 do { \ 2372 do { \
2373 p[iorder[3]] = ((CH) >> 24) & 0xff; \ 2373 p[iorder[3]] = ((CH) >> 24) & 0xff; \
2374 p[iorder[2]] = ((CH) >> 16) & 0xff; \ 2374 p[iorder[2]] = ((CH) >> 16) & 0xff; \
2375 p[iorder[1]] = ((CH) >> 8) & 0xff; \ 2375 p[iorder[1]] = ((CH) >> 8) & 0xff; \
2376 p[iorder[0]] = (CH) & 0xff; \ 2376 p[iorder[0]] = (CH) & 0xff; \
2377 p += 4; \ 2377 p += 4; \
2378 } while(0) 2378 } while(0)
2379 2379
2380 /* In narrow builds we can output surrogate pairs as one codepoint, 2380 /* In narrow builds we can output surrogate pairs as one codepoint,
2381 so we need less space. */ 2381 so we need less space. */
2382 #ifndef Py_UNICODE_WIDE 2382 #ifndef Py_UNICODE_WIDE
2383 for (i = pairs = 0; i < size-1; i++) 2383 for (i = pairs = 0; i < size-1; i++)
2384 if (0xD800 <= s[i] && s[i] <= 0xDBFF && 2384 if (0xD800 <= s[i] && s[i] <= 0xDBFF &&
2385 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF) 2385 0xDC00 <= s[i+1] && s[i+1] <= 0xDFFF)
2386 pairs++; 2386 pairs++;
2387 #endif 2387 #endif
2388 v = PyBytes_FromStringAndSize(NULL, 2388 v = PyBytes_FromStringAndSize(NULL,
2389 4 * (size - pairs + (byteorder == 0))); 2389 4 * (size - pairs + (byteorder == 0)));
2390 if (v == NULL) 2390 if (v == NULL)
2391 return NULL; 2391 return NULL;
2392 2392
2393 p = (unsigned char *)PyBytes_AS_STRING(v); 2393 p = (unsigned char *)PyBytes_AS_STRING(v);
2394 if (byteorder == 0) 2394 if (byteorder == 0)
2395 STORECHAR(0xFEFF); 2395 STORECHAR(0xFEFF);
2396 if (size == 0) 2396 if (size == 0)
2397 goto done; 2397 goto done;
2398 2398
2399 if (byteorder == -1) { 2399 if (byteorder == -1) {
2400 /* force LE */ 2400 /* force LE */
2401 iorder[0] = 0; 2401 iorder[0] = 0;
2402 iorder[1] = 1; 2402 iorder[1] = 1;
2403 iorder[2] = 2; 2403 iorder[2] = 2;
2404 iorder[3] = 3; 2404 iorder[3] = 3;
2405 } 2405 }
2406 else if (byteorder == 1) { 2406 else if (byteorder == 1) {
2407 /* force BE */ 2407 /* force BE */
2408 iorder[0] = 3; 2408 iorder[0] = 3;
2409 iorder[1] = 2; 2409 iorder[1] = 2;
2410 iorder[2] = 1; 2410 iorder[2] = 1;
2411 iorder[3] = 0; 2411 iorder[3] = 0;
2412 } 2412 }
2413 2413
2414 while (size-- > 0) { 2414 while (size-- > 0) {
2415 Py_UCS4 ch = *s++; 2415 Py_UCS4 ch = *s++;
2416
2416 #ifndef Py_UNICODE_WIDE 2417 #ifndef Py_UNICODE_WIDE
2417 if (0xD800 <= ch && ch <= 0xDBFF && size > 0) { 2418 if (0xD800 <= ch && ch <= 0xDBFF && size > 0) {
2418 Py_UCS4 ch2 = *s; 2419 Py_UCS4 ch2 = *s;
2419 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { 2420 if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
2420 ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000; 2421 ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
2421 s++; 2422 s++;
2422 size--; 2423 size--;
2423 } 2424 }
2424 } 2425 }
2425 #endif 2426 #endif
2426 STORECHAR(ch); 2427 STORECHAR(ch);
2427 } 2428 }
2428 2429
2429 done: 2430 done:
2430 result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), Py_SIZE(v)); 2431 result = PyString_FromStringAndSize(PyBytes_AS_STRING(v), Py_SIZE(v));
2431 Py_DECREF(v); 2432 Py_DECREF(v);
2432 return result; 2433 return result;
2433 #undef STORECHAR 2434 #undef STORECHAR
2434 } 2435 }
2435 2436
2436 PyObject *PyUnicode_AsUTF32String(PyObject *unicode) 2437 PyObject *PyUnicode_AsUTF32String(PyObject *unicode)
2437 { 2438 {
2438 if (!PyUnicode_Check(unicode)) { 2439 if (!PyUnicode_Check(unicode)) {
2439 PyErr_BadArgument(); 2440 PyErr_BadArgument();
2440 return NULL; 2441 return NULL;
2441 } 2442 }
2442 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode), 2443 return PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(unicode),
2443 PyUnicode_GET_SIZE(unicode), 2444 PyUnicode_GET_SIZE(unicode),
2444 NULL, 2445 NULL,
2445 0); 2446 0);
2446 } 2447 }
2447 2448
2448 /* --- UTF-16 Codec ------------------------------------------------------- */ 2449 /* --- UTF-16 Codec ------------------------------------------------------- */
2449 2450
2450 PyObject * 2451 PyObject *
2451 PyUnicode_DecodeUTF16(const char *s, 2452 PyUnicode_DecodeUTF16(const char *s,
2452 Py_ssize_t size, 2453 Py_ssize_t size,
2453 const char *errors, 2454 const char *errors,
2454 int *byteorder) 2455 int *byteorder)
2455 { 2456 {
2456 return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL); 2457 return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
2457 } 2458 }
2458 2459
2459 PyObject * 2460 PyObject *
2460 PyUnicode_DecodeUTF16Stateful(const char *s, 2461 PyUnicode_DecodeUTF16Stateful(const char *s,
2461 Py_ssize_t size, 2462 Py_ssize_t size,
2462 const char *errors, 2463 const char *errors,
2463 int *byteorder, 2464 int *byteorder,
2464 Py_ssize_t *consumed) 2465 Py_ssize_t *consumed)
2465 { 2466 {
(...skipping 5004 matching lines...) Show 10 above Show 10 below
7470 7471
7471 Py_DECREF(str1); 7472 Py_DECREF(str1);
7472 Py_DECREF(str2); 7473 Py_DECREF(str2);
7473 return result; 7474 return result;
7474 } 7475 }
7475 7476
7476 static 7477 static
7477 PyObject *unicode_repr(PyObject *unicode) 7478 PyObject *unicode_repr(PyObject *unicode)
7478 { 7479 {
7479 PyObject *repr; 7480 PyObject *repr;
7480 Py_UNICODE *p; 7481 Py_UNICODE *p;
7481 Py_UNICODE *s = PyUnicode_AS_UNICODE(unicode); 7482 Py_UNICODE *s = PyUnicode_AS_UNICODE(unicode);
7482 Py_ssize_t size = PyUnicode_GET_SIZE(unicode); 7483 Py_ssize_t size = PyUnicode_GET_SIZE(unicode);
7483 7484
7484 /* XXX(nnorwitz): rather than over-allocating, it would be 7485 /* XXX(nnorwitz): rather than over-allocating, it would be
7485 better to choose a different scheme. Perhaps scan the 7486 better to choose a different scheme. Perhaps scan the
7486 first N-chars of the string and allocate based on that size. 7487 first N-chars of the string and allocate based on that size.
7487 */ 7488 */
7488 /* Initial allocation is based on the longest-possible unichr 7489 /* Initial allocation is based on the longest-possible unichr
7489 escape. 7490 escape.
7490 7491
7491 In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source 7492 In wide (UTF-32) builds '\U00xxxxxx' is 10 chars per source
7492 unichr, so in this case it's the longest unichr escape. In 7493 unichr, so in this case it's the longest unichr escape. In
7493 narrow (UTF-16) builds this is five chars per source unichr 7494 narrow (UTF-16) builds this is five chars per source unichr
7494 since there are two unichrs in the surrogate pair, so in narrow 7495 since there are two unichrs in the surrogate pair, so in narrow
7495 (UTF-16) builds it's not the longest unichr escape. 7496 (UTF-16) builds it's not the longest unichr escape.
7496 7497
7497 In wide or narrow builds '\uxxxx' is 6 chars per source unichr, 7498 In wide or narrow builds '\uxxxx' is 6 chars per source unichr,
7498 so in the narrow (UTF-16) build case it's the longest unichr 7499 so in the narrow (UTF-16) build case it's the longest unichr
7499 escape. 7500 escape.
7500 */ 7501 */
7501 7502
7502 repr = PyUnicode_FromUnicode(NULL, 7503 repr = PyUnicode_FromUnicode(NULL,
7503 2 /* quotes */ 7504 2 /* quotes */
7504 #ifdef Py_UNICODE_WIDE 7505 #ifdef Py_UNICODE_WIDE
7505 + 10*size 7506 + 10*size
7506 #else 7507 #else
7507 + 6*size 7508 + 6*size
7508 #endif 7509 #endif
7509 + 1); 7510 + 1);
7510 if (repr == NULL) 7511 if (repr == NULL)
7511 return NULL; 7512 return NULL;
7512 7513
7513 p = PyUnicode_AS_UNICODE(repr); 7514 p = PyUnicode_AS_UNICODE(repr);
7514 7515
7515 /* Add quote */ 7516 /* Add quote */
7516 *p++ = (findchar(s, size, '\'') && 7517 *p++ = (findchar(s, size, '\'') &&
7517 !findchar(s, size, '"')) ? '"' : '\''; 7518 !findchar(s, size, '"')) ? '"' : '\'';
7518 while (size-- > 0) { 7519 while (size-- > 0) {
7519 Py_UNICODE ch = *s++; 7520 Py_UNICODE ch = *s++;
7520
7521 /* Escape quotes and backslashes */ 7521 /* Escape quotes and backslashes */
7522 if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) { 7522 if ((ch == PyUnicode_AS_UNICODE(repr)[0]) || (ch == '\\')) {
7523 *p++ = '\\'; 7523 *p++ = '\\';
7524 *p++ = ch; 7524 *p++ = ch;
7525 continue; 7525 continue;
7526 } 7526 }
7527 7527
7528 #ifdef Py_UNICODE_WIDE 7528 /* Map special whitespace to '\t', \n', '\r' */
7529 /* Map 21-bit characters to '\U00xxxxxx' */ 7529 if (ch == '\t') {
7530 else if (ch >= 0x10000) {
7531 *p++ = '\\';
7532 *p++ = 'U';
7533 *p++ = hexdigits[(ch >> 28) & 0x0000000F];
7534 *p++ = hexdigits[(ch >> 24) & 0x0000000F];
7535 *p++ = hexdigits[(ch >> 20) & 0x0000000F];
7536 *p++ = hexdigits[(ch >> 16) & 0x0000000F];
7537 *p++ = hexdigits[(ch >> 12) & 0x0000000F];
7538 *p++ = hexdigits[(ch >> 8) & 0x0000000F];
7539 *p++ = hexdigits[(ch >> 4) & 0x0000000F];
7540 *p++ = hexdigits[ch & 0x0000000F];
7541 continue;
7542 }
7543 #else
7544 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
7545 else if (ch >= 0xD800 && ch < 0xDC00) {
7546 Py_UNICODE ch2;
7547 Py_UCS4 ucs;
7548
7549 ch2 = *s++;
7550 size--;
7551 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
7552 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
7553 *p++ = '\\';
7554 *p++ = 'U';
7555 *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
7556 *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
7557 *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
7558 *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
7559 *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
7560 *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
7561 *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
7562 *p++ = hexdigits[ucs & 0x0000000F];
7563 continue;
7564 }
7565 /* Fall through: isolated surrogates are copied as-is */
7566 s--;
7567 size++;
7568 }
7569 #endif
7570
7571 /* Map 16-bit characters to '\uxxxx' */
7572 if (ch >= 256) {
7573 *p++ = '\\';
7574 *p++ = 'u';
7575 *p++ = hexdigits[(ch >> 12) & 0x000F];
7576 *p++ = hexdigits[(ch >> 8) & 0x000F];
7577 *p++ = hexdigits[(ch >> 4) & 0x000F];
7578 *p++ = hexdigits[ch & 0x000F];
7579 }
7580
7581 /* Map special whitespace to '\t', \n', '\r' */
7582 else if (ch == '\t') {
7583 *p++ = '\\'; 7530 *p++ = '\\';
7584 *p++ = 't'; 7531 *p++ = 't';
7585 } 7532 }
7586 else if (ch == '\n') { 7533 else if (ch == '\n') {
7587 *p++ = '\\'; 7534 *p++ = '\\';
7588 *p++ = 'n'; 7535 *p++ = 'n';
7589 } 7536 }
7590 else if (ch == '\r') { 7537 else if (ch == '\r') {
7591 *p++ = '\\'; 7538 *p++ = '\\';
7592 *p++ = 'r'; 7539 *p++ = 'r';
7593 } 7540 }
7594 7541
7595 /* Map non-printable US ASCII to '\xhh' */ 7542 /* Map non-printable US ASCII to '\xhh' */
7596 else if (ch < ' ' || ch >= 0x7F) { 7543 else if (ch < ' ' || ch == 0x7F) {
7597 *p++ = '\\'; 7544 *p++ = '\\';
7598 *p++ = 'x'; 7545 *p++ = 'x';
7599 *p++ = hexdigits[(ch >> 4) & 0x000F]; 7546 *p++ = hexdigits[(ch >> 4) & 0x000F];
7600 *p++ = hexdigits[ch & 0x000F]; 7547 *p++ = hexdigits[ch & 0x000F];
7601 } 7548 }
7602 7549
7603 /* Copy everything else as-is */ 7550 /* Copy ASCII characters as-is */
7604 else 7551 else if (ch < 0x7F) {
7605 *p++ = (char) ch; 7552 *p++ = ch;
7553 }
7554
7555 /* Non-ASCII characters */
7556 else {
7557 Py_UCS4 ucs = ch;
7558
7559 #ifndef Py_UNICODE_WIDE
7560 Py_UNICODE ch2 = 0;
7561 /* Get code point from surrogate pair */
7562 if (size > 0) {
7563 ch2 = *s;
7564 if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00
7565 && ch2 <= 0xDFFF) {
7566 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF))
7567 + 0x00010000;
7568 s++;
7569 size--;
7570 }
7571 }
7572 #endif
7573 /* Map Unicode whitespace and control characters
7574 (categories Z* and C* except ASCII space)
7575 */
7576 if (Py_UNICODE_ISHEXESCAPED(ucs)) {
7577 /* Map 8-bit characters to '\xhh' */
7578 if (ucs <= 0xff) {
7579 *p++ = '\\';
7580 *p++ = 'x';
7581 *p++ = hexdigits[(ch >> 4) & 0x000F];
7582 *p++ = hexdigits[ch & 0x000F];
7583 }
7584 /* Map 21-bit characters to '\U00xxxxxx' */
7585 else if (ucs >= 0x10000) {
7586 *p++ = '\\';
7587 *p++ = 'U';
7588 *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
7589 *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
7590 *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
7591 *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
7592 *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
7593 *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
7594 *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
7595 *p++ = hexdigits[ucs & 0x0000000F];
7596 }
7597 /* Map 16-bit characters to '\uxxxx' */
7598 else {
7599 *p++ = '\\';
7600 *p++ = 'u';
7601 *p++ = hexdigits[(ucs >> 12) & 0x000F];
7602 *p++ = hexdigits[(ucs >> 8) & 0x000F];
7603 *p++ = hexdigits[(ucs >> 4) & 0x000F];
7604 *p++ = hexdigits[ucs & 0x000F];
7605 }
7606 }
7607 /* Copy characters as-is */
7608 else {
7609 *p++ = ch;
7610 #ifndef Py_UNICODE_WIDE
7611 if (ucs >= 0x10000)
7612 *p++ = ch2;
7613 #endif
7614 }
7615 }
7606 } 7616 }
7607 /* Add quote */ 7617 /* Add quote */
7608 *p++ = PyUnicode_AS_UNICODE(repr)[0]; 7618 *p++ = PyUnicode_AS_UNICODE(repr)[0];
7609 7619
7610 *p = '\0'; 7620 *p = '\0';
7611 _PyUnicode_Resize(&repr, p - PyUnicode_AS_UNICODE(repr)); 7621 _PyUnicode_Resize(&repr, p - PyUnicode_AS_UNICODE(repr));
7612 return repr; 7622 return repr;
7613 } 7623 }
7614 7624
7615 PyDoc_STRVAR(rfind__doc__, 7625 PyDoc_STRVAR(rfind__doc__,
7616 "S.rfind(sub [,start [,end]]) -> int\n\ 7626 "S.rfind(sub [,start [,end]]) -> int\n\
7617 \n\ 7627 \n\
7618 Return the highest index in S where substring sub is found,\n\ 7628 Return the highest index in S where substring sub is found,\n\
7619 such that sub is contained within s[start:end]. Optional\n\ 7629 such that sub is contained within s[start:end]. Optional\n\
7620 arguments start and end are interpreted as in slice notation.\n\ 7630 arguments start and end are interpreted as in slice notation.\n\
7621 \n\ 7631 \n\
7622 Return -1 on failure."); 7632 Return -1 on failure.");
7623 7633
7624 static PyObject * 7634 static PyObject *
7625 unicode_rfind(PyUnicodeObject *self, PyObject *args) 7635 unicode_rfind(PyUnicodeObject *self, PyObject *args)
7626 { 7636 {
7627 PyObject *substring; 7637 PyObject *substring;
7628 Py_ssize_t start; 7638 Py_ssize_t start;
7629 Py_ssize_t end; 7639 Py_ssize_t end;
7630 Py_ssize_t result; 7640 Py_ssize_t result;
7631 7641
7632 if (!_ParseTupleFinds(args, &substring, &start, &end)) 7642 if (!_ParseTupleFinds(args, &substring, &start, &end))
7633 return NULL; 7643 return NULL;
7634 7644
7635 result = stringlib_rfind_slice( 7645 result = stringlib_rfind_slice(
7636 PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self), 7646 PyUnicode_AS_UNICODE(self), PyUnicode_GET_SIZE(self),
7637 PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring), 7647 PyUnicode_AS_UNICODE(substring), PyUnicode_GET_SIZE(substring),
7638 start, end 7648 start, end
7639 ); 7649 );
7640 7650
7641 Py_DECREF(substring); 7651 Py_DECREF(substring);
7642 7652
7643 return PyLong_FromSsize_t(result); 7653 return PyLong_FromSsize_t(result);
7644 } 7654 }
7645 7655
7646 PyDoc_STRVAR(rindex__doc__, 7656 PyDoc_STRVAR(rindex__doc__,
7647 "S.rindex(sub [,start [,end]]) -> int\n\ 7657 "S.rindex(sub [,start [,end]]) -> int\n\
7648 \n\ 7658 \n\
7649 Like S.rfind() but raise ValueError when the substring is not found."); 7659 Like S.rfind() but raise ValueError when the substring is not found.");
7650 7660
7651 static PyObject * 7661 static PyObject *
7652 unicode_rindex(PyUnicodeObject *self, PyObject *args) 7662 unicode_rindex(PyUnicodeObject *self, PyObject *args)
7653 { 7663 {
7654 PyObject *substring; 7664 PyObject *substring;
7655 Py_ssize_t start; 7665 Py_ssize_t start;
(...skipping 1830 matching lines...) Show 10 above Show 10 below
9486 while ((*u++ = *s2++)); 9496 while ((*u++ = *s2++));
9487 return s1; 9497 return s1;
9488 } 9498 }
9489 9499
9490 Py_UNICODE* 9500 Py_UNICODE*
9491 Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n) 9501 Py_UNICODE_strncpy(Py_UNICODE *s1, const Py_UNICODE *s2, size_t n)
9492 { 9502 {
9493 Py_UNICODE *u = s1; 9503 Py_UNICODE *u = s1;
9494 while ((*u++ = *s2++)) 9504 while ((*u++ = *s2++))
9495 if (n-- == 0) 9505 if (n-- == 0)
9496 break; 9506 break;
9497 return s1; 9507 return s1;
9498 } 9508 }
9499 9509
9500 int 9510 int
9501 Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2) 9511 Py_UNICODE_strcmp(const Py_UNICODE *s1, const Py_UNICODE *s2)
9502 { 9512 {
9503 while (*s1 && *s2 && *s1 == *s2) 9513 while (*s1 && *s2 && *s1 == *s2)
9504 s1++, s2++; 9514 s1++, s2++;
9505 if (*s1 && *s2) 9515 if (*s1 && *s2)
9506 return (*s1 < *s2) ? -1 : +1; 9516 return (*s1 < *s2) ? -1 : +1;
9507 if (*s1) 9517 if (*s1)
9508 return 1; 9518 return 1;
9509 if (*s2) 9519 if (*s2)
9510 return -1; 9520 return -1;
9511 return 0; 9521 return 0;
9512 } 9522 }
9513 9523
9514 Py_UNICODE* 9524 Py_UNICODE*
9515 Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c) 9525 Py_UNICODE_strchr(const Py_UNICODE *s, Py_UNICODE c)
9516 { 9526 {
9517 const Py_UNICODE *p; 9527 const Py_UNICODE *p;
9518 for (p = s; *p; p++) 9528 for (p = s; *p; p++)
9519 if (*p == c) 9529 if (*p == c)
9520 return (Py_UNICODE*)p; 9530 return (Py_UNICODE*)p;
9521 return NULL; 9531 return NULL;
9522 } 9532 }
9523 9533
9524 9534
9525 #ifdef __cplusplus 9535 #ifdef __cplusplus
9526 } 9536 }
9527 #endif 9537 #endif