| Left: | ||
| Right: |
| OLD | NEW |
|---|---|
| 1 #include "Python.h" | 1 #include "Python.h" |
| 2 #include "structmember.h" | |
| 2 | 3 |
| 3 #define DEFAULT_ENCODING "utf-8" | 4 #define DEFAULT_ENCODING "utf-8" |
| 5 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) | |
| 6 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) | |
| 7 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) | |
| 8 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) | |
| 9 | |
| 10 static PyTypeObject PyScannerType; | |
| 11 static PyTypeObject PyEncoderType; | |
| 12 | |
| 13 typedef struct _PyScannerObject { | |
| 14 PyObject_HEAD | |
| 15 PyObject *encoding; | |
| 16 PyObject *strict; | |
| 17 PyObject *object_hook; | |
| 18 PyObject *parse_float; | |
| 19 PyObject *parse_int; | |
| 20 PyObject *parse_constant; | |
| 21 } PyScannerObject; | |
| 22 | |
| 23 static PyMemberDef scanner_members[] = { | |
| 24 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encod ing"}, | |
| 25 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, | |
| 26 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, | |
| 27 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, | |
| 28 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "par se_int"}, | |
| 29 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READ ONLY, "parse_constant"}, | |
| 30 {NULL} | |
| 31 }; | |
| 32 | |
| 33 typedef struct _PyEncoderObject { | |
| 34 PyObject_HEAD | |
| 35 PyObject *markers; | |
| 36 PyObject *defaultfn; | |
| 37 PyObject *encoder; | |
| 38 PyObject *indent; | |
| 39 PyObject *key_separator; | |
| 40 PyObject *item_separator; | |
| 41 PyObject *sort_keys; | |
| 42 PyObject *skipkeys; | |
| 43 int fast_encode; | |
| 44 int allow_nan; | |
| 45 } PyEncoderObject; | |
| 46 | |
| 47 static PyMemberDef encoder_members[] = { | |
| 48 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers "}, | |
| 49 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "defau lt"}, | |
| 50 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder "}, | |
| 51 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, | |
| 52 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READON LY, "key_separator"}, | |
| 53 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READ ONLY, "item_separator"}, | |
| 54 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sor t_keys"}, | |
| 55 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipk eys"}, | |
| 56 {NULL} | |
| 57 }; | |
| 58 | |
| 59 static Py_ssize_t | |
| 60 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); | |
| 61 static PyObject * | |
| 62 ascii_escape_unicode(PyObject *pystr); | |
| 63 static PyObject * | |
| 64 ascii_escape_str(PyObject *pystr); | |
| 65 static PyObject * | |
| 66 py_encode_basestring_ascii(PyObject* self, PyObject *pystr); | |
| 67 void init_json(void); | |
| 68 static PyObject * | |
| 69 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n ext_idx_ptr); | |
| 70 static PyObject * | |
| 71 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr); | |
| 72 static PyObject * | |
| 73 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); | |
| 74 static int | |
| 75 scanner_init(PyObject *self, PyObject *args, PyObject *kwds); | |
| 76 static void | |
| 77 scanner_dealloc(PyObject *self); | |
| 78 static int | |
| 79 encoder_init(PyObject *self, PyObject *args, PyObject *kwds); | |
| 80 static void | |
| 81 encoder_dealloc(PyObject *self); | |
| 82 static int | |
| 83 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss ize_t indent_level); | |
| 84 static int | |
| 85 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi ze_t indent_level); | |
| 86 static int | |
| 87 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss ize_t indent_level); | |
| 88 static PyObject * | |
| 89 _encoded_const(PyObject *const); | |
| 90 static void | |
| 91 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); | |
| 92 static PyObject * | |
| 93 encoder_encode_string(PyEncoderObject *s, PyObject *obj); | |
| 94 static int | |
| 95 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); | |
| 96 static PyObject * | |
| 97 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); | |
| 98 static PyObject * | |
| 99 encoder_encode_float(PyEncoderObject *s, PyObject *obj); | |
| 100 | |
| 4 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') | 101 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') |
| 102 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) | |
| 103 | |
| 5 #define MIN_EXPANSION 6 | 104 #define MIN_EXPANSION 6 |
| 6 | 105 |
| 7 #ifdef Py_UNICODE_WIDE | 106 #ifdef Py_UNICODE_WIDE |
| 8 #define MAX_EXPANSION (2 * MIN_EXPANSION) | 107 #define MAX_EXPANSION (2 * MIN_EXPANSION) |
| 9 #else | 108 #else |
| 10 #define MAX_EXPANSION MIN_EXPANSION | 109 #define MAX_EXPANSION MIN_EXPANSION |
| 11 #endif | 110 #endif |
| 111 | |
| 112 static int | |
| 113 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) | |
| 114 { | |
| 115 *size_ptr = PyInt_AsSsize_t(o); | |
| 116 if (*size_ptr == -1 && PyErr_Occurred()); | |
| 117 return 1; | |
| 118 return 0; | |
| 119 } | |
| 120 | |
| 121 static PyObject * | |
| 122 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) | |
| 123 { | |
| 124 return PyInt_FromSsize_t(*size_ptr); | |
| 125 } | |
| 12 | 126 |
| 13 static Py_ssize_t | 127 static Py_ssize_t |
| 14 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) | 128 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) |
| 15 { | 129 { |
| 16 Py_UNICODE x; | |
| 17 output[chars++] = '\\'; | 130 output[chars++] = '\\'; |
| 18 switch (c) { | 131 switch (c) { |
| 19 case '\\': output[chars++] = (char)c; break; | 132 case '\\': output[chars++] = (char)c; break; |
| 20 case '"': output[chars++] = (char)c; break; | 133 case '"': output[chars++] = (char)c; break; |
| 21 case '\b': output[chars++] = 'b'; break; | 134 case '\b': output[chars++] = 'b'; break; |
| 22 case '\f': output[chars++] = 'f'; break; | 135 case '\f': output[chars++] = 'f'; break; |
| 23 case '\n': output[chars++] = 'n'; break; | 136 case '\n': output[chars++] = 'n'; break; |
| 24 case '\r': output[chars++] = 'r'; break; | 137 case '\r': output[chars++] = 'r'; break; |
| 25 case '\t': output[chars++] = 't'; break; | 138 case '\t': output[chars++] = 't'; break; |
| 26 default: | 139 default: |
| 27 #ifdef Py_UNICODE_WIDE | 140 #ifdef Py_UNICODE_WIDE |
| 28 if (c >= 0x10000) { | 141 if (c >= 0x10000) { |
| 29 /* UTF-16 surrogate pair */ | 142 /* UTF-16 surrogate pair */ |
| 30 Py_UNICODE v = c - 0x10000; | 143 Py_UNICODE v = c - 0x10000; |
| 31 c = 0xd800 | ((v >> 10) & 0x3ff); | 144 c = 0xd800 | ((v >> 10) & 0x3ff); |
| 32 output[chars++] = 'u'; | 145 output[chars++] = 'u'; |
| 33 x = (c & 0xf000) >> 12; | 146 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; |
| 34 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | 147 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; |
| 35 x = (c & 0x0f00) >> 8; | 148 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; |
| 36 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | 149 output[chars++] = "0123456789abcdef"[(c ) & 0xf]; |
| 37 x = (c & 0x00f0) >> 4; | |
| 38 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | |
| 39 x = (c & 0x000f); | |
| 40 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | |
| 41 c = 0xdc00 | (v & 0x3ff); | 150 c = 0xdc00 | (v & 0x3ff); |
| 42 output[chars++] = '\\'; | 151 output[chars++] = '\\'; |
| 43 } | 152 } |
| 44 #endif | 153 #endif |
| 45 output[chars++] = 'u'; | 154 output[chars++] = 'u'; |
| 46 x = (c & 0xf000) >> 12; | 155 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; |
| 47 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | 156 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; |
| 48 x = (c & 0x0f00) >> 8; | 157 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; |
| 49 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | 158 output[chars++] = "0123456789abcdef"[(c ) & 0xf]; |
| 50 x = (c & 0x00f0) >> 4; | |
| 51 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | |
| 52 x = (c & 0x000f); | |
| 53 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | |
| 54 } | 159 } |
| 55 return chars; | 160 return chars; |
| 56 } | 161 } |
| 57 | 162 |
| 58 static PyObject * | 163 static PyObject * |
| 59 ascii_escape_unicode(PyObject *pystr) | 164 ascii_escape_unicode(PyObject *pystr) |
| 60 { | 165 { |
| 61 Py_ssize_t i; | 166 Py_ssize_t i; |
| 62 Py_ssize_t input_chars; | 167 Py_ssize_t input_chars; |
| 63 Py_ssize_t output_size; | 168 Py_ssize_t output_size; |
| 64 Py_ssize_t chars; | 169 Py_ssize_t chars; |
| 65 PyObject *rval; | 170 PyObject *rval; |
| 66 char *output; | 171 char *output; |
| 67 Py_UNICODE *input_unicode; | 172 Py_UNICODE *input_unicode; |
| 68 | 173 |
| 69 input_chars = PyUnicode_GET_SIZE(pystr); | 174 input_chars = PyUnicode_GET_SIZE(pystr); |
| 70 input_unicode = PyUnicode_AS_UNICODE(pystr); | 175 input_unicode = PyUnicode_AS_UNICODE(pystr); |
| 176 | |
| 71 /* One char input can be up to 6 chars output, estimate 4 of these */ | 177 /* One char input can be up to 6 chars output, estimate 4 of these */ |
| 72 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; | 178 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; |
| 73 rval = PyString_FromStringAndSize(NULL, output_size); | 179 rval = PyString_FromStringAndSize(NULL, output_size); |
| 74 if (rval == NULL) { | 180 if (rval == NULL) { |
| 75 return NULL; | 181 return NULL; |
| 76 } | 182 } |
| 77 output = PyString_AS_STRING(rval); | 183 output = PyString_AS_STRING(rval); |
| 78 chars = 0; | 184 chars = 0; |
| 79 output[chars++] = '"'; | 185 output[chars++] = '"'; |
| 80 for (i = 0; i < input_chars; i++) { | 186 for (i = 0; i < input_chars; i++) { |
| 81 Py_UNICODE c = input_unicode[i]; | 187 Py_UNICODE c = input_unicode[i]; |
| 82 if (S_CHAR(c)) { | 188 if (S_CHAR(c)) { |
| 83 output[chars++] = (char)c; | 189 output[chars++] = (char)c; |
| 84 } | 190 } |
| 85 » else { | 191 else { |
| 86 chars = ascii_escape_char(c, output, chars); | 192 chars = ascii_escape_char(c, output, chars); |
| 87 } | 193 } |
| 88 if (output_size - chars < (1 + MAX_EXPANSION)) { | 194 if (output_size - chars < (1 + MAX_EXPANSION)) { |
| 89 /* There's more than four, so let's resize by a lot */ | 195 /* There's more than four, so let's resize by a lot */ |
| 90 output_size *= 2; | 196 output_size *= 2; |
|
Martin v. Löwis
2009/01/04 13:22:29
You might want to check for integer overflow here.
bob.ippolito
2009/01/05 01:28:19
_PyString_Resize checks for integer overflow, so i
| |
| 91 /* This is an upper bound */ | 197 /* This is an upper bound */ |
| 92 if (output_size > 2 + (input_chars * MAX_EXPANSION)) { | 198 if (output_size > 2 + (input_chars * MAX_EXPANSION)) { |
| 93 output_size = 2 + (input_chars * MAX_EXPANSION); | 199 output_size = 2 + (input_chars * MAX_EXPANSION); |
| 94 } | 200 } |
| 95 if (_PyString_Resize(&rval, output_size) == -1) { | 201 if (_PyString_Resize(&rval, output_size) == -1) { |
| 96 return NULL; | 202 return NULL; |
| 97 } | 203 } |
| 98 output = PyString_AS_STRING(rval); | 204 output = PyString_AS_STRING(rval); |
| 99 } | 205 } |
| 100 } | 206 } |
| 101 output[chars++] = '"'; | 207 output[chars++] = '"'; |
| 102 if (_PyString_Resize(&rval, chars) == -1) { | 208 if (_PyString_Resize(&rval, chars) == -1) { |
| 103 return NULL; | 209 return NULL; |
| 104 } | 210 } |
| 105 return rval; | 211 return rval; |
| 106 } | 212 } |
| 107 | 213 |
| 108 static PyObject * | 214 static PyObject * |
| 109 ascii_escape_str(PyObject *pystr) | 215 ascii_escape_str(PyObject *pystr) |
|
Martin v. Löwis
2009/01/04 13:22:29
Please attach a comment to each function, telling
bob.ippolito
2009/01/05 01:28:19
Done in the next patch
| |
| 110 { | 216 { |
| 111 Py_ssize_t i; | 217 Py_ssize_t i; |
| 112 Py_ssize_t input_chars; | 218 Py_ssize_t input_chars; |
| 113 Py_ssize_t output_size; | 219 Py_ssize_t output_size; |
| 114 Py_ssize_t chars; | 220 Py_ssize_t chars; |
| 115 PyObject *rval; | 221 PyObject *rval; |
| 116 char *output; | 222 char *output; |
| 117 char *input_str; | 223 char *input_str; |
| 118 | 224 |
| 119 input_chars = PyString_GET_SIZE(pystr); | 225 input_chars = PyString_GET_SIZE(pystr); |
| 120 input_str = PyString_AS_STRING(pystr); | 226 input_str = PyString_AS_STRING(pystr); |
| 121 /* One char input can be up to 6 chars output, estimate 4 of these */ | 227 |
| 122 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; | 228 /* Fast path for a string that's already ASCII */ |
| 229 for (i = 0; i < input_chars; i++) { | |
| 230 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; | |
| 231 if (!S_CHAR(c)) { | |
| 232 /* If we have to escape something, scan the string for unicode */ | |
| 233 Py_ssize_t j; | |
| 234 for (j = i; j < input_chars; j++) { | |
| 235 c = (Py_UNICODE)(unsigned char)input_str[j]; | |
| 236 if (c > 0x7f) { | |
| 237 /* We hit a non-ASCII character, bail to unicode mode */ | |
| 238 PyObject *uni; | |
| 239 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict") ; | |
| 240 if (uni == NULL) { | |
| 241 return NULL; | |
| 242 } | |
| 243 rval = ascii_escape_unicode(uni); | |
| 244 Py_DECREF(uni); | |
| 245 return rval; | |
| 246 } | |
| 247 } | |
| 248 break; | |
| 249 } | |
| 250 } | |
| 251 | |
| 252 if (i == input_chars) { | |
| 253 /* Input is already ASCII */ | |
| 254 output_size = 2 + input_chars; | |
| 255 } | |
| 256 else { | |
| 257 /* One char input can be up to 6 chars output, estimate 4 of these */ | |
| 258 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; | |
| 259 } | |
| 123 rval = PyString_FromStringAndSize(NULL, output_size); | 260 rval = PyString_FromStringAndSize(NULL, output_size); |
| 124 if (rval == NULL) { | 261 if (rval == NULL) { |
| 125 return NULL; | 262 return NULL; |
| 126 } | 263 } |
| 127 output = PyString_AS_STRING(rval); | 264 output = PyString_AS_STRING(rval); |
| 128 chars = 0; | 265 output[0] = '"'; |
| 129 output[chars++] = '"'; | 266 |
| 130 for (i = 0; i < input_chars; i++) { | 267 /* We know that everything up to i is ASCII already */ |
| 131 Py_UNICODE c = (Py_UNICODE)input_str[i]; | 268 chars = i + 1; |
| 269 memcpy(&output[1], input_str, i); | |
| 270 | |
| 271 for (; i < input_chars; i++) { | |
| 272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; | |
| 132 if (S_CHAR(c)) { | 273 if (S_CHAR(c)) { |
| 133 output[chars++] = (char)c; | 274 output[chars++] = (char)c; |
| 134 } | 275 } |
| 135 » else if (c > 0x7F) { | 276 else { |
| 136 /* We hit a non-ASCII character, bail to unicode mode */ | |
| 137 PyObject *uni; | |
| 138 Py_DECREF(rval); | |
| 139 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); | |
| 140 if (uni == NULL) { | |
| 141 return NULL; | |
| 142 } | |
| 143 rval = ascii_escape_unicode(uni); | |
| 144 Py_DECREF(uni); | |
| 145 return rval; | |
| 146 } | |
| 147 » else { | |
| 148 chars = ascii_escape_char(c, output, chars); | 277 chars = ascii_escape_char(c, output, chars); |
| 149 } | 278 } |
| 150 /* An ASCII char can't possibly expand to a surrogate! */ | 279 /* An ASCII char can't possibly expand to a surrogate! */ |
| 151 if (output_size - chars < (1 + MIN_EXPANSION)) { | 280 if (output_size - chars < (1 + MIN_EXPANSION)) { |
| 152 /* There's more than four, so let's resize by a lot */ | 281 /* There's more than four, so let's resize by a lot */ |
| 153 output_size *= 2; | 282 output_size *= 2; |
| 154 if (output_size > 2 + (input_chars * MIN_EXPANSION)) { | 283 if (output_size > 2 + (input_chars * MIN_EXPANSION)) { |
| 155 output_size = 2 + (input_chars * MIN_EXPANSION); | 284 output_size = 2 + (input_chars * MIN_EXPANSION); |
| 156 } | 285 } |
| 157 if (_PyString_Resize(&rval, output_size) == -1) { | 286 if (_PyString_Resize(&rval, output_size) == -1) { |
| 158 return NULL; | 287 return NULL; |
| 159 } | 288 } |
| 160 output = PyString_AS_STRING(rval); | 289 output = PyString_AS_STRING(rval); |
| 161 } | 290 } |
| 162 } | 291 } |
| 163 output[chars++] = '"'; | 292 output[chars++] = '"'; |
| 164 if (_PyString_Resize(&rval, chars) == -1) { | 293 if (_PyString_Resize(&rval, chars) == -1) { |
| 165 return NULL; | 294 return NULL; |
| 166 } | 295 } |
| 167 return rval; | 296 return rval; |
| 168 } | 297 } |
| 169 | 298 |
| 170 void | 299 static void |
| 171 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) | 300 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) |
| 172 { | 301 { |
| 173 static PyObject *errmsg_fn = NULL; | 302 static PyObject *errmsg_fn = NULL; |
| 174 PyObject *pymsg; | 303 PyObject *pymsg; |
| 175 if (errmsg_fn == NULL) { | 304 if (errmsg_fn == NULL) { |
| 176 PyObject *decoder = PyImport_ImportModule("json.decoder"); | 305 PyObject *decoder = PyImport_ImportModule("json.decoder"); |
| 177 if (decoder == NULL) | 306 if (decoder == NULL) |
| 178 return; | 307 return; |
| 179 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); | 308 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); |
| 309 Py_DECREF(decoder); | |
| 180 if (errmsg_fn == NULL) | 310 if (errmsg_fn == NULL) |
| 181 return; | 311 return; |
| 182 Py_DECREF(decoder); | |
| 183 } | 312 } |
| 184 pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end); | 313 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_Fro mSsize_t, &end); |
| 185 if (pymsg) { | 314 if (pymsg) { |
| 186 PyErr_SetObject(PyExc_ValueError, pymsg); | 315 PyErr_SetObject(PyExc_ValueError, pymsg); |
| 187 Py_DECREF(pymsg); | 316 Py_DECREF(pymsg); |
| 188 } | 317 } |
| 189 /* | |
| 190 | |
| 191 def linecol(doc, pos): | |
| 192 lineno = doc.count('\n', 0, pos) + 1 | |
| 193 if lineno == 1: | |
| 194 colno = pos | |
| 195 else: | |
| 196 colno = pos - doc.rindex('\n', 0, pos) | |
| 197 return lineno, colno | |
| 198 | |
| 199 def errmsg(msg, doc, pos, end=None): | |
| 200 lineno, colno = linecol(doc, pos) | |
| 201 if end is None: | |
| 202 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) | |
| 203 endlineno, endcolno = linecol(doc, end) | |
| 204 return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( | |
| 205 msg, lineno, colno, endlineno, endcolno, pos, end) | |
| 206 | |
| 207 */ | |
| 208 } | 318 } |
| 209 | 319 |
| 210 static PyObject * | 320 static PyObject * |
| 211 join_list_unicode(PyObject *lst) | 321 join_list_string(PyObject *lst) |
| 212 { | 322 { |
| 213 static PyObject *ustr = NULL; | 323 static PyObject *joinfn = NULL; |
| 214 static PyObject *joinstr = NULL; | 324 if (joinfn == NULL) { |
| 215 if (ustr == NULL) { | 325 PyObject *ustr = PyString_FromStringAndSize(NULL, 0); |
| 216 Py_UNICODE c = 0; | 326 if (ustr == NULL) |
| 217 ustr = PyUnicode_FromUnicode(&c, 0); | 327 return NULL; |
| 328 | |
| 329 joinfn = PyObject_GetAttrString(ustr, "join"); | |
| 330 Py_DECREF(ustr); | |
| 331 if (joinfn == NULL) | |
| 332 return NULL; | |
| 218 } | 333 } |
| 219 if (joinstr == NULL) { | 334 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); |
| 220 joinstr = PyString_InternFromString("join"); | |
| 221 } | |
| 222 if (joinstr == NULL || ustr == NULL) { | |
| 223 return NULL; | |
| 224 } | |
| 225 return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL); | |
| 226 } | 335 } |
| 227 | 336 |
| 228 static PyObject * | 337 static PyObject * |
| 229 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict) | 338 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { |
| 339 PyObject *tpl; | |
| 340 PyObject *pyidx; | |
| 341 /* | |
| 342 steal a reference to rval, returns (rval, idx) | |
| 343 */ | |
| 344 if (rval == NULL) { | |
| 345 return NULL; | |
| 346 } | |
| 347 pyidx = PyInt_FromSsize_t(idx); | |
| 348 if (pyidx == NULL) { | |
| 349 Py_DECREF(rval); | |
| 350 return NULL; | |
| 351 } | |
| 352 tpl = PyTuple_New(2); | |
| 353 if (tpl == NULL) { | |
| 354 Py_DECREF(pyidx); | |
| 355 Py_DECREF(rval); | |
| 356 return NULL; | |
| 357 } | |
| 358 PyTuple_SET_ITEM(tpl, 0, rval); | |
| 359 PyTuple_SET_ITEM(tpl, 1, pyidx); | |
| 360 return tpl; | |
| 361 } | |
| 362 | |
| 363 static PyObject * | |
| 364 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s size_t *next_end_ptr) | |
| 230 { | 365 { |
| 231 PyObject *rval; | 366 PyObject *rval; |
| 232 Py_ssize_t len = PyString_GET_SIZE(pystr); | 367 Py_ssize_t len = PyString_GET_SIZE(pystr); |
| 233 Py_ssize_t begin = end - 1; | 368 Py_ssize_t begin = end - 1; |
| 234 Py_ssize_t next = begin; | 369 Py_ssize_t next = begin; |
| 370 int has_unicode = 0; | |
| 235 char *buf = PyString_AS_STRING(pystr); | 371 char *buf = PyString_AS_STRING(pystr); |
| 236 PyObject *chunks = PyList_New(0); | 372 PyObject *chunks = PyList_New(0); |
| 237 if (chunks == NULL) { | 373 if (chunks == NULL) { |
| 238 goto bail; | 374 goto bail; |
| 239 } | 375 } |
| 240 if (end < 0 || len <= end) { | 376 if (end < 0 || len <= end) { |
| 241 PyErr_SetString(PyExc_ValueError, "end is out of bounds"); | 377 PyErr_SetString(PyExc_ValueError, "end is out of bounds"); |
| 242 goto bail; | 378 goto bail; |
| 243 } | 379 } |
| 244 while (1) { | 380 while (1) { |
| 245 /* Find the end of the string or the next escape */ | 381 /* Find the end of the string or the next escape */ |
| 246 Py_UNICODE c = 0; | 382 Py_UNICODE c = 0; |
| 247 PyObject *chunk = NULL; | 383 PyObject *chunk = NULL; |
| 248 for (next = end; next < len; next++) { | 384 for (next = end; next < len; next++) { |
| 249 c = buf[next]; | 385 c = (unsigned char)buf[next]; |
| 250 if (c == '"' || c == '\\') { | 386 if (c == '"' || c == '\\') { |
| 251 break; | 387 break; |
| 252 } | 388 } |
| 253 else if (strict && c <= 0x1f) { | 389 else if (strict && c <= 0x1f) { |
| 254 raise_errmsg("Invalid control character at", pystr, next); | 390 raise_errmsg("Invalid control character at", pystr, next); |
| 255 goto bail; | 391 goto bail; |
| 392 } | |
| 393 else if (c > 0x7f) { | |
| 394 has_unicode = 1; | |
| 256 } | 395 } |
| 257 } | 396 } |
| 258 if (!(c == '"' || c == '\\')) { | 397 if (!(c == '"' || c == '\\')) { |
| 259 raise_errmsg("Unterminated string starting at", pystr, begin); | 398 raise_errmsg("Unterminated string starting at", pystr, begin); |
| 260 goto bail; | 399 goto bail; |
| 261 } | 400 } |
| 262 /* Pick up this chunk if it's not zero length */ | 401 /* Pick up this chunk if it's not zero length */ |
| 263 if (next != end) { | 402 if (next != end) { |
| 264 PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end); | 403 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - en d); |
| 265 if (strchunk == NULL) { | 404 if (strchunk == NULL) { |
| 266 goto bail; | 405 goto bail; |
| 267 } | 406 } |
| 268 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); | 407 if (has_unicode) { |
| 269 Py_DECREF(strchunk); | 408 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); |
| 270 if (chunk == NULL) { | 409 Py_DECREF(strchunk); |
| 271 goto bail; | 410 if (chunk == NULL) { |
| 411 goto bail; | |
| 412 } | |
| 413 } | |
| 414 else { | |
| 415 chunk = strchunk; | |
| 272 } | 416 } |
| 273 if (PyList_Append(chunks, chunk)) { | 417 if (PyList_Append(chunks, chunk)) { |
| 274 Py_DECREF(chunk); | 418 Py_DECREF(chunk); |
| 275 goto bail; | 419 goto bail; |
| 276 } | 420 } |
| 277 Py_DECREF(chunk); | 421 Py_DECREF(chunk); |
| 278 } | 422 } |
| 279 next++; | 423 next++; |
| 280 if (c == '"') { | 424 if (c == '"') { |
| 281 end = next; | 425 end = next; |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 308 else { | 452 else { |
| 309 c = 0; | 453 c = 0; |
| 310 next++; | 454 next++; |
| 311 end = next + 4; | 455 end = next + 4; |
| 312 if (end >= len) { | 456 if (end >= len) { |
| 313 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); | 457 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); |
| 314 goto bail; | 458 goto bail; |
| 315 } | 459 } |
| 316 /* Decode 4 hex digits */ | 460 /* Decode 4 hex digits */ |
| 317 for (; next < end; next++) { | 461 for (; next < end; next++) { |
| 318 Py_ssize_t shl = (end - next - 1) << 2; | |
| 319 Py_UNICODE digit = buf[next]; | 462 Py_UNICODE digit = buf[next]; |
| 463 c <<= 4; | |
| 320 switch (digit) { | 464 switch (digit) { |
| 321 case '0': case '1': case '2': case '3': case '4': | 465 case '0': case '1': case '2': case '3': case '4': |
| 322 case '5': case '6': case '7': case '8': case '9': | 466 case '5': case '6': case '7': case '8': case '9': |
| 323 c |= (digit - '0') << shl; break; | 467 c |= (digit - '0'); break; |
| 324 case 'a': case 'b': case 'c': case 'd': case 'e': | 468 case 'a': case 'b': case 'c': case 'd': case 'e': |
| 325 case 'f': | 469 case 'f': |
| 326 c |= (digit - 'a' + 10) << shl; break; | 470 c |= (digit - 'a' + 10); break; |
| 327 case 'A': case 'B': case 'C': case 'D': case 'E': | 471 case 'A': case 'B': case 'C': case 'D': case 'E': |
| 328 case 'F': | 472 case 'F': |
| 329 c |= (digit - 'A' + 10) << shl; break; | 473 c |= (digit - 'A' + 10); break; |
| 330 default: | 474 default: |
| 331 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 475 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); |
| 332 goto bail; | 476 goto bail; |
| 333 } | 477 } |
| 334 } | 478 } |
| 335 #ifdef Py_UNICODE_WIDE | 479 #ifdef Py_UNICODE_WIDE |
| 336 /* Surrogate pair */ | 480 /* Surrogate pair */ |
| 337 if (c >= 0xd800 && c <= 0xdbff) { | 481 if ((c & 0xfc00) == 0xd800) { |
| 338 Py_UNICODE c2 = 0; | 482 Py_UNICODE c2 = 0; |
| 339 if (end + 6 >= len) { | 483 if (end + 6 >= len) { |
| 340 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, | 484 raise_errmsg("Unpaired high surrogate", pystr, end - 5); |
| 341 end - 5); | 485 goto bail; |
| 342 } | 486 } |
| 343 if (buf[next++] != '\\' || buf[next++] != 'u') { | 487 if (buf[next++] != '\\' || buf[next++] != 'u') { |
| 344 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, | 488 raise_errmsg("Unpaired high surrogate", pystr, end - 5); |
| 345 end - 5); | 489 goto bail; |
| 346 } | 490 } |
| 347 end += 6; | 491 end += 6; |
| 348 /* Decode 4 hex digits */ | 492 /* Decode 4 hex digits */ |
| 349 for (; next < end; next++) { | 493 for (; next < end; next++) { |
| 350 Py_ssize_t shl = (end - next - 1) << 2; | 494 c2 <<= 4; |
| 351 Py_UNICODE digit = buf[next]; | 495 Py_UNICODE digit = buf[next]; |
| 352 switch (digit) { | 496 switch (digit) { |
| 353 case '0': case '1': case '2': case '3': case '4': | 497 case '0': case '1': case '2': case '3': case '4': |
| 354 case '5': case '6': case '7': case '8': case '9': | 498 case '5': case '6': case '7': case '8': case '9': |
| 355 c2 |= (digit - '0') << shl; break; | 499 c2 |= (digit - '0'); break; |
| 356 case 'a': case 'b': case 'c': case 'd': case 'e': | 500 case 'a': case 'b': case 'c': case 'd': case 'e': |
| 357 case 'f': | 501 case 'f': |
| 358 c2 |= (digit - 'a' + 10) << shl; break; | 502 c2 |= (digit - 'a' + 10); break; |
| 359 case 'A': case 'B': case 'C': case 'D': case 'E': | 503 case 'A': case 'B': case 'C': case 'D': case 'E': |
| 360 case 'F': | 504 case 'F': |
| 361 c2 |= (digit - 'A' + 10) << shl; break; | 505 c2 |= (digit - 'A' + 10); break; |
| 362 default: | 506 default: |
| 363 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 507 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); |
| 364 goto bail; | 508 goto bail; |
| 365 } | 509 } |
| 366 } | 510 } |
| 511 if ((c2 & 0xfc00) != 0xdc00) { | |
| 512 raise_errmsg("Unpaired high surrogate", pystr, end - 5); | |
| 513 goto bail; | |
| 514 } | |
| 367 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); | 515 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); |
| 516 } | |
| 517 else if ((c & 0xfc00) == 0xdc00) { | |
| 518 raise_errmsg("Unpaired low surrogate", pystr, end - 5); | |
| 519 goto bail; | |
| 368 } | 520 } |
| 369 #endif | 521 #endif |
| 370 } | 522 } |
| 371 chunk = PyUnicode_FromUnicode(&c, 1); | 523 if (c > 0x7f) { |
| 372 if (chunk == NULL) { | 524 has_unicode = 1; |
| 373 goto bail; | 525 } |
| 526 if (has_unicode) { | |
| 527 chunk = PyUnicode_FromUnicode(&c, 1); | |
| 528 if (chunk == NULL) { | |
| 529 goto bail; | |
| 530 } | |
| 531 } | |
| 532 else { | |
| 533 char c_char = Py_CHARMASK(c); | |
| 534 chunk = PyString_FromStringAndSize(&c_char, 1); | |
| 535 if (chunk == NULL) { | |
| 536 goto bail; | |
| 537 } | |
| 374 } | 538 } |
| 375 if (PyList_Append(chunks, chunk)) { | 539 if (PyList_Append(chunks, chunk)) { |
| 376 Py_DECREF(chunk); | 540 Py_DECREF(chunk); |
| 377 goto bail; | 541 goto bail; |
| 378 } | 542 } |
| 379 Py_DECREF(chunk); | 543 Py_DECREF(chunk); |
| 380 } | 544 } |
| 381 | 545 |
| 382 rval = join_list_unicode(chunks); | 546 rval = join_list_string(chunks); |
| 383 if (rval == NULL) { | 547 if (rval == NULL) { |
| 384 goto bail; | 548 goto bail; |
| 385 } | 549 } |
| 386 Py_CLEAR(chunks); | 550 Py_CLEAR(chunks); |
| 387 return Py_BuildValue("(Nn)", rval, end); | 551 *next_end_ptr = end; |
| 552 return rval; | |
| 388 bail: | 553 bail: |
| 554 *next_end_ptr = -1; | |
| 389 Py_XDECREF(chunks); | 555 Py_XDECREF(chunks); |
| 390 return NULL; | 556 return NULL; |
| 391 } | 557 } |
| 392 | 558 |
| 393 | 559 |
| 394 static PyObject * | 560 static PyObject * |
| 395 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict) | 561 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next _end_ptr) |
| 396 { | 562 { |
| 397 PyObject *rval; | 563 PyObject *rval; |
| 398 Py_ssize_t len = PyUnicode_GET_SIZE(pystr); | 564 Py_ssize_t len = PyUnicode_GET_SIZE(pystr); |
| 399 Py_ssize_t begin = end - 1; | 565 Py_ssize_t begin = end - 1; |
| 400 Py_ssize_t next = begin; | 566 Py_ssize_t next = begin; |
| 401 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); | 567 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); |
| 402 PyObject *chunks = PyList_New(0); | 568 PyObject *chunks = PyList_New(0); |
| 403 if (chunks == NULL) { | 569 if (chunks == NULL) { |
| 404 goto bail; | 570 goto bail; |
| 405 } | 571 } |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 469 else { | 635 else { |
| 470 c = 0; | 636 c = 0; |
| 471 next++; | 637 next++; |
| 472 end = next + 4; | 638 end = next + 4; |
| 473 if (end >= len) { | 639 if (end >= len) { |
| 474 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); | 640 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); |
| 475 goto bail; | 641 goto bail; |
| 476 } | 642 } |
| 477 /* Decode 4 hex digits */ | 643 /* Decode 4 hex digits */ |
| 478 for (; next < end; next++) { | 644 for (; next < end; next++) { |
| 479 Py_ssize_t shl = (end - next - 1) << 2; | |
| 480 Py_UNICODE digit = buf[next]; | 645 Py_UNICODE digit = buf[next]; |
| 646 c <<= 4; | |
| 481 switch (digit) { | 647 switch (digit) { |
| 482 case '0': case '1': case '2': case '3': case '4': | 648 case '0': case '1': case '2': case '3': case '4': |
| 483 case '5': case '6': case '7': case '8': case '9': | 649 case '5': case '6': case '7': case '8': case '9': |
| 484 c |= (digit - '0') << shl; break; | 650 c |= (digit - '0'); break; |
| 485 case 'a': case 'b': case 'c': case 'd': case 'e': | 651 case 'a': case 'b': case 'c': case 'd': case 'e': |
| 486 case 'f': | 652 case 'f': |
| 487 c |= (digit - 'a' + 10) << shl; break; | 653 c |= (digit - 'a' + 10); break; |
| 488 case 'A': case 'B': case 'C': case 'D': case 'E': | 654 case 'A': case 'B': case 'C': case 'D': case 'E': |
| 489 case 'F': | 655 case 'F': |
| 490 c |= (digit - 'A' + 10) << shl; break; | 656 c |= (digit - 'A' + 10); break; |
| 491 default: | 657 default: |
| 492 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 658 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); |
| 493 goto bail; | 659 goto bail; |
| 494 } | 660 } |
| 495 } | 661 } |
| 496 #ifdef Py_UNICODE_WIDE | 662 #ifdef Py_UNICODE_WIDE |
| 497 /* Surrogate pair */ | 663 /* Surrogate pair */ |
| 498 if (c >= 0xd800 && c <= 0xdbff) { | 664 if ((c & 0xfc00) == 0xd800) { |
| 499 Py_UNICODE c2 = 0; | 665 Py_UNICODE c2 = 0; |
| 500 if (end + 6 >= len) { | 666 if (end + 6 >= len) { |
| 501 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, | 667 raise_errmsg("Unpaired high surrogate", pystr, end - 5); |
| 502 end - 5); | 668 goto bail; |
| 503 } | 669 } |
| 504 if (buf[next++] != '\\' || buf[next++] != 'u') { | 670 if (buf[next++] != '\\' || buf[next++] != 'u') { |
| 505 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, | 671 raise_errmsg("Unpaired high surrogate", pystr, end - 5); |
| 506 end - 5); | 672 goto bail; |
| 507 } | 673 } |
| 508 end += 6; | 674 end += 6; |
| 509 /* Decode 4 hex digits */ | 675 /* Decode 4 hex digits */ |
| 510 for (; next < end; next++) { | 676 for (; next < end; next++) { |
| 511 Py_ssize_t shl = (end - next - 1) << 2; | 677 c2 <<= 4; |
| 512 Py_UNICODE digit = buf[next]; | 678 Py_UNICODE digit = buf[next]; |
| 513 switch (digit) { | 679 switch (digit) { |
| 514 case '0': case '1': case '2': case '3': case '4': | 680 case '0': case '1': case '2': case '3': case '4': |
| 515 case '5': case '6': case '7': case '8': case '9': | 681 case '5': case '6': case '7': case '8': case '9': |
| 516 c2 |= (digit - '0') << shl; break; | 682 c2 |= (digit - '0'); break; |
| 517 case 'a': case 'b': case 'c': case 'd': case 'e': | 683 case 'a': case 'b': case 'c': case 'd': case 'e': |
| 518 case 'f': | 684 case 'f': |
| 519 c2 |= (digit - 'a' + 10) << shl; break; | 685 c2 |= (digit - 'a' + 10); break; |
| 520 case 'A': case 'B': case 'C': case 'D': case 'E': | 686 case 'A': case 'B': case 'C': case 'D': case 'E': |
| 521 case 'F': | 687 case 'F': |
| 522 c2 |= (digit - 'A' + 10) << shl; break; | 688 c2 |= (digit - 'A' + 10); break; |
| 523 default: | 689 default: |
| 524 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 690 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); |
| 525 goto bail; | 691 goto bail; |
| 526 } | 692 } |
| 527 } | 693 } |
| 694 if ((c2 & 0xfc00) != 0xdc00) { | |
| 695 raise_errmsg("Unpaired high surrogate", pystr, end - 5); | |
| 696 goto bail; | |
| 697 } | |
| 528 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); | 698 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); |
| 699 } | |
| 700 else if ((c & 0xfc00) == 0xdc00) { | |
| 701 raise_errmsg("Unpaired low surrogate", pystr, end - 5); | |
| 702 goto bail; | |
| 529 } | 703 } |
| 530 #endif | 704 #endif |
| 531 } | 705 } |
| 532 chunk = PyUnicode_FromUnicode(&c, 1); | 706 chunk = PyUnicode_FromUnicode(&c, 1); |
| 533 if (chunk == NULL) { | 707 if (chunk == NULL) { |
| 534 goto bail; | 708 goto bail; |
| 535 } | 709 } |
| 536 if (PyList_Append(chunks, chunk)) { | 710 if (PyList_Append(chunks, chunk)) { |
| 537 Py_DECREF(chunk); | 711 Py_DECREF(chunk); |
| 538 goto bail; | 712 goto bail; |
| 539 } | 713 } |
| 540 Py_DECREF(chunk); | 714 Py_DECREF(chunk); |
| 541 } | 715 } |
| 542 | 716 |
| 543 rval = join_list_unicode(chunks); | 717 rval = join_list_string(chunks); |
| 544 if (rval == NULL) { | 718 if (rval == NULL) { |
| 545 goto bail; | 719 goto bail; |
| 546 } | 720 } |
| 547 Py_CLEAR(chunks); | 721 Py_DECREF(chunks); |
| 548 return Py_BuildValue("(Nn)", rval, end); | 722 *next_end_ptr = end; |
| 723 return rval; | |
| 549 bail: | 724 bail: |
| 725 *next_end_ptr = -1; | |
| 550 Py_XDECREF(chunks); | 726 Py_XDECREF(chunks); |
| 551 return NULL; | 727 return NULL; |
| 552 } | 728 } |
| 553 | 729 |
| 554 PyDoc_STRVAR(pydoc_scanstring, | 730 PyDoc_STRVAR(pydoc_scanstring, |
| 555 "scanstring(basestring, end, encoding) -> (str, end)\n"); | 731 "scanstring(basestring, end, encoding) -> (str, end)\n" |
| 732 "\n" | |
| 733 "..." | |
|
Martin v. Löwis
2009/01/04 13:22:29
Some text should probably be added here.
bob.ippolito
2009/01/05 01:28:19
Done in the next patch.
| |
| 734 ); | |
| 556 | 735 |
| 557 static PyObject * | 736 static PyObject * |
| 558 py_scanstring(PyObject* self, PyObject *args) | 737 py_scanstring(PyObject* self, PyObject *args) |
| 559 { | 738 { |
| 560 PyObject *pystr; | 739 PyObject *pystr; |
| 740 PyObject *rval; | |
| 561 Py_ssize_t end; | 741 Py_ssize_t end; |
| 742 Py_ssize_t next_end = -1; | |
| 562 char *encoding = NULL; | 743 char *encoding = NULL; |
| 563 int strict = 0; | 744 int strict = 0; |
| 564 if (!PyArg_ParseTuple(args, "On|zi:scanstring", &pystr, &end, &encoding, &st rict)) { | 745 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsi ze_t, &end, &encoding, &strict)) { |
| 565 return NULL; | 746 return NULL; |
| 566 } | 747 } |
| 567 if (encoding == NULL) { | 748 if (encoding == NULL) { |
| 568 encoding = DEFAULT_ENCODING; | 749 encoding = DEFAULT_ENCODING; |
| 569 } | 750 } |
| 570 if (PyString_Check(pystr)) { | 751 if (PyString_Check(pystr)) { |
| 571 return scanstring_str(pystr, end, encoding, strict); | 752 rval = scanstring_str(pystr, end, encoding, strict, &next_end); |
| 572 } | 753 } |
| 573 else if (PyUnicode_Check(pystr)) { | 754 else if (PyUnicode_Check(pystr)) { |
| 574 return scanstring_unicode(pystr, end, strict); | 755 rval = scanstring_unicode(pystr, end, strict, &next_end); |
| 575 } | 756 } |
| 576 else { | 757 else { |
| 577 PyErr_Format(PyExc_TypeError, | 758 PyErr_Format(PyExc_TypeError, |
| 578 "first argument must be a string or unicode, not %.80s", | 759 "first argument must be a string, not %.80s", |
| 579 Py_TYPE(pystr)->tp_name); | 760 Py_TYPE(pystr)->tp_name); |
| 580 return NULL; | 761 return NULL; |
| 581 } | 762 } |
| 763 return _build_rval_index_tuple(rval, next_end); | |
| 582 } | 764 } |
| 583 | 765 |
| 584 PyDoc_STRVAR(pydoc_encode_basestring_ascii, | 766 PyDoc_STRVAR(pydoc_encode_basestring_ascii, |
| 585 "encode_basestring_ascii(basestring) -> str\n"); | 767 "encode_basestring_ascii(basestring) -> str\n" |
| 768 "\n" | |
| 769 "..." | |
| 770 ); | |
| 586 | 771 |
| 587 static PyObject * | 772 static PyObject * |
| 588 py_encode_basestring_ascii(PyObject* self, PyObject *pystr) | 773 py_encode_basestring_ascii(PyObject* self, PyObject *pystr) |
| 589 { | 774 { |
| 590 /* METH_O */ | 775 /* METH_O */ |
| 591 if (PyString_Check(pystr)) { | 776 if (PyString_Check(pystr)) { |
| 592 return ascii_escape_str(pystr); | 777 return ascii_escape_str(pystr); |
| 593 } | 778 } |
| 594 else if (PyUnicode_Check(pystr)) { | 779 else if (PyUnicode_Check(pystr)) { |
| 595 return ascii_escape_unicode(pystr); | 780 return ascii_escape_unicode(pystr); |
| 596 } | 781 } |
| 597 else { | 782 else { |
| 598 PyErr_Format(PyExc_TypeError, | 783 PyErr_Format(PyExc_TypeError, |
| 599 "first argument must be a string or unicode, not %.80s", | 784 "first argument must be a string, not %.80s", |
| 600 Py_TYPE(pystr)->tp_name); | 785 Py_TYPE(pystr)->tp_name); |
| 601 return NULL; | 786 return NULL; |
| 602 } | 787 } |
| 603 } | 788 } |
| 604 | 789 |
| 790 static void | |
| 791 scanner_dealloc(PyObject *self) | |
| 792 { | |
| 793 PyScannerObject *s; | |
| 794 assert(PyScanner_Check(self)); | |
| 795 s = (PyScannerObject *)self; | |
| 796 Py_CLEAR(s->encoding); | |
| 797 Py_CLEAR(s->strict); | |
| 798 Py_CLEAR(s->object_hook); | |
| 799 Py_CLEAR(s->parse_float); | |
| 800 Py_CLEAR(s->parse_int); | |
| 801 Py_CLEAR(s->parse_constant); | |
| 802 self->ob_type->tp_free(self); | |
| 803 } | |
| 804 | |
| 805 static PyObject * | |
| 806 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr) { | |
| 807 char *str = PyString_AS_STRING(pystr); | |
| 808 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | |
| 809 PyObject *rval = PyDict_New(); | |
| 810 PyObject *key = NULL; | |
| 811 PyObject *val = NULL; | |
| 812 char *encoding = PyString_AS_STRING(s->encoding); | |
| 813 int strict = PyObject_IsTrue(s->strict); | |
| 814 Py_ssize_t next_idx; | |
| 815 if (rval == NULL) | |
| 816 return NULL; | |
| 817 | |
| 818 /* skip whitespace after { */ | |
| 819 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 820 | |
| 821 /* only loop if the object is non-empty */ | |
| 822 if (idx <= end_idx && str[idx] != '}') { | |
| 823 while (idx <= end_idx) { | |
| 824 /* read key */ | |
| 825 if (str[idx] != '"') { | |
| 826 raise_errmsg("Expecting property name", pystr, idx); | |
| 827 goto bail; | |
| 828 } | |
| 829 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); | |
| 830 if (key == NULL) | |
| 831 goto bail; | |
| 832 idx = next_idx; | |
| 833 | |
| 834 /* skip whitespace between key and : delimiter, read :, skip whitesp ace */ | |
| 835 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 836 if (idx > end_idx || str[idx] != ':') { | |
| 837 raise_errmsg("Expecting : delimiter", pystr, idx); | |
| 838 goto bail; | |
| 839 } | |
| 840 idx++; | |
| 841 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 842 | |
| 843 /* read any JSON data type */ | |
| 844 val = scan_once_str(s, pystr, idx, &next_idx); | |
| 845 if (val == NULL) | |
| 846 goto bail; | |
| 847 | |
| 848 if (PyDict_SetItem(rval, key, val) == -1) | |
| 849 goto bail; | |
| 850 | |
| 851 Py_CLEAR(key); | |
| 852 Py_CLEAR(val); | |
| 853 idx = next_idx; | |
| 854 | |
| 855 /* skip whitespace before } or , */ | |
| 856 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 857 | |
| 858 /* bail if the object is closed or we didn't get the , delimiter */ | |
| 859 if (idx > end_idx) break; | |
| 860 if (str[idx] == '}') { | |
| 861 break; | |
| 862 } | |
| 863 else if (str[idx] != ',') { | |
| 864 raise_errmsg("Expecting , delimiter", pystr, idx); | |
| 865 goto bail; | |
| 866 } | |
| 867 idx++; | |
| 868 | |
| 869 /* skip whitespace after , delimiter */ | |
| 870 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 871 } | |
| 872 } | |
| 873 /* verify that idx < end_idx, str[idx] should be '}' */ | |
| 874 if (idx > end_idx || str[idx] != '}') { | |
| 875 raise_errmsg("Expecting object", pystr, end_idx); | |
| 876 goto bail; | |
| 877 } | |
| 878 /* if object_hook is not None: rval = object_hook(rval) */ | |
| 879 if (s->object_hook != Py_None) { | |
| 880 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); | |
| 881 if (val == NULL) | |
| 882 goto bail; | |
| 883 Py_DECREF(rval); | |
| 884 rval = val; | |
| 885 val = NULL; | |
| 886 } | |
| 887 *next_idx_ptr = idx + 1; | |
| 888 return rval; | |
| 889 bail: | |
| 890 Py_XDECREF(key); | |
| 891 Py_XDECREF(val); | |
| 892 Py_DECREF(rval); | |
| 893 return NULL; | |
| 894 } | |
| 895 | |
| 896 static PyObject * | |
| 897 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss ize_t *next_idx_ptr) { | |
| 898 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | |
| 899 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | |
| 900 PyObject *val = NULL; | |
| 901 PyObject *rval = PyDict_New(); | |
| 902 PyObject *key = NULL; | |
| 903 int strict = PyObject_IsTrue(s->strict); | |
| 904 Py_ssize_t next_idx; | |
| 905 if (rval == NULL) | |
| 906 return NULL; | |
| 907 | |
| 908 /* skip whitespace after { */ | |
| 909 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 910 | |
| 911 /* only loop if the object is non-empty */ | |
| 912 if (idx <= end_idx && str[idx] != '}') { | |
| 913 while (idx <= end_idx) { | |
| 914 /* read key */ | |
| 915 if (str[idx] != '"') { | |
| 916 raise_errmsg("Expecting property name", pystr, idx); | |
| 917 goto bail; | |
| 918 } | |
| 919 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); | |
| 920 if (key == NULL) | |
| 921 goto bail; | |
| 922 idx = next_idx; | |
| 923 | |
| 924 /* skip whitespace between key and : delimiter, read :, skip whitesp ace */ | |
| 925 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 926 if (idx > end_idx || str[idx] != ':') { | |
| 927 raise_errmsg("Expecting : delimiter", pystr, idx); | |
| 928 goto bail; | |
| 929 } | |
| 930 idx++; | |
| 931 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 932 | |
| 933 /* read any JSON term */ | |
| 934 val = scan_once_unicode(s, pystr, idx, &next_idx); | |
| 935 if (val == NULL) | |
| 936 goto bail; | |
| 937 | |
| 938 if (PyDict_SetItem(rval, key, val) == -1) | |
| 939 goto bail; | |
| 940 | |
| 941 Py_CLEAR(key); | |
| 942 Py_CLEAR(val); | |
| 943 idx = next_idx; | |
| 944 | |
| 945 /* skip whitespace before } or , */ | |
| 946 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 947 | |
| 948 /* bail if the object is closed or we didn't get the , delimiter */ | |
| 949 if (idx > end_idx) break; | |
| 950 if (str[idx] == '}') { | |
| 951 break; | |
| 952 } | |
| 953 else if (str[idx] != ',') { | |
| 954 raise_errmsg("Expecting , delimiter", pystr, idx); | |
| 955 goto bail; | |
| 956 } | |
| 957 idx++; | |
| 958 | |
| 959 /* skip whitespace after , delimiter */ | |
| 960 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 961 } | |
| 962 } | |
| 963 | |
| 964 /* verify that idx < end_idx, str[idx] should be '}' */ | |
| 965 if (idx > end_idx || str[idx] != '}') { | |
| 966 raise_errmsg("Expecting object", pystr, end_idx); | |
| 967 goto bail; | |
| 968 } | |
| 969 | |
| 970 /* if object_hook is not None: rval = object_hook(rval) */ | |
| 971 if (s->object_hook != Py_None) { | |
| 972 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); | |
| 973 if (val == NULL) | |
| 974 goto bail; | |
| 975 Py_DECREF(rval); | |
| 976 rval = val; | |
| 977 val = NULL; | |
| 978 } | |
| 979 *next_idx_ptr = idx + 1; | |
| 980 return rval; | |
| 981 bail: | |
| 982 Py_XDECREF(key); | |
| 983 Py_XDECREF(val); | |
| 984 Py_DECREF(rval); | |
| 985 return NULL; | |
| 986 } | |
| 987 | |
| 988 static PyObject * | |
| 989 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { | |
| 990 char *str = PyString_AS_STRING(pystr); | |
| 991 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | |
| 992 PyObject *val = NULL; | |
| 993 PyObject *rval = PyList_New(0); | |
| 994 Py_ssize_t next_idx; | |
| 995 if (rval == NULL) | |
| 996 return NULL; | |
| 997 | |
| 998 /* skip whitespace after [ */ | |
| 999 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 1000 | |
| 1001 /* only loop if the array is non-empty */ | |
| 1002 if (idx <= end_idx && str[idx] != ']') { | |
| 1003 while (idx <= end_idx) { | |
| 1004 | |
| 1005 /* read any JSON term and de-tuplefy the (rval, idx) */ | |
| 1006 val = scan_once_str(s, pystr, idx, &next_idx); | |
| 1007 if (val == NULL) | |
| 1008 goto bail; | |
| 1009 | |
| 1010 if (PyList_Append(rval, val) == -1) | |
| 1011 goto bail; | |
| 1012 | |
| 1013 Py_CLEAR(val); | |
| 1014 idx = next_idx; | |
| 1015 | |
| 1016 /* skip whitespace between term and , */ | |
| 1017 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 1018 | |
| 1019 /* bail if the array is closed or we didn't get the , delimiter */ | |
| 1020 if (idx > end_idx) break; | |
| 1021 if (str[idx] == ']') { | |
| 1022 break; | |
| 1023 } | |
| 1024 else if (str[idx] != ',') { | |
| 1025 raise_errmsg("Expecting , delimiter", pystr, idx); | |
| 1026 goto bail; | |
| 1027 } | |
| 1028 idx++; | |
| 1029 | |
| 1030 /* skip whitespace after , */ | |
| 1031 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 1032 } | |
| 1033 } | |
| 1034 | |
| 1035 /* verify that idx < end_idx, str[idx] should be ']' */ | |
| 1036 if (idx > end_idx || str[idx] != ']') { | |
| 1037 raise_errmsg("Expecting object", pystr, end_idx); | |
| 1038 goto bail; | |
| 1039 } | |
| 1040 *next_idx_ptr = idx + 1; | |
| 1041 return rval; | |
| 1042 bail: | |
| 1043 Py_XDECREF(val); | |
| 1044 Py_DECREF(rval); | |
| 1045 return NULL; | |
| 1046 } | |
| 1047 | |
| 1048 static PyObject * | |
| 1049 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi ze_t *next_idx_ptr) { | |
| 1050 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | |
| 1051 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | |
| 1052 PyObject *val = NULL; | |
| 1053 PyObject *rval = PyList_New(0); | |
| 1054 Py_ssize_t next_idx; | |
| 1055 if (rval == NULL) | |
| 1056 return NULL; | |
| 1057 | |
| 1058 /* skip whitespace after [ */ | |
| 1059 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 1060 | |
| 1061 /* only loop if the array is non-empty */ | |
| 1062 if (idx <= end_idx && str[idx] != ']') { | |
| 1063 while (idx <= end_idx) { | |
| 1064 | |
| 1065 /* read any JSON term */ | |
| 1066 val = scan_once_unicode(s, pystr, idx, &next_idx); | |
| 1067 if (val == NULL) | |
| 1068 goto bail; | |
| 1069 | |
| 1070 if (PyList_Append(rval, val) == -1) | |
| 1071 goto bail; | |
| 1072 | |
| 1073 Py_CLEAR(val); | |
| 1074 idx = next_idx; | |
| 1075 | |
| 1076 /* skip whitespace between term and , */ | |
| 1077 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 1078 | |
| 1079 /* bail if the array is closed or we didn't get the , delimiter */ | |
| 1080 if (idx > end_idx) break; | |
| 1081 if (str[idx] == ']') { | |
| 1082 break; | |
| 1083 } | |
| 1084 else if (str[idx] != ',') { | |
| 1085 raise_errmsg("Expecting , delimiter", pystr, idx); | |
| 1086 goto bail; | |
| 1087 } | |
| 1088 idx++; | |
| 1089 | |
| 1090 /* skip whitespace after , */ | |
| 1091 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
| 1092 } | |
| 1093 } | |
| 1094 | |
| 1095 /* verify that idx < end_idx, str[idx] should be ']' */ | |
| 1096 if (idx > end_idx || str[idx] != ']') { | |
| 1097 raise_errmsg("Expecting object", pystr, end_idx); | |
| 1098 goto bail; | |
| 1099 } | |
| 1100 *next_idx_ptr = idx + 1; | |
| 1101 return rval; | |
| 1102 bail: | |
| 1103 Py_XDECREF(val); | |
| 1104 Py_DECREF(rval); | |
| 1105 return NULL; | |
| 1106 } | |
| 1107 | |
| 1108 static PyObject * | |
| 1109 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * next_idx_ptr) { | |
| 1110 PyObject *cstr; | |
| 1111 PyObject *rval; | |
| 1112 /* constant is "NaN", "Infinity", or "-Infinity" */ | |
| 1113 cstr = PyString_InternFromString(constant); | |
| 1114 if (cstr == NULL) | |
| 1115 return NULL; | |
| 1116 | |
| 1117 /* rval = parse_constant(constant) */ | |
| 1118 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); | |
| 1119 idx += PyString_GET_SIZE(cstr); | |
| 1120 Py_DECREF(cstr); | |
| 1121 *next_idx_ptr = idx; | |
| 1122 return rval; | |
| 1123 } | |
| 1124 | |
| 1125 static PyObject * | |
| 1126 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz e_t *next_idx_ptr) { | |
| 1127 char *str = PyString_AS_STRING(pystr); | |
| 1128 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | |
| 1129 Py_ssize_t idx = start; | |
| 1130 int is_float = 0; | |
| 1131 PyObject *rval; | |
| 1132 PyObject *numstr; | |
| 1133 | |
| 1134 /* read a sign if it's there, make sure it's not the end of the string */ | |
| 1135 if (str[idx] == '-') { | |
| 1136 idx++; | |
| 1137 if (idx > end_idx) { | |
| 1138 PyErr_SetNone(PyExc_StopIteration); | |
| 1139 return NULL; | |
| 1140 } | |
| 1141 } | |
| 1142 | |
| 1143 /* read as many integer digits as we find as long as it doesn't start with 0 */ | |
| 1144 if (str[idx] >= '1' && str[idx] <= '9') { | |
| 1145 idx++; | |
| 1146 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
| 1147 } | |
| 1148 /* if it starts with 0 we only expect one integer digit */ | |
| 1149 else if (str[idx] == '0') { | |
| 1150 idx++; | |
| 1151 } | |
| 1152 /* no integer digits, error */ | |
| 1153 else { | |
| 1154 PyErr_SetNone(PyExc_StopIteration); | |
| 1155 return NULL; | |
| 1156 } | |
| 1157 | |
| 1158 /* if the next char is '.' followed by a digit then read all float digits */ | |
| 1159 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { | |
| 1160 is_float = 1; | |
| 1161 idx += 2; | |
| 1162 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
| 1163 } | |
| 1164 | |
| 1165 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack ) */ | |
| 1166 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { | |
| 1167 | |
| 1168 /* save the index of the 'e' or 'E' just in case we need to backtrack */ | |
| 1169 Py_ssize_t e_start = idx; | |
| 1170 idx++; | |
| 1171 | |
| 1172 /* read an exponent sign if present */ | |
| 1173 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; | |
| 1174 | |
| 1175 /* read all digits */ | |
| 1176 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
| 1177 | |
| 1178 /* if we got a digit, then parse as float. if not, backtrack */ | |
| 1179 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { | |
| 1180 is_float = 1; | |
| 1181 } | |
| 1182 else { | |
| 1183 idx = e_start; | |
| 1184 } | |
| 1185 } | |
| 1186 | |
| 1187 /* copy the section we determined to be a number */ | |
| 1188 numstr = PyString_FromStringAndSize(&str[start], idx - start); | |
| 1189 if (numstr == NULL) | |
| 1190 return NULL; | |
| 1191 if (is_float) { | |
| 1192 /* parse as a float using a fast path if available, otherwise call user defined method */ | |
| 1193 if (s->parse_float != (PyObject *)&PyFloat_Type) { | |
| 1194 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); | |
| 1195 } | |
| 1196 else { | |
| 1197 rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr) )); | |
| 1198 } | |
| 1199 } | |
| 1200 else { | |
| 1201 /* parse as an int using a fast path if available, otherwise call user d efined method */ | |
| 1202 if (s->parse_int != (PyObject *)&PyInt_Type) { | |
| 1203 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); | |
| 1204 } | |
| 1205 else { | |
| 1206 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); | |
| 1207 } | |
| 1208 } | |
| 1209 Py_DECREF(numstr); | |
| 1210 *next_idx_ptr = idx; | |
| 1211 return rval; | |
| 1212 } | |
| 1213 | |
| 1214 static PyObject * | |
| 1215 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ ssize_t *next_idx_ptr) { | |
| 1216 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | |
| 1217 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | |
| 1218 Py_ssize_t idx = start; | |
| 1219 int is_float = 0; | |
| 1220 PyObject *rval; | |
| 1221 PyObject *numstr; | |
| 1222 | |
| 1223 /* read a sign if it's there, make sure it's not the end of the string */ | |
| 1224 if (str[idx] == '-') { | |
| 1225 idx++; | |
| 1226 if (idx > end_idx) { | |
| 1227 PyErr_SetNone(PyExc_StopIteration); | |
| 1228 return NULL; | |
| 1229 } | |
| 1230 } | |
| 1231 | |
| 1232 /* read as many integer digits as we find as long as it doesn't start with 0 */ | |
| 1233 if (str[idx] >= '1' && str[idx] <= '9') { | |
| 1234 idx++; | |
| 1235 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
| 1236 } | |
| 1237 /* if it starts with 0 we only expect one integer digit */ | |
| 1238 else if (str[idx] == '0') { | |
| 1239 idx++; | |
| 1240 } | |
| 1241 /* no integer digits, error */ | |
| 1242 else { | |
| 1243 PyErr_SetNone(PyExc_StopIteration); | |
| 1244 return NULL; | |
| 1245 } | |
| 1246 | |
| 1247 /* if the next char is '.' followed by a digit then read all float digits */ | |
| 1248 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { | |
| 1249 is_float = 1; | |
| 1250 idx += 2; | |
| 1251 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
| 1252 } | |
| 1253 | |
| 1254 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack ) */ | |
| 1255 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { | |
| 1256 Py_ssize_t e_start = idx; | |
| 1257 idx++; | |
| 1258 | |
| 1259 /* read an exponent sign if present */ | |
| 1260 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; | |
| 1261 | |
| 1262 /* read all digits */ | |
| 1263 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
| 1264 | |
| 1265 /* if we got a digit, then parse as float. if not, backtrack */ | |
| 1266 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { | |
| 1267 is_float = 1; | |
| 1268 } | |
| 1269 else { | |
| 1270 idx = e_start; | |
| 1271 } | |
| 1272 } | |
| 1273 | |
| 1274 /* copy the section we determined to be a number */ | |
| 1275 numstr = PyUnicode_FromUnicode(&str[start], idx - start); | |
| 1276 if (numstr == NULL) | |
| 1277 return NULL; | |
| 1278 if (is_float) { | |
| 1279 /* parse as a float using a fast path if available, otherwise call user defined method */ | |
| 1280 if (s->parse_float != (PyObject *)&PyFloat_Type) { | |
| 1281 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); | |
| 1282 } | |
| 1283 else { | |
| 1284 rval = PyFloat_FromString(numstr, NULL); | |
| 1285 } | |
| 1286 } | |
| 1287 else { | |
| 1288 /* no fast path for unicode -> int, just call */ | |
| 1289 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); | |
| 1290 } | |
| 1291 Py_DECREF(numstr); | |
| 1292 *next_idx_ptr = idx; | |
| 1293 return rval; | |
| 1294 } | |
| 1295 | |
| 1296 static PyObject * | |
| 1297 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n ext_idx_ptr) | |
| 1298 { | |
| 1299 char *str = PyString_AS_STRING(pystr); | |
| 1300 Py_ssize_t length = PyString_GET_SIZE(pystr); | |
| 1301 if (idx >= length) { | |
| 1302 PyErr_SetNone(PyExc_StopIteration); | |
| 1303 return NULL; | |
| 1304 } | |
| 1305 switch (str[idx]) { | |
| 1306 case '"': | |
| 1307 /* string */ | |
| 1308 return scanstring_str(pystr, idx + 1, | |
| 1309 PyString_AS_STRING(s->encoding), | |
| 1310 PyObject_IsTrue(s->strict), | |
| 1311 next_idx_ptr); | |
| 1312 case '{': | |
| 1313 /* object */ | |
| 1314 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); | |
| 1315 case '[': | |
| 1316 /* array */ | |
| 1317 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); | |
| 1318 case 'n': | |
| 1319 /* null */ | |
| 1320 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { | |
|
Martin v. Löwis
2009/01/04 13:22:29
Is this really faster than a strncmp?
bob.ippolito
2009/01/05 01:28:19
Probably not, but strncmp doesn't work for PyUnico
| |
| 1321 Py_INCREF(Py_None); | |
| 1322 *next_idx_ptr = idx + 4; | |
| 1323 return Py_None; | |
| 1324 } | |
| 1325 break; | |
| 1326 case 't': | |
| 1327 /* true */ | |
| 1328 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { | |
| 1329 Py_INCREF(Py_True); | |
| 1330 *next_idx_ptr = idx + 4; | |
| 1331 return Py_True; | |
| 1332 } | |
| 1333 break; | |
| 1334 case 'f': | |
| 1335 /* false */ | |
| 1336 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { | |
| 1337 Py_INCREF(Py_False); | |
| 1338 *next_idx_ptr = idx + 5; | |
| 1339 return Py_False; | |
| 1340 } | |
| 1341 break; | |
| 1342 case 'N': | |
| 1343 /* NaN */ | |
| 1344 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N' ) { | |
| 1345 return _parse_constant(s, "NaN", idx, next_idx_ptr); | |
| 1346 } | |
| 1347 break; | |
| 1348 case 'I': | |
| 1349 /* Infinity */ | |
| 1350 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { | |
| 1351 return _parse_constant(s, "Infinity", idx, next_idx_ptr); | |
| 1352 } | |
| 1353 break; | |
| 1354 case '-': | |
| 1355 /* -Infinity */ | |
| 1356 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { | |
| 1357 return _parse_constant(s, "-Infinity", idx, next_idx_ptr); | |
| 1358 } | |
| 1359 break; | |
| 1360 } | |
| 1361 /* Didn't find a string, object, array, or named constant. Look for a number . */ | |
| 1362 return _match_number_str(s, pystr, idx, next_idx_ptr); | |
| 1363 } | |
| 1364 | |
| 1365 static PyObject * | |
| 1366 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr) | |
| 1367 { | |
| 1368 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | |
| 1369 Py_ssize_t length = PyUnicode_GET_SIZE(pystr); | |
| 1370 if (idx >= length) { | |
| 1371 PyErr_SetNone(PyExc_StopIteration); | |
| 1372 return NULL; | |
| 1373 } | |
| 1374 switch (str[idx]) { | |
| 1375 case '"': | |
| 1376 /* string */ | |
| 1377 return scanstring_unicode(pystr, idx + 1, | |
| 1378 PyObject_IsTrue(s->strict), | |
| 1379 next_idx_ptr); | |
| 1380 case '{': | |
| 1381 /* object */ | |
| 1382 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); | |
| 1383 case '[': | |
| 1384 /* array */ | |
| 1385 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); | |
| 1386 case 'n': | |
| 1387 /* null */ | |
| 1388 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { | |
| 1389 Py_INCREF(Py_None); | |
| 1390 *next_idx_ptr = idx + 4; | |
| 1391 return Py_None; | |
| 1392 } | |
| 1393 break; | |
| 1394 case 't': | |
| 1395 /* true */ | |
| 1396 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { | |
| 1397 Py_INCREF(Py_True); | |
| 1398 *next_idx_ptr = idx + 4; | |
| 1399 return Py_True; | |
| 1400 } | |
| 1401 break; | |
| 1402 case 'f': | |
| 1403 /* false */ | |
| 1404 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { | |
| 1405 Py_INCREF(Py_False); | |
| 1406 *next_idx_ptr = idx + 5; | |
| 1407 return Py_False; | |
| 1408 } | |
| 1409 break; | |
| 1410 case 'N': | |
| 1411 /* NaN */ | |
| 1412 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N' ) { | |
| 1413 return _parse_constant(s, "NaN", idx, next_idx_ptr); | |
| 1414 } | |
| 1415 break; | |
| 1416 case 'I': | |
| 1417 /* Infinity */ | |
| 1418 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { | |
| 1419 return _parse_constant(s, "Infinity", idx, next_idx_ptr); | |
| 1420 } | |
| 1421 break; | |
| 1422 case '-': | |
| 1423 /* -Infinity */ | |
| 1424 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { | |
| 1425 return _parse_constant(s, "-Infinity", idx, next_idx_ptr); | |
| 1426 } | |
| 1427 break; | |
| 1428 } | |
| 1429 /* Didn't find a string, object, array, or named constant. Look for a number . */ | |
| 1430 return _match_number_unicode(s, pystr, idx, next_idx_ptr); | |
| 1431 } | |
| 1432 | |
| 1433 static PyObject * | |
| 1434 scanner_call(PyObject *self, PyObject *args, PyObject *kwds) | |
| 1435 { | |
| 1436 PyObject *pystr; | |
| 1437 PyObject *rval; | |
| 1438 Py_ssize_t idx; | |
| 1439 Py_ssize_t next_idx = -1; | |
| 1440 static char *kwlist[] = {"string", "idx", NULL}; | |
| 1441 PyScannerObject *s; | |
| 1442 assert(PyScanner_Check(self)); | |
| 1443 s = (PyScannerObject *)self; | |
| 1444 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr , _convertPyInt_AsSsize_t, &idx)) | |
| 1445 return NULL; | |
| 1446 | |
| 1447 if (PyString_Check(pystr)) { | |
| 1448 rval = scan_once_str(s, pystr, idx, &next_idx); | |
| 1449 } | |
| 1450 else if (PyUnicode_Check(pystr)) { | |
| 1451 rval = scan_once_unicode(s, pystr, idx, &next_idx); | |
| 1452 } | |
| 1453 else { | |
| 1454 PyErr_Format(PyExc_TypeError, | |
| 1455 "first argument must be a string, not %.80s", | |
| 1456 Py_TYPE(pystr)->tp_name); | |
| 1457 return NULL; | |
| 1458 } | |
| 1459 return _build_rval_index_tuple(rval, next_idx); | |
| 1460 } | |
| 1461 | |
| 1462 static int | |
| 1463 scanner_init(PyObject *self, PyObject *args, PyObject *kwds) | |
| 1464 { | |
| 1465 PyObject *ctx; | |
| 1466 static char *kwlist[] = {"context", NULL}; | |
| 1467 PyScannerObject *s; | |
| 1468 | |
| 1469 assert(PyScanner_Check(self)); | |
| 1470 s = (PyScannerObject *)self; | |
| 1471 | |
| 1472 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx) ) | |
| 1473 return -1; | |
| 1474 | |
| 1475 s->encoding = NULL; | |
| 1476 s->strict = NULL; | |
| 1477 s->object_hook = NULL; | |
| 1478 s->parse_float = NULL; | |
| 1479 s->parse_int = NULL; | |
| 1480 s->parse_constant = NULL; | |
| 1481 | |
| 1482 /* PyString_AS_STRING is used on encoding */ | |
| 1483 s->encoding = PyObject_GetAttrString(ctx, "encoding"); | |
| 1484 if (s->encoding == Py_None) { | |
| 1485 Py_DECREF(Py_None); | |
| 1486 s->encoding = PyString_InternFromString(DEFAULT_ENCODING); | |
| 1487 } | |
| 1488 else if (PyUnicode_Check(s->encoding)) { | |
| 1489 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); | |
| 1490 Py_DECREF(s->encoding); | |
| 1491 s->encoding = tmp; | |
| 1492 } | |
| 1493 if (s->encoding == NULL || !PyString_Check(s->encoding)) | |
| 1494 goto bail; | |
| 1495 | |
| 1496 /* All of these will fail "gracefully" so we don't need to verify them */ | |
| 1497 s->strict = PyObject_GetAttrString(ctx, "strict"); | |
| 1498 if (s->strict == NULL) | |
| 1499 goto bail; | |
| 1500 s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); | |
| 1501 if (s->object_hook == NULL) | |
| 1502 goto bail; | |
| 1503 s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); | |
| 1504 if (s->parse_float == NULL) | |
| 1505 goto bail; | |
| 1506 s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); | |
| 1507 if (s->parse_int == NULL) | |
| 1508 goto bail; | |
| 1509 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); | |
| 1510 if (s->parse_constant == NULL) | |
| 1511 goto bail; | |
| 1512 | |
| 1513 return 0; | |
| 1514 | |
| 1515 bail: | |
| 1516 Py_CLEAR(s->encoding); | |
| 1517 Py_CLEAR(s->strict); | |
| 1518 Py_CLEAR(s->object_hook); | |
| 1519 Py_CLEAR(s->parse_float); | |
| 1520 Py_CLEAR(s->parse_int); | |
| 1521 Py_CLEAR(s->parse_constant); | |
| 1522 return -1; | |
| 1523 } | |
| 1524 | |
| 1525 PyDoc_STRVAR(scanner_doc, "JSON scanner object"); | |
| 1526 | |
| 1527 static | |
| 1528 PyTypeObject PyScannerType = { | |
|
Martin v. Löwis
2009/01/04 13:22:29
I think scanner objects should participate in cycl
bob.ippolito
2009/01/05 01:28:19
I don't think it's possible to cause a cycle using
| |
| 1529 PyObject_HEAD_INIT(0) | |
| 1530 0, /* tp_internal */ | |
| 1531 "make_scanner", /* tp_name */ | |
| 1532 sizeof(PyScannerObject), /* tp_basicsize */ | |
| 1533 0, /* tp_itemsize */ | |
| 1534 scanner_dealloc, /* tp_dealloc */ | |
| 1535 0, /* tp_print */ | |
| 1536 0, /* tp_getattr */ | |
| 1537 0, /* tp_setattr */ | |
| 1538 0, /* tp_compare */ | |
| 1539 0, /* tp_repr */ | |
| 1540 0, /* tp_as_number */ | |
| 1541 0, /* tp_as_sequence */ | |
| 1542 0, /* tp_as_mapping */ | |
| 1543 0, /* tp_hash */ | |
| 1544 scanner_call, /* tp_call */ | |
| 1545 0, /* tp_str */ | |
| 1546 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ | |
| 1547 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ | |
| 1548 0, /* tp_as_buffer */ | |
| 1549 Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
| 1550 scanner_doc, /* tp_doc */ | |
| 1551 0, /* tp_traverse */ | |
| 1552 0, /* tp_clear */ | |
| 1553 0, /* tp_richcompare */ | |
| 1554 0, /* tp_weaklistoffset */ | |
| 1555 0, /* tp_iter */ | |
| 1556 0, /* tp_iternext */ | |
| 1557 0, /* tp_methods */ | |
| 1558 scanner_members, /* tp_members */ | |
| 1559 0, /* tp_getset */ | |
| 1560 0, /* tp_base */ | |
| 1561 0, /* tp_dict */ | |
| 1562 0, /* tp_descr_get */ | |
| 1563 0, /* tp_descr_set */ | |
| 1564 0, /* tp_dictoffset */ | |
| 1565 scanner_init, /* tp_init */ | |
| 1566 0,/* PyType_GenericAlloc, */ /* tp_alloc */ | |
| 1567 0,/* PyType_GenericNew, */ /* tp_new */ | |
| 1568 0,/* _PyObject_Del, */ /* tp_free */ | |
| 1569 }; | |
| 1570 | |
| 1571 static int | |
| 1572 encoder_init(PyObject *self, PyObject *args, PyObject *kwds) | |
| 1573 { | |
| 1574 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_sep arator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; | |
| 1575 | |
| 1576 PyEncoderObject *s; | |
| 1577 PyObject *allow_nan; | |
| 1578 | |
| 1579 assert(PyEncoder_Check(self)); | |
| 1580 s = (PyEncoderObject *)self; | |
| 1581 | |
| 1582 s->markers = NULL; | |
| 1583 s->defaultfn = NULL; | |
| 1584 s->encoder = NULL; | |
| 1585 s->indent = NULL; | |
| 1586 s->key_separator = NULL; | |
| 1587 s->item_separator = NULL; | |
| 1588 s->sort_keys = NULL; | |
| 1589 s->skipkeys = NULL; | |
| 1590 | |
| 1591 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlis t, | |
| 1592 &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) | |
| 1593 return -1; | |
| 1594 | |
| 1595 Py_INCREF(s->markers); | |
| 1596 Py_INCREF(s->defaultfn); | |
| 1597 Py_INCREF(s->encoder); | |
| 1598 Py_INCREF(s->indent); | |
| 1599 Py_INCREF(s->key_separator); | |
| 1600 Py_INCREF(s->item_separator); | |
| 1601 Py_INCREF(s->sort_keys); | |
| 1602 Py_INCREF(s->skipkeys); | |
| 1603 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s ->encoder) == (PyCFunction)py_encode_basestring_ascii); | |
| 1604 s->allow_nan = PyObject_IsTrue(allow_nan); | |
| 1605 return 0; | |
| 1606 } | |
| 1607 | |
| 1608 static PyObject * | |
| 1609 encoder_call(PyObject *self, PyObject *args, PyObject *kwds) | |
| 1610 { | |
| 1611 static char *kwlist[] = {"obj", "_current_indent_level", NULL}; | |
| 1612 PyObject *obj; | |
| 1613 PyObject *rval; | |
| 1614 Py_ssize_t indent_level; | |
| 1615 PyEncoderObject *s; | |
| 1616 assert(PyEncoder_Check(self)); | |
| 1617 s = (PyEncoderObject *)self; | |
| 1618 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, | |
| 1619 &obj, _convertPyInt_AsSsize_t, &indent_level)) | |
| 1620 return NULL; | |
| 1621 rval = PyList_New(0); | |
| 1622 if (rval == NULL) | |
| 1623 return NULL; | |
| 1624 if (encoder_listencode_obj(s, rval, obj, indent_level)) { | |
| 1625 Py_DECREF(rval); | |
| 1626 return NULL; | |
| 1627 } | |
| 1628 return rval; | |
| 1629 } | |
| 1630 | |
| 1631 static PyObject * | |
| 1632 _encoded_const(PyObject *obj) | |
| 1633 { | |
| 1634 if (obj == Py_None) { | |
| 1635 static PyObject *s_null = NULL; | |
| 1636 if (s_null == NULL) { | |
| 1637 s_null = PyString_InternFromString("null"); | |
| 1638 } | |
| 1639 Py_INCREF(s_null); | |
| 1640 return s_null; | |
| 1641 } | |
| 1642 else if (obj == Py_True) { | |
| 1643 static PyObject *s_true = NULL; | |
| 1644 if (s_true == NULL) { | |
| 1645 s_true = PyString_InternFromString("true"); | |
| 1646 } | |
| 1647 Py_INCREF(s_true); | |
| 1648 return s_true; | |
| 1649 } | |
| 1650 else if (obj == Py_False) { | |
| 1651 static PyObject *s_false = NULL; | |
| 1652 if (s_false == NULL) { | |
| 1653 s_false = PyString_InternFromString("false"); | |
| 1654 } | |
| 1655 Py_INCREF(s_false); | |
| 1656 return s_false; | |
| 1657 } | |
| 1658 else { | |
| 1659 PyErr_SetString(PyExc_ValueError, "not a const"); | |
| 1660 return NULL; | |
| 1661 } | |
| 1662 } | |
| 1663 | |
| 1664 static PyObject * | |
| 1665 encoder_encode_float(PyEncoderObject *s, PyObject *obj) | |
| 1666 { | |
| 1667 double i = PyFloat_AS_DOUBLE(obj); | |
| 1668 if (!Py_IS_FINITE(i)) { | |
| 1669 if (!s->allow_nan) { | |
| 1670 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); | |
| 1671 return NULL; | |
| 1672 } | |
| 1673 if (i > 0) { | |
| 1674 return PyString_FromString("Infinity"); | |
| 1675 } | |
| 1676 else if (i < 0) { | |
| 1677 return PyString_FromString("-Infinity"); | |
| 1678 } | |
| 1679 else { | |
| 1680 return PyString_FromString("NaN"); | |
| 1681 } | |
| 1682 } | |
| 1683 /* Use a better float format here? */ | |
| 1684 return PyObject_Repr(obj); | |
| 1685 } | |
| 1686 | |
| 1687 static PyObject * | |
| 1688 encoder_encode_string(PyEncoderObject *s, PyObject *obj) | |
| 1689 { | |
| 1690 if (s->fast_encode) | |
| 1691 return py_encode_basestring_ascii(NULL, obj); | |
| 1692 else | |
| 1693 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); | |
| 1694 } | |
| 1695 | |
| 1696 static int | |
| 1697 _steal_list_append(PyObject *lst, PyObject *stolen) | |
| 1698 { | |
| 1699 int rval = PyList_Append(lst, stolen); | |
| 1700 Py_DECREF(stolen); | |
| 1701 return rval; | |
| 1702 } | |
| 1703 | |
| 1704 static int | |
| 1705 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi ze_t indent_level) | |
| 1706 { | |
| 1707 PyObject *newobj; | |
| 1708 int rv; | |
| 1709 | |
| 1710 if (obj == Py_None || obj == Py_True || obj == Py_False) { | |
| 1711 PyObject *cstr = _encoded_const(obj); | |
| 1712 if (cstr == NULL) | |
| 1713 return -1; | |
| 1714 return _steal_list_append(rval, cstr); | |
| 1715 } | |
| 1716 else if (PyString_Check(obj) || PyUnicode_Check(obj)) | |
| 1717 { | |
| 1718 PyObject *encoded = encoder_encode_string(s, obj); | |
| 1719 if (encoded == NULL) | |
| 1720 return -1; | |
| 1721 return _steal_list_append(rval, encoded); | |
| 1722 } | |
| 1723 else if (PyInt_Check(obj) || PyLong_Check(obj)) { | |
| 1724 PyObject *encoded = PyObject_Str(obj); | |
| 1725 if (encoded == NULL) | |
| 1726 return -1; | |
| 1727 return _steal_list_append(rval, encoded); | |
| 1728 } | |
| 1729 else if (PyFloat_Check(obj)) { | |
| 1730 PyObject *encoded = encoder_encode_float(s, obj); | |
| 1731 if (encoded == NULL) | |
| 1732 return -1; | |
| 1733 return _steal_list_append(rval, encoded); | |
| 1734 } | |
| 1735 else if (PyList_Check(obj) || PyTuple_Check(obj)) { | |
| 1736 return encoder_listencode_list(s, rval, obj, indent_level); | |
| 1737 } | |
| 1738 else if (PyDict_Check(obj)) { | |
| 1739 return encoder_listencode_dict(s, rval, obj, indent_level); | |
| 1740 } | |
| 1741 else { | |
| 1742 PyObject *ident = NULL; | |
| 1743 if (s->markers != Py_None) { | |
| 1744 int has_key; | |
| 1745 ident = PyLong_FromVoidPtr(obj); | |
| 1746 if (ident == NULL) | |
| 1747 return -1; | |
| 1748 has_key = PyDict_Contains(s->markers, ident); | |
| 1749 if (has_key) { | |
| 1750 if (has_key != -1) | |
| 1751 PyErr_SetString(PyExc_ValueError, "Circular reference detect ed"); | |
| 1752 Py_DECREF(ident); | |
| 1753 return -1; | |
| 1754 } | |
| 1755 if (PyDict_SetItem(s->markers, ident, obj)) { | |
| 1756 Py_DECREF(ident); | |
| 1757 return -1; | |
| 1758 } | |
| 1759 } | |
| 1760 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); | |
| 1761 if (newobj == NULL) { | |
| 1762 Py_DECREF(ident); | |
| 1763 return -1; | |
| 1764 } | |
| 1765 rv = encoder_listencode_obj(s, rval, newobj, indent_level); | |
| 1766 Py_DECREF(newobj); | |
| 1767 if (rv) { | |
| 1768 Py_DECREF(ident); | |
| 1769 return -1; | |
| 1770 } | |
| 1771 if (ident != NULL) { | |
| 1772 if (PyDict_DelItem(s->markers, ident)) { | |
| 1773 Py_DECREF(ident); | |
| 1774 return -1; | |
| 1775 } | |
| 1776 Py_DECREF(ident); | |
| 1777 } | |
| 1778 return rv; | |
| 1779 } | |
| 1780 } | |
| 1781 | |
| 1782 static int | |
| 1783 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss ize_t indent_level) | |
| 1784 { | |
| 1785 static PyObject *open_dict = NULL; | |
| 1786 static PyObject *close_dict = NULL; | |
| 1787 static PyObject *empty_dict = NULL; | |
| 1788 PyObject *kstr = NULL; | |
| 1789 PyObject *ident = NULL; | |
| 1790 PyObject *key, *value; | |
| 1791 Py_ssize_t pos; | |
| 1792 int skipkeys; | |
| 1793 Py_ssize_t idx; | |
| 1794 | |
| 1795 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { | |
| 1796 open_dict = PyString_InternFromString("{"); | |
| 1797 close_dict = PyString_InternFromString("}"); | |
| 1798 empty_dict = PyString_InternFromString("{}"); | |
| 1799 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) | |
| 1800 return -1; | |
| 1801 } | |
| 1802 if (PyDict_Size(dct) == 0) | |
| 1803 return PyList_Append(rval, empty_dict); | |
| 1804 | |
| 1805 if (s->markers != Py_None) { | |
| 1806 int has_key; | |
| 1807 ident = PyLong_FromVoidPtr(dct); | |
| 1808 if (ident == NULL) | |
| 1809 goto bail; | |
| 1810 has_key = PyDict_Contains(s->markers, ident); | |
| 1811 if (has_key) { | |
| 1812 if (has_key != -1) | |
| 1813 PyErr_SetString(PyExc_ValueError, "Circular reference detected") ; | |
| 1814 goto bail; | |
| 1815 } | |
| 1816 if (PyDict_SetItem(s->markers, ident, dct)) { | |
| 1817 goto bail; | |
| 1818 } | |
| 1819 } | |
| 1820 | |
| 1821 if (PyList_Append(rval, open_dict)) | |
| 1822 goto bail; | |
| 1823 | |
| 1824 if (s->indent != Py_None) { | |
| 1825 /* TODO: DOES NOT RUN */ | |
| 1826 indent_level += 1; | |
| 1827 /* | |
| 1828 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | |
| 1829 separator = _item_separator + newline_indent | |
| 1830 buf += newline_indent | |
| 1831 */ | |
| 1832 } | |
| 1833 | |
| 1834 /* TODO: C speedup not implemented for sort_keys */ | |
| 1835 | |
| 1836 pos = 0; | |
| 1837 skipkeys = PyObject_IsTrue(s->skipkeys); | |
| 1838 idx = 0; | |
| 1839 while (PyDict_Next(dct, &pos, &key, &value)) { | |
| 1840 PyObject *encoded; | |
| 1841 | |
| 1842 if (PyString_Check(key) || PyUnicode_Check(key)) { | |
| 1843 Py_INCREF(key); | |
| 1844 kstr = key; | |
| 1845 } | |
| 1846 else if (PyFloat_Check(key)) { | |
| 1847 kstr = encoder_encode_float(s, key); | |
| 1848 if (kstr == NULL) | |
| 1849 goto bail; | |
| 1850 } | |
| 1851 else if (PyInt_Check(key) || PyLong_Check(key)) { | |
| 1852 kstr = PyObject_Str(key); | |
| 1853 if (kstr == NULL) | |
| 1854 goto bail; | |
| 1855 } | |
| 1856 else if (key == Py_True || key == Py_False || key == Py_None) { | |
| 1857 kstr = _encoded_const(key); | |
| 1858 if (kstr == NULL) | |
| 1859 goto bail; | |
| 1860 } | |
| 1861 else if (skipkeys) { | |
| 1862 continue; | |
| 1863 } | |
| 1864 else { | |
| 1865 /* TODO: include repr of key */ | |
| 1866 PyErr_SetString(PyExc_ValueError, "keys must be a string"); | |
| 1867 goto bail; | |
| 1868 } | |
| 1869 | |
| 1870 if (idx) { | |
| 1871 if (PyList_Append(rval, s->item_separator)) | |
| 1872 goto bail; | |
| 1873 } | |
| 1874 | |
| 1875 encoded = encoder_encode_string(s, kstr); | |
| 1876 Py_CLEAR(kstr); | |
| 1877 if (encoded == NULL) | |
| 1878 goto bail; | |
| 1879 if (PyList_Append(rval, encoded)) { | |
| 1880 Py_DECREF(encoded); | |
| 1881 goto bail; | |
| 1882 } | |
| 1883 Py_DECREF(encoded); | |
| 1884 if (PyList_Append(rval, s->key_separator)) | |
| 1885 goto bail; | |
| 1886 if (encoder_listencode_obj(s, rval, value, indent_level)) | |
| 1887 goto bail; | |
| 1888 idx += 1; | |
| 1889 } | |
| 1890 if (ident != NULL) { | |
| 1891 if (PyDict_DelItem(s->markers, ident)) | |
| 1892 goto bail; | |
| 1893 Py_CLEAR(ident); | |
| 1894 } | |
| 1895 if (s->indent != Py_None) { | |
| 1896 /* TODO: DOES NOT RUN */ | |
| 1897 indent_level -= 1; | |
| 1898 /* | |
| 1899 yield '\n' + (' ' * (_indent * _current_indent_level)) | |
| 1900 */ | |
| 1901 } | |
| 1902 if (PyList_Append(rval, close_dict)) | |
| 1903 goto bail; | |
| 1904 return 0; | |
| 1905 | |
| 1906 bail: | |
| 1907 Py_XDECREF(kstr); | |
| 1908 Py_XDECREF(ident); | |
| 1909 return -1; | |
| 1910 } | |
| 1911 | |
| 1912 | |
| 1913 static int | |
| 1914 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss ize_t indent_level) | |
| 1915 { | |
| 1916 static PyObject *open_array = NULL; | |
| 1917 static PyObject *close_array = NULL; | |
| 1918 static PyObject *empty_array = NULL; | |
| 1919 PyObject *ident = NULL; | |
| 1920 PyObject *s_fast = NULL; | |
| 1921 Py_ssize_t num_items; | |
| 1922 PyObject **seq_items; | |
| 1923 Py_ssize_t i; | |
| 1924 | |
| 1925 if (open_array == NULL || close_array == NULL || empty_array == NULL) { | |
| 1926 open_array = PyString_InternFromString("["); | |
| 1927 close_array = PyString_InternFromString("]"); | |
| 1928 empty_array = PyString_InternFromString("[]"); | |
| 1929 if (open_array == NULL || close_array == NULL || empty_array == NULL) | |
| 1930 return -1; | |
| 1931 } | |
| 1932 ident = NULL; | |
| 1933 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); | |
| 1934 if (s_fast == NULL) | |
| 1935 return -1; | |
| 1936 num_items = PySequence_Fast_GET_SIZE(s_fast); | |
| 1937 if (num_items == 0) { | |
| 1938 Py_DECREF(s_fast); | |
| 1939 return PyList_Append(rval, empty_array); | |
| 1940 } | |
| 1941 | |
| 1942 if (s->markers != Py_None) { | |
| 1943 int has_key; | |
| 1944 ident = PyLong_FromVoidPtr(seq); | |
| 1945 if (ident == NULL) | |
| 1946 goto bail; | |
| 1947 has_key = PyDict_Contains(s->markers, ident); | |
| 1948 if (has_key) { | |
| 1949 if (has_key != -1) | |
| 1950 PyErr_SetString(PyExc_ValueError, "Circular reference detected") ; | |
| 1951 goto bail; | |
| 1952 } | |
| 1953 if (PyDict_SetItem(s->markers, ident, seq)) { | |
| 1954 goto bail; | |
| 1955 } | |
| 1956 } | |
| 1957 | |
| 1958 seq_items = PySequence_Fast_ITEMS(s_fast); | |
| 1959 if (PyList_Append(rval, open_array)) | |
| 1960 goto bail; | |
| 1961 if (s->indent != Py_None) { | |
| 1962 /* TODO: DOES NOT RUN */ | |
| 1963 indent_level += 1; | |
| 1964 /* | |
| 1965 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | |
| 1966 separator = _item_separator + newline_indent | |
| 1967 buf += newline_indent | |
| 1968 */ | |
| 1969 } | |
| 1970 for (i = 0; i < num_items; i++) { | |
| 1971 PyObject *obj = seq_items[i]; | |
| 1972 if (i) { | |
| 1973 if (PyList_Append(rval, s->item_separator)) | |
| 1974 goto bail; | |
| 1975 } | |
| 1976 if (encoder_listencode_obj(s, rval, obj, indent_level)) | |
| 1977 goto bail; | |
| 1978 } | |
| 1979 if (ident != NULL) { | |
| 1980 if (PyDict_DelItem(s->markers, ident)) | |
| 1981 goto bail; | |
| 1982 Py_CLEAR(ident); | |
| 1983 } | |
| 1984 if (s->indent != Py_None) { | |
| 1985 /* TODO: DOES NOT RUN */ | |
| 1986 indent_level -= 1; | |
| 1987 /* | |
| 1988 yield '\n' + (' ' * (_indent * _current_indent_level)) | |
| 1989 */ | |
| 1990 } | |
| 1991 if (PyList_Append(rval, close_array)) | |
| 1992 goto bail; | |
| 1993 Py_DECREF(s_fast); | |
| 1994 return 0; | |
| 1995 | |
| 1996 bail: | |
| 1997 Py_XDECREF(ident); | |
| 1998 Py_DECREF(s_fast); | |
| 1999 return -1; | |
| 2000 } | |
| 2001 | |
| 2002 static void | |
| 2003 encoder_dealloc(PyObject *self) | |
| 2004 { | |
| 2005 PyEncoderObject *s; | |
| 2006 assert(PyEncoder_Check(self)); | |
| 2007 s = (PyEncoderObject *)self; | |
| 2008 Py_CLEAR(s->markers); | |
| 2009 Py_CLEAR(s->defaultfn); | |
| 2010 Py_CLEAR(s->encoder); | |
| 2011 Py_CLEAR(s->indent); | |
| 2012 Py_CLEAR(s->key_separator); | |
| 2013 Py_CLEAR(s->item_separator); | |
| 2014 Py_CLEAR(s->sort_keys); | |
| 2015 Py_CLEAR(s->skipkeys); | |
| 2016 self->ob_type->tp_free(self); | |
| 2017 } | |
| 2018 | |
| 2019 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") ; | |
| 2020 | |
| 2021 static | |
| 2022 PyTypeObject PyEncoderType = { | |
| 2023 PyObject_HEAD_INIT(0) | |
| 2024 0, /* tp_internal */ | |
| 2025 "make_encoder", /* tp_name */ | |
|
Martin v. Löwis
2009/01/04 13:22:29
That is a confusing type name. How about "Encoder"
bob.ippolito
2009/01/05 01:28:19
It's not a type that's ever exposed to user code,
| |
| 2026 sizeof(PyEncoderObject), /* tp_basicsize */ | |
| 2027 0, /* tp_itemsize */ | |
| 2028 encoder_dealloc, /* tp_dealloc */ | |
| 2029 0, /* tp_print */ | |
| 2030 0, /* tp_getattr */ | |
| 2031 0, /* tp_setattr */ | |
| 2032 0, /* tp_compare */ | |
| 2033 0, /* tp_repr */ | |
| 2034 0, /* tp_as_number */ | |
| 2035 0, /* tp_as_sequence */ | |
| 2036 0, /* tp_as_mapping */ | |
| 2037 0, /* tp_hash */ | |
| 2038 encoder_call, /* tp_call */ | |
| 2039 0, /* tp_str */ | |
| 2040 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ | |
| 2041 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ | |
| 2042 0, /* tp_as_buffer */ | |
| 2043 Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
| 2044 encoder_doc, /* tp_doc */ | |
| 2045 0, /* tp_traverse */ | |
| 2046 0, /* tp_clear */ | |
| 2047 0, /* tp_richcompare */ | |
| 2048 0, /* tp_weaklistoffset */ | |
| 2049 0, /* tp_iter */ | |
| 2050 0, /* tp_iternext */ | |
| 2051 0, /* tp_methods */ | |
| 2052 encoder_members, /* tp_members */ | |
| 2053 0, /* tp_getset */ | |
| 2054 0, /* tp_base */ | |
| 2055 0, /* tp_dict */ | |
| 2056 0, /* tp_descr_get */ | |
| 2057 0, /* tp_descr_set */ | |
| 2058 0, /* tp_dictoffset */ | |
| 2059 encoder_init, /* tp_init */ | |
| 2060 0,/* PyType_GenericAlloc, */ /* tp_alloc */ | |
| 2061 0,/* PyType_GenericNew, */ /* tp_new */ | |
| 2062 0,/* _PyObject_Del, */ /* tp_free */ | |
| 2063 }; | |
| 2064 | |
| 605 static PyMethodDef json_methods[] = { | 2065 static PyMethodDef json_methods[] = { |
| 606 {"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii, | 2066 {"encode_basestring_ascii", |
| 607 METH_O, pydoc_encode_basestring_ascii}, | 2067 (PyCFunction)py_encode_basestring_ascii, |
| 608 {"scanstring", (PyCFunction)py_scanstring, METH_VARARGS, | 2068 METH_O, |
| 609 pydoc_scanstring}, | 2069 pydoc_encode_basestring_ascii}, |
| 2070 {"scanstring", | |
| 2071 (PyCFunction)py_scanstring, | |
| 2072 METH_VARARGS, | |
| 2073 pydoc_scanstring}, | |
| 610 {NULL, NULL, 0, NULL} | 2074 {NULL, NULL, 0, NULL} |
| 611 }; | 2075 }; |
| 612 | 2076 |
| 613 PyDoc_STRVAR(module_doc, | 2077 PyDoc_STRVAR(module_doc, |
| 614 "json speedups\n"); | 2078 "json speedups\n"); |
| 615 | 2079 |
| 616 void | 2080 void |
| 617 init_json(void) | 2081 init_json(void) |
| 618 { | 2082 { |
| 619 PyObject *m; | 2083 PyObject *m; |
| 2084 PyScannerType.tp_getattro = PyObject_GenericGetAttr; | |
| 2085 PyScannerType.tp_setattro = PyObject_GenericSetAttr; | |
| 2086 PyScannerType.tp_alloc = PyType_GenericAlloc; | |
| 2087 PyScannerType.tp_new = PyType_GenericNew; | |
| 2088 PyScannerType.tp_free = _PyObject_Del; | |
| 2089 if (PyType_Ready(&PyScannerType) < 0) | |
| 2090 return; | |
| 2091 PyEncoderType.tp_getattro = PyObject_GenericGetAttr; | |
| 2092 PyEncoderType.tp_setattro = PyObject_GenericSetAttr; | |
| 2093 PyEncoderType.tp_alloc = PyType_GenericAlloc; | |
| 2094 PyEncoderType.tp_new = PyType_GenericNew; | |
| 2095 PyEncoderType.tp_free = _PyObject_Del; | |
| 2096 if (PyType_Ready(&PyEncoderType) < 0) | |
| 2097 return; | |
| 620 m = Py_InitModule3("_json", json_methods, module_doc); | 2098 m = Py_InitModule3("_json", json_methods, module_doc); |
| 2099 Py_INCREF((PyObject*)&PyScannerType); | |
| 2100 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); | |
| 2101 Py_INCREF((PyObject*)&PyEncoderType); | |
| 2102 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); | |
| 621 } | 2103 } |
| OLD | NEW |