Left: | ||
Right: |
OLD | NEW |
---|---|
1 #include "Python.h" | 1 #include "Python.h" |
2 #include "structmember.h" | |
2 | 3 |
3 #define DEFAULT_ENCODING "utf-8" | 4 #define DEFAULT_ENCODING "utf-8" |
5 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) | |
6 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) | |
7 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) | |
8 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) | |
9 | |
10 static PyTypeObject PyScannerType; | |
11 static PyTypeObject PyEncoderType; | |
12 | |
13 typedef struct _PyScannerObject { | |
14 PyObject_HEAD | |
15 PyObject *encoding; | |
16 PyObject *strict; | |
17 PyObject *object_hook; | |
18 PyObject *parse_float; | |
19 PyObject *parse_int; | |
20 PyObject *parse_constant; | |
21 } PyScannerObject; | |
22 | |
23 static PyMemberDef scanner_members[] = { | |
24 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encod ing"}, | |
25 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, | |
26 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, | |
27 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, | |
28 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "par se_int"}, | |
29 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READ ONLY, "parse_constant"}, | |
30 {NULL} | |
31 }; | |
32 | |
33 typedef struct _PyEncoderObject { | |
34 PyObject_HEAD | |
35 PyObject *markers; | |
36 PyObject *defaultfn; | |
37 PyObject *encoder; | |
38 PyObject *indent; | |
39 PyObject *key_separator; | |
40 PyObject *item_separator; | |
41 PyObject *sort_keys; | |
42 PyObject *skipkeys; | |
43 int fast_encode; | |
44 int allow_nan; | |
45 } PyEncoderObject; | |
46 | |
47 static PyMemberDef encoder_members[] = { | |
48 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers "}, | |
49 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "defau lt"}, | |
50 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder "}, | |
51 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, | |
52 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READON LY, "key_separator"}, | |
53 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READ ONLY, "item_separator"}, | |
54 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sor t_keys"}, | |
55 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipk eys"}, | |
56 {NULL} | |
57 }; | |
58 | |
59 static Py_ssize_t | |
60 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); | |
61 static PyObject * | |
62 ascii_escape_unicode(PyObject *pystr); | |
63 static PyObject * | |
64 ascii_escape_str(PyObject *pystr); | |
65 static PyObject * | |
66 py_encode_basestring_ascii(PyObject* self, PyObject *pystr); | |
67 void init_json(void); | |
68 static PyObject * | |
69 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n ext_idx_ptr); | |
70 static PyObject * | |
71 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr); | |
72 static PyObject * | |
73 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); | |
74 static int | |
75 scanner_init(PyObject *self, PyObject *args, PyObject *kwds); | |
76 static void | |
77 scanner_dealloc(PyObject *self); | |
78 static int | |
79 encoder_init(PyObject *self, PyObject *args, PyObject *kwds); | |
80 static void | |
81 encoder_dealloc(PyObject *self); | |
82 static int | |
83 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss ize_t indent_level); | |
84 static int | |
85 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi ze_t indent_level); | |
86 static int | |
87 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss ize_t indent_level); | |
88 static PyObject * | |
89 _encoded_const(PyObject *const); | |
90 static void | |
91 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); | |
92 static PyObject * | |
93 encoder_encode_string(PyEncoderObject *s, PyObject *obj); | |
94 static int | |
95 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); | |
96 static PyObject * | |
97 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); | |
98 static PyObject * | |
99 encoder_encode_float(PyEncoderObject *s, PyObject *obj); | |
100 | |
4 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') | 101 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') |
102 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) | |
103 | |
5 #define MIN_EXPANSION 6 | 104 #define MIN_EXPANSION 6 |
6 | 105 |
7 #ifdef Py_UNICODE_WIDE | 106 #ifdef Py_UNICODE_WIDE |
8 #define MAX_EXPANSION (2 * MIN_EXPANSION) | 107 #define MAX_EXPANSION (2 * MIN_EXPANSION) |
9 #else | 108 #else |
10 #define MAX_EXPANSION MIN_EXPANSION | 109 #define MAX_EXPANSION MIN_EXPANSION |
11 #endif | 110 #endif |
12 | 111 |
112 static int | |
113 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) | |
114 { | |
115 *size_ptr = PyInt_AsSsize_t(o); | |
116 if (*size_ptr == -1 && PyErr_Occurred()); | |
117 return 1; | |
118 return 0; | |
119 } | |
120 | |
121 static PyObject * | |
122 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) | |
123 { | |
124 return PyInt_FromSsize_t(*size_ptr); | |
125 } | |
126 | |
13 static Py_ssize_t | 127 static Py_ssize_t |
14 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) | 128 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) |
15 { | 129 { |
16 Py_UNICODE x; | |
17 output[chars++] = '\\'; | 130 output[chars++] = '\\'; |
18 switch (c) { | 131 switch (c) { |
19 case '\\': output[chars++] = (char)c; break; | 132 case '\\': output[chars++] = (char)c; break; |
20 case '"': output[chars++] = (char)c; break; | 133 case '"': output[chars++] = (char)c; break; |
21 case '\b': output[chars++] = 'b'; break; | 134 case '\b': output[chars++] = 'b'; break; |
22 case '\f': output[chars++] = 'f'; break; | 135 case '\f': output[chars++] = 'f'; break; |
23 case '\n': output[chars++] = 'n'; break; | 136 case '\n': output[chars++] = 'n'; break; |
24 case '\r': output[chars++] = 'r'; break; | 137 case '\r': output[chars++] = 'r'; break; |
25 case '\t': output[chars++] = 't'; break; | 138 case '\t': output[chars++] = 't'; break; |
26 default: | 139 default: |
27 #ifdef Py_UNICODE_WIDE | 140 #ifdef Py_UNICODE_WIDE |
28 if (c >= 0x10000) { | 141 if (c >= 0x10000) { |
29 /* UTF-16 surrogate pair */ | 142 /* UTF-16 surrogate pair */ |
30 Py_UNICODE v = c - 0x10000; | 143 Py_UNICODE v = c - 0x10000; |
31 c = 0xd800 | ((v >> 10) & 0x3ff); | 144 c = 0xd800 | ((v >> 10) & 0x3ff); |
32 output[chars++] = 'u'; | 145 output[chars++] = 'u'; |
33 x = (c & 0xf000) >> 12; | 146 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; |
34 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | 147 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; |
35 x = (c & 0x0f00) >> 8; | 148 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; |
36 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | 149 output[chars++] = "0123456789abcdef"[(c ) & 0xf]; |
37 x = (c & 0x00f0) >> 4; | |
38 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | |
39 x = (c & 0x000f); | |
40 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | |
41 c = 0xdc00 | (v & 0x3ff); | 150 c = 0xdc00 | (v & 0x3ff); |
42 output[chars++] = '\\'; | 151 output[chars++] = '\\'; |
43 } | 152 } |
44 #endif | 153 #endif |
45 output[chars++] = 'u'; | 154 output[chars++] = 'u'; |
46 x = (c & 0xf000) >> 12; | 155 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; |
47 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | 156 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; |
48 x = (c & 0x0f00) >> 8; | 157 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; |
49 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | 158 output[chars++] = "0123456789abcdef"[(c ) & 0xf]; |
50 x = (c & 0x00f0) >> 4; | |
51 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | |
52 x = (c & 0x000f); | |
53 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); | |
54 } | 159 } |
55 return chars; | 160 return chars; |
56 } | 161 } |
57 | 162 |
58 static PyObject * | 163 static PyObject * |
59 ascii_escape_unicode(PyObject *pystr) | 164 ascii_escape_unicode(PyObject *pystr) |
60 { | 165 { |
61 Py_ssize_t i; | 166 Py_ssize_t i; |
62 Py_ssize_t input_chars; | 167 Py_ssize_t input_chars; |
63 Py_ssize_t output_size; | 168 Py_ssize_t output_size; |
64 Py_ssize_t chars; | 169 Py_ssize_t chars; |
65 PyObject *rval; | 170 PyObject *rval; |
66 char *output; | 171 char *output; |
67 Py_UNICODE *input_unicode; | 172 Py_UNICODE *input_unicode; |
68 | 173 |
69 input_chars = PyUnicode_GET_SIZE(pystr); | 174 input_chars = PyUnicode_GET_SIZE(pystr); |
70 input_unicode = PyUnicode_AS_UNICODE(pystr); | 175 input_unicode = PyUnicode_AS_UNICODE(pystr); |
176 | |
71 /* One char input can be up to 6 chars output, estimate 4 of these */ | 177 /* One char input can be up to 6 chars output, estimate 4 of these */ |
72 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; | 178 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; |
73 rval = PyString_FromStringAndSize(NULL, output_size); | 179 rval = PyString_FromStringAndSize(NULL, output_size); |
74 if (rval == NULL) { | 180 if (rval == NULL) { |
75 return NULL; | 181 return NULL; |
76 } | 182 } |
77 output = PyString_AS_STRING(rval); | 183 output = PyString_AS_STRING(rval); |
78 chars = 0; | 184 chars = 0; |
79 output[chars++] = '"'; | 185 output[chars++] = '"'; |
80 for (i = 0; i < input_chars; i++) { | 186 for (i = 0; i < input_chars; i++) { |
81 Py_UNICODE c = input_unicode[i]; | 187 Py_UNICODE c = input_unicode[i]; |
82 if (S_CHAR(c)) { | 188 if (S_CHAR(c)) { |
83 output[chars++] = (char)c; | 189 output[chars++] = (char)c; |
84 } | 190 } |
85 » else { | 191 else { |
86 chars = ascii_escape_char(c, output, chars); | 192 chars = ascii_escape_char(c, output, chars); |
87 } | 193 } |
88 if (output_size - chars < (1 + MAX_EXPANSION)) { | 194 if (output_size - chars < (1 + MAX_EXPANSION)) { |
89 /* There's more than four, so let's resize by a lot */ | 195 /* There's more than four, so let's resize by a lot */ |
90 output_size *= 2; | 196 output_size *= 2; |
Martin v. Löwis
2009/01/04 13:22:29
You might want to check for integer overflow here.
bob.ippolito
2009/01/05 01:28:19
_PyString_Resize checks for integer overflow, so i
| |
91 /* This is an upper bound */ | 197 /* This is an upper bound */ |
92 if (output_size > 2 + (input_chars * MAX_EXPANSION)) { | 198 if (output_size > 2 + (input_chars * MAX_EXPANSION)) { |
93 output_size = 2 + (input_chars * MAX_EXPANSION); | 199 output_size = 2 + (input_chars * MAX_EXPANSION); |
94 } | 200 } |
95 if (_PyString_Resize(&rval, output_size) == -1) { | 201 if (_PyString_Resize(&rval, output_size) == -1) { |
96 return NULL; | 202 return NULL; |
97 } | 203 } |
98 output = PyString_AS_STRING(rval); | 204 output = PyString_AS_STRING(rval); |
99 } | 205 } |
100 } | 206 } |
101 output[chars++] = '"'; | 207 output[chars++] = '"'; |
102 if (_PyString_Resize(&rval, chars) == -1) { | 208 if (_PyString_Resize(&rval, chars) == -1) { |
103 return NULL; | 209 return NULL; |
104 } | 210 } |
105 return rval; | 211 return rval; |
106 } | 212 } |
107 | 213 |
108 static PyObject * | 214 static PyObject * |
109 ascii_escape_str(PyObject *pystr) | 215 ascii_escape_str(PyObject *pystr) |
Martin v. Löwis
2009/01/04 13:22:29
Please attach a comment to each function, telling
bob.ippolito
2009/01/05 01:28:19
Done in the next patch
| |
110 { | 216 { |
111 Py_ssize_t i; | 217 Py_ssize_t i; |
112 Py_ssize_t input_chars; | 218 Py_ssize_t input_chars; |
113 Py_ssize_t output_size; | 219 Py_ssize_t output_size; |
114 Py_ssize_t chars; | 220 Py_ssize_t chars; |
115 PyObject *rval; | 221 PyObject *rval; |
116 char *output; | 222 char *output; |
117 char *input_str; | 223 char *input_str; |
118 | 224 |
119 input_chars = PyString_GET_SIZE(pystr); | 225 input_chars = PyString_GET_SIZE(pystr); |
120 input_str = PyString_AS_STRING(pystr); | 226 input_str = PyString_AS_STRING(pystr); |
121 /* One char input can be up to 6 chars output, estimate 4 of these */ | 227 |
122 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; | 228 /* Fast path for a string that's already ASCII */ |
229 for (i = 0; i < input_chars; i++) { | |
230 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; | |
231 if (!S_CHAR(c)) { | |
232 /* If we have to escape something, scan the string for unicode */ | |
233 Py_ssize_t j; | |
234 for (j = i; j < input_chars; j++) { | |
235 c = (Py_UNICODE)(unsigned char)input_str[j]; | |
236 if (c > 0x7f) { | |
237 /* We hit a non-ASCII character, bail to unicode mode */ | |
238 PyObject *uni; | |
239 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict") ; | |
240 if (uni == NULL) { | |
241 return NULL; | |
242 } | |
243 rval = ascii_escape_unicode(uni); | |
244 Py_DECREF(uni); | |
245 return rval; | |
246 } | |
247 } | |
248 break; | |
249 } | |
250 } | |
251 | |
252 if (i == input_chars) { | |
253 /* Input is already ASCII */ | |
254 output_size = 2 + input_chars; | |
255 } | |
256 else { | |
257 /* One char input can be up to 6 chars output, estimate 4 of these */ | |
258 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; | |
259 } | |
123 rval = PyString_FromStringAndSize(NULL, output_size); | 260 rval = PyString_FromStringAndSize(NULL, output_size); |
124 if (rval == NULL) { | 261 if (rval == NULL) { |
125 return NULL; | 262 return NULL; |
126 } | 263 } |
127 output = PyString_AS_STRING(rval); | 264 output = PyString_AS_STRING(rval); |
128 chars = 0; | 265 output[0] = '"'; |
129 output[chars++] = '"'; | 266 ···· |
130 for (i = 0; i < input_chars; i++) { | 267 /* We know that everything up to i is ASCII already */ |
131 Py_UNICODE c = (Py_UNICODE)input_str[i]; | 268 chars = i + 1; |
269 memcpy(&output[1], input_str, i); | |
270 | |
271 for (; i < input_chars; i++) { | |
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; | |
132 if (S_CHAR(c)) { | 273 if (S_CHAR(c)) { |
133 output[chars++] = (char)c; | 274 output[chars++] = (char)c; |
134 } | 275 } |
135 » else if (c > 0x7F) { | 276 else { |
136 /* We hit a non-ASCII character, bail to unicode mode */ | |
137 PyObject *uni; | |
138 Py_DECREF(rval); | |
139 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); | |
140 if (uni == NULL) { | |
141 return NULL; | |
142 } | |
143 rval = ascii_escape_unicode(uni); | |
144 Py_DECREF(uni); | |
145 return rval; | |
146 } | |
147 » else { | |
148 chars = ascii_escape_char(c, output, chars); | 277 chars = ascii_escape_char(c, output, chars); |
149 } | 278 } |
150 /* An ASCII char can't possibly expand to a surrogate! */ | 279 /* An ASCII char can't possibly expand to a surrogate! */ |
151 if (output_size - chars < (1 + MIN_EXPANSION)) { | 280 if (output_size - chars < (1 + MIN_EXPANSION)) { |
152 /* There's more than four, so let's resize by a lot */ | 281 /* There's more than four, so let's resize by a lot */ |
153 output_size *= 2; | 282 output_size *= 2; |
154 if (output_size > 2 + (input_chars * MIN_EXPANSION)) { | 283 if (output_size > 2 + (input_chars * MIN_EXPANSION)) { |
155 output_size = 2 + (input_chars * MIN_EXPANSION); | 284 output_size = 2 + (input_chars * MIN_EXPANSION); |
156 } | 285 } |
157 if (_PyString_Resize(&rval, output_size) == -1) { | 286 if (_PyString_Resize(&rval, output_size) == -1) { |
158 return NULL; | 287 return NULL; |
159 } | 288 } |
160 output = PyString_AS_STRING(rval); | 289 output = PyString_AS_STRING(rval); |
161 } | 290 } |
162 } | 291 } |
163 output[chars++] = '"'; | 292 output[chars++] = '"'; |
164 if (_PyString_Resize(&rval, chars) == -1) { | 293 if (_PyString_Resize(&rval, chars) == -1) { |
165 return NULL; | 294 return NULL; |
166 } | 295 } |
167 return rval; | 296 return rval; |
168 } | 297 } |
169 | 298 |
170 void | 299 static void |
171 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) | 300 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) |
172 { | 301 { |
173 static PyObject *errmsg_fn = NULL; | 302 static PyObject *errmsg_fn = NULL; |
174 PyObject *pymsg; | 303 PyObject *pymsg; |
175 if (errmsg_fn == NULL) { | 304 if (errmsg_fn == NULL) { |
176 PyObject *decoder = PyImport_ImportModule("json.decoder"); | 305 PyObject *decoder = PyImport_ImportModule("json.decoder"); |
177 if (decoder == NULL) | 306 if (decoder == NULL) |
178 return; | 307 return; |
179 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); | 308 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); |
309 Py_DECREF(decoder); | |
180 if (errmsg_fn == NULL) | 310 if (errmsg_fn == NULL) |
181 return; | 311 return; |
182 Py_DECREF(decoder); | |
183 } | 312 } |
184 pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end); | 313 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_Fro mSsize_t, &end); |
185 if (pymsg) { | 314 if (pymsg) { |
186 PyErr_SetObject(PyExc_ValueError, pymsg); | 315 PyErr_SetObject(PyExc_ValueError, pymsg); |
187 Py_DECREF(pymsg); | 316 Py_DECREF(pymsg); |
188 } | 317 } |
189 /* | |
190 | |
191 def linecol(doc, pos): | |
192 lineno = doc.count('\n', 0, pos) + 1 | |
193 if lineno == 1: | |
194 colno = pos | |
195 else: | |
196 colno = pos - doc.rindex('\n', 0, pos) | |
197 return lineno, colno | |
198 | |
199 def errmsg(msg, doc, pos, end=None): | |
200 lineno, colno = linecol(doc, pos) | |
201 if end is None: | |
202 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) | |
203 endlineno, endcolno = linecol(doc, end) | |
204 return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( | |
205 msg, lineno, colno, endlineno, endcolno, pos, end) | |
206 | |
207 */ | |
208 } | 318 } |
209 | 319 |
210 static PyObject * | 320 static PyObject * |
211 join_list_unicode(PyObject *lst) | 321 join_list_string(PyObject *lst) |
212 { | 322 { |
213 static PyObject *ustr = NULL; | 323 static PyObject *joinfn = NULL; |
214 static PyObject *joinstr = NULL; | 324 if (joinfn == NULL) { |
215 if (ustr == NULL) { | 325 PyObject *ustr = PyString_FromStringAndSize(NULL, 0); |
216 Py_UNICODE c = 0; | 326 if (ustr == NULL) |
217 ustr = PyUnicode_FromUnicode(&c, 0); | 327 return NULL; |
328 ········ | |
329 joinfn = PyObject_GetAttrString(ustr, "join"); | |
330 Py_DECREF(ustr); | |
331 if (joinfn == NULL) | |
332 return NULL; | |
218 } | 333 } |
219 if (joinstr == NULL) { | 334 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); |
220 joinstr = PyString_InternFromString("join"); | |
221 } | |
222 if (joinstr == NULL || ustr == NULL) { | |
223 return NULL; | |
224 } | |
225 return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL); | |
226 } | 335 } |
227 | 336 |
228 static PyObject * | 337 static PyObject * |
229 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict) | 338 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { |
339 PyObject *tpl; | |
340 PyObject *pyidx; | |
341 /* | |
342 steal a reference to rval, returns (rval, idx) | |
343 */ | |
344 if (rval == NULL) { | |
345 return NULL; | |
346 } | |
347 pyidx = PyInt_FromSsize_t(idx); | |
348 if (pyidx == NULL) { | |
349 Py_DECREF(rval); | |
350 return NULL; | |
351 } | |
352 tpl = PyTuple_New(2); | |
353 if (tpl == NULL) { | |
354 Py_DECREF(pyidx); | |
355 Py_DECREF(rval); | |
356 return NULL; | |
357 } | |
358 PyTuple_SET_ITEM(tpl, 0, rval); | |
359 PyTuple_SET_ITEM(tpl, 1, pyidx); | |
360 return tpl; | |
361 } | |
362 | |
363 static PyObject * | |
364 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s size_t *next_end_ptr) | |
230 { | 365 { |
231 PyObject *rval; | 366 PyObject *rval; |
232 Py_ssize_t len = PyString_GET_SIZE(pystr); | 367 Py_ssize_t len = PyString_GET_SIZE(pystr); |
233 Py_ssize_t begin = end - 1; | 368 Py_ssize_t begin = end - 1; |
234 Py_ssize_t next = begin; | 369 Py_ssize_t next = begin; |
370 int has_unicode = 0; | |
235 char *buf = PyString_AS_STRING(pystr); | 371 char *buf = PyString_AS_STRING(pystr); |
236 PyObject *chunks = PyList_New(0); | 372 PyObject *chunks = PyList_New(0); |
237 if (chunks == NULL) { | 373 if (chunks == NULL) { |
238 goto bail; | 374 goto bail; |
239 } | 375 } |
240 if (end < 0 || len <= end) { | 376 if (end < 0 || len <= end) { |
241 PyErr_SetString(PyExc_ValueError, "end is out of bounds"); | 377 PyErr_SetString(PyExc_ValueError, "end is out of bounds"); |
242 goto bail; | 378 goto bail; |
243 } | 379 } |
244 while (1) { | 380 while (1) { |
245 /* Find the end of the string or the next escape */ | 381 /* Find the end of the string or the next escape */ |
246 Py_UNICODE c = 0; | 382 Py_UNICODE c = 0; |
247 PyObject *chunk = NULL; | 383 PyObject *chunk = NULL; |
248 for (next = end; next < len; next++) { | 384 for (next = end; next < len; next++) { |
249 c = buf[next]; | 385 c = (unsigned char)buf[next]; |
250 if (c == '"' || c == '\\') { | 386 if (c == '"' || c == '\\') { |
251 break; | 387 break; |
252 } | 388 } |
253 else if (strict && c <= 0x1f) { | 389 else if (strict && c <= 0x1f) { |
254 raise_errmsg("Invalid control character at", pystr, next); | 390 raise_errmsg("Invalid control character at", pystr, next); |
255 goto bail; | 391 goto bail; |
256 } | 392 } |
393 else if (c > 0x7f) { | |
394 has_unicode = 1; | |
395 } | |
257 } | 396 } |
258 if (!(c == '"' || c == '\\')) { | 397 if (!(c == '"' || c == '\\')) { |
259 raise_errmsg("Unterminated string starting at", pystr, begin); | 398 raise_errmsg("Unterminated string starting at", pystr, begin); |
260 goto bail; | 399 goto bail; |
261 } | 400 } |
262 /* Pick up this chunk if it's not zero length */ | 401 /* Pick up this chunk if it's not zero length */ |
263 if (next != end) { | 402 if (next != end) { |
264 PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end); | 403 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - en d); |
265 if (strchunk == NULL) { | 404 if (strchunk == NULL) { |
266 goto bail; | 405 goto bail; |
267 } | 406 } |
268 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); | 407 if (has_unicode) { |
269 Py_DECREF(strchunk); | 408 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); |
270 if (chunk == NULL) { | 409 Py_DECREF(strchunk); |
271 goto bail; | 410 if (chunk == NULL) { |
411 goto bail; | |
412 } | |
413 } | |
414 else { | |
415 chunk = strchunk; | |
272 } | 416 } |
273 if (PyList_Append(chunks, chunk)) { | 417 if (PyList_Append(chunks, chunk)) { |
274 Py_DECREF(chunk); | 418 Py_DECREF(chunk); |
275 goto bail; | 419 goto bail; |
276 } | 420 } |
277 Py_DECREF(chunk); | 421 Py_DECREF(chunk); |
278 } | 422 } |
279 next++; | 423 next++; |
280 if (c == '"') { | 424 if (c == '"') { |
281 end = next; | 425 end = next; |
(...skipping 26 matching lines...) Expand all Loading... | |
308 else { | 452 else { |
309 c = 0; | 453 c = 0; |
310 next++; | 454 next++; |
311 end = next + 4; | 455 end = next + 4; |
312 if (end >= len) { | 456 if (end >= len) { |
313 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); | 457 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); |
314 goto bail; | 458 goto bail; |
315 } | 459 } |
316 /* Decode 4 hex digits */ | 460 /* Decode 4 hex digits */ |
317 for (; next < end; next++) { | 461 for (; next < end; next++) { |
318 Py_ssize_t shl = (end - next - 1) << 2; | |
319 Py_UNICODE digit = buf[next]; | 462 Py_UNICODE digit = buf[next]; |
463 c <<= 4; | |
320 switch (digit) { | 464 switch (digit) { |
321 case '0': case '1': case '2': case '3': case '4': | 465 case '0': case '1': case '2': case '3': case '4': |
322 case '5': case '6': case '7': case '8': case '9': | 466 case '5': case '6': case '7': case '8': case '9': |
323 c |= (digit - '0') << shl; break; | 467 c |= (digit - '0'); break; |
324 case 'a': case 'b': case 'c': case 'd': case 'e': | 468 case 'a': case 'b': case 'c': case 'd': case 'e': |
325 case 'f': | 469 case 'f': |
326 c |= (digit - 'a' + 10) << shl; break; | 470 c |= (digit - 'a' + 10); break; |
327 case 'A': case 'B': case 'C': case 'D': case 'E': | 471 case 'A': case 'B': case 'C': case 'D': case 'E': |
328 case 'F': | 472 case 'F': |
329 c |= (digit - 'A' + 10) << shl; break; | 473 c |= (digit - 'A' + 10); break; |
330 default: | 474 default: |
331 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 475 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); |
332 goto bail; | 476 goto bail; |
333 } | 477 } |
334 } | 478 } |
335 #ifdef Py_UNICODE_WIDE | 479 #ifdef Py_UNICODE_WIDE |
336 /* Surrogate pair */ | 480 /* Surrogate pair */ |
337 if (c >= 0xd800 && c <= 0xdbff) { | 481 if ((c & 0xfc00) == 0xd800) { |
338 Py_UNICODE c2 = 0; | 482 Py_UNICODE c2 = 0; |
339 if (end + 6 >= len) { | 483 if (end + 6 >= len) { |
340 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, | 484 raise_errmsg("Unpaired high surrogate", pystr, end - 5); |
341 end - 5); | 485 goto bail; |
342 } | 486 } |
343 if (buf[next++] != '\\' || buf[next++] != 'u') { | 487 if (buf[next++] != '\\' || buf[next++] != 'u') { |
344 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, | 488 raise_errmsg("Unpaired high surrogate", pystr, end - 5); |
345 end - 5); | 489 goto bail; |
346 } | 490 } |
347 end += 6; | 491 end += 6; |
348 /* Decode 4 hex digits */ | 492 /* Decode 4 hex digits */ |
349 for (; next < end; next++) { | 493 for (; next < end; next++) { |
350 Py_ssize_t shl = (end - next - 1) << 2; | 494 c2 <<= 4; |
351 Py_UNICODE digit = buf[next]; | 495 Py_UNICODE digit = buf[next]; |
352 switch (digit) { | 496 switch (digit) { |
353 case '0': case '1': case '2': case '3': case '4': | 497 case '0': case '1': case '2': case '3': case '4': |
354 case '5': case '6': case '7': case '8': case '9': | 498 case '5': case '6': case '7': case '8': case '9': |
355 c2 |= (digit - '0') << shl; break; | 499 c2 |= (digit - '0'); break; |
356 case 'a': case 'b': case 'c': case 'd': case 'e': | 500 case 'a': case 'b': case 'c': case 'd': case 'e': |
357 case 'f': | 501 case 'f': |
358 c2 |= (digit - 'a' + 10) << shl; break; | 502 c2 |= (digit - 'a' + 10); break; |
359 case 'A': case 'B': case 'C': case 'D': case 'E': | 503 case 'A': case 'B': case 'C': case 'D': case 'E': |
360 case 'F': | 504 case 'F': |
361 c2 |= (digit - 'A' + 10) << shl; break; | 505 c2 |= (digit - 'A' + 10); break; |
362 default: | 506 default: |
363 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 507 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); |
364 goto bail; | 508 goto bail; |
365 } | 509 } |
366 } | 510 } |
511 if ((c2 & 0xfc00) != 0xdc00) { | |
512 raise_errmsg("Unpaired high surrogate", pystr, end - 5); | |
513 goto bail; | |
514 } | |
367 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); | 515 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); |
368 } | 516 } |
517 else if ((c & 0xfc00) == 0xdc00) { | |
518 raise_errmsg("Unpaired low surrogate", pystr, end - 5); | |
519 goto bail; | |
520 } | |
369 #endif | 521 #endif |
370 } | 522 } |
371 chunk = PyUnicode_FromUnicode(&c, 1); | 523 if (c > 0x7f) { |
372 if (chunk == NULL) { | 524 has_unicode = 1; |
373 goto bail; | 525 } |
526 if (has_unicode) { | |
527 chunk = PyUnicode_FromUnicode(&c, 1); | |
528 if (chunk == NULL) { | |
529 goto bail; | |
530 } | |
531 } | |
532 else { | |
533 char c_char = Py_CHARMASK(c); | |
534 chunk = PyString_FromStringAndSize(&c_char, 1); | |
535 if (chunk == NULL) { | |
536 goto bail; | |
537 } | |
374 } | 538 } |
375 if (PyList_Append(chunks, chunk)) { | 539 if (PyList_Append(chunks, chunk)) { |
376 Py_DECREF(chunk); | 540 Py_DECREF(chunk); |
377 goto bail; | 541 goto bail; |
378 } | 542 } |
379 Py_DECREF(chunk); | 543 Py_DECREF(chunk); |
380 } | 544 } |
381 | 545 |
382 rval = join_list_unicode(chunks); | 546 rval = join_list_string(chunks); |
383 if (rval == NULL) { | 547 if (rval == NULL) { |
384 goto bail; | 548 goto bail; |
385 } | 549 } |
386 Py_CLEAR(chunks); | 550 Py_CLEAR(chunks); |
387 return Py_BuildValue("(Nn)", rval, end); | 551 *next_end_ptr = end; |
552 return rval; | |
388 bail: | 553 bail: |
554 *next_end_ptr = -1; | |
389 Py_XDECREF(chunks); | 555 Py_XDECREF(chunks); |
390 return NULL; | 556 return NULL; |
391 } | 557 } |
392 | 558 |
393 | 559 |
394 static PyObject * | 560 static PyObject * |
395 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict) | 561 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next _end_ptr) |
396 { | 562 { |
397 PyObject *rval; | 563 PyObject *rval; |
398 Py_ssize_t len = PyUnicode_GET_SIZE(pystr); | 564 Py_ssize_t len = PyUnicode_GET_SIZE(pystr); |
399 Py_ssize_t begin = end - 1; | 565 Py_ssize_t begin = end - 1; |
400 Py_ssize_t next = begin; | 566 Py_ssize_t next = begin; |
401 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); | 567 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); |
402 PyObject *chunks = PyList_New(0); | 568 PyObject *chunks = PyList_New(0); |
403 if (chunks == NULL) { | 569 if (chunks == NULL) { |
404 goto bail; | 570 goto bail; |
405 } | 571 } |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
469 else { | 635 else { |
470 c = 0; | 636 c = 0; |
471 next++; | 637 next++; |
472 end = next + 4; | 638 end = next + 4; |
473 if (end >= len) { | 639 if (end >= len) { |
474 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); | 640 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); |
475 goto bail; | 641 goto bail; |
476 } | 642 } |
477 /* Decode 4 hex digits */ | 643 /* Decode 4 hex digits */ |
478 for (; next < end; next++) { | 644 for (; next < end; next++) { |
479 Py_ssize_t shl = (end - next - 1) << 2; | |
480 Py_UNICODE digit = buf[next]; | 645 Py_UNICODE digit = buf[next]; |
646 c <<= 4; | |
481 switch (digit) { | 647 switch (digit) { |
482 case '0': case '1': case '2': case '3': case '4': | 648 case '0': case '1': case '2': case '3': case '4': |
483 case '5': case '6': case '7': case '8': case '9': | 649 case '5': case '6': case '7': case '8': case '9': |
484 c |= (digit - '0') << shl; break; | 650 c |= (digit - '0'); break; |
485 case 'a': case 'b': case 'c': case 'd': case 'e': | 651 case 'a': case 'b': case 'c': case 'd': case 'e': |
486 case 'f': | 652 case 'f': |
487 c |= (digit - 'a' + 10) << shl; break; | 653 c |= (digit - 'a' + 10); break; |
488 case 'A': case 'B': case 'C': case 'D': case 'E': | 654 case 'A': case 'B': case 'C': case 'D': case 'E': |
489 case 'F': | 655 case 'F': |
490 c |= (digit - 'A' + 10) << shl; break; | 656 c |= (digit - 'A' + 10); break; |
491 default: | 657 default: |
492 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 658 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); |
493 goto bail; | 659 goto bail; |
494 } | 660 } |
495 } | 661 } |
496 #ifdef Py_UNICODE_WIDE | 662 #ifdef Py_UNICODE_WIDE |
497 /* Surrogate pair */ | 663 /* Surrogate pair */ |
498 if (c >= 0xd800 && c <= 0xdbff) { | 664 if ((c & 0xfc00) == 0xd800) { |
499 Py_UNICODE c2 = 0; | 665 Py_UNICODE c2 = 0; |
500 if (end + 6 >= len) { | 666 if (end + 6 >= len) { |
501 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, | 667 raise_errmsg("Unpaired high surrogate", pystr, end - 5); |
502 end - 5); | 668 goto bail; |
503 } | 669 } |
504 if (buf[next++] != '\\' || buf[next++] != 'u') { | 670 if (buf[next++] != '\\' || buf[next++] != 'u') { |
505 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, | 671 raise_errmsg("Unpaired high surrogate", pystr, end - 5); |
506 end - 5); | 672 goto bail; |
507 } | 673 } |
508 end += 6; | 674 end += 6; |
509 /* Decode 4 hex digits */ | 675 /* Decode 4 hex digits */ |
510 for (; next < end; next++) { | 676 for (; next < end; next++) { |
511 Py_ssize_t shl = (end - next - 1) << 2; | 677 c2 <<= 4; |
512 Py_UNICODE digit = buf[next]; | 678 Py_UNICODE digit = buf[next]; |
513 switch (digit) { | 679 switch (digit) { |
514 case '0': case '1': case '2': case '3': case '4': | 680 case '0': case '1': case '2': case '3': case '4': |
515 case '5': case '6': case '7': case '8': case '9': | 681 case '5': case '6': case '7': case '8': case '9': |
516 c2 |= (digit - '0') << shl; break; | 682 c2 |= (digit - '0'); break; |
517 case 'a': case 'b': case 'c': case 'd': case 'e': | 683 case 'a': case 'b': case 'c': case 'd': case 'e': |
518 case 'f': | 684 case 'f': |
519 c2 |= (digit - 'a' + 10) << shl; break; | 685 c2 |= (digit - 'a' + 10); break; |
520 case 'A': case 'B': case 'C': case 'D': case 'E': | 686 case 'A': case 'B': case 'C': case 'D': case 'E': |
521 case 'F': | 687 case 'F': |
522 c2 |= (digit - 'A' + 10) << shl; break; | 688 c2 |= (digit - 'A' + 10); break; |
523 default: | 689 default: |
524 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); | 690 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); |
525 goto bail; | 691 goto bail; |
526 } | 692 } |
527 } | 693 } |
694 if ((c2 & 0xfc00) != 0xdc00) { | |
695 raise_errmsg("Unpaired high surrogate", pystr, end - 5); | |
696 goto bail; | |
697 } | |
528 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); | 698 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); |
529 } | 699 } |
700 else if ((c & 0xfc00) == 0xdc00) { | |
701 raise_errmsg("Unpaired low surrogate", pystr, end - 5); | |
702 goto bail; | |
703 } | |
530 #endif | 704 #endif |
531 } | 705 } |
532 chunk = PyUnicode_FromUnicode(&c, 1); | 706 chunk = PyUnicode_FromUnicode(&c, 1); |
533 if (chunk == NULL) { | 707 if (chunk == NULL) { |
534 goto bail; | 708 goto bail; |
535 } | 709 } |
536 if (PyList_Append(chunks, chunk)) { | 710 if (PyList_Append(chunks, chunk)) { |
537 Py_DECREF(chunk); | 711 Py_DECREF(chunk); |
538 goto bail; | 712 goto bail; |
539 } | 713 } |
540 Py_DECREF(chunk); | 714 Py_DECREF(chunk); |
541 } | 715 } |
542 | 716 |
543 rval = join_list_unicode(chunks); | 717 rval = join_list_string(chunks); |
544 if (rval == NULL) { | 718 if (rval == NULL) { |
545 goto bail; | 719 goto bail; |
546 } | 720 } |
547 Py_CLEAR(chunks); | 721 Py_DECREF(chunks); |
548 return Py_BuildValue("(Nn)", rval, end); | 722 *next_end_ptr = end; |
723 return rval; | |
549 bail: | 724 bail: |
725 *next_end_ptr = -1; | |
550 Py_XDECREF(chunks); | 726 Py_XDECREF(chunks); |
551 return NULL; | 727 return NULL; |
552 } | 728 } |
553 | 729 |
554 PyDoc_STRVAR(pydoc_scanstring, | 730 PyDoc_STRVAR(pydoc_scanstring, |
555 "scanstring(basestring, end, encoding) -> (str, end)\n"); | 731 "scanstring(basestring, end, encoding) -> (str, end)\n" |
732 "\n" | |
733 "..." | |
Martin v. Löwis
2009/01/04 13:22:29
Some text should probably be added here.
bob.ippolito
2009/01/05 01:28:19
Done in the next patch.
| |
734 ); | |
556 | 735 |
557 static PyObject * | 736 static PyObject * |
558 py_scanstring(PyObject* self, PyObject *args) | 737 py_scanstring(PyObject* self, PyObject *args) |
559 { | 738 { |
560 PyObject *pystr; | 739 PyObject *pystr; |
740 PyObject *rval; | |
561 Py_ssize_t end; | 741 Py_ssize_t end; |
742 Py_ssize_t next_end = -1; | |
562 char *encoding = NULL; | 743 char *encoding = NULL; |
563 int strict = 0; | 744 int strict = 0; |
564 if (!PyArg_ParseTuple(args, "On|zi:scanstring", &pystr, &end, &encoding, &st rict)) { | 745 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsi ze_t, &end, &encoding, &strict)) { |
565 return NULL; | 746 return NULL; |
566 } | 747 } |
567 if (encoding == NULL) { | 748 if (encoding == NULL) { |
568 encoding = DEFAULT_ENCODING; | 749 encoding = DEFAULT_ENCODING; |
569 } | 750 } |
570 if (PyString_Check(pystr)) { | 751 if (PyString_Check(pystr)) { |
571 return scanstring_str(pystr, end, encoding, strict); | 752 rval = scanstring_str(pystr, end, encoding, strict, &next_end); |
572 } | 753 } |
573 else if (PyUnicode_Check(pystr)) { | 754 else if (PyUnicode_Check(pystr)) { |
574 return scanstring_unicode(pystr, end, strict); | 755 rval = scanstring_unicode(pystr, end, strict, &next_end); |
575 } | 756 } |
576 else { | 757 else { |
577 PyErr_Format(PyExc_TypeError, | 758 PyErr_Format(PyExc_TypeError, |
578 "first argument must be a string or unicode, not %.80s", | 759 "first argument must be a string, not %.80s", |
579 Py_TYPE(pystr)->tp_name); | 760 Py_TYPE(pystr)->tp_name); |
580 return NULL; | 761 return NULL; |
581 } | 762 } |
763 return _build_rval_index_tuple(rval, next_end); | |
582 } | 764 } |
583 | 765 |
584 PyDoc_STRVAR(pydoc_encode_basestring_ascii, | 766 PyDoc_STRVAR(pydoc_encode_basestring_ascii, |
585 "encode_basestring_ascii(basestring) -> str\n"); | 767 "encode_basestring_ascii(basestring) -> str\n" |
768 "\n" | |
769 "..." | |
770 ); | |
586 | 771 |
587 static PyObject * | 772 static PyObject * |
588 py_encode_basestring_ascii(PyObject* self, PyObject *pystr) | 773 py_encode_basestring_ascii(PyObject* self, PyObject *pystr) |
589 { | 774 { |
590 /* METH_O */ | 775 /* METH_O */ |
591 if (PyString_Check(pystr)) { | 776 if (PyString_Check(pystr)) { |
592 return ascii_escape_str(pystr); | 777 return ascii_escape_str(pystr); |
593 } | 778 } |
594 else if (PyUnicode_Check(pystr)) { | 779 else if (PyUnicode_Check(pystr)) { |
595 return ascii_escape_unicode(pystr); | 780 return ascii_escape_unicode(pystr); |
596 } | 781 } |
597 else { | 782 else { |
598 PyErr_Format(PyExc_TypeError, | 783 PyErr_Format(PyExc_TypeError, |
599 "first argument must be a string or unicode, not %.80s", | 784 "first argument must be a string, not %.80s", |
600 Py_TYPE(pystr)->tp_name); | 785 Py_TYPE(pystr)->tp_name); |
601 return NULL; | 786 return NULL; |
602 } | 787 } |
603 } | 788 } |
604 | 789 |
790 static void | |
791 scanner_dealloc(PyObject *self) | |
792 { | |
793 PyScannerObject *s; | |
794 assert(PyScanner_Check(self)); | |
795 s = (PyScannerObject *)self; | |
796 Py_CLEAR(s->encoding); | |
797 Py_CLEAR(s->strict); | |
798 Py_CLEAR(s->object_hook); | |
799 Py_CLEAR(s->parse_float); | |
800 Py_CLEAR(s->parse_int); | |
801 Py_CLEAR(s->parse_constant); | |
802 self->ob_type->tp_free(self); | |
803 } | |
804 | |
805 static PyObject * | |
806 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr) { | |
807 char *str = PyString_AS_STRING(pystr); | |
808 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | |
809 PyObject *rval = PyDict_New(); | |
810 PyObject *key = NULL; | |
811 PyObject *val = NULL; | |
812 char *encoding = PyString_AS_STRING(s->encoding); | |
813 int strict = PyObject_IsTrue(s->strict); | |
814 Py_ssize_t next_idx; | |
815 if (rval == NULL) | |
816 return NULL; | |
817 | |
818 /* skip whitespace after { */ | |
819 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
820 | |
821 /* only loop if the object is non-empty */ | |
822 if (idx <= end_idx && str[idx] != '}') { | |
823 while (idx <= end_idx) { | |
824 /* read key */ | |
825 if (str[idx] != '"') { | |
826 raise_errmsg("Expecting property name", pystr, idx); | |
827 goto bail; | |
828 } | |
829 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); | |
830 if (key == NULL) | |
831 goto bail; | |
832 idx = next_idx; | |
833 ············ | |
834 /* skip whitespace between key and : delimiter, read :, skip whitesp ace */ | |
835 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
836 if (idx > end_idx || str[idx] != ':') { | |
837 raise_errmsg("Expecting : delimiter", pystr, idx); | |
838 goto bail; | |
839 } | |
840 idx++; | |
841 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
842 ············ | |
843 /* read any JSON data type */ | |
844 val = scan_once_str(s, pystr, idx, &next_idx); | |
845 if (val == NULL) | |
846 goto bail; | |
847 | |
848 if (PyDict_SetItem(rval, key, val) == -1) | |
849 goto bail; | |
850 | |
851 Py_CLEAR(key); | |
852 Py_CLEAR(val); | |
853 idx = next_idx; | |
854 ············ | |
855 /* skip whitespace before } or , */ | |
856 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
857 | |
858 /* bail if the object is closed or we didn't get the , delimiter */ | |
859 if (idx > end_idx) break; | |
860 if (str[idx] == '}') { | |
861 break; | |
862 } | |
863 else if (str[idx] != ',') { | |
864 raise_errmsg("Expecting , delimiter", pystr, idx); | |
865 goto bail; | |
866 } | |
867 idx++; | |
868 | |
869 /* skip whitespace after , delimiter */ | |
870 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
871 } | |
872 } | |
873 /* verify that idx < end_idx, str[idx] should be '}' */ | |
874 if (idx > end_idx || str[idx] != '}') { | |
875 raise_errmsg("Expecting object", pystr, end_idx); | |
876 goto bail; | |
877 } | |
878 /* if object_hook is not None: rval = object_hook(rval) */ | |
879 if (s->object_hook != Py_None) { | |
880 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); | |
881 if (val == NULL) | |
882 goto bail; | |
883 Py_DECREF(rval); | |
884 rval = val; | |
885 val = NULL; | |
886 } | |
887 *next_idx_ptr = idx + 1; | |
888 return rval; | |
889 bail: | |
890 Py_XDECREF(key); | |
891 Py_XDECREF(val); | |
892 Py_DECREF(rval); | |
893 return NULL;···· | |
894 } | |
895 | |
896 static PyObject * | |
897 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss ize_t *next_idx_ptr) { | |
898 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | |
899 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | |
900 PyObject *val = NULL; | |
901 PyObject *rval = PyDict_New(); | |
902 PyObject *key = NULL; | |
903 int strict = PyObject_IsTrue(s->strict); | |
904 Py_ssize_t next_idx; | |
905 if (rval == NULL) | |
906 return NULL; | |
907 ···· | |
908 /* skip whitespace after { */ | |
909 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
910 | |
911 /* only loop if the object is non-empty */ | |
912 if (idx <= end_idx && str[idx] != '}') { | |
913 while (idx <= end_idx) { | |
914 /* read key */ | |
915 if (str[idx] != '"') { | |
916 raise_errmsg("Expecting property name", pystr, idx); | |
917 goto bail; | |
918 } | |
919 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); | |
920 if (key == NULL) | |
921 goto bail; | |
922 idx = next_idx; | |
923 | |
924 /* skip whitespace between key and : delimiter, read :, skip whitesp ace */ | |
925 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
926 if (idx > end_idx || str[idx] != ':') { | |
927 raise_errmsg("Expecting : delimiter", pystr, idx); | |
928 goto bail; | |
929 } | |
930 idx++; | |
931 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
932 ············ | |
933 /* read any JSON term */ | |
934 val = scan_once_unicode(s, pystr, idx, &next_idx); | |
935 if (val == NULL) | |
936 goto bail; | |
937 | |
938 if (PyDict_SetItem(rval, key, val) == -1) | |
939 goto bail; | |
940 | |
941 Py_CLEAR(key); | |
942 Py_CLEAR(val); | |
943 idx = next_idx; | |
944 | |
945 /* skip whitespace before } or , */ | |
946 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
947 | |
948 /* bail if the object is closed or we didn't get the , delimiter */ | |
949 if (idx > end_idx) break; | |
950 if (str[idx] == '}') { | |
951 break; | |
952 } | |
953 else if (str[idx] != ',') { | |
954 raise_errmsg("Expecting , delimiter", pystr, idx); | |
955 goto bail; | |
956 } | |
957 idx++; | |
958 | |
959 /* skip whitespace after , delimiter */ | |
960 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
961 } | |
962 } | |
963 | |
964 /* verify that idx < end_idx, str[idx] should be '}' */ | |
965 if (idx > end_idx || str[idx] != '}') { | |
966 raise_errmsg("Expecting object", pystr, end_idx); | |
967 goto bail; | |
968 } | |
969 | |
970 /* if object_hook is not None: rval = object_hook(rval) */ | |
971 if (s->object_hook != Py_None) { | |
972 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); | |
973 if (val == NULL) | |
974 goto bail; | |
975 Py_DECREF(rval); | |
976 rval = val; | |
977 val = NULL; | |
978 } | |
979 *next_idx_ptr = idx + 1; | |
980 return rval; | |
981 bail: | |
982 Py_XDECREF(key); | |
983 Py_XDECREF(val); | |
984 Py_DECREF(rval); | |
985 return NULL; | |
986 } | |
987 | |
988 static PyObject * | |
989 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { | |
990 char *str = PyString_AS_STRING(pystr); | |
991 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | |
992 PyObject *val = NULL; | |
993 PyObject *rval = PyList_New(0); | |
994 Py_ssize_t next_idx; | |
995 if (rval == NULL) | |
996 return NULL; | |
997 | |
998 /* skip whitespace after [ */ | |
999 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
1000 | |
1001 /* only loop if the array is non-empty */ | |
1002 if (idx <= end_idx && str[idx] != ']') { | |
1003 while (idx <= end_idx) { | |
1004 | |
1005 /* read any JSON term and de-tuplefy the (rval, idx) */ | |
1006 val = scan_once_str(s, pystr, idx, &next_idx); | |
1007 if (val == NULL) | |
1008 goto bail; | |
1009 | |
1010 if (PyList_Append(rval, val) == -1) | |
1011 goto bail; | |
1012 | |
1013 Py_CLEAR(val); | |
1014 idx = next_idx; | |
1015 ············ | |
1016 /* skip whitespace between term and , */ | |
1017 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
1018 | |
1019 /* bail if the array is closed or we didn't get the , delimiter */ | |
1020 if (idx > end_idx) break; | |
1021 if (str[idx] == ']') { | |
1022 break; | |
1023 } | |
1024 else if (str[idx] != ',') { | |
1025 raise_errmsg("Expecting , delimiter", pystr, idx); | |
1026 goto bail; | |
1027 } | |
1028 idx++; | |
1029 ············ | |
1030 /* skip whitespace after , */ | |
1031 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
1032 } | |
1033 } | |
1034 | |
1035 /* verify that idx < end_idx, str[idx] should be ']' */ | |
1036 if (idx > end_idx || str[idx] != ']') { | |
1037 raise_errmsg("Expecting object", pystr, end_idx); | |
1038 goto bail; | |
1039 } | |
1040 *next_idx_ptr = idx + 1; | |
1041 return rval; | |
1042 bail: | |
1043 Py_XDECREF(val); | |
1044 Py_DECREF(rval); | |
1045 return NULL; | |
1046 } | |
1047 | |
1048 static PyObject * | |
1049 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi ze_t *next_idx_ptr) { | |
1050 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | |
1051 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | |
1052 PyObject *val = NULL; | |
1053 PyObject *rval = PyList_New(0); | |
1054 Py_ssize_t next_idx; | |
1055 if (rval == NULL) | |
1056 return NULL; | |
1057 | |
1058 /* skip whitespace after [ */ | |
1059 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
1060 | |
1061 /* only loop if the array is non-empty */ | |
1062 if (idx <= end_idx && str[idx] != ']') { | |
1063 while (idx <= end_idx) { | |
1064 | |
1065 /* read any JSON term */ | |
1066 val = scan_once_unicode(s, pystr, idx, &next_idx); | |
1067 if (val == NULL) | |
1068 goto bail; | |
1069 | |
1070 if (PyList_Append(rval, val) == -1) | |
1071 goto bail; | |
1072 | |
1073 Py_CLEAR(val); | |
1074 idx = next_idx; | |
1075 | |
1076 /* skip whitespace between term and , */ | |
1077 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
1078 | |
1079 /* bail if the array is closed or we didn't get the , delimiter */ | |
1080 if (idx > end_idx) break; | |
1081 if (str[idx] == ']') { | |
1082 break; | |
1083 } | |
1084 else if (str[idx] != ',') { | |
1085 raise_errmsg("Expecting , delimiter", pystr, idx); | |
1086 goto bail; | |
1087 } | |
1088 idx++; | |
1089 | |
1090 /* skip whitespace after , */ | |
1091 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; | |
1092 } | |
1093 } | |
1094 | |
1095 /* verify that idx < end_idx, str[idx] should be ']' */ | |
1096 if (idx > end_idx || str[idx] != ']') { | |
1097 raise_errmsg("Expecting object", pystr, end_idx); | |
1098 goto bail; | |
1099 } | |
1100 *next_idx_ptr = idx + 1; | |
1101 return rval; | |
1102 bail: | |
1103 Py_XDECREF(val); | |
1104 Py_DECREF(rval); | |
1105 return NULL; | |
1106 } | |
1107 | |
1108 static PyObject * | |
1109 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * next_idx_ptr) { | |
1110 PyObject *cstr; | |
1111 PyObject *rval; | |
1112 /* constant is "NaN", "Infinity", or "-Infinity" */ | |
1113 cstr = PyString_InternFromString(constant); | |
1114 if (cstr == NULL) | |
1115 return NULL; | |
1116 | |
1117 /* rval = parse_constant(constant) */ | |
1118 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); | |
1119 idx += PyString_GET_SIZE(cstr); | |
1120 Py_DECREF(cstr); | |
1121 *next_idx_ptr = idx; | |
1122 return rval; | |
1123 } | |
1124 | |
1125 static PyObject * | |
1126 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz e_t *next_idx_ptr) { | |
1127 char *str = PyString_AS_STRING(pystr); | |
1128 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; | |
1129 Py_ssize_t idx = start; | |
1130 int is_float = 0; | |
1131 PyObject *rval; | |
1132 PyObject *numstr; | |
1133 ···· | |
1134 /* read a sign if it's there, make sure it's not the end of the string */ | |
1135 if (str[idx] == '-') { | |
1136 idx++; | |
1137 if (idx > end_idx) { | |
1138 PyErr_SetNone(PyExc_StopIteration); | |
1139 return NULL; | |
1140 } | |
1141 } | |
1142 | |
1143 /* read as many integer digits as we find as long as it doesn't start with 0 */ | |
1144 if (str[idx] >= '1' && str[idx] <= '9') { | |
1145 idx++; | |
1146 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
1147 } | |
1148 /* if it starts with 0 we only expect one integer digit */ | |
1149 else if (str[idx] == '0') { | |
1150 idx++; | |
1151 } | |
1152 /* no integer digits, error */ | |
1153 else { | |
1154 PyErr_SetNone(PyExc_StopIteration); | |
1155 return NULL; | |
1156 } | |
1157 ···· | |
1158 /* if the next char is '.' followed by a digit then read all float digits */ | |
1159 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { | |
1160 is_float = 1; | |
1161 idx += 2; | |
1162 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
1163 } | |
1164 | |
1165 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack ) */ | |
1166 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { | |
1167 | |
1168 /* save the index of the 'e' or 'E' just in case we need to backtrack */ | |
1169 Py_ssize_t e_start = idx; | |
1170 idx++; | |
1171 | |
1172 /* read an exponent sign if present */ | |
1173 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; | |
1174 | |
1175 /* read all digits */ | |
1176 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
1177 | |
1178 /* if we got a digit, then parse as float. if not, backtrack */ | |
1179 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { | |
1180 is_float = 1; | |
1181 } | |
1182 else { | |
1183 idx = e_start; | |
1184 } | |
1185 } | |
1186 ···· | |
1187 /* copy the section we determined to be a number */ | |
1188 numstr = PyString_FromStringAndSize(&str[start], idx - start); | |
1189 if (numstr == NULL) | |
1190 return NULL; | |
1191 if (is_float) { | |
1192 /* parse as a float using a fast path if available, otherwise call user defined method */ | |
1193 if (s->parse_float != (PyObject *)&PyFloat_Type) { | |
1194 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); | |
1195 } | |
1196 else { | |
1197 rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr) )); | |
1198 } | |
1199 } | |
1200 else { | |
1201 /* parse as an int using a fast path if available, otherwise call user d efined method */ | |
1202 if (s->parse_int != (PyObject *)&PyInt_Type) { | |
1203 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); | |
1204 } | |
1205 else { | |
1206 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); | |
1207 } | |
1208 } | |
1209 Py_DECREF(numstr); | |
1210 *next_idx_ptr = idx; | |
1211 return rval; | |
1212 } | |
1213 | |
1214 static PyObject * | |
1215 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ ssize_t *next_idx_ptr) { | |
1216 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | |
1217 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; | |
1218 Py_ssize_t idx = start; | |
1219 int is_float = 0; | |
1220 PyObject *rval; | |
1221 PyObject *numstr; | |
1222 | |
1223 /* read a sign if it's there, make sure it's not the end of the string */ | |
1224 if (str[idx] == '-') { | |
1225 idx++; | |
1226 if (idx > end_idx) { | |
1227 PyErr_SetNone(PyExc_StopIteration); | |
1228 return NULL; | |
1229 } | |
1230 } | |
1231 | |
1232 /* read as many integer digits as we find as long as it doesn't start with 0 */ | |
1233 if (str[idx] >= '1' && str[idx] <= '9') { | |
1234 idx++; | |
1235 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
1236 } | |
1237 /* if it starts with 0 we only expect one integer digit */ | |
1238 else if (str[idx] == '0') { | |
1239 idx++; | |
1240 } | |
1241 /* no integer digits, error */ | |
1242 else { | |
1243 PyErr_SetNone(PyExc_StopIteration); | |
1244 return NULL; | |
1245 } | |
1246 | |
1247 /* if the next char is '.' followed by a digit then read all float digits */ | |
1248 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { | |
1249 is_float = 1; | |
1250 idx += 2; | |
1251 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
1252 } | |
1253 | |
1254 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack ) */ | |
1255 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { | |
1256 Py_ssize_t e_start = idx; | |
1257 idx++; | |
1258 | |
1259 /* read an exponent sign if present */ | |
1260 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; | |
1261 | |
1262 /* read all digits */ | |
1263 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; | |
1264 | |
1265 /* if we got a digit, then parse as float. if not, backtrack */ | |
1266 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { | |
1267 is_float = 1; | |
1268 } | |
1269 else { | |
1270 idx = e_start; | |
1271 } | |
1272 } | |
1273 | |
1274 /* copy the section we determined to be a number */ | |
1275 numstr = PyUnicode_FromUnicode(&str[start], idx - start); | |
1276 if (numstr == NULL) | |
1277 return NULL; | |
1278 if (is_float) { | |
1279 /* parse as a float using a fast path if available, otherwise call user defined method */ | |
1280 if (s->parse_float != (PyObject *)&PyFloat_Type) { | |
1281 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); | |
1282 } | |
1283 else { | |
1284 rval = PyFloat_FromString(numstr, NULL); | |
1285 } | |
1286 } | |
1287 else { | |
1288 /* no fast path for unicode -> int, just call */ | |
1289 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); | |
1290 } | |
1291 Py_DECREF(numstr); | |
1292 *next_idx_ptr = idx; | |
1293 return rval; | |
1294 } | |
1295 | |
1296 static PyObject * | |
1297 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n ext_idx_ptr) | |
1298 { | |
1299 char *str = PyString_AS_STRING(pystr); | |
1300 Py_ssize_t length = PyString_GET_SIZE(pystr); | |
1301 if (idx >= length) { | |
1302 PyErr_SetNone(PyExc_StopIteration); | |
1303 return NULL; | |
1304 } | |
1305 switch (str[idx]) { | |
1306 case '"': | |
1307 /* string */ | |
1308 return scanstring_str(pystr, idx + 1, | |
1309 PyString_AS_STRING(s->encoding), | |
1310 PyObject_IsTrue(s->strict), | |
1311 next_idx_ptr); | |
1312 case '{': | |
1313 /* object */ | |
1314 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr); | |
1315 case '[': | |
1316 /* array */ | |
1317 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr); | |
1318 case 'n': | |
1319 /* null */ | |
1320 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { | |
Martin v. Löwis
2009/01/04 13:22:29
Is this really faster than a strncmp?
bob.ippolito
2009/01/05 01:28:19
Probably not, but strncmp doesn't work for PyUnico
| |
1321 Py_INCREF(Py_None); | |
1322 *next_idx_ptr = idx + 4; | |
1323 return Py_None; | |
1324 } | |
1325 break; | |
1326 case 't': | |
1327 /* true */ | |
1328 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { | |
1329 Py_INCREF(Py_True); | |
1330 *next_idx_ptr = idx + 4; | |
1331 return Py_True; | |
1332 } | |
1333 break; | |
1334 case 'f': | |
1335 /* false */ | |
1336 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { | |
1337 Py_INCREF(Py_False); | |
1338 *next_idx_ptr = idx + 5; | |
1339 return Py_False; | |
1340 } | |
1341 break; | |
1342 case 'N': | |
1343 /* NaN */ | |
1344 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N' ) { | |
1345 return _parse_constant(s, "NaN", idx, next_idx_ptr); | |
1346 } | |
1347 break; | |
1348 case 'I': | |
1349 /* Infinity */ | |
1350 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { | |
1351 return _parse_constant(s, "Infinity", idx, next_idx_ptr); | |
1352 } | |
1353 break; | |
1354 case '-': | |
1355 /* -Infinity */ | |
1356 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { | |
1357 return _parse_constant(s, "-Infinity", idx, next_idx_ptr); | |
1358 } | |
1359 break; | |
1360 } | |
1361 /* Didn't find a string, object, array, or named constant. Look for a number . */ | |
1362 return _match_number_str(s, pystr, idx, next_idx_ptr); | |
1363 } | |
1364 | |
1365 static PyObject * | |
1366 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr) | |
1367 { | |
1368 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); | |
1369 Py_ssize_t length = PyUnicode_GET_SIZE(pystr); | |
1370 if (idx >= length) { | |
1371 PyErr_SetNone(PyExc_StopIteration); | |
1372 return NULL; | |
1373 } | |
1374 switch (str[idx]) { | |
1375 case '"': | |
1376 /* string */ | |
1377 return scanstring_unicode(pystr, idx + 1, | |
1378 PyObject_IsTrue(s->strict), | |
1379 next_idx_ptr); | |
1380 case '{': | |
1381 /* object */ | |
1382 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); | |
1383 case '[': | |
1384 /* array */ | |
1385 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); | |
1386 case 'n': | |
1387 /* null */ | |
1388 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { | |
1389 Py_INCREF(Py_None); | |
1390 *next_idx_ptr = idx + 4; | |
1391 return Py_None; | |
1392 } | |
1393 break; | |
1394 case 't': | |
1395 /* true */ | |
1396 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { | |
1397 Py_INCREF(Py_True); | |
1398 *next_idx_ptr = idx + 4; | |
1399 return Py_True; | |
1400 } | |
1401 break; | |
1402 case 'f': | |
1403 /* false */ | |
1404 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { | |
1405 Py_INCREF(Py_False); | |
1406 *next_idx_ptr = idx + 5; | |
1407 return Py_False; | |
1408 } | |
1409 break; | |
1410 case 'N': | |
1411 /* NaN */ | |
1412 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N' ) { | |
1413 return _parse_constant(s, "NaN", idx, next_idx_ptr); | |
1414 } | |
1415 break; | |
1416 case 'I': | |
1417 /* Infinity */ | |
1418 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { | |
1419 return _parse_constant(s, "Infinity", idx, next_idx_ptr); | |
1420 } | |
1421 break; | |
1422 case '-': | |
1423 /* -Infinity */ | |
1424 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { | |
1425 return _parse_constant(s, "-Infinity", idx, next_idx_ptr); | |
1426 } | |
1427 break; | |
1428 } | |
1429 /* Didn't find a string, object, array, or named constant. Look for a number . */ | |
1430 return _match_number_unicode(s, pystr, idx, next_idx_ptr); | |
1431 } | |
1432 | |
1433 static PyObject * | |
1434 scanner_call(PyObject *self, PyObject *args, PyObject *kwds) | |
1435 { | |
1436 PyObject *pystr; | |
1437 PyObject *rval; | |
1438 Py_ssize_t idx; | |
1439 Py_ssize_t next_idx = -1; | |
1440 static char *kwlist[] = {"string", "idx", NULL}; | |
1441 PyScannerObject *s; | |
1442 assert(PyScanner_Check(self)); | |
1443 s = (PyScannerObject *)self; | |
1444 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr , _convertPyInt_AsSsize_t, &idx)) | |
1445 return NULL; | |
1446 | |
1447 if (PyString_Check(pystr)) { | |
1448 rval = scan_once_str(s, pystr, idx, &next_idx); | |
1449 } | |
1450 else if (PyUnicode_Check(pystr)) { | |
1451 rval = scan_once_unicode(s, pystr, idx, &next_idx); | |
1452 } | |
1453 else { | |
1454 PyErr_Format(PyExc_TypeError, | |
1455 "first argument must be a string, not %.80s", | |
1456 Py_TYPE(pystr)->tp_name); | |
1457 return NULL; | |
1458 } | |
1459 return _build_rval_index_tuple(rval, next_idx); | |
1460 } | |
1461 | |
1462 static int | |
1463 scanner_init(PyObject *self, PyObject *args, PyObject *kwds) | |
1464 { | |
1465 PyObject *ctx; | |
1466 static char *kwlist[] = {"context", NULL}; | |
1467 PyScannerObject *s; | |
1468 | |
1469 assert(PyScanner_Check(self)); | |
1470 s = (PyScannerObject *)self; | |
1471 | |
1472 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx) ) | |
1473 return -1; | |
1474 | |
1475 s->encoding = NULL; | |
1476 s->strict = NULL; | |
1477 s->object_hook = NULL; | |
1478 s->parse_float = NULL; | |
1479 s->parse_int = NULL; | |
1480 s->parse_constant = NULL; | |
1481 | |
1482 /* PyString_AS_STRING is used on encoding */ | |
1483 s->encoding = PyObject_GetAttrString(ctx, "encoding"); | |
1484 if (s->encoding == Py_None) { | |
1485 Py_DECREF(Py_None); | |
1486 s->encoding = PyString_InternFromString(DEFAULT_ENCODING); | |
1487 } | |
1488 else if (PyUnicode_Check(s->encoding)) { | |
1489 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); | |
1490 Py_DECREF(s->encoding); | |
1491 s->encoding = tmp; | |
1492 } | |
1493 if (s->encoding == NULL || !PyString_Check(s->encoding)) | |
1494 goto bail; | |
1495 ···· | |
1496 /* All of these will fail "gracefully" so we don't need to verify them */ | |
1497 s->strict = PyObject_GetAttrString(ctx, "strict"); | |
1498 if (s->strict == NULL) | |
1499 goto bail; | |
1500 s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); | |
1501 if (s->object_hook == NULL) | |
1502 goto bail; | |
1503 s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); | |
1504 if (s->parse_float == NULL) | |
1505 goto bail; | |
1506 s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); | |
1507 if (s->parse_int == NULL) | |
1508 goto bail; | |
1509 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); | |
1510 if (s->parse_constant == NULL) | |
1511 goto bail; | |
1512 ···· | |
1513 return 0; | |
1514 | |
1515 bail: | |
1516 Py_CLEAR(s->encoding); | |
1517 Py_CLEAR(s->strict); | |
1518 Py_CLEAR(s->object_hook); | |
1519 Py_CLEAR(s->parse_float); | |
1520 Py_CLEAR(s->parse_int); | |
1521 Py_CLEAR(s->parse_constant); | |
1522 return -1; | |
1523 } | |
1524 | |
1525 PyDoc_STRVAR(scanner_doc, "JSON scanner object"); | |
1526 | |
1527 static | |
1528 PyTypeObject PyScannerType = { | |
Martin v. Löwis
2009/01/04 13:22:29
I think scanner objects should participate in cycl
bob.ippolito
2009/01/05 01:28:19
I don't think it's possible to cause a cycle using
| |
1529 PyObject_HEAD_INIT(0) | |
1530 0, /* tp_internal */ | |
1531 "make_scanner", /* tp_name */ | |
1532 sizeof(PyScannerObject), /* tp_basicsize */ | |
1533 0, /* tp_itemsize */ | |
1534 scanner_dealloc, /* tp_dealloc */ | |
1535 0, /* tp_print */ | |
1536 0, /* tp_getattr */ | |
1537 0, /* tp_setattr */ | |
1538 0, /* tp_compare */ | |
1539 0, /* tp_repr */ | |
1540 0, /* tp_as_number */ | |
1541 0, /* tp_as_sequence */ | |
1542 0, /* tp_as_mapping */ | |
1543 0, /* tp_hash */ | |
1544 scanner_call, /* tp_call */ | |
1545 0, /* tp_str */ | |
1546 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ | |
1547 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ | |
1548 0, /* tp_as_buffer */ | |
1549 Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
1550 scanner_doc, /* tp_doc */ | |
1551 0, /* tp_traverse */ | |
1552 0, /* tp_clear */ | |
1553 0, /* tp_richcompare */ | |
1554 0, /* tp_weaklistoffset */ | |
1555 0, /* tp_iter */ | |
1556 0, /* tp_iternext */ | |
1557 0, /* tp_methods */ | |
1558 scanner_members, /* tp_members */ | |
1559 0, /* tp_getset */ | |
1560 0, /* tp_base */ | |
1561 0, /* tp_dict */ | |
1562 0, /* tp_descr_get */ | |
1563 0, /* tp_descr_set */ | |
1564 0, /* tp_dictoffset */ | |
1565 scanner_init, /* tp_init */ | |
1566 0,/* PyType_GenericAlloc, */ /* tp_alloc */ | |
1567 0,/* PyType_GenericNew, */ /* tp_new */ | |
1568 0,/* _PyObject_Del, */ /* tp_free */ | |
1569 }; | |
1570 | |
1571 static int | |
1572 encoder_init(PyObject *self, PyObject *args, PyObject *kwds) | |
1573 { | |
1574 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_sep arator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; | |
1575 | |
1576 PyEncoderObject *s; | |
1577 PyObject *allow_nan; | |
1578 | |
1579 assert(PyEncoder_Check(self)); | |
1580 s = (PyEncoderObject *)self; | |
1581 | |
1582 s->markers = NULL; | |
1583 s->defaultfn = NULL; | |
1584 s->encoder = NULL; | |
1585 s->indent = NULL; | |
1586 s->key_separator = NULL; | |
1587 s->item_separator = NULL; | |
1588 s->sort_keys = NULL; | |
1589 s->skipkeys = NULL; | |
1590 ···· | |
1591 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlis t, | |
1592 &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan)) | |
1593 return -1; | |
1594 ···· | |
1595 Py_INCREF(s->markers); | |
1596 Py_INCREF(s->defaultfn); | |
1597 Py_INCREF(s->encoder); | |
1598 Py_INCREF(s->indent); | |
1599 Py_INCREF(s->key_separator); | |
1600 Py_INCREF(s->item_separator); | |
1601 Py_INCREF(s->sort_keys); | |
1602 Py_INCREF(s->skipkeys); | |
1603 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s ->encoder) == (PyCFunction)py_encode_basestring_ascii); | |
1604 s->allow_nan = PyObject_IsTrue(allow_nan); | |
1605 return 0; | |
1606 } | |
1607 | |
1608 static PyObject * | |
1609 encoder_call(PyObject *self, PyObject *args, PyObject *kwds) | |
1610 { | |
1611 static char *kwlist[] = {"obj", "_current_indent_level", NULL}; | |
1612 PyObject *obj; | |
1613 PyObject *rval; | |
1614 Py_ssize_t indent_level; | |
1615 PyEncoderObject *s; | |
1616 assert(PyEncoder_Check(self)); | |
1617 s = (PyEncoderObject *)self; | |
1618 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, | |
1619 &obj, _convertPyInt_AsSsize_t, &indent_level)) | |
1620 return NULL; | |
1621 rval = PyList_New(0); | |
1622 if (rval == NULL) | |
1623 return NULL; | |
1624 if (encoder_listencode_obj(s, rval, obj, indent_level)) { | |
1625 Py_DECREF(rval); | |
1626 return NULL; | |
1627 } | |
1628 return rval; | |
1629 } | |
1630 | |
1631 static PyObject * | |
1632 _encoded_const(PyObject *obj) | |
1633 { | |
1634 if (obj == Py_None) { | |
1635 static PyObject *s_null = NULL; | |
1636 if (s_null == NULL) { | |
1637 s_null = PyString_InternFromString("null"); | |
1638 } | |
1639 Py_INCREF(s_null); | |
1640 return s_null; | |
1641 } | |
1642 else if (obj == Py_True) { | |
1643 static PyObject *s_true = NULL; | |
1644 if (s_true == NULL) { | |
1645 s_true = PyString_InternFromString("true"); | |
1646 } | |
1647 Py_INCREF(s_true); | |
1648 return s_true; | |
1649 } | |
1650 else if (obj == Py_False) { | |
1651 static PyObject *s_false = NULL; | |
1652 if (s_false == NULL) { | |
1653 s_false = PyString_InternFromString("false"); | |
1654 } | |
1655 Py_INCREF(s_false); | |
1656 return s_false; | |
1657 } | |
1658 else { | |
1659 PyErr_SetString(PyExc_ValueError, "not a const"); | |
1660 return NULL; | |
1661 } | |
1662 } | |
1663 | |
1664 static PyObject * | |
1665 encoder_encode_float(PyEncoderObject *s, PyObject *obj) | |
1666 { | |
1667 double i = PyFloat_AS_DOUBLE(obj); | |
1668 if (!Py_IS_FINITE(i)) { | |
1669 if (!s->allow_nan) { | |
1670 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); | |
1671 return NULL; | |
1672 } | |
1673 if (i > 0) { | |
1674 return PyString_FromString("Infinity"); | |
1675 } | |
1676 else if (i < 0) { | |
1677 return PyString_FromString("-Infinity"); | |
1678 } | |
1679 else { | |
1680 return PyString_FromString("NaN"); | |
1681 } | |
1682 } | |
1683 /* Use a better float format here? */ | |
1684 return PyObject_Repr(obj); | |
1685 } | |
1686 | |
1687 static PyObject * | |
1688 encoder_encode_string(PyEncoderObject *s, PyObject *obj) | |
1689 { | |
1690 if (s->fast_encode) | |
1691 return py_encode_basestring_ascii(NULL, obj); | |
1692 else | |
1693 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); | |
1694 } | |
1695 | |
1696 static int | |
1697 _steal_list_append(PyObject *lst, PyObject *stolen) | |
1698 { | |
1699 int rval = PyList_Append(lst, stolen); | |
1700 Py_DECREF(stolen); | |
1701 return rval; | |
1702 } | |
1703 | |
1704 static int | |
1705 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi ze_t indent_level) | |
1706 { | |
1707 PyObject *newobj; | |
1708 int rv; | |
1709 ···· | |
1710 if (obj == Py_None || obj == Py_True || obj == Py_False) { | |
1711 PyObject *cstr = _encoded_const(obj); | |
1712 if (cstr == NULL) | |
1713 return -1; | |
1714 return _steal_list_append(rval, cstr); | |
1715 } | |
1716 else if (PyString_Check(obj) || PyUnicode_Check(obj)) | |
1717 { | |
1718 PyObject *encoded = encoder_encode_string(s, obj); | |
1719 if (encoded == NULL) | |
1720 return -1; | |
1721 return _steal_list_append(rval, encoded); | |
1722 } | |
1723 else if (PyInt_Check(obj) || PyLong_Check(obj)) { | |
1724 PyObject *encoded = PyObject_Str(obj); | |
1725 if (encoded == NULL) | |
1726 return -1; | |
1727 return _steal_list_append(rval, encoded); | |
1728 } | |
1729 else if (PyFloat_Check(obj)) { | |
1730 PyObject *encoded = encoder_encode_float(s, obj); | |
1731 if (encoded == NULL) | |
1732 return -1; | |
1733 return _steal_list_append(rval, encoded); | |
1734 } | |
1735 else if (PyList_Check(obj) || PyTuple_Check(obj)) { | |
1736 return encoder_listencode_list(s, rval, obj, indent_level); | |
1737 } | |
1738 else if (PyDict_Check(obj)) { | |
1739 return encoder_listencode_dict(s, rval, obj, indent_level); | |
1740 } | |
1741 else { | |
1742 PyObject *ident = NULL; | |
1743 if (s->markers != Py_None) { | |
1744 int has_key; | |
1745 ident = PyLong_FromVoidPtr(obj); | |
1746 if (ident == NULL) | |
1747 return -1; | |
1748 has_key = PyDict_Contains(s->markers, ident); | |
1749 if (has_key) { | |
1750 if (has_key != -1) | |
1751 PyErr_SetString(PyExc_ValueError, "Circular reference detect ed"); | |
1752 Py_DECREF(ident); | |
1753 return -1; | |
1754 } | |
1755 if (PyDict_SetItem(s->markers, ident, obj)) { | |
1756 Py_DECREF(ident); | |
1757 return -1; | |
1758 } | |
1759 } | |
1760 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); | |
1761 if (newobj == NULL) { | |
1762 Py_DECREF(ident); | |
1763 return -1; | |
1764 } | |
1765 rv = encoder_listencode_obj(s, rval, newobj, indent_level); | |
1766 Py_DECREF(newobj); | |
1767 if (rv) { | |
1768 Py_DECREF(ident); | |
1769 return -1; | |
1770 } | |
1771 if (ident != NULL) { | |
1772 if (PyDict_DelItem(s->markers, ident)) { | |
1773 Py_DECREF(ident); | |
1774 return -1; | |
1775 } | |
1776 Py_DECREF(ident); | |
1777 } | |
1778 return rv; | |
1779 } | |
1780 } | |
1781 | |
1782 static int | |
1783 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss ize_t indent_level) | |
1784 { | |
1785 static PyObject *open_dict = NULL; | |
1786 static PyObject *close_dict = NULL; | |
1787 static PyObject *empty_dict = NULL; | |
1788 PyObject *kstr = NULL; | |
1789 PyObject *ident = NULL; | |
1790 PyObject *key, *value; | |
1791 Py_ssize_t pos; | |
1792 int skipkeys; | |
1793 Py_ssize_t idx; | |
1794 ···· | |
1795 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { | |
1796 open_dict = PyString_InternFromString("{"); | |
1797 close_dict = PyString_InternFromString("}"); | |
1798 empty_dict = PyString_InternFromString("{}"); | |
1799 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) | |
1800 return -1; | |
1801 } | |
1802 if (PyDict_Size(dct) == 0) | |
1803 return PyList_Append(rval, empty_dict); | |
1804 ···· | |
1805 if (s->markers != Py_None) { | |
1806 int has_key; | |
1807 ident = PyLong_FromVoidPtr(dct); | |
1808 if (ident == NULL) | |
1809 goto bail; | |
1810 has_key = PyDict_Contains(s->markers, ident); | |
1811 if (has_key) { | |
1812 if (has_key != -1) | |
1813 PyErr_SetString(PyExc_ValueError, "Circular reference detected") ; | |
1814 goto bail; | |
1815 } | |
1816 if (PyDict_SetItem(s->markers, ident, dct)) { | |
1817 goto bail; | |
1818 } | |
1819 } | |
1820 | |
1821 if (PyList_Append(rval, open_dict)) | |
1822 goto bail; | |
1823 | |
1824 if (s->indent != Py_None) { | |
1825 /* TODO: DOES NOT RUN */ | |
1826 indent_level += 1; | |
1827 /* | |
1828 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | |
1829 separator = _item_separator + newline_indent | |
1830 buf += newline_indent | |
1831 */ | |
1832 } | |
1833 | |
1834 /* TODO: C speedup not implemented for sort_keys */ | |
1835 | |
1836 pos = 0; | |
1837 skipkeys = PyObject_IsTrue(s->skipkeys); | |
1838 idx = 0; | |
1839 while (PyDict_Next(dct, &pos, &key, &value)) { | |
1840 PyObject *encoded; | |
1841 | |
1842 if (PyString_Check(key) || PyUnicode_Check(key)) { | |
1843 Py_INCREF(key); | |
1844 kstr = key; | |
1845 } | |
1846 else if (PyFloat_Check(key)) { | |
1847 kstr = encoder_encode_float(s, key); | |
1848 if (kstr == NULL) | |
1849 goto bail; | |
1850 } | |
1851 else if (PyInt_Check(key) || PyLong_Check(key)) { | |
1852 kstr = PyObject_Str(key); | |
1853 if (kstr == NULL) | |
1854 goto bail; | |
1855 } | |
1856 else if (key == Py_True || key == Py_False || key == Py_None) { | |
1857 kstr = _encoded_const(key); | |
1858 if (kstr == NULL) | |
1859 goto bail; | |
1860 } | |
1861 else if (skipkeys) { | |
1862 continue; | |
1863 } | |
1864 else { | |
1865 /* TODO: include repr of key */ | |
1866 PyErr_SetString(PyExc_ValueError, "keys must be a string"); | |
1867 goto bail; | |
1868 } | |
1869 ········ | |
1870 if (idx) { | |
1871 if (PyList_Append(rval, s->item_separator)) | |
1872 goto bail; | |
1873 } | |
1874 ········ | |
1875 encoded = encoder_encode_string(s, kstr); | |
1876 Py_CLEAR(kstr); | |
1877 if (encoded == NULL) | |
1878 goto bail; | |
1879 if (PyList_Append(rval, encoded)) { | |
1880 Py_DECREF(encoded); | |
1881 goto bail; | |
1882 } | |
1883 Py_DECREF(encoded); | |
1884 if (PyList_Append(rval, s->key_separator)) | |
1885 goto bail; | |
1886 if (encoder_listencode_obj(s, rval, value, indent_level)) | |
1887 goto bail; | |
1888 idx += 1; | |
1889 } | |
1890 if (ident != NULL) { | |
1891 if (PyDict_DelItem(s->markers, ident)) | |
1892 goto bail; | |
1893 Py_CLEAR(ident); | |
1894 } | |
1895 if (s->indent != Py_None) { | |
1896 /* TODO: DOES NOT RUN */ | |
1897 indent_level -= 1; | |
1898 /* | |
1899 yield '\n' + (' ' * (_indent * _current_indent_level)) | |
1900 */ | |
1901 } | |
1902 if (PyList_Append(rval, close_dict)) | |
1903 goto bail; | |
1904 return 0; | |
1905 ···· | |
1906 bail: | |
1907 Py_XDECREF(kstr); | |
1908 Py_XDECREF(ident); | |
1909 return -1; | |
1910 } | |
1911 | |
1912 | |
1913 static int | |
1914 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss ize_t indent_level) | |
1915 { | |
1916 static PyObject *open_array = NULL; | |
1917 static PyObject *close_array = NULL; | |
1918 static PyObject *empty_array = NULL; | |
1919 PyObject *ident = NULL; | |
1920 PyObject *s_fast = NULL; | |
1921 Py_ssize_t num_items; | |
1922 PyObject **seq_items; | |
1923 Py_ssize_t i; | |
1924 ···· | |
1925 if (open_array == NULL || close_array == NULL || empty_array == NULL) { | |
1926 open_array = PyString_InternFromString("["); | |
1927 close_array = PyString_InternFromString("]"); | |
1928 empty_array = PyString_InternFromString("[]"); | |
1929 if (open_array == NULL || close_array == NULL || empty_array == NULL) | |
1930 return -1; | |
1931 } | |
1932 ident = NULL; | |
1933 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); | |
1934 if (s_fast == NULL) | |
1935 return -1; | |
1936 num_items = PySequence_Fast_GET_SIZE(s_fast); | |
1937 if (num_items == 0) { | |
1938 Py_DECREF(s_fast); | |
1939 return PyList_Append(rval, empty_array); | |
1940 } | |
1941 ···· | |
1942 if (s->markers != Py_None) { | |
1943 int has_key; | |
1944 ident = PyLong_FromVoidPtr(seq); | |
1945 if (ident == NULL) | |
1946 goto bail; | |
1947 has_key = PyDict_Contains(s->markers, ident); | |
1948 if (has_key) { | |
1949 if (has_key != -1) | |
1950 PyErr_SetString(PyExc_ValueError, "Circular reference detected") ; | |
1951 goto bail; | |
1952 } | |
1953 if (PyDict_SetItem(s->markers, ident, seq)) { | |
1954 goto bail; | |
1955 } | |
1956 } | |
1957 ···· | |
1958 seq_items = PySequence_Fast_ITEMS(s_fast); | |
1959 if (PyList_Append(rval, open_array)) | |
1960 goto bail; | |
1961 if (s->indent != Py_None) { | |
1962 /* TODO: DOES NOT RUN */ | |
1963 indent_level += 1; | |
1964 /* | |
1965 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) | |
1966 separator = _item_separator + newline_indent | |
1967 buf += newline_indent | |
1968 */ | |
1969 } | |
1970 for (i = 0; i < num_items; i++) { | |
1971 PyObject *obj = seq_items[i]; | |
1972 if (i) { | |
1973 if (PyList_Append(rval, s->item_separator)) | |
1974 goto bail; | |
1975 } | |
1976 if (encoder_listencode_obj(s, rval, obj, indent_level)) | |
1977 goto bail; | |
1978 } | |
1979 if (ident != NULL) { | |
1980 if (PyDict_DelItem(s->markers, ident)) | |
1981 goto bail; | |
1982 Py_CLEAR(ident); | |
1983 } | |
1984 if (s->indent != Py_None) { | |
1985 /* TODO: DOES NOT RUN */ | |
1986 indent_level -= 1; | |
1987 /* | |
1988 yield '\n' + (' ' * (_indent * _current_indent_level)) | |
1989 */ | |
1990 } | |
1991 if (PyList_Append(rval, close_array)) | |
1992 goto bail; | |
1993 Py_DECREF(s_fast); | |
1994 return 0; | |
1995 ···· | |
1996 bail: | |
1997 Py_XDECREF(ident); | |
1998 Py_DECREF(s_fast); | |
1999 return -1; | |
2000 } | |
2001 | |
2002 static void | |
2003 encoder_dealloc(PyObject *self) | |
2004 { | |
2005 PyEncoderObject *s; | |
2006 assert(PyEncoder_Check(self)); | |
2007 s = (PyEncoderObject *)self; | |
2008 Py_CLEAR(s->markers); | |
2009 Py_CLEAR(s->defaultfn); | |
2010 Py_CLEAR(s->encoder); | |
2011 Py_CLEAR(s->indent); | |
2012 Py_CLEAR(s->key_separator); | |
2013 Py_CLEAR(s->item_separator); | |
2014 Py_CLEAR(s->sort_keys); | |
2015 Py_CLEAR(s->skipkeys); | |
2016 self->ob_type->tp_free(self); | |
2017 } | |
2018 | |
2019 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") ; | |
2020 | |
2021 static | |
2022 PyTypeObject PyEncoderType = { | |
2023 PyObject_HEAD_INIT(0) | |
2024 0, /* tp_internal */ | |
2025 "make_encoder", /* tp_name */ | |
Martin v. Löwis
2009/01/04 13:22:29
That is a confusing type name. How about "Encoder"
bob.ippolito
2009/01/05 01:28:19
It's not a type that's ever exposed to user code,
| |
2026 sizeof(PyEncoderObject), /* tp_basicsize */ | |
2027 0, /* tp_itemsize */ | |
2028 encoder_dealloc, /* tp_dealloc */ | |
2029 0, /* tp_print */ | |
2030 0, /* tp_getattr */ | |
2031 0, /* tp_setattr */ | |
2032 0, /* tp_compare */ | |
2033 0, /* tp_repr */ | |
2034 0, /* tp_as_number */ | |
2035 0, /* tp_as_sequence */ | |
2036 0, /* tp_as_mapping */ | |
2037 0, /* tp_hash */ | |
2038 encoder_call, /* tp_call */ | |
2039 0, /* tp_str */ | |
2040 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ | |
2041 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ | |
2042 0, /* tp_as_buffer */ | |
2043 Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
2044 encoder_doc, /* tp_doc */ | |
2045 0, /* tp_traverse */ | |
2046 0, /* tp_clear */ | |
2047 0, /* tp_richcompare */ | |
2048 0, /* tp_weaklistoffset */ | |
2049 0, /* tp_iter */ | |
2050 0, /* tp_iternext */ | |
2051 0, /* tp_methods */ | |
2052 encoder_members, /* tp_members */ | |
2053 0, /* tp_getset */ | |
2054 0, /* tp_base */ | |
2055 0, /* tp_dict */ | |
2056 0, /* tp_descr_get */ | |
2057 0, /* tp_descr_set */ | |
2058 0, /* tp_dictoffset */ | |
2059 encoder_init, /* tp_init */ | |
2060 0,/* PyType_GenericAlloc, */ /* tp_alloc */ | |
2061 0,/* PyType_GenericNew, */ /* tp_new */ | |
2062 0,/* _PyObject_Del, */ /* tp_free */ | |
2063 }; | |
2064 | |
605 static PyMethodDef json_methods[] = { | 2065 static PyMethodDef json_methods[] = { |
606 {"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii, | 2066 {"encode_basestring_ascii", |
607 METH_O, pydoc_encode_basestring_ascii}, | 2067 (PyCFunction)py_encode_basestring_ascii, |
608 {"scanstring", (PyCFunction)py_scanstring, METH_VARARGS, | 2068 METH_O, |
609 pydoc_scanstring}, | 2069 pydoc_encode_basestring_ascii}, |
2070 {"scanstring", | |
2071 (PyCFunction)py_scanstring, | |
2072 METH_VARARGS, | |
2073 pydoc_scanstring}, | |
610 {NULL, NULL, 0, NULL} | 2074 {NULL, NULL, 0, NULL} |
611 }; | 2075 }; |
612 | 2076 |
613 PyDoc_STRVAR(module_doc, | 2077 PyDoc_STRVAR(module_doc, |
614 "json speedups\n"); | 2078 "json speedups\n"); |
615 | 2079 |
616 void | 2080 void |
617 init_json(void) | 2081 init_json(void) |
618 { | 2082 { |
619 PyObject *m; | 2083 PyObject *m; |
2084 PyScannerType.tp_getattro = PyObject_GenericGetAttr; | |
2085 PyScannerType.tp_setattro = PyObject_GenericSetAttr; | |
2086 PyScannerType.tp_alloc = PyType_GenericAlloc; | |
2087 PyScannerType.tp_new = PyType_GenericNew; | |
2088 PyScannerType.tp_free = _PyObject_Del; | |
2089 if (PyType_Ready(&PyScannerType) < 0) | |
2090 return; | |
2091 PyEncoderType.tp_getattro = PyObject_GenericGetAttr; | |
2092 PyEncoderType.tp_setattro = PyObject_GenericSetAttr; | |
2093 PyEncoderType.tp_alloc = PyType_GenericAlloc; | |
2094 PyEncoderType.tp_new = PyType_GenericNew; | |
2095 PyEncoderType.tp_free = _PyObject_Del; | |
2096 if (PyType_Ready(&PyEncoderType) < 0) | |
2097 return; | |
620 m = Py_InitModule3("_json", json_methods, module_doc); | 2098 m = Py_InitModule3("_json", json_methods, module_doc); |
2099 Py_INCREF((PyObject*)&PyScannerType); | |
2100 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); | |
2101 Py_INCREF((PyObject*)&PyEncoderType); | |
2102 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); | |
621 } | 2103 } |
OLD | NEW |