Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(51)

Side by Side Diff: Modules/_json.c

Issue 7311: [issue4136] merge json library with simplejson 2.0.3 Base URL: http://svn.python.org/view/*checkout*/python/trunk/
Patch Set: Created 15 years, 5 months ago , Downloaded from: http://bugs.python.org/file11822/json_issue4136_r66961.diff
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« Lib/json/decoder.py ('K') | « Lib/json/tool.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #include "Python.h" 1 #include "Python.h"
2 #include "structmember.h"
2 3
3 #define DEFAULT_ENCODING "utf-8" 4 #define DEFAULT_ENCODING "utf-8"
5 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
6 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
7 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
8 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
9
10 static PyTypeObject PyScannerType;
11 static PyTypeObject PyEncoderType;
12
13 typedef struct _PyScannerObject {
14 PyObject_HEAD
15 PyObject *encoding;
16 PyObject *strict;
17 PyObject *object_hook;
18 PyObject *parse_float;
19 PyObject *parse_int;
20 PyObject *parse_constant;
21 } PyScannerObject;
22
23 static PyMemberDef scanner_members[] = {
24 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encod ing"},
25 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
26 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
27 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
28 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "par se_int"},
29 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READ ONLY, "parse_constant"},
30 {NULL}
31 };
32
33 typedef struct _PyEncoderObject {
34 PyObject_HEAD
35 PyObject *markers;
36 PyObject *defaultfn;
37 PyObject *encoder;
38 PyObject *indent;
39 PyObject *key_separator;
40 PyObject *item_separator;
41 PyObject *sort_keys;
42 PyObject *skipkeys;
43 int fast_encode;
44 int allow_nan;
45 } PyEncoderObject;
46
47 static PyMemberDef encoder_members[] = {
48 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers "},
49 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "defau lt"},
50 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder "},
51 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
52 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READON LY, "key_separator"},
53 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READ ONLY, "item_separator"},
54 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sor t_keys"},
55 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipk eys"},
56 {NULL}
57 };
58
59 static Py_ssize_t
60 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
61 static PyObject *
62 ascii_escape_unicode(PyObject *pystr);
63 static PyObject *
64 ascii_escape_str(PyObject *pystr);
65 static PyObject *
66 py_encode_basestring_ascii(PyObject* self, PyObject *pystr);
67 void init_json(void);
68 static PyObject *
69 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n ext_idx_ptr);
70 static PyObject *
71 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr);
72 static PyObject *
73 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
74 static int
75 scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
76 static void
77 scanner_dealloc(PyObject *self);
78 static int
79 encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
80 static void
81 encoder_dealloc(PyObject *self);
82 static int
83 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss ize_t indent_level);
84 static int
85 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi ze_t indent_level);
86 static int
87 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss ize_t indent_level);
88 static PyObject *
89 _encoded_const(PyObject *const);
90 static void
91 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
92 static PyObject *
93 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
94 static int
95 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
96 static PyObject *
97 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
98 static PyObject *
99 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
100
4 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') 101 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
102 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
103
5 #define MIN_EXPANSION 6 104 #define MIN_EXPANSION 6
6 105
7 #ifdef Py_UNICODE_WIDE 106 #ifdef Py_UNICODE_WIDE
8 #define MAX_EXPANSION (2 * MIN_EXPANSION) 107 #define MAX_EXPANSION (2 * MIN_EXPANSION)
9 #else 108 #else
10 #define MAX_EXPANSION MIN_EXPANSION 109 #define MAX_EXPANSION MIN_EXPANSION
11 #endif 110 #endif
12 111
112 static int
113 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
114 {
115 *size_ptr = PyInt_AsSsize_t(o);
116 if (*size_ptr == -1 && PyErr_Occurred());
117 return 1;
118 return 0;
119 }
120
121 static PyObject *
122 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
123 {
124 return PyInt_FromSsize_t(*size_ptr);
125 }
126
13 static Py_ssize_t 127 static Py_ssize_t
14 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) 128 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
15 { 129 {
16 Py_UNICODE x;
17 output[chars++] = '\\'; 130 output[chars++] = '\\';
18 switch (c) { 131 switch (c) {
19 case '\\': output[chars++] = (char)c; break; 132 case '\\': output[chars++] = (char)c; break;
20 case '"': output[chars++] = (char)c; break; 133 case '"': output[chars++] = (char)c; break;
21 case '\b': output[chars++] = 'b'; break; 134 case '\b': output[chars++] = 'b'; break;
22 case '\f': output[chars++] = 'f'; break; 135 case '\f': output[chars++] = 'f'; break;
23 case '\n': output[chars++] = 'n'; break; 136 case '\n': output[chars++] = 'n'; break;
24 case '\r': output[chars++] = 'r'; break; 137 case '\r': output[chars++] = 'r'; break;
25 case '\t': output[chars++] = 't'; break; 138 case '\t': output[chars++] = 't'; break;
26 default: 139 default:
27 #ifdef Py_UNICODE_WIDE 140 #ifdef Py_UNICODE_WIDE
28 if (c >= 0x10000) { 141 if (c >= 0x10000) {
29 /* UTF-16 surrogate pair */ 142 /* UTF-16 surrogate pair */
30 Py_UNICODE v = c - 0x10000; 143 Py_UNICODE v = c - 0x10000;
31 c = 0xd800 | ((v >> 10) & 0x3ff); 144 c = 0xd800 | ((v >> 10) & 0x3ff);
32 output[chars++] = 'u'; 145 output[chars++] = 'u';
33 x = (c & 0xf000) >> 12; 146 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
34 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 147 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
35 x = (c & 0x0f00) >> 8; 148 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
36 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 149 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
37 x = (c & 0x00f0) >> 4;
38 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
39 x = (c & 0x000f);
40 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
41 c = 0xdc00 | (v & 0x3ff); 150 c = 0xdc00 | (v & 0x3ff);
42 output[chars++] = '\\'; 151 output[chars++] = '\\';
43 } 152 }
44 #endif 153 #endif
45 output[chars++] = 'u'; 154 output[chars++] = 'u';
46 x = (c & 0xf000) >> 12; 155 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
47 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 156 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
48 x = (c & 0x0f00) >> 8; 157 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
49 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 158 output[chars++] = "0123456789abcdef"[(c ) & 0xf];
50 x = (c & 0x00f0) >> 4;
51 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
52 x = (c & 0x000f);
53 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
54 } 159 }
55 return chars; 160 return chars;
56 } 161 }
57 162
58 static PyObject * 163 static PyObject *
59 ascii_escape_unicode(PyObject *pystr) 164 ascii_escape_unicode(PyObject *pystr)
60 { 165 {
61 Py_ssize_t i; 166 Py_ssize_t i;
62 Py_ssize_t input_chars; 167 Py_ssize_t input_chars;
63 Py_ssize_t output_size; 168 Py_ssize_t output_size;
64 Py_ssize_t chars; 169 Py_ssize_t chars;
65 PyObject *rval; 170 PyObject *rval;
66 char *output; 171 char *output;
67 Py_UNICODE *input_unicode; 172 Py_UNICODE *input_unicode;
68 173
69 input_chars = PyUnicode_GET_SIZE(pystr); 174 input_chars = PyUnicode_GET_SIZE(pystr);
70 input_unicode = PyUnicode_AS_UNICODE(pystr); 175 input_unicode = PyUnicode_AS_UNICODE(pystr);
176
71 /* One char input can be up to 6 chars output, estimate 4 of these */ 177 /* One char input can be up to 6 chars output, estimate 4 of these */
72 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 178 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
73 rval = PyString_FromStringAndSize(NULL, output_size); 179 rval = PyString_FromStringAndSize(NULL, output_size);
74 if (rval == NULL) { 180 if (rval == NULL) {
75 return NULL; 181 return NULL;
76 } 182 }
77 output = PyString_AS_STRING(rval); 183 output = PyString_AS_STRING(rval);
78 chars = 0; 184 chars = 0;
79 output[chars++] = '"'; 185 output[chars++] = '"';
80 for (i = 0; i < input_chars; i++) { 186 for (i = 0; i < input_chars; i++) {
81 Py_UNICODE c = input_unicode[i]; 187 Py_UNICODE c = input_unicode[i];
82 if (S_CHAR(c)) { 188 if (S_CHAR(c)) {
83 output[chars++] = (char)c; 189 output[chars++] = (char)c;
84 } 190 }
85 » else { 191 else {
86 chars = ascii_escape_char(c, output, chars); 192 chars = ascii_escape_char(c, output, chars);
87 } 193 }
88 if (output_size - chars < (1 + MAX_EXPANSION)) { 194 if (output_size - chars < (1 + MAX_EXPANSION)) {
89 /* There's more than four, so let's resize by a lot */ 195 /* There's more than four, so let's resize by a lot */
90 output_size *= 2; 196 output_size *= 2;
Martin v. Löwis 2009/01/04 13:22:29 You might want to check for integer overflow here.
bob.ippolito 2009/01/05 01:28:19 _PyString_Resize checks for integer overflow, so i
91 /* This is an upper bound */ 197 /* This is an upper bound */
92 if (output_size > 2 + (input_chars * MAX_EXPANSION)) { 198 if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
93 output_size = 2 + (input_chars * MAX_EXPANSION); 199 output_size = 2 + (input_chars * MAX_EXPANSION);
94 } 200 }
95 if (_PyString_Resize(&rval, output_size) == -1) { 201 if (_PyString_Resize(&rval, output_size) == -1) {
96 return NULL; 202 return NULL;
97 } 203 }
98 output = PyString_AS_STRING(rval); 204 output = PyString_AS_STRING(rval);
99 } 205 }
100 } 206 }
101 output[chars++] = '"'; 207 output[chars++] = '"';
102 if (_PyString_Resize(&rval, chars) == -1) { 208 if (_PyString_Resize(&rval, chars) == -1) {
103 return NULL; 209 return NULL;
104 } 210 }
105 return rval; 211 return rval;
106 } 212 }
107 213
108 static PyObject * 214 static PyObject *
109 ascii_escape_str(PyObject *pystr) 215 ascii_escape_str(PyObject *pystr)
Martin v. Löwis 2009/01/04 13:22:29 Please attach a comment to each function, telling
bob.ippolito 2009/01/05 01:28:19 Done in the next patch
110 { 216 {
111 Py_ssize_t i; 217 Py_ssize_t i;
112 Py_ssize_t input_chars; 218 Py_ssize_t input_chars;
113 Py_ssize_t output_size; 219 Py_ssize_t output_size;
114 Py_ssize_t chars; 220 Py_ssize_t chars;
115 PyObject *rval; 221 PyObject *rval;
116 char *output; 222 char *output;
117 char *input_str; 223 char *input_str;
118 224
119 input_chars = PyString_GET_SIZE(pystr); 225 input_chars = PyString_GET_SIZE(pystr);
120 input_str = PyString_AS_STRING(pystr); 226 input_str = PyString_AS_STRING(pystr);
121 /* One char input can be up to 6 chars output, estimate 4 of these */ 227
122 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 228 /* Fast path for a string that's already ASCII */
229 for (i = 0; i < input_chars; i++) {
230 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
231 if (!S_CHAR(c)) {
232 /* If we have to escape something, scan the string for unicode */
233 Py_ssize_t j;
234 for (j = i; j < input_chars; j++) {
235 c = (Py_UNICODE)(unsigned char)input_str[j];
236 if (c > 0x7f) {
237 /* We hit a non-ASCII character, bail to unicode mode */
238 PyObject *uni;
239 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict") ;
240 if (uni == NULL) {
241 return NULL;
242 }
243 rval = ascii_escape_unicode(uni);
244 Py_DECREF(uni);
245 return rval;
246 }
247 }
248 break;
249 }
250 }
251
252 if (i == input_chars) {
253 /* Input is already ASCII */
254 output_size = 2 + input_chars;
255 }
256 else {
257 /* One char input can be up to 6 chars output, estimate 4 of these */
258 output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
259 }
123 rval = PyString_FromStringAndSize(NULL, output_size); 260 rval = PyString_FromStringAndSize(NULL, output_size);
124 if (rval == NULL) { 261 if (rval == NULL) {
125 return NULL; 262 return NULL;
126 } 263 }
127 output = PyString_AS_STRING(rval); 264 output = PyString_AS_STRING(rval);
128 chars = 0; 265 output[0] = '"';
129 output[chars++] = '"'; 266 ····
130 for (i = 0; i < input_chars; i++) { 267 /* We know that everything up to i is ASCII already */
131 Py_UNICODE c = (Py_UNICODE)input_str[i]; 268 chars = i + 1;
269 memcpy(&output[1], input_str, i);
270
271 for (; i < input_chars; i++) {
272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
132 if (S_CHAR(c)) { 273 if (S_CHAR(c)) {
133 output[chars++] = (char)c; 274 output[chars++] = (char)c;
134 } 275 }
135 » else if (c > 0x7F) { 276 else {
136 /* We hit a non-ASCII character, bail to unicode mode */
137 PyObject *uni;
138 Py_DECREF(rval);
139 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
140 if (uni == NULL) {
141 return NULL;
142 }
143 rval = ascii_escape_unicode(uni);
144 Py_DECREF(uni);
145 return rval;
146 }
147 » else {
148 chars = ascii_escape_char(c, output, chars); 277 chars = ascii_escape_char(c, output, chars);
149 } 278 }
150 /* An ASCII char can't possibly expand to a surrogate! */ 279 /* An ASCII char can't possibly expand to a surrogate! */
151 if (output_size - chars < (1 + MIN_EXPANSION)) { 280 if (output_size - chars < (1 + MIN_EXPANSION)) {
152 /* There's more than four, so let's resize by a lot */ 281 /* There's more than four, so let's resize by a lot */
153 output_size *= 2; 282 output_size *= 2;
154 if (output_size > 2 + (input_chars * MIN_EXPANSION)) { 283 if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
155 output_size = 2 + (input_chars * MIN_EXPANSION); 284 output_size = 2 + (input_chars * MIN_EXPANSION);
156 } 285 }
157 if (_PyString_Resize(&rval, output_size) == -1) { 286 if (_PyString_Resize(&rval, output_size) == -1) {
158 return NULL; 287 return NULL;
159 } 288 }
160 output = PyString_AS_STRING(rval); 289 output = PyString_AS_STRING(rval);
161 } 290 }
162 } 291 }
163 output[chars++] = '"'; 292 output[chars++] = '"';
164 if (_PyString_Resize(&rval, chars) == -1) { 293 if (_PyString_Resize(&rval, chars) == -1) {
165 return NULL; 294 return NULL;
166 } 295 }
167 return rval; 296 return rval;
168 } 297 }
169 298
170 void 299 static void
171 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) 300 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
172 { 301 {
173 static PyObject *errmsg_fn = NULL; 302 static PyObject *errmsg_fn = NULL;
174 PyObject *pymsg; 303 PyObject *pymsg;
175 if (errmsg_fn == NULL) { 304 if (errmsg_fn == NULL) {
176 PyObject *decoder = PyImport_ImportModule("json.decoder"); 305 PyObject *decoder = PyImport_ImportModule("json.decoder");
177 if (decoder == NULL) 306 if (decoder == NULL)
178 return; 307 return;
179 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); 308 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
309 Py_DECREF(decoder);
180 if (errmsg_fn == NULL) 310 if (errmsg_fn == NULL)
181 return; 311 return;
182 Py_DECREF(decoder);
183 } 312 }
184 pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end); 313 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_Fro mSsize_t, &end);
185 if (pymsg) { 314 if (pymsg) {
186 PyErr_SetObject(PyExc_ValueError, pymsg); 315 PyErr_SetObject(PyExc_ValueError, pymsg);
187 Py_DECREF(pymsg); 316 Py_DECREF(pymsg);
188 } 317 }
189 /*
190
191 def linecol(doc, pos):
192 lineno = doc.count('\n', 0, pos) + 1
193 if lineno == 1:
194 colno = pos
195 else:
196 colno = pos - doc.rindex('\n', 0, pos)
197 return lineno, colno
198
199 def errmsg(msg, doc, pos, end=None):
200 lineno, colno = linecol(doc, pos)
201 if end is None:
202 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
203 endlineno, endcolno = linecol(doc, end)
204 return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
205 msg, lineno, colno, endlineno, endcolno, pos, end)
206
207 */
208 } 318 }
209 319
210 static PyObject * 320 static PyObject *
211 join_list_unicode(PyObject *lst) 321 join_list_string(PyObject *lst)
212 { 322 {
213 static PyObject *ustr = NULL; 323 static PyObject *joinfn = NULL;
214 static PyObject *joinstr = NULL; 324 if (joinfn == NULL) {
215 if (ustr == NULL) { 325 PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
216 Py_UNICODE c = 0; 326 if (ustr == NULL)
217 ustr = PyUnicode_FromUnicode(&c, 0); 327 return NULL;
328 ········
329 joinfn = PyObject_GetAttrString(ustr, "join");
330 Py_DECREF(ustr);
331 if (joinfn == NULL)
332 return NULL;
218 } 333 }
219 if (joinstr == NULL) { 334 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
220 joinstr = PyString_InternFromString("join");
221 }
222 if (joinstr == NULL || ustr == NULL) {
223 return NULL;
224 }
225 return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL);
226 } 335 }
227 336
228 static PyObject * 337 static PyObject *
229 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict) 338 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
339 PyObject *tpl;
340 PyObject *pyidx;
341 /*
342 steal a reference to rval, returns (rval, idx)
343 */
344 if (rval == NULL) {
345 return NULL;
346 }
347 pyidx = PyInt_FromSsize_t(idx);
348 if (pyidx == NULL) {
349 Py_DECREF(rval);
350 return NULL;
351 }
352 tpl = PyTuple_New(2);
353 if (tpl == NULL) {
354 Py_DECREF(pyidx);
355 Py_DECREF(rval);
356 return NULL;
357 }
358 PyTuple_SET_ITEM(tpl, 0, rval);
359 PyTuple_SET_ITEM(tpl, 1, pyidx);
360 return tpl;
361 }
362
363 static PyObject *
364 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_s size_t *next_end_ptr)
230 { 365 {
231 PyObject *rval; 366 PyObject *rval;
232 Py_ssize_t len = PyString_GET_SIZE(pystr); 367 Py_ssize_t len = PyString_GET_SIZE(pystr);
233 Py_ssize_t begin = end - 1; 368 Py_ssize_t begin = end - 1;
234 Py_ssize_t next = begin; 369 Py_ssize_t next = begin;
370 int has_unicode = 0;
235 char *buf = PyString_AS_STRING(pystr); 371 char *buf = PyString_AS_STRING(pystr);
236 PyObject *chunks = PyList_New(0); 372 PyObject *chunks = PyList_New(0);
237 if (chunks == NULL) { 373 if (chunks == NULL) {
238 goto bail; 374 goto bail;
239 } 375 }
240 if (end < 0 || len <= end) { 376 if (end < 0 || len <= end) {
241 PyErr_SetString(PyExc_ValueError, "end is out of bounds"); 377 PyErr_SetString(PyExc_ValueError, "end is out of bounds");
242 goto bail; 378 goto bail;
243 } 379 }
244 while (1) { 380 while (1) {
245 /* Find the end of the string or the next escape */ 381 /* Find the end of the string or the next escape */
246 Py_UNICODE c = 0; 382 Py_UNICODE c = 0;
247 PyObject *chunk = NULL; 383 PyObject *chunk = NULL;
248 for (next = end; next < len; next++) { 384 for (next = end; next < len; next++) {
249 c = buf[next]; 385 c = (unsigned char)buf[next];
250 if (c == '"' || c == '\\') { 386 if (c == '"' || c == '\\') {
251 break; 387 break;
252 } 388 }
253 else if (strict && c <= 0x1f) { 389 else if (strict && c <= 0x1f) {
254 raise_errmsg("Invalid control character at", pystr, next); 390 raise_errmsg("Invalid control character at", pystr, next);
255 goto bail; 391 goto bail;
256 } 392 }
393 else if (c > 0x7f) {
394 has_unicode = 1;
395 }
257 } 396 }
258 if (!(c == '"' || c == '\\')) { 397 if (!(c == '"' || c == '\\')) {
259 raise_errmsg("Unterminated string starting at", pystr, begin); 398 raise_errmsg("Unterminated string starting at", pystr, begin);
260 goto bail; 399 goto bail;
261 } 400 }
262 /* Pick up this chunk if it's not zero length */ 401 /* Pick up this chunk if it's not zero length */
263 if (next != end) { 402 if (next != end) {
264 PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end); 403 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - en d);
265 if (strchunk == NULL) { 404 if (strchunk == NULL) {
266 goto bail; 405 goto bail;
267 } 406 }
268 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); 407 if (has_unicode) {
269 Py_DECREF(strchunk); 408 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
270 if (chunk == NULL) { 409 Py_DECREF(strchunk);
271 goto bail; 410 if (chunk == NULL) {
411 goto bail;
412 }
413 }
414 else {
415 chunk = strchunk;
272 } 416 }
273 if (PyList_Append(chunks, chunk)) { 417 if (PyList_Append(chunks, chunk)) {
274 Py_DECREF(chunk); 418 Py_DECREF(chunk);
275 goto bail; 419 goto bail;
276 } 420 }
277 Py_DECREF(chunk); 421 Py_DECREF(chunk);
278 } 422 }
279 next++; 423 next++;
280 if (c == '"') { 424 if (c == '"') {
281 end = next; 425 end = next;
(...skipping 26 matching lines...) Expand all
308 else { 452 else {
309 c = 0; 453 c = 0;
310 next++; 454 next++;
311 end = next + 4; 455 end = next + 4;
312 if (end >= len) { 456 if (end >= len) {
313 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); 457 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
314 goto bail; 458 goto bail;
315 } 459 }
316 /* Decode 4 hex digits */ 460 /* Decode 4 hex digits */
317 for (; next < end; next++) { 461 for (; next < end; next++) {
318 Py_ssize_t shl = (end - next - 1) << 2;
319 Py_UNICODE digit = buf[next]; 462 Py_UNICODE digit = buf[next];
463 c <<= 4;
320 switch (digit) { 464 switch (digit) {
321 case '0': case '1': case '2': case '3': case '4': 465 case '0': case '1': case '2': case '3': case '4':
322 case '5': case '6': case '7': case '8': case '9': 466 case '5': case '6': case '7': case '8': case '9':
323 c |= (digit - '0') << shl; break; 467 c |= (digit - '0'); break;
324 case 'a': case 'b': case 'c': case 'd': case 'e': 468 case 'a': case 'b': case 'c': case 'd': case 'e':
325 case 'f': 469 case 'f':
326 c |= (digit - 'a' + 10) << shl; break; 470 c |= (digit - 'a' + 10); break;
327 case 'A': case 'B': case 'C': case 'D': case 'E': 471 case 'A': case 'B': case 'C': case 'D': case 'E':
328 case 'F': 472 case 'F':
329 c |= (digit - 'A' + 10) << shl; break; 473 c |= (digit - 'A' + 10); break;
330 default: 474 default:
331 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); 475 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
332 goto bail; 476 goto bail;
333 } 477 }
334 } 478 }
335 #ifdef Py_UNICODE_WIDE 479 #ifdef Py_UNICODE_WIDE
336 /* Surrogate pair */ 480 /* Surrogate pair */
337 if (c >= 0xd800 && c <= 0xdbff) { 481 if ((c & 0xfc00) == 0xd800) {
338 Py_UNICODE c2 = 0; 482 Py_UNICODE c2 = 0;
339 if (end + 6 >= len) { 483 if (end + 6 >= len) {
340 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, 484 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
341 end - 5); 485 goto bail;
342 } 486 }
343 if (buf[next++] != '\\' || buf[next++] != 'u') { 487 if (buf[next++] != '\\' || buf[next++] != 'u') {
344 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, 488 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
345 end - 5); 489 goto bail;
346 } 490 }
347 end += 6; 491 end += 6;
348 /* Decode 4 hex digits */ 492 /* Decode 4 hex digits */
349 for (; next < end; next++) { 493 for (; next < end; next++) {
350 Py_ssize_t shl = (end - next - 1) << 2; 494 c2 <<= 4;
351 Py_UNICODE digit = buf[next]; 495 Py_UNICODE digit = buf[next];
352 switch (digit) { 496 switch (digit) {
353 case '0': case '1': case '2': case '3': case '4': 497 case '0': case '1': case '2': case '3': case '4':
354 case '5': case '6': case '7': case '8': case '9': 498 case '5': case '6': case '7': case '8': case '9':
355 c2 |= (digit - '0') << shl; break; 499 c2 |= (digit - '0'); break;
356 case 'a': case 'b': case 'c': case 'd': case 'e': 500 case 'a': case 'b': case 'c': case 'd': case 'e':
357 case 'f': 501 case 'f':
358 c2 |= (digit - 'a' + 10) << shl; break; 502 c2 |= (digit - 'a' + 10); break;
359 case 'A': case 'B': case 'C': case 'D': case 'E': 503 case 'A': case 'B': case 'C': case 'D': case 'E':
360 case 'F': 504 case 'F':
361 c2 |= (digit - 'A' + 10) << shl; break; 505 c2 |= (digit - 'A' + 10); break;
362 default: 506 default:
363 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); 507 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
364 goto bail; 508 goto bail;
365 } 509 }
366 } 510 }
511 if ((c2 & 0xfc00) != 0xdc00) {
512 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
513 goto bail;
514 }
367 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); 515 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
368 } 516 }
517 else if ((c & 0xfc00) == 0xdc00) {
518 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
519 goto bail;
520 }
369 #endif 521 #endif
370 } 522 }
371 chunk = PyUnicode_FromUnicode(&c, 1); 523 if (c > 0x7f) {
372 if (chunk == NULL) { 524 has_unicode = 1;
373 goto bail; 525 }
526 if (has_unicode) {
527 chunk = PyUnicode_FromUnicode(&c, 1);
528 if (chunk == NULL) {
529 goto bail;
530 }
531 }
532 else {
533 char c_char = Py_CHARMASK(c);
534 chunk = PyString_FromStringAndSize(&c_char, 1);
535 if (chunk == NULL) {
536 goto bail;
537 }
374 } 538 }
375 if (PyList_Append(chunks, chunk)) { 539 if (PyList_Append(chunks, chunk)) {
376 Py_DECREF(chunk); 540 Py_DECREF(chunk);
377 goto bail; 541 goto bail;
378 } 542 }
379 Py_DECREF(chunk); 543 Py_DECREF(chunk);
380 } 544 }
381 545
382 rval = join_list_unicode(chunks); 546 rval = join_list_string(chunks);
383 if (rval == NULL) { 547 if (rval == NULL) {
384 goto bail; 548 goto bail;
385 } 549 }
386 Py_CLEAR(chunks); 550 Py_CLEAR(chunks);
387 return Py_BuildValue("(Nn)", rval, end); 551 *next_end_ptr = end;
552 return rval;
388 bail: 553 bail:
554 *next_end_ptr = -1;
389 Py_XDECREF(chunks); 555 Py_XDECREF(chunks);
390 return NULL; 556 return NULL;
391 } 557 }
392 558
393 559
394 static PyObject * 560 static PyObject *
395 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict) 561 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next _end_ptr)
396 { 562 {
397 PyObject *rval; 563 PyObject *rval;
398 Py_ssize_t len = PyUnicode_GET_SIZE(pystr); 564 Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
399 Py_ssize_t begin = end - 1; 565 Py_ssize_t begin = end - 1;
400 Py_ssize_t next = begin; 566 Py_ssize_t next = begin;
401 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); 567 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
402 PyObject *chunks = PyList_New(0); 568 PyObject *chunks = PyList_New(0);
403 if (chunks == NULL) { 569 if (chunks == NULL) {
404 goto bail; 570 goto bail;
405 } 571 }
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
469 else { 635 else {
470 c = 0; 636 c = 0;
471 next++; 637 next++;
472 end = next + 4; 638 end = next + 4;
473 if (end >= len) { 639 if (end >= len) {
474 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); 640 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
475 goto bail; 641 goto bail;
476 } 642 }
477 /* Decode 4 hex digits */ 643 /* Decode 4 hex digits */
478 for (; next < end; next++) { 644 for (; next < end; next++) {
479 Py_ssize_t shl = (end - next - 1) << 2;
480 Py_UNICODE digit = buf[next]; 645 Py_UNICODE digit = buf[next];
646 c <<= 4;
481 switch (digit) { 647 switch (digit) {
482 case '0': case '1': case '2': case '3': case '4': 648 case '0': case '1': case '2': case '3': case '4':
483 case '5': case '6': case '7': case '8': case '9': 649 case '5': case '6': case '7': case '8': case '9':
484 c |= (digit - '0') << shl; break; 650 c |= (digit - '0'); break;
485 case 'a': case 'b': case 'c': case 'd': case 'e': 651 case 'a': case 'b': case 'c': case 'd': case 'e':
486 case 'f': 652 case 'f':
487 c |= (digit - 'a' + 10) << shl; break; 653 c |= (digit - 'a' + 10); break;
488 case 'A': case 'B': case 'C': case 'D': case 'E': 654 case 'A': case 'B': case 'C': case 'D': case 'E':
489 case 'F': 655 case 'F':
490 c |= (digit - 'A' + 10) << shl; break; 656 c |= (digit - 'A' + 10); break;
491 default: 657 default:
492 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); 658 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
493 goto bail; 659 goto bail;
494 } 660 }
495 } 661 }
496 #ifdef Py_UNICODE_WIDE 662 #ifdef Py_UNICODE_WIDE
497 /* Surrogate pair */ 663 /* Surrogate pair */
498 if (c >= 0xd800 && c <= 0xdbff) { 664 if ((c & 0xfc00) == 0xd800) {
499 Py_UNICODE c2 = 0; 665 Py_UNICODE c2 = 0;
500 if (end + 6 >= len) { 666 if (end + 6 >= len) {
501 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, 667 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
502 end - 5); 668 goto bail;
503 } 669 }
504 if (buf[next++] != '\\' || buf[next++] != 'u') { 670 if (buf[next++] != '\\' || buf[next++] != 'u') {
505 raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr, 671 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
506 end - 5); 672 goto bail;
507 } 673 }
508 end += 6; 674 end += 6;
509 /* Decode 4 hex digits */ 675 /* Decode 4 hex digits */
510 for (; next < end; next++) { 676 for (; next < end; next++) {
511 Py_ssize_t shl = (end - next - 1) << 2; 677 c2 <<= 4;
512 Py_UNICODE digit = buf[next]; 678 Py_UNICODE digit = buf[next];
513 switch (digit) { 679 switch (digit) {
514 case '0': case '1': case '2': case '3': case '4': 680 case '0': case '1': case '2': case '3': case '4':
515 case '5': case '6': case '7': case '8': case '9': 681 case '5': case '6': case '7': case '8': case '9':
516 c2 |= (digit - '0') << shl; break; 682 c2 |= (digit - '0'); break;
517 case 'a': case 'b': case 'c': case 'd': case 'e': 683 case 'a': case 'b': case 'c': case 'd': case 'e':
518 case 'f': 684 case 'f':
519 c2 |= (digit - 'a' + 10) << shl; break; 685 c2 |= (digit - 'a' + 10); break;
520 case 'A': case 'B': case 'C': case 'D': case 'E': 686 case 'A': case 'B': case 'C': case 'D': case 'E':
521 case 'F': 687 case 'F':
522 c2 |= (digit - 'A' + 10) << shl; break; 688 c2 |= (digit - 'A' + 10); break;
523 default: 689 default:
524 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); 690 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
525 goto bail; 691 goto bail;
526 } 692 }
527 } 693 }
694 if ((c2 & 0xfc00) != 0xdc00) {
695 raise_errmsg("Unpaired high surrogate", pystr, end - 5);
696 goto bail;
697 }
528 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); 698 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
529 } 699 }
700 else if ((c & 0xfc00) == 0xdc00) {
701 raise_errmsg("Unpaired low surrogate", pystr, end - 5);
702 goto bail;
703 }
530 #endif 704 #endif
531 } 705 }
532 chunk = PyUnicode_FromUnicode(&c, 1); 706 chunk = PyUnicode_FromUnicode(&c, 1);
533 if (chunk == NULL) { 707 if (chunk == NULL) {
534 goto bail; 708 goto bail;
535 } 709 }
536 if (PyList_Append(chunks, chunk)) { 710 if (PyList_Append(chunks, chunk)) {
537 Py_DECREF(chunk); 711 Py_DECREF(chunk);
538 goto bail; 712 goto bail;
539 } 713 }
540 Py_DECREF(chunk); 714 Py_DECREF(chunk);
541 } 715 }
542 716
543 rval = join_list_unicode(chunks); 717 rval = join_list_string(chunks);
544 if (rval == NULL) { 718 if (rval == NULL) {
545 goto bail; 719 goto bail;
546 } 720 }
547 Py_CLEAR(chunks); 721 Py_DECREF(chunks);
548 return Py_BuildValue("(Nn)", rval, end); 722 *next_end_ptr = end;
723 return rval;
549 bail: 724 bail:
725 *next_end_ptr = -1;
550 Py_XDECREF(chunks); 726 Py_XDECREF(chunks);
551 return NULL; 727 return NULL;
552 } 728 }
553 729
554 PyDoc_STRVAR(pydoc_scanstring, 730 PyDoc_STRVAR(pydoc_scanstring,
555 "scanstring(basestring, end, encoding) -> (str, end)\n"); 731 "scanstring(basestring, end, encoding) -> (str, end)\n"
732 "\n"
733 "..."
Martin v. Löwis 2009/01/04 13:22:29 Some text should probably be added here.
bob.ippolito 2009/01/05 01:28:19 Done in the next patch.
734 );
556 735
557 static PyObject * 736 static PyObject *
558 py_scanstring(PyObject* self, PyObject *args) 737 py_scanstring(PyObject* self, PyObject *args)
559 { 738 {
560 PyObject *pystr; 739 PyObject *pystr;
740 PyObject *rval;
561 Py_ssize_t end; 741 Py_ssize_t end;
742 Py_ssize_t next_end = -1;
562 char *encoding = NULL; 743 char *encoding = NULL;
563 int strict = 0; 744 int strict = 0;
564 if (!PyArg_ParseTuple(args, "On|zi:scanstring", &pystr, &end, &encoding, &st rict)) { 745 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsi ze_t, &end, &encoding, &strict)) {
565 return NULL; 746 return NULL;
566 } 747 }
567 if (encoding == NULL) { 748 if (encoding == NULL) {
568 encoding = DEFAULT_ENCODING; 749 encoding = DEFAULT_ENCODING;
569 } 750 }
570 if (PyString_Check(pystr)) { 751 if (PyString_Check(pystr)) {
571 return scanstring_str(pystr, end, encoding, strict); 752 rval = scanstring_str(pystr, end, encoding, strict, &next_end);
572 } 753 }
573 else if (PyUnicode_Check(pystr)) { 754 else if (PyUnicode_Check(pystr)) {
574 return scanstring_unicode(pystr, end, strict); 755 rval = scanstring_unicode(pystr, end, strict, &next_end);
575 } 756 }
576 else { 757 else {
577 PyErr_Format(PyExc_TypeError, 758 PyErr_Format(PyExc_TypeError,
578 "first argument must be a string or unicode, not %.80s", 759 "first argument must be a string, not %.80s",
579 Py_TYPE(pystr)->tp_name); 760 Py_TYPE(pystr)->tp_name);
580 return NULL; 761 return NULL;
581 } 762 }
763 return _build_rval_index_tuple(rval, next_end);
582 } 764 }
583 765
584 PyDoc_STRVAR(pydoc_encode_basestring_ascii, 766 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
585 "encode_basestring_ascii(basestring) -> str\n"); 767 "encode_basestring_ascii(basestring) -> str\n"
768 "\n"
769 "..."
770 );
586 771
587 static PyObject * 772 static PyObject *
588 py_encode_basestring_ascii(PyObject* self, PyObject *pystr) 773 py_encode_basestring_ascii(PyObject* self, PyObject *pystr)
589 { 774 {
590 /* METH_O */ 775 /* METH_O */
591 if (PyString_Check(pystr)) { 776 if (PyString_Check(pystr)) {
592 return ascii_escape_str(pystr); 777 return ascii_escape_str(pystr);
593 } 778 }
594 else if (PyUnicode_Check(pystr)) { 779 else if (PyUnicode_Check(pystr)) {
595 return ascii_escape_unicode(pystr); 780 return ascii_escape_unicode(pystr);
596 } 781 }
597 else { 782 else {
598 PyErr_Format(PyExc_TypeError, 783 PyErr_Format(PyExc_TypeError,
599 "first argument must be a string or unicode, not %.80s", 784 "first argument must be a string, not %.80s",
600 Py_TYPE(pystr)->tp_name); 785 Py_TYPE(pystr)->tp_name);
601 return NULL; 786 return NULL;
602 } 787 }
603 } 788 }
604 789
790 static void
791 scanner_dealloc(PyObject *self)
792 {
793 PyScannerObject *s;
794 assert(PyScanner_Check(self));
795 s = (PyScannerObject *)self;
796 Py_CLEAR(s->encoding);
797 Py_CLEAR(s->strict);
798 Py_CLEAR(s->object_hook);
799 Py_CLEAR(s->parse_float);
800 Py_CLEAR(s->parse_int);
801 Py_CLEAR(s->parse_constant);
802 self->ob_type->tp_free(self);
803 }
804
805 static PyObject *
806 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr) {
807 char *str = PyString_AS_STRING(pystr);
808 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
809 PyObject *rval = PyDict_New();
810 PyObject *key = NULL;
811 PyObject *val = NULL;
812 char *encoding = PyString_AS_STRING(s->encoding);
813 int strict = PyObject_IsTrue(s->strict);
814 Py_ssize_t next_idx;
815 if (rval == NULL)
816 return NULL;
817
818 /* skip whitespace after { */
819 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
820
821 /* only loop if the object is non-empty */
822 if (idx <= end_idx && str[idx] != '}') {
823 while (idx <= end_idx) {
824 /* read key */
825 if (str[idx] != '"') {
826 raise_errmsg("Expecting property name", pystr, idx);
827 goto bail;
828 }
829 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
830 if (key == NULL)
831 goto bail;
832 idx = next_idx;
833 ············
834 /* skip whitespace between key and : delimiter, read :, skip whitesp ace */
835 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
836 if (idx > end_idx || str[idx] != ':') {
837 raise_errmsg("Expecting : delimiter", pystr, idx);
838 goto bail;
839 }
840 idx++;
841 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
842 ············
843 /* read any JSON data type */
844 val = scan_once_str(s, pystr, idx, &next_idx);
845 if (val == NULL)
846 goto bail;
847
848 if (PyDict_SetItem(rval, key, val) == -1)
849 goto bail;
850
851 Py_CLEAR(key);
852 Py_CLEAR(val);
853 idx = next_idx;
854 ············
855 /* skip whitespace before } or , */
856 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
857
858 /* bail if the object is closed or we didn't get the , delimiter */
859 if (idx > end_idx) break;
860 if (str[idx] == '}') {
861 break;
862 }
863 else if (str[idx] != ',') {
864 raise_errmsg("Expecting , delimiter", pystr, idx);
865 goto bail;
866 }
867 idx++;
868
869 /* skip whitespace after , delimiter */
870 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
871 }
872 }
873 /* verify that idx < end_idx, str[idx] should be '}' */
874 if (idx > end_idx || str[idx] != '}') {
875 raise_errmsg("Expecting object", pystr, end_idx);
876 goto bail;
877 }
878 /* if object_hook is not None: rval = object_hook(rval) */
879 if (s->object_hook != Py_None) {
880 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
881 if (val == NULL)
882 goto bail;
883 Py_DECREF(rval);
884 rval = val;
885 val = NULL;
886 }
887 *next_idx_ptr = idx + 1;
888 return rval;
889 bail:
890 Py_XDECREF(key);
891 Py_XDECREF(val);
892 Py_DECREF(rval);
893 return NULL;····
894 }
895
896 static PyObject *
897 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ss ize_t *next_idx_ptr) {
898 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
899 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
900 PyObject *val = NULL;
901 PyObject *rval = PyDict_New();
902 PyObject *key = NULL;
903 int strict = PyObject_IsTrue(s->strict);
904 Py_ssize_t next_idx;
905 if (rval == NULL)
906 return NULL;
907 ····
908 /* skip whitespace after { */
909 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
910
911 /* only loop if the object is non-empty */
912 if (idx <= end_idx && str[idx] != '}') {
913 while (idx <= end_idx) {
914 /* read key */
915 if (str[idx] != '"') {
916 raise_errmsg("Expecting property name", pystr, idx);
917 goto bail;
918 }
919 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
920 if (key == NULL)
921 goto bail;
922 idx = next_idx;
923
924 /* skip whitespace between key and : delimiter, read :, skip whitesp ace */
925 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
926 if (idx > end_idx || str[idx] != ':') {
927 raise_errmsg("Expecting : delimiter", pystr, idx);
928 goto bail;
929 }
930 idx++;
931 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
932 ············
933 /* read any JSON term */
934 val = scan_once_unicode(s, pystr, idx, &next_idx);
935 if (val == NULL)
936 goto bail;
937
938 if (PyDict_SetItem(rval, key, val) == -1)
939 goto bail;
940
941 Py_CLEAR(key);
942 Py_CLEAR(val);
943 idx = next_idx;
944
945 /* skip whitespace before } or , */
946 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
947
948 /* bail if the object is closed or we didn't get the , delimiter */
949 if (idx > end_idx) break;
950 if (str[idx] == '}') {
951 break;
952 }
953 else if (str[idx] != ',') {
954 raise_errmsg("Expecting , delimiter", pystr, idx);
955 goto bail;
956 }
957 idx++;
958
959 /* skip whitespace after , delimiter */
960 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
961 }
962 }
963
964 /* verify that idx < end_idx, str[idx] should be '}' */
965 if (idx > end_idx || str[idx] != '}') {
966 raise_errmsg("Expecting object", pystr, end_idx);
967 goto bail;
968 }
969
970 /* if object_hook is not None: rval = object_hook(rval) */
971 if (s->object_hook != Py_None) {
972 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
973 if (val == NULL)
974 goto bail;
975 Py_DECREF(rval);
976 rval = val;
977 val = NULL;
978 }
979 *next_idx_ptr = idx + 1;
980 return rval;
981 bail:
982 Py_XDECREF(key);
983 Py_XDECREF(val);
984 Py_DECREF(rval);
985 return NULL;
986 }
987
988 static PyObject *
989 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
990 char *str = PyString_AS_STRING(pystr);
991 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
992 PyObject *val = NULL;
993 PyObject *rval = PyList_New(0);
994 Py_ssize_t next_idx;
995 if (rval == NULL)
996 return NULL;
997
998 /* skip whitespace after [ */
999 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1000
1001 /* only loop if the array is non-empty */
1002 if (idx <= end_idx && str[idx] != ']') {
1003 while (idx <= end_idx) {
1004
1005 /* read any JSON term and de-tuplefy the (rval, idx) */
1006 val = scan_once_str(s, pystr, idx, &next_idx);
1007 if (val == NULL)
1008 goto bail;
1009
1010 if (PyList_Append(rval, val) == -1)
1011 goto bail;
1012
1013 Py_CLEAR(val);
1014 idx = next_idx;
1015 ············
1016 /* skip whitespace between term and , */
1017 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1018
1019 /* bail if the array is closed or we didn't get the , delimiter */
1020 if (idx > end_idx) break;
1021 if (str[idx] == ']') {
1022 break;
1023 }
1024 else if (str[idx] != ',') {
1025 raise_errmsg("Expecting , delimiter", pystr, idx);
1026 goto bail;
1027 }
1028 idx++;
1029 ············
1030 /* skip whitespace after , */
1031 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1032 }
1033 }
1034
1035 /* verify that idx < end_idx, str[idx] should be ']' */
1036 if (idx > end_idx || str[idx] != ']') {
1037 raise_errmsg("Expecting object", pystr, end_idx);
1038 goto bail;
1039 }
1040 *next_idx_ptr = idx + 1;
1041 return rval;
1042 bail:
1043 Py_XDECREF(val);
1044 Py_DECREF(rval);
1045 return NULL;
1046 }
1047
1048 static PyObject *
1049 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssi ze_t *next_idx_ptr) {
1050 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1051 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1052 PyObject *val = NULL;
1053 PyObject *rval = PyList_New(0);
1054 Py_ssize_t next_idx;
1055 if (rval == NULL)
1056 return NULL;
1057
1058 /* skip whitespace after [ */
1059 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1060
1061 /* only loop if the array is non-empty */
1062 if (idx <= end_idx && str[idx] != ']') {
1063 while (idx <= end_idx) {
1064
1065 /* read any JSON term */
1066 val = scan_once_unicode(s, pystr, idx, &next_idx);
1067 if (val == NULL)
1068 goto bail;
1069
1070 if (PyList_Append(rval, val) == -1)
1071 goto bail;
1072
1073 Py_CLEAR(val);
1074 idx = next_idx;
1075
1076 /* skip whitespace between term and , */
1077 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1078
1079 /* bail if the array is closed or we didn't get the , delimiter */
1080 if (idx > end_idx) break;
1081 if (str[idx] == ']') {
1082 break;
1083 }
1084 else if (str[idx] != ',') {
1085 raise_errmsg("Expecting , delimiter", pystr, idx);
1086 goto bail;
1087 }
1088 idx++;
1089
1090 /* skip whitespace after , */
1091 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1092 }
1093 }
1094
1095 /* verify that idx < end_idx, str[idx] should be ']' */
1096 if (idx > end_idx || str[idx] != ']') {
1097 raise_errmsg("Expecting object", pystr, end_idx);
1098 goto bail;
1099 }
1100 *next_idx_ptr = idx + 1;
1101 return rval;
1102 bail:
1103 Py_XDECREF(val);
1104 Py_DECREF(rval);
1105 return NULL;
1106 }
1107
1108 static PyObject *
1109 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t * next_idx_ptr) {
1110 PyObject *cstr;
1111 PyObject *rval;
1112 /* constant is "NaN", "Infinity", or "-Infinity" */
1113 cstr = PyString_InternFromString(constant);
1114 if (cstr == NULL)
1115 return NULL;
1116
1117 /* rval = parse_constant(constant) */
1118 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1119 idx += PyString_GET_SIZE(cstr);
1120 Py_DECREF(cstr);
1121 *next_idx_ptr = idx;
1122 return rval;
1123 }
1124
1125 static PyObject *
1126 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssiz e_t *next_idx_ptr) {
1127 char *str = PyString_AS_STRING(pystr);
1128 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1129 Py_ssize_t idx = start;
1130 int is_float = 0;
1131 PyObject *rval;
1132 PyObject *numstr;
1133 ····
1134 /* read a sign if it's there, make sure it's not the end of the string */
1135 if (str[idx] == '-') {
1136 idx++;
1137 if (idx > end_idx) {
1138 PyErr_SetNone(PyExc_StopIteration);
1139 return NULL;
1140 }
1141 }
1142
1143 /* read as many integer digits as we find as long as it doesn't start with 0 */
1144 if (str[idx] >= '1' && str[idx] <= '9') {
1145 idx++;
1146 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1147 }
1148 /* if it starts with 0 we only expect one integer digit */
1149 else if (str[idx] == '0') {
1150 idx++;
1151 }
1152 /* no integer digits, error */
1153 else {
1154 PyErr_SetNone(PyExc_StopIteration);
1155 return NULL;
1156 }
1157 ····
1158 /* if the next char is '.' followed by a digit then read all float digits */
1159 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1160 is_float = 1;
1161 idx += 2;
1162 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1163 }
1164
1165 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack ) */
1166 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1167
1168 /* save the index of the 'e' or 'E' just in case we need to backtrack */
1169 Py_ssize_t e_start = idx;
1170 idx++;
1171
1172 /* read an exponent sign if present */
1173 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1174
1175 /* read all digits */
1176 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1177
1178 /* if we got a digit, then parse as float. if not, backtrack */
1179 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1180 is_float = 1;
1181 }
1182 else {
1183 idx = e_start;
1184 }
1185 }
1186 ····
1187 /* copy the section we determined to be a number */
1188 numstr = PyString_FromStringAndSize(&str[start], idx - start);
1189 if (numstr == NULL)
1190 return NULL;
1191 if (is_float) {
1192 /* parse as a float using a fast path if available, otherwise call user defined method */
1193 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1194 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1195 }
1196 else {
1197 rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr) ));
1198 }
1199 }
1200 else {
1201 /* parse as an int using a fast path if available, otherwise call user d efined method */
1202 if (s->parse_int != (PyObject *)&PyInt_Type) {
1203 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1204 }
1205 else {
1206 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1207 }
1208 }
1209 Py_DECREF(numstr);
1210 *next_idx_ptr = idx;
1211 return rval;
1212 }
1213
1214 static PyObject *
1215 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ ssize_t *next_idx_ptr) {
1216 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1217 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1218 Py_ssize_t idx = start;
1219 int is_float = 0;
1220 PyObject *rval;
1221 PyObject *numstr;
1222
1223 /* read a sign if it's there, make sure it's not the end of the string */
1224 if (str[idx] == '-') {
1225 idx++;
1226 if (idx > end_idx) {
1227 PyErr_SetNone(PyExc_StopIteration);
1228 return NULL;
1229 }
1230 }
1231
1232 /* read as many integer digits as we find as long as it doesn't start with 0 */
1233 if (str[idx] >= '1' && str[idx] <= '9') {
1234 idx++;
1235 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1236 }
1237 /* if it starts with 0 we only expect one integer digit */
1238 else if (str[idx] == '0') {
1239 idx++;
1240 }
1241 /* no integer digits, error */
1242 else {
1243 PyErr_SetNone(PyExc_StopIteration);
1244 return NULL;
1245 }
1246
1247 /* if the next char is '.' followed by a digit then read all float digits */
1248 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1249 is_float = 1;
1250 idx += 2;
1251 while (idx < end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1252 }
1253
1254 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack ) */
1255 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1256 Py_ssize_t e_start = idx;
1257 idx++;
1258
1259 /* read an exponent sign if present */
1260 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1261
1262 /* read all digits */
1263 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1264
1265 /* if we got a digit, then parse as float. if not, backtrack */
1266 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1267 is_float = 1;
1268 }
1269 else {
1270 idx = e_start;
1271 }
1272 }
1273
1274 /* copy the section we determined to be a number */
1275 numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1276 if (numstr == NULL)
1277 return NULL;
1278 if (is_float) {
1279 /* parse as a float using a fast path if available, otherwise call user defined method */
1280 if (s->parse_float != (PyObject *)&PyFloat_Type) {
1281 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1282 }
1283 else {
1284 rval = PyFloat_FromString(numstr, NULL);
1285 }
1286 }
1287 else {
1288 /* no fast path for unicode -> int, just call */
1289 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1290 }
1291 Py_DECREF(numstr);
1292 *next_idx_ptr = idx;
1293 return rval;
1294 }
1295
1296 static PyObject *
1297 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *n ext_idx_ptr)
1298 {
1299 char *str = PyString_AS_STRING(pystr);
1300 Py_ssize_t length = PyString_GET_SIZE(pystr);
1301 if (idx >= length) {
1302 PyErr_SetNone(PyExc_StopIteration);
1303 return NULL;
1304 }
1305 switch (str[idx]) {
1306 case '"':
1307 /* string */
1308 return scanstring_str(pystr, idx + 1,
1309 PyString_AS_STRING(s->encoding),
1310 PyObject_IsTrue(s->strict),
1311 next_idx_ptr);
1312 case '{':
1313 /* object */
1314 return _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1315 case '[':
1316 /* array */
1317 return _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1318 case 'n':
1319 /* null */
1320 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
Martin v. Löwis 2009/01/04 13:22:29 Is this really faster than a strncmp?
bob.ippolito 2009/01/05 01:28:19 Probably not, but strncmp doesn't work for PyUnico
1321 Py_INCREF(Py_None);
1322 *next_idx_ptr = idx + 4;
1323 return Py_None;
1324 }
1325 break;
1326 case 't':
1327 /* true */
1328 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1329 Py_INCREF(Py_True);
1330 *next_idx_ptr = idx + 4;
1331 return Py_True;
1332 }
1333 break;
1334 case 'f':
1335 /* false */
1336 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1337 Py_INCREF(Py_False);
1338 *next_idx_ptr = idx + 5;
1339 return Py_False;
1340 }
1341 break;
1342 case 'N':
1343 /* NaN */
1344 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N' ) {
1345 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1346 }
1347 break;
1348 case 'I':
1349 /* Infinity */
1350 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1351 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1352 }
1353 break;
1354 case '-':
1355 /* -Infinity */
1356 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1357 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1358 }
1359 break;
1360 }
1361 /* Didn't find a string, object, array, or named constant. Look for a number . */
1362 return _match_number_str(s, pystr, idx, next_idx_ptr);
1363 }
1364
1365 static PyObject *
1366 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_ t *next_idx_ptr)
1367 {
1368 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1369 Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1370 if (idx >= length) {
1371 PyErr_SetNone(PyExc_StopIteration);
1372 return NULL;
1373 }
1374 switch (str[idx]) {
1375 case '"':
1376 /* string */
1377 return scanstring_unicode(pystr, idx + 1,
1378 PyObject_IsTrue(s->strict),
1379 next_idx_ptr);
1380 case '{':
1381 /* object */
1382 return _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1383 case '[':
1384 /* array */
1385 return _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1386 case 'n':
1387 /* null */
1388 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1389 Py_INCREF(Py_None);
1390 *next_idx_ptr = idx + 4;
1391 return Py_None;
1392 }
1393 break;
1394 case 't':
1395 /* true */
1396 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1397 Py_INCREF(Py_True);
1398 *next_idx_ptr = idx + 4;
1399 return Py_True;
1400 }
1401 break;
1402 case 'f':
1403 /* false */
1404 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1405 Py_INCREF(Py_False);
1406 *next_idx_ptr = idx + 5;
1407 return Py_False;
1408 }
1409 break;
1410 case 'N':
1411 /* NaN */
1412 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N' ) {
1413 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1414 }
1415 break;
1416 case 'I':
1417 /* Infinity */
1418 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1419 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1420 }
1421 break;
1422 case '-':
1423 /* -Infinity */
1424 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1425 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1426 }
1427 break;
1428 }
1429 /* Didn't find a string, object, array, or named constant. Look for a number . */
1430 return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1431 }
1432
1433 static PyObject *
1434 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1435 {
1436 PyObject *pystr;
1437 PyObject *rval;
1438 Py_ssize_t idx;
1439 Py_ssize_t next_idx = -1;
1440 static char *kwlist[] = {"string", "idx", NULL};
1441 PyScannerObject *s;
1442 assert(PyScanner_Check(self));
1443 s = (PyScannerObject *)self;
1444 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr , _convertPyInt_AsSsize_t, &idx))
1445 return NULL;
1446
1447 if (PyString_Check(pystr)) {
1448 rval = scan_once_str(s, pystr, idx, &next_idx);
1449 }
1450 else if (PyUnicode_Check(pystr)) {
1451 rval = scan_once_unicode(s, pystr, idx, &next_idx);
1452 }
1453 else {
1454 PyErr_Format(PyExc_TypeError,
1455 "first argument must be a string, not %.80s",
1456 Py_TYPE(pystr)->tp_name);
1457 return NULL;
1458 }
1459 return _build_rval_index_tuple(rval, next_idx);
1460 }
1461
1462 static int
1463 scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1464 {
1465 PyObject *ctx;
1466 static char *kwlist[] = {"context", NULL};
1467 PyScannerObject *s;
1468
1469 assert(PyScanner_Check(self));
1470 s = (PyScannerObject *)self;
1471
1472 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx) )
1473 return -1;
1474
1475 s->encoding = NULL;
1476 s->strict = NULL;
1477 s->object_hook = NULL;
1478 s->parse_float = NULL;
1479 s->parse_int = NULL;
1480 s->parse_constant = NULL;
1481
1482 /* PyString_AS_STRING is used on encoding */
1483 s->encoding = PyObject_GetAttrString(ctx, "encoding");
1484 if (s->encoding == Py_None) {
1485 Py_DECREF(Py_None);
1486 s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1487 }
1488 else if (PyUnicode_Check(s->encoding)) {
1489 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1490 Py_DECREF(s->encoding);
1491 s->encoding = tmp;
1492 }
1493 if (s->encoding == NULL || !PyString_Check(s->encoding))
1494 goto bail;
1495 ····
1496 /* All of these will fail "gracefully" so we don't need to verify them */
1497 s->strict = PyObject_GetAttrString(ctx, "strict");
1498 if (s->strict == NULL)
1499 goto bail;
1500 s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1501 if (s->object_hook == NULL)
1502 goto bail;
1503 s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1504 if (s->parse_float == NULL)
1505 goto bail;
1506 s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1507 if (s->parse_int == NULL)
1508 goto bail;
1509 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1510 if (s->parse_constant == NULL)
1511 goto bail;
1512 ····
1513 return 0;
1514
1515 bail:
1516 Py_CLEAR(s->encoding);
1517 Py_CLEAR(s->strict);
1518 Py_CLEAR(s->object_hook);
1519 Py_CLEAR(s->parse_float);
1520 Py_CLEAR(s->parse_int);
1521 Py_CLEAR(s->parse_constant);
1522 return -1;
1523 }
1524
1525 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1526
1527 static
1528 PyTypeObject PyScannerType = {
Martin v. Löwis 2009/01/04 13:22:29 I think scanner objects should participate in cycl
bob.ippolito 2009/01/05 01:28:19 I don't think it's possible to cause a cycle using
1529 PyObject_HEAD_INIT(0)
1530 0, /* tp_internal */
1531 "make_scanner", /* tp_name */
1532 sizeof(PyScannerObject), /* tp_basicsize */
1533 0, /* tp_itemsize */
1534 scanner_dealloc, /* tp_dealloc */
1535 0, /* tp_print */
1536 0, /* tp_getattr */
1537 0, /* tp_setattr */
1538 0, /* tp_compare */
1539 0, /* tp_repr */
1540 0, /* tp_as_number */
1541 0, /* tp_as_sequence */
1542 0, /* tp_as_mapping */
1543 0, /* tp_hash */
1544 scanner_call, /* tp_call */
1545 0, /* tp_str */
1546 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
1547 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
1548 0, /* tp_as_buffer */
1549 Py_TPFLAGS_DEFAULT, /* tp_flags */
1550 scanner_doc, /* tp_doc */
1551 0, /* tp_traverse */
1552 0, /* tp_clear */
1553 0, /* tp_richcompare */
1554 0, /* tp_weaklistoffset */
1555 0, /* tp_iter */
1556 0, /* tp_iternext */
1557 0, /* tp_methods */
1558 scanner_members, /* tp_members */
1559 0, /* tp_getset */
1560 0, /* tp_base */
1561 0, /* tp_dict */
1562 0, /* tp_descr_get */
1563 0, /* tp_descr_set */
1564 0, /* tp_dictoffset */
1565 scanner_init, /* tp_init */
1566 0,/* PyType_GenericAlloc, */ /* tp_alloc */
1567 0,/* PyType_GenericNew, */ /* tp_new */
1568 0,/* _PyObject_Del, */ /* tp_free */
1569 };
1570
1571 static int
1572 encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1573 {
1574 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_sep arator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1575
1576 PyEncoderObject *s;
1577 PyObject *allow_nan;
1578
1579 assert(PyEncoder_Check(self));
1580 s = (PyEncoderObject *)self;
1581
1582 s->markers = NULL;
1583 s->defaultfn = NULL;
1584 s->encoder = NULL;
1585 s->indent = NULL;
1586 s->key_separator = NULL;
1587 s->item_separator = NULL;
1588 s->sort_keys = NULL;
1589 s->skipkeys = NULL;
1590 ····
1591 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlis t,
1592 &s->markers, &s->defaultfn, &s->encoder, &s->indent, &s->key_separator, &s->item_separator, &s->sort_keys, &s->skipkeys, &allow_nan))
1593 return -1;
1594 ····
1595 Py_INCREF(s->markers);
1596 Py_INCREF(s->defaultfn);
1597 Py_INCREF(s->encoder);
1598 Py_INCREF(s->indent);
1599 Py_INCREF(s->key_separator);
1600 Py_INCREF(s->item_separator);
1601 Py_INCREF(s->sort_keys);
1602 Py_INCREF(s->skipkeys);
1603 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s ->encoder) == (PyCFunction)py_encode_basestring_ascii);
1604 s->allow_nan = PyObject_IsTrue(allow_nan);
1605 return 0;
1606 }
1607
1608 static PyObject *
1609 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1610 {
1611 static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1612 PyObject *obj;
1613 PyObject *rval;
1614 Py_ssize_t indent_level;
1615 PyEncoderObject *s;
1616 assert(PyEncoder_Check(self));
1617 s = (PyEncoderObject *)self;
1618 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1619 &obj, _convertPyInt_AsSsize_t, &indent_level))
1620 return NULL;
1621 rval = PyList_New(0);
1622 if (rval == NULL)
1623 return NULL;
1624 if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1625 Py_DECREF(rval);
1626 return NULL;
1627 }
1628 return rval;
1629 }
1630
1631 static PyObject *
1632 _encoded_const(PyObject *obj)
1633 {
1634 if (obj == Py_None) {
1635 static PyObject *s_null = NULL;
1636 if (s_null == NULL) {
1637 s_null = PyString_InternFromString("null");
1638 }
1639 Py_INCREF(s_null);
1640 return s_null;
1641 }
1642 else if (obj == Py_True) {
1643 static PyObject *s_true = NULL;
1644 if (s_true == NULL) {
1645 s_true = PyString_InternFromString("true");
1646 }
1647 Py_INCREF(s_true);
1648 return s_true;
1649 }
1650 else if (obj == Py_False) {
1651 static PyObject *s_false = NULL;
1652 if (s_false == NULL) {
1653 s_false = PyString_InternFromString("false");
1654 }
1655 Py_INCREF(s_false);
1656 return s_false;
1657 }
1658 else {
1659 PyErr_SetString(PyExc_ValueError, "not a const");
1660 return NULL;
1661 }
1662 }
1663
1664 static PyObject *
1665 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1666 {
1667 double i = PyFloat_AS_DOUBLE(obj);
1668 if (!Py_IS_FINITE(i)) {
1669 if (!s->allow_nan) {
1670 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1671 return NULL;
1672 }
1673 if (i > 0) {
1674 return PyString_FromString("Infinity");
1675 }
1676 else if (i < 0) {
1677 return PyString_FromString("-Infinity");
1678 }
1679 else {
1680 return PyString_FromString("NaN");
1681 }
1682 }
1683 /* Use a better float format here? */
1684 return PyObject_Repr(obj);
1685 }
1686
1687 static PyObject *
1688 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1689 {
1690 if (s->fast_encode)
1691 return py_encode_basestring_ascii(NULL, obj);
1692 else
1693 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1694 }
1695
1696 static int
1697 _steal_list_append(PyObject *lst, PyObject *stolen)
1698 {
1699 int rval = PyList_Append(lst, stolen);
1700 Py_DECREF(stolen);
1701 return rval;
1702 }
1703
1704 static int
1705 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssi ze_t indent_level)
1706 {
1707 PyObject *newobj;
1708 int rv;
1709 ····
1710 if (obj == Py_None || obj == Py_True || obj == Py_False) {
1711 PyObject *cstr = _encoded_const(obj);
1712 if (cstr == NULL)
1713 return -1;
1714 return _steal_list_append(rval, cstr);
1715 }
1716 else if (PyString_Check(obj) || PyUnicode_Check(obj))
1717 {
1718 PyObject *encoded = encoder_encode_string(s, obj);
1719 if (encoded == NULL)
1720 return -1;
1721 return _steal_list_append(rval, encoded);
1722 }
1723 else if (PyInt_Check(obj) || PyLong_Check(obj)) {
1724 PyObject *encoded = PyObject_Str(obj);
1725 if (encoded == NULL)
1726 return -1;
1727 return _steal_list_append(rval, encoded);
1728 }
1729 else if (PyFloat_Check(obj)) {
1730 PyObject *encoded = encoder_encode_float(s, obj);
1731 if (encoded == NULL)
1732 return -1;
1733 return _steal_list_append(rval, encoded);
1734 }
1735 else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1736 return encoder_listencode_list(s, rval, obj, indent_level);
1737 }
1738 else if (PyDict_Check(obj)) {
1739 return encoder_listencode_dict(s, rval, obj, indent_level);
1740 }
1741 else {
1742 PyObject *ident = NULL;
1743 if (s->markers != Py_None) {
1744 int has_key;
1745 ident = PyLong_FromVoidPtr(obj);
1746 if (ident == NULL)
1747 return -1;
1748 has_key = PyDict_Contains(s->markers, ident);
1749 if (has_key) {
1750 if (has_key != -1)
1751 PyErr_SetString(PyExc_ValueError, "Circular reference detect ed");
1752 Py_DECREF(ident);
1753 return -1;
1754 }
1755 if (PyDict_SetItem(s->markers, ident, obj)) {
1756 Py_DECREF(ident);
1757 return -1;
1758 }
1759 }
1760 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
1761 if (newobj == NULL) {
1762 Py_DECREF(ident);
1763 return -1;
1764 }
1765 rv = encoder_listencode_obj(s, rval, newobj, indent_level);
1766 Py_DECREF(newobj);
1767 if (rv) {
1768 Py_DECREF(ident);
1769 return -1;
1770 }
1771 if (ident != NULL) {
1772 if (PyDict_DelItem(s->markers, ident)) {
1773 Py_DECREF(ident);
1774 return -1;
1775 }
1776 Py_DECREF(ident);
1777 }
1778 return rv;
1779 }
1780 }
1781
1782 static int
1783 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ss ize_t indent_level)
1784 {
1785 static PyObject *open_dict = NULL;
1786 static PyObject *close_dict = NULL;
1787 static PyObject *empty_dict = NULL;
1788 PyObject *kstr = NULL;
1789 PyObject *ident = NULL;
1790 PyObject *key, *value;
1791 Py_ssize_t pos;
1792 int skipkeys;
1793 Py_ssize_t idx;
1794 ····
1795 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
1796 open_dict = PyString_InternFromString("{");
1797 close_dict = PyString_InternFromString("}");
1798 empty_dict = PyString_InternFromString("{}");
1799 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
1800 return -1;
1801 }
1802 if (PyDict_Size(dct) == 0)
1803 return PyList_Append(rval, empty_dict);
1804 ····
1805 if (s->markers != Py_None) {
1806 int has_key;
1807 ident = PyLong_FromVoidPtr(dct);
1808 if (ident == NULL)
1809 goto bail;
1810 has_key = PyDict_Contains(s->markers, ident);
1811 if (has_key) {
1812 if (has_key != -1)
1813 PyErr_SetString(PyExc_ValueError, "Circular reference detected") ;
1814 goto bail;
1815 }
1816 if (PyDict_SetItem(s->markers, ident, dct)) {
1817 goto bail;
1818 }
1819 }
1820
1821 if (PyList_Append(rval, open_dict))
1822 goto bail;
1823
1824 if (s->indent != Py_None) {
1825 /* TODO: DOES NOT RUN */
1826 indent_level += 1;
1827 /*
1828 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1829 separator = _item_separator + newline_indent
1830 buf += newline_indent
1831 */
1832 }
1833
1834 /* TODO: C speedup not implemented for sort_keys */
1835
1836 pos = 0;
1837 skipkeys = PyObject_IsTrue(s->skipkeys);
1838 idx = 0;
1839 while (PyDict_Next(dct, &pos, &key, &value)) {
1840 PyObject *encoded;
1841
1842 if (PyString_Check(key) || PyUnicode_Check(key)) {
1843 Py_INCREF(key);
1844 kstr = key;
1845 }
1846 else if (PyFloat_Check(key)) {
1847 kstr = encoder_encode_float(s, key);
1848 if (kstr == NULL)
1849 goto bail;
1850 }
1851 else if (PyInt_Check(key) || PyLong_Check(key)) {
1852 kstr = PyObject_Str(key);
1853 if (kstr == NULL)
1854 goto bail;
1855 }
1856 else if (key == Py_True || key == Py_False || key == Py_None) {
1857 kstr = _encoded_const(key);
1858 if (kstr == NULL)
1859 goto bail;
1860 }
1861 else if (skipkeys) {
1862 continue;
1863 }
1864 else {
1865 /* TODO: include repr of key */
1866 PyErr_SetString(PyExc_ValueError, "keys must be a string");
1867 goto bail;
1868 }
1869 ········
1870 if (idx) {
1871 if (PyList_Append(rval, s->item_separator))
1872 goto bail;
1873 }
1874 ········
1875 encoded = encoder_encode_string(s, kstr);
1876 Py_CLEAR(kstr);
1877 if (encoded == NULL)
1878 goto bail;
1879 if (PyList_Append(rval, encoded)) {
1880 Py_DECREF(encoded);
1881 goto bail;
1882 }
1883 Py_DECREF(encoded);
1884 if (PyList_Append(rval, s->key_separator))
1885 goto bail;
1886 if (encoder_listencode_obj(s, rval, value, indent_level))
1887 goto bail;
1888 idx += 1;
1889 }
1890 if (ident != NULL) {
1891 if (PyDict_DelItem(s->markers, ident))
1892 goto bail;
1893 Py_CLEAR(ident);
1894 }
1895 if (s->indent != Py_None) {
1896 /* TODO: DOES NOT RUN */
1897 indent_level -= 1;
1898 /*
1899 yield '\n' + (' ' * (_indent * _current_indent_level))
1900 */
1901 }
1902 if (PyList_Append(rval, close_dict))
1903 goto bail;
1904 return 0;
1905 ····
1906 bail:
1907 Py_XDECREF(kstr);
1908 Py_XDECREF(ident);
1909 return -1;
1910 }
1911
1912
1913 static int
1914 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ss ize_t indent_level)
1915 {
1916 static PyObject *open_array = NULL;
1917 static PyObject *close_array = NULL;
1918 static PyObject *empty_array = NULL;
1919 PyObject *ident = NULL;
1920 PyObject *s_fast = NULL;
1921 Py_ssize_t num_items;
1922 PyObject **seq_items;
1923 Py_ssize_t i;
1924 ····
1925 if (open_array == NULL || close_array == NULL || empty_array == NULL) {
1926 open_array = PyString_InternFromString("[");
1927 close_array = PyString_InternFromString("]");
1928 empty_array = PyString_InternFromString("[]");
1929 if (open_array == NULL || close_array == NULL || empty_array == NULL)
1930 return -1;
1931 }
1932 ident = NULL;
1933 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1934 if (s_fast == NULL)
1935 return -1;
1936 num_items = PySequence_Fast_GET_SIZE(s_fast);
1937 if (num_items == 0) {
1938 Py_DECREF(s_fast);
1939 return PyList_Append(rval, empty_array);
1940 }
1941 ····
1942 if (s->markers != Py_None) {
1943 int has_key;
1944 ident = PyLong_FromVoidPtr(seq);
1945 if (ident == NULL)
1946 goto bail;
1947 has_key = PyDict_Contains(s->markers, ident);
1948 if (has_key) {
1949 if (has_key != -1)
1950 PyErr_SetString(PyExc_ValueError, "Circular reference detected") ;
1951 goto bail;
1952 }
1953 if (PyDict_SetItem(s->markers, ident, seq)) {
1954 goto bail;
1955 }
1956 }
1957 ····
1958 seq_items = PySequence_Fast_ITEMS(s_fast);
1959 if (PyList_Append(rval, open_array))
1960 goto bail;
1961 if (s->indent != Py_None) {
1962 /* TODO: DOES NOT RUN */
1963 indent_level += 1;
1964 /*
1965 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1966 separator = _item_separator + newline_indent
1967 buf += newline_indent
1968 */
1969 }
1970 for (i = 0; i < num_items; i++) {
1971 PyObject *obj = seq_items[i];
1972 if (i) {
1973 if (PyList_Append(rval, s->item_separator))
1974 goto bail;
1975 }
1976 if (encoder_listencode_obj(s, rval, obj, indent_level))
1977 goto bail;
1978 }
1979 if (ident != NULL) {
1980 if (PyDict_DelItem(s->markers, ident))
1981 goto bail;
1982 Py_CLEAR(ident);
1983 }
1984 if (s->indent != Py_None) {
1985 /* TODO: DOES NOT RUN */
1986 indent_level -= 1;
1987 /*
1988 yield '\n' + (' ' * (_indent * _current_indent_level))
1989 */
1990 }
1991 if (PyList_Append(rval, close_array))
1992 goto bail;
1993 Py_DECREF(s_fast);
1994 return 0;
1995 ····
1996 bail:
1997 Py_XDECREF(ident);
1998 Py_DECREF(s_fast);
1999 return -1;
2000 }
2001
2002 static void
2003 encoder_dealloc(PyObject *self)
2004 {
2005 PyEncoderObject *s;
2006 assert(PyEncoder_Check(self));
2007 s = (PyEncoderObject *)self;
2008 Py_CLEAR(s->markers);
2009 Py_CLEAR(s->defaultfn);
2010 Py_CLEAR(s->encoder);
2011 Py_CLEAR(s->indent);
2012 Py_CLEAR(s->key_separator);
2013 Py_CLEAR(s->item_separator);
2014 Py_CLEAR(s->sort_keys);
2015 Py_CLEAR(s->skipkeys);
2016 self->ob_type->tp_free(self);
2017 }
2018
2019 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable") ;
2020
2021 static
2022 PyTypeObject PyEncoderType = {
2023 PyObject_HEAD_INIT(0)
2024 0, /* tp_internal */
2025 "make_encoder", /* tp_name */
Martin v. Löwis 2009/01/04 13:22:29 That is a confusing type name. How about "Encoder"
bob.ippolito 2009/01/05 01:28:19 It's not a type that's ever exposed to user code,
2026 sizeof(PyEncoderObject), /* tp_basicsize */
2027 0, /* tp_itemsize */
2028 encoder_dealloc, /* tp_dealloc */
2029 0, /* tp_print */
2030 0, /* tp_getattr */
2031 0, /* tp_setattr */
2032 0, /* tp_compare */
2033 0, /* tp_repr */
2034 0, /* tp_as_number */
2035 0, /* tp_as_sequence */
2036 0, /* tp_as_mapping */
2037 0, /* tp_hash */
2038 encoder_call, /* tp_call */
2039 0, /* tp_str */
2040 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
2041 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
2042 0, /* tp_as_buffer */
2043 Py_TPFLAGS_DEFAULT, /* tp_flags */
2044 encoder_doc, /* tp_doc */
2045 0, /* tp_traverse */
2046 0, /* tp_clear */
2047 0, /* tp_richcompare */
2048 0, /* tp_weaklistoffset */
2049 0, /* tp_iter */
2050 0, /* tp_iternext */
2051 0, /* tp_methods */
2052 encoder_members, /* tp_members */
2053 0, /* tp_getset */
2054 0, /* tp_base */
2055 0, /* tp_dict */
2056 0, /* tp_descr_get */
2057 0, /* tp_descr_set */
2058 0, /* tp_dictoffset */
2059 encoder_init, /* tp_init */
2060 0,/* PyType_GenericAlloc, */ /* tp_alloc */
2061 0,/* PyType_GenericNew, */ /* tp_new */
2062 0,/* _PyObject_Del, */ /* tp_free */
2063 };
2064
605 static PyMethodDef json_methods[] = { 2065 static PyMethodDef json_methods[] = {
606 {"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii, 2066 {"encode_basestring_ascii",
607 METH_O, pydoc_encode_basestring_ascii}, 2067 (PyCFunction)py_encode_basestring_ascii,
608 {"scanstring", (PyCFunction)py_scanstring, METH_VARARGS, 2068 METH_O,
609 pydoc_scanstring}, 2069 pydoc_encode_basestring_ascii},
2070 {"scanstring",
2071 (PyCFunction)py_scanstring,
2072 METH_VARARGS,
2073 pydoc_scanstring},
610 {NULL, NULL, 0, NULL} 2074 {NULL, NULL, 0, NULL}
611 }; 2075 };
612 2076
613 PyDoc_STRVAR(module_doc, 2077 PyDoc_STRVAR(module_doc,
614 "json speedups\n"); 2078 "json speedups\n");
615 2079
616 void 2080 void
617 init_json(void) 2081 init_json(void)
618 { 2082 {
619 PyObject *m; 2083 PyObject *m;
2084 PyScannerType.tp_getattro = PyObject_GenericGetAttr;
2085 PyScannerType.tp_setattro = PyObject_GenericSetAttr;
2086 PyScannerType.tp_alloc = PyType_GenericAlloc;
2087 PyScannerType.tp_new = PyType_GenericNew;
2088 PyScannerType.tp_free = _PyObject_Del;
2089 if (PyType_Ready(&PyScannerType) < 0)
2090 return;
2091 PyEncoderType.tp_getattro = PyObject_GenericGetAttr;
2092 PyEncoderType.tp_setattro = PyObject_GenericSetAttr;
2093 PyEncoderType.tp_alloc = PyType_GenericAlloc;
2094 PyEncoderType.tp_new = PyType_GenericNew;
2095 PyEncoderType.tp_free = _PyObject_Del;
2096 if (PyType_Ready(&PyEncoderType) < 0)
2097 return;
620 m = Py_InitModule3("_json", json_methods, module_doc); 2098 m = Py_InitModule3("_json", json_methods, module_doc);
2099 Py_INCREF((PyObject*)&PyScannerType);
2100 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2101 Py_INCREF((PyObject*)&PyEncoderType);
2102 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
621 } 2103 }
OLDNEW
« Lib/json/decoder.py ('K') | « Lib/json/tool.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b