Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1163)

Unified Diff: Modules/_struct.c

Issue 1258041: PEP 3318 - New struct string syntax Base URL: http://svn.python.org/view/*checkout*/python/branches/py3k/
Patch Set: Created 14 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « Lib/test/test_struct.py ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: Modules/_struct.c
===================================================================
--- Modules/_struct.c (revision 81143)
+++ Modules/_struct.c (working copy)
@@ -29,13 +29,44 @@
Py_ssize_t size;
} formatcode;
+/* A left child / right sibling tree used to represent the structure
+ of the format codes. Child nodes are added to represent nested
+ struct formats. */
+
+typedef struct _formattree {
+ Py_ssize_t s_size;
+ Py_ssize_t s_len;
+ formatcode *s_codes;
+ struct _formattree *child;
+ struct _formattree *sibling;
+} formattree;
+
+/* Holds the state of the struct string parser. */
+
+typedef struct _parser_state {
+ const formatdef *byte_fmt;
+ const char *fmt;
+ Py_ssize_t offset;
+ Py_ssize_t error_cnt;
+} parser_state;
+
+#define FormatTree_HasChildren(ft) ((ft)->child != 0)
+#define FormatTree_AppendChild(ft, new_child) \
+ do { \
+ if ((ft)->child == NULL) { \
+ (ft)->child = (new_child); \
+ } else { \
+ formattree *t = (ft)->child; \
+ for ( ; t->sibling != NULL; t = t->sibling); \
+ t->sibling = (new_child); \
+ } \
+ } while (0)
+
/* Struct object interface */
typedef struct {
PyObject_HEAD
- Py_ssize_t s_size;
- Py_ssize_t s_len;
- formatcode *s_codes;
+ formattree *s_tree;
PyObject *s_format;
PyObject *weakreflist; /* List of weak references */
} PyStructObject;
@@ -92,6 +123,62 @@
/* Helper for integer format codes: converts an arbitrary Python object to a
PyLongObject if possible, otherwise fails. Caller should decref. */
+/* Prototypes -- have to put these because the parser is mutually recursive. */
+
+static formattree*
+parse_struct_string(parser_state *state, formattree *root);
+
+static formattree*
+parse_struct(parser_state *state);
+
+static formattree*
+parse_primitive(parser_state *state);
+
+
+static formattree *
+formattree_new(void)
+{
+ formattree * new_tree = PyMem_MALLOC(sizeof(formattree));
+ if (new_tree == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ new_tree->s_size = 0;
+ new_tree->s_len = 0;
+ new_tree->s_codes = NULL;
+ new_tree->child = NULL;
+ new_tree->sibling = NULL;
+
+ return new_tree;
+}
+
+static void
+formattree_free_r(formattree *tree)
+{
+ formattree *child = tree->child;
+ formattree *del_child = NULL;
+ while (child != NULL) {
+ if (FormatTree_HasChildren(child)) {
+ formattree_free_r(child);
+ } else {
+ PyMem_FREE(child->s_codes);
+ }
+ del_child = child;
+ child = child->sibling;
+ PyMem_FREE(del_child);
+ }
+}
+
+static void
+formattree_free(formattree *tree)
+{
+ formattree_free_r(tree);
+ PyMem_FREE(tree);
+}
+
+/* Helper to get a PyLongObject by hook or by crook. Caller should decref. */
+
static PyObject *
get_pylong(PyObject *v)
{
@@ -1158,28 +1245,87 @@
return size;
}
+static void
+whitespace(parser_state *state)
+{
+ while ( (*state->fmt != '\0') && isspace(Py_CHARMASK(*state->fmt)))
+ state->fmt++;
+}
-/* calculate the size of a format string */
+static int
+match(parser_state *state, char c)
+{
+ if (*state->fmt == c) {
+ state->fmt++;
+ return 0;
+ } else {
+ return -1;
+ }
+}
static int
-prepare_s(PyStructObject *self)
+is_primitive(char c)
{
+ static char *primitive_codes = "xcbB?hHiIlLqQfdspP";
+
+ const char *begin;
+ for (begin = primitive_codes; *begin; ++begin) {
+ if (c == *begin)
+ return 1;
+ }
+ return isdigit(c);
+}
+
+static int
+is_byte_order_marker(char c)
+{
+ static char *byte_order_codes = "<>!=@";
+
+ const char *begin;
+ for (begin = byte_order_codes; *begin; ++begin) {
+ if (c == *begin)
+ return 1;
+ }
+ return 0;
+}
+
+static void
+parse_error(parser_state *state, const char *error_msg)
+{
+ PyErr_SetString(StructError, error_msg);
+ state->error_cnt += 1;
+}
+
+static formattree*
+parse_primitive(parser_state *state)
+{
+ #define Return_NoMemory() \
+ do { \
+ PyErr_NoMemory(); \
+ state->error_cnt += 1; \
+ PyMem_FREE(tree); \
+ return NULL; \
+ } while (0)
+
+ formattree *tree = formattree_new();
+ if (tree == NULL) {
+ return NULL;
+ }
+
const formatdef *f;
const formatdef *e;
formatcode *codes;
const char *s;
- const char *fmt;
char c;
Py_ssize_t size, len, num, itemsize, x;
- fmt = PyBytes_AS_STRING(self->s_format);
-
- f = whichtable((char **)&fmt);
-
- s = fmt;
- size = 0;
+ s = state->fmt;
+ /* Start 'size' at '*offset' instead of 0 so that the proper alignment
+ is maintained. */
+ size = state->offset;
len = 0;
+ f = state->byte_fmt;
while ((c = *s++) != '\0') {
if (isspace(Py_CHARMASK(c)))
continue;
@@ -1188,10 +1334,9 @@
while ('0' <= (c = *s++) && c <= '9') {
x = num*10 + (c - '0');
if (x/10 != num) {
- PyErr_SetString(
- StructError,
+ parse_error(state,
"overflow in item count");
- return -1;
+ return NULL;
}
num = x;
}
@@ -1200,10 +1345,12 @@
}
else
num = 1;
+ if (!is_primitive(c))
+ break;
e = getentry(c, f);
if (e == NULL)
- return -1;
+ return NULL;
switch (c) {
case 's': /* fall through */
@@ -1217,41 +1364,40 @@
x = num * itemsize;
size += x;
if (x/itemsize != num || size < 0) {
- PyErr_SetString(StructError,
- "total struct size too long");
- return -1;
+ parse_error(state, "total struct size too long");
+ return NULL;
}
}
/* check for overflow */
if ((len + 1) > (PY_SSIZE_T_MAX / sizeof(formatcode))) {
- PyErr_NoMemory();
- return -1;
+ Return_NoMemory();
}
- self->s_size = size;
- self->s_len = len;
codes = PyMem_MALLOC((len + 1) * sizeof(formatcode));
if (codes == NULL) {
- PyErr_NoMemory();
- return -1;
+ Return_NoMemory();
}
- self->s_codes = codes;
+ tree->s_codes = codes;
- s = fmt;
- size = 0;
- while ((c = *s++) != '\0') {
+ /* Start 'size' at '*offset' instead of 0 so that the proper alignment
+ is maintained. */
+ size = state->offset;
+ f = state->byte_fmt;
+ while ((c = *state->fmt++) != '\0') {
if (isspace(Py_CHARMASK(c)))
continue;
if ('0' <= c && c <= '9') {
num = c - '0';
- while ('0' <= (c = *s++) && c <= '9')
+ while ('0' <= (c = *state->fmt++) && c <= '9')
num = num*10 + (c - '0');
if (c == '\0')
break;
}
else
num = 1;
+ if (!is_primitive(c))
+ break;
e = getentry(c, f);
@@ -1274,13 +1420,126 @@
}
}
}
+ state->fmt--;
codes->fmtdef = NULL;
codes->offset = size;
codes->size = 0;
+ /* Since we started 'size' at '*offset' to maintain alignment, we
+ need to subtract out '*offset' to compute the true size. */
+ tree->s_size = size - state->offset;
+ tree->s_len = len;
+ state->byte_fmt = f;
- return 0;
+ return tree;
+
+ #undef Return_NoMemory
}
+static formattree*
+parse_struct(parser_state *state)
+{
+ formattree *tree = NULL;
+
+ if (match(state, 'T') == 0) {
+ whitespace(state);
+ if (match(state, '{') == 0) {
+ tree = parse_struct_string(state, NULL);
+ whitespace(state);
+ if (match(state, '}') != 0) {
+ parse_error(state,
+ "missing '}' in struct string");
+ return NULL;
+ }
+ } else {
+ parse_error(state,
+ "missing '{' after 'T' in struct string");
+ return NULL;
+ }
+ }
+
+ return tree;
+}
+
+/* Parses a struct string and builds an intermediate representation of that
+ * string in the form of a formattree.
+ *
+ * NOTE: This function is called recursively and *can* return NULL even
+ * when no errors have occurred. This happens when an empty structure
+ * 'T{}' is encountered.
+ */
+
+static formattree*
+parse_struct_string(parser_state *state, formattree *root)
+{
+ formattree *tree = NULL;
+
+ if (root != NULL) {
+ tree = root;
+ }
+
+ whitespace(state);
+ while (*state->fmt != '\0') {
+ formattree *child = NULL;
+ int n = 0;
+
+ if (is_byte_order_marker(*state->fmt)) {
+ state->byte_fmt = whichtable((char**)&state->fmt);
+ } else if (*state->fmt == '}') {
+ /* Return back to invocation that started parsing the
+ structure. */
+ break;
+ } else if (*state->fmt == 'T') {
+ child = parse_struct(state);
+ n = 1;
+ } else if (is_primitive(*state->fmt)) {
+ child = parse_primitive(state);
+ if (child != NULL) {
+ state->offset += child->s_size;
+ n = child->s_len;
+ }
+ } else {
+ parse_error(state,
+ "unexpected character in struct string");
+ return NULL;
+ }
+
+ if (state->error_cnt > 0) {
+ return NULL;
+ }
+
+ /* If 'child' is NULL then, either an empty structure was encountered
+ * or a byte order change was made.
+ */
+ if (child != NULL) {
+ if (tree == NULL) {
+ tree = formattree_new();
+ if (tree == NULL) {
+ return NULL;
+ }
+ }
+ FormatTree_AppendChild(tree, child);
+ tree->s_size += child->s_size;
+ tree->s_len += n;
+ }
+
+ whitespace(state);
+ }
+ return tree;
+}
+
+static int
+prepare_s(PyStructObject *self)
+{
+ parser_state state = {
+ native_table,
+ PyBytes_AS_STRING(self->s_format),
+ 0,
+ 0
+ };
+ (void) parse_struct_string(&state, self->s_tree);
+ return (state.error_cnt == 0) ? 0 : -1;
+}
+
static PyObject *
s_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
@@ -1293,9 +1552,7 @@
PyStructObject *s = (PyStructObject*)self;
Py_INCREF(Py_None);
s->s_format = Py_None;
- s->s_codes = NULL;
- s->s_size = -1;
- s->s_len = -1;
+ s->s_tree = formattree_new();
}
return self;
}
@@ -1344,22 +1601,35 @@
{
if (s->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *)s);
- if (s->s_codes != NULL) {
- PyMem_FREE(s->s_codes);
+ if (s->s_tree != NULL) {
+ formattree_free(s->s_tree);
}
Py_XDECREF(s->s_format);
Py_TYPE(s)->tp_free((PyObject *)s);
}
+/* Unpacks the given buffer in accordance with the given formattree. The
+ * formattree should not have any children and should have a sequence of
+ * format codes defined.
+ *
+ * A Tuple object is returned upon success. NULL is returned upon failure.
+ */
+
static PyObject *
-s_unpack_internal(PyStructObject *soself, char *startfrom) {
+s_unpack_primitive(formattree *tree, const char *startfrom)
+{
formatcode *code;
Py_ssize_t i = 0;
- PyObject *result = PyTuple_New(soself->s_len);
+ PyObject *result;
+
+ assert(tree->s_codes != NULL);
+ assert(!FormatTree_HasChildren(tree));
+
+ result = PyTuple_New(tree->s_len);
if (result == NULL)
return NULL;
- for (code = soself->s_codes; code->fmtdef != NULL; code++) {
+ for (code = tree->s_codes; code->fmtdef != NULL; code++) {
PyObject *v;
const formatdef *e = code->fmtdef;
const char *res = startfrom + code->offset;
@@ -1384,7 +1654,64 @@
return NULL;
}
+/* Unpacks the given buffer in accordance with the given formattree. The
+ * formattree specifies how the given character buffer should be unpacked.
+ *
+ * A Tuple object is returned upon success. This Tuple object may have nested
+ * Tuple objects, if the formattree dictacts as such. NULL is returned upon
+ * failure.
+ */
+static PyObject *
+s_unpack_struct(formattree *tree, const char *startfrom)
+{
+ formattree *child = NULL;
+ PyObject *tup = PyTuple_New(0);
+ if (tup == NULL)
+ return NULL;
+
+ for (child = tree->child; child != NULL; child = child->sibling) {
+ PyObject *sub_tup = NULL;
+ PyObject *new_tup = NULL;
+
+ if (FormatTree_HasChildren(child)) {
+ PyObject *wrap_tup = NULL;
+ sub_tup = s_unpack_struct(child, startfrom);
+ if (sub_tup == NULL)
+ goto fail;
+ wrap_tup = PyTuple_New(1);
+ if (wrap_tup == NULL) {
+ Py_DECREF(sub_tup);
+ goto fail;
+ }
+ PyTuple_SetItem(wrap_tup, 0, sub_tup);
+ new_tup = PySequence_Concat(tup, wrap_tup);
+ Py_DECREF(wrap_tup);
+ } else {
+ sub_tup = s_unpack_primitive(child, startfrom);
+ if (sub_tup == NULL)
+ goto fail;
+ new_tup = PySequence_Concat(tup, sub_tup);
+ Py_DECREF(sub_tup);
+ }
+ Py_DECREF(tup);
+ if (new_tup == NULL)
+ goto fail;
+ tup = new_tup;
+ }
+
+ return tup;
+fail:
+ Py_DECREF(tup);
+ return NULL;
+}
+
+static PyObject *
+s_unpack_internal(PyStructObject *soself, char *startfrom)
+{
+ return s_unpack_struct(soself->s_tree, startfrom);
+}
+
PyDoc_STRVAR(s_unpack__doc__,
"S.unpack(buffer) -> (v1, v2, ...)\n\
\n\
@@ -1403,10 +1730,10 @@
assert(soself->s_codes != NULL);
minge 2010/05/20 13:58:04 This should be 'assert(soself->s_tree->s_codes !=
if (PyObject_GetBuffer(input, &vbuf, PyBUF_SIMPLE) < 0)
return NULL;
- if (vbuf.len != soself->s_size) {
+ if (vbuf.len != soself->s_tree->s_size) {
PyErr_Format(StructError,
"unpack requires a bytes argument of length %zd",
- soself->s_size);
+ soself->s_tree->s_size);
PyBuffer_Release(&vbuf);
return NULL;
}
@@ -1445,10 +1772,10 @@
return NULL;
if (offset < 0)
offset += vbuf.len;
- if (offset < 0 || vbuf.len - offset < soself->s_size) {
+ if (offset < 0 || vbuf.len - offset < soself->s_tree->s_size) {
PyErr_Format(StructError,
"unpack_from requires a buffer of at least %zd bytes",
- soself->s_size);
+ soself->s_tree->s_size);
PyBuffer_Release(&vbuf);
return NULL;
}
@@ -1457,28 +1784,28 @@
return result;
}
-
-/*
- * Guts of the pack function.
+/* Packs a non-nested tuples of arguments to the given buffer.
*
- * Takes a struct object, a tuple of arguments, and offset in that tuple of
- * argument for where to start processing the arguments for packing, and a
- * character buffer for writing the packed string. The caller must insure
- * that the buffer may contain the required length for packing the arguments.
- * 0 is returned on success, 1 is returned if there is an error.
- *
+ * Takes a struct object, a tuple of arguments, and a character buffer for
+ * writing the packed string. 0 is returned on success, -1 is returned if
+ * there is an error.
*/
+
static int
-s_pack_internal(PyStructObject *soself, PyObject *args, int offset, char* buf)
+s_pack_primitive(formattree *tree, PyObject *args, char *buf)
{
formatcode *code;
- /* XXX(nnorwitz): why does i need to be a local? can we use
- the offset parameter or do we need the wider width? */
- Py_ssize_t i;
+ Py_ssize_t i = 0;
+ assert(tree->s_codes != NULL);
- memset(buf, '\0', soself->s_size);
- i = offset;
- for (code = soself->s_codes; code->fmtdef != NULL; code++) {
+ if (PyTuple_GET_SIZE(args) != tree->s_len)
+ {
+ PyErr_Format(StructError,
+ "pack requires exactly %zd arguments", tree->s_len);
+ return -1;
+ }
+
+ for (code = tree->s_codes; code->fmtdef != NULL; code++) {
Py_ssize_t n;
PyObject *v = PyTuple_GET_ITEM(args, i++);
const formatdef *e = code->fmtdef;
@@ -1553,6 +1880,68 @@
}
+/* Packs a potentially nested tuple of arguments to the given buffer.
+ *
+ * Takes a struct object, a tuple of arguments, and a character buffer for
+ * writing the packed string. If there are nested structures within the given
+ * structure, then the nested structures are recursively packed. 0 is returned
+ * on success, -1 is returned if there is an error.
+ */
+
+static int
+s_pack_struct(formattree *tree, PyObject *args, char *buf)
+{
+ Py_ssize_t arg_i = 0;
+ formattree *child = NULL;
+
+ if (PyTuple_GET_SIZE(args) != tree->s_len)
+ {
+ PyErr_Format(StructError,
+ "pack requires exactly %zd arguments", tree->s_len);
+ return -1;
+ }
+
+ for (child = tree->child; child != NULL; child = child->sibling) {
+ int ret = 0;
+ if (FormatTree_HasChildren(child)) {
+ /* args[arg_i] */
+ PyObject *arg = PyTuple_GET_ITEM(args, arg_i);
+ if (!PyTuple_Check(arg)) {
+ PyErr_SetString(StructError,
+ "argument for 'T{...}'"
+ " must be a tuple");
+ return -1;
+ }
+ ret = s_pack_struct(child, arg, buf);
+ arg_i += 1;
+ } else {
+ /* args[arg_i : arg_i + n] */
+ PyObject *slice =
+ PyTuple_GetSlice(args, arg_i,
+ arg_i + child->s_len);
+ ret = s_pack_primitive(child, slice, buf);
+ arg_i += child->s_len;
+ Py_DECREF(slice);
+ }
+
+ if (ret < 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+s_pack_internal(PyStructObject *soself, PyObject *args, int offset, char *buf)
+{
+ memset(buf, '\0', soself->s_tree->s_size);
+ PyObject *new_args = PyTuple_GetSlice(args, offset, PyTuple_Size(args));
+ int ret = s_pack_struct(soself->s_tree, new_args, buf);
+ Py_DECREF(new_args);
+ return ret;
+}
+
PyDoc_STRVAR(s_pack__doc__,
"S.pack(v1, v2, ...) -> bytes\n\
\n\
@@ -1568,16 +1957,9 @@
/* Validate arguments. */
soself = (PyStructObject *)self;
assert(PyStruct_Check(self));
- assert(soself->s_codes != NULL);
- if (PyTuple_GET_SIZE(args) != soself->s_len)
- {
- PyErr_Format(StructError,
- "pack requires exactly %zd arguments", soself->s_len);
- return NULL;
- }
/* Allocate a new string */
- result = PyBytes_FromStringAndSize((char *)NULL, soself->s_size);
+ result = PyBytes_FromStringAndSize((char *)NULL, soself->s_tree->s_size);
if (result == NULL)
return NULL;
@@ -1608,14 +1990,6 @@
/* Validate arguments. +1 is for the first arg as buffer. */
soself = (PyStructObject *)self;
assert(PyStruct_Check(self));
- assert(soself->s_codes != NULL);
- if (PyTuple_GET_SIZE(args) != (soself->s_len + 2))
- {
- PyErr_Format(StructError,
- "pack_into requires exactly %zd arguments",
- (soself->s_len + 2));
- return NULL;
- }
/* Extract a writable memory buffer from the first argument */
if ( PyObject_AsWriteBuffer(PyTuple_GET_ITEM(args, 0),
@@ -1634,10 +2008,10 @@
offset += buffer_len;
/* Check boundaries */
- if (offset < 0 || (buffer_len - offset) < soself->s_size) {
+ if (offset < 0 || (buffer_len - offset) < soself->s_tree->s_size) {
PyErr_Format(StructError,
"pack_into requires a buffer of at least %zd bytes",
- soself->s_size);
+ soself->s_tree->s_size);
return NULL;
}
@@ -1659,7 +2033,7 @@
static PyObject *
s_get_size(PyStructObject *self, void *unused)
{
- return PyLong_FromSsize_t(self->s_size);
+ return PyLong_FromSsize_t(self->s_tree->s_size);
}
/* List of functions */
@@ -1780,7 +2154,7 @@
PyObject *s_object = cache_struct(fmt);
if (s_object == NULL)
return NULL;
- n = ((PyStructObject *)s_object)->s_size;
+ n = ((PyStructObject *)s_object)->s_tree->s_size;
Py_DECREF(s_object);
return PyLong_FromSsize_t(n);
}
@@ -1914,15 +2288,19 @@
and also as format strings (explained below) to describe the layout of data\n\
in the C struct.\n\
\n\
-The optional first format char indicates byte order, size and alignment:\n\
+The following format chars indicates byte order, size and alignment:\n\
@: native order, size & alignment (default)\n\
=: native order, std. size & alignment\n\
<: little-endian, std. size & alignment\n\
>: big-endian, std. size & alignment\n\
!: same as >\n\
+They may be used anywhere in the format string. The byte order, size and\n\
+alignment dictated by one of these chars is in effect until the next\n\
+such char is encountered.\n\
\n\
-The remaining chars indicate types of args and must match exactly;\n\
-these can be preceded by a decimal repeat count:\n\
+The primitive format chars are used to indicate types of primitive arg\n\
+values and must match exactly; these can be preceded by a decimal repeat\n\
+count:\n\
x: pad byte (no data); c:char; b:signed byte; B:unsigned byte;\n\
?: _Bool (requires C99; if not available, char is used instead)\n\
h:short; H:unsigned short; i:int; I:unsigned int;\n\
@@ -1935,6 +2313,10 @@
q:long long; Q:unsigned long long\n\
Whitespace between formats is ignored.\n\
\n\
+Additionally, there may be nested structures. These are specified by\n\
+surrounding a format string with 'T{' ... '}'. They maybe arbitrarily\n\
+nested.\n\
+\n\
The variable struct.error is an exception raised on errors.\n");
« no previous file with comments | « Lib/test/test_struct.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b