Re: [BUGS] casting strings to multidimensional arrays yields strange - Mailing list pgsql-patches
| From | Joe Conway |
|---|---|
| Subject | Re: [BUGS] casting strings to multidimensional arrays yields strange |
| Date | |
| Msg-id | 4111CD22.1010804@joeconway.com Whole thread Raw |
| In response to | Re: [BUGS] casting strings to multidimensional arrays yields strange results (Tom Lane <tgl@sss.pgh.pa.us>) |
| List | pgsql-patches |
Tom Lane wrote:
> Joe Conway <mail@joeconway.com> writes:
>>While looking at it the last day or so, I started to think it might be
>>better to use bison to parse array literals -- or is that a bad idea?
>
> Offhand it doesn't seem like a super-appropriate tool. Once you get
> past the lexical details like quoting, the syntax of array literals
> is not complicated enough to need a bison parser. Also, the issues
> you're facing now like enforcing consistent dimensions are not amenable
> to solution by a context-free grammar --- so you'd still need most of
> the dimension-checking mechanisms.
I'm hesitant to apply the attached this late before the beta without
review, but it seems to take care of the pathological cases I came up
with, doesn't break anything AFAICS, and passes all regression tests. I
guess it can go into beta 2.
Joe
Index: src/backend/utils/adt/arrayfuncs.c
===================================================================
RCS file: /cvsroot/pgsql-server/src/backend/utils/adt/arrayfuncs.c,v
retrieving revision 1.106
diff -c -r1.106 arrayfuncs.c
*** src/backend/utils/adt/arrayfuncs.c 5 Aug 2004 03:29:37 -0000 1.106
--- src/backend/utils/adt/arrayfuncs.c 5 Aug 2004 05:50:07 -0000
***************
*** 351,368 ****
* The syntax for array input is C-like nested curly braces
*-----------------------------------------------------------------------------
*/
static int
ArrayCount(char *str, int *dim, char typdelim)
{
! int nest_level = 0,
! i;
! int ndim = 1,
! temp[MAXDIM],
! nelems[MAXDIM],
! nelems_last[MAXDIM];
! bool scanning_string = false;
! bool eoArray = false;
! char *ptr;
for (i = 0; i < MAXDIM; ++i)
{
--- 351,378 ----
* The syntax for array input is C-like nested curly braces
*-----------------------------------------------------------------------------
*/
+ typedef enum
+ {
+ ARRAY_NO_LEVEL,
+ ARRAY_LEVEL_STARTED,
+ ARRAY_ELEM_STARTED,
+ ARRAY_LEVEL_COMPLETED,
+ ARRAY_LEVEL_DELIMITED
+ } ArrayParseState;
+
static int
ArrayCount(char *str, int *dim, char typdelim)
{
! int nest_level = 0,
! i;
! int ndim = 1,
! temp[MAXDIM],
! nelems[MAXDIM],
! nelems_last[MAXDIM];
! bool scanning_string = false;
! bool eoArray = false;
! char *ptr;
! ArrayParseState parse_state = ARRAY_NO_LEVEL;
for (i = 0; i < MAXDIM; ++i)
{
***************
*** 389,394 ****
--- 399,416 ----
errmsg("malformed array literal: \"%s\"", str)));
break;
case '\\':
+ /*
+ * An escape must be after a level start, within an
+ * element, or after a delimiter. In any case
+ * we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_ELEM_STARTED;
/* skip the escaped character */
if (*(ptr + 1))
ptr++;
***************
*** 398,408 ****
--- 420,454 ----
errmsg("malformed array literal: \"%s\"", str)));
break;
case '\"':
+ /*
+ * A quote must be after a level start, within an
+ * element, or after a delimiter. In any case
+ * we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_ELEM_STARTED;
scanning_string = !scanning_string;
break;
case '{':
if (!scanning_string)
{
+ /*
+ * A left brace can occur if no nesting has
+ * occurred yet, after a level start, or
+ * after a delimiter.
+ */
+ if (parse_state != ARRAY_NO_LEVEL &&
+ parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_LEVEL_STARTED;
if (nest_level >= MAXDIM)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
***************
*** 417,422 ****
--- 463,480 ----
case '}':
if (!scanning_string)
{
+ /*
+ * A right brace can occur after a level start,
+ * after an element start, or after a level
+ * completion.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_COMPLETED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_LEVEL_COMPLETED;
if (nest_level == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
***************
*** 447,455 ****
--- 505,540 ----
default:
if (*ptr == typdelim && !scanning_string)
{
+ /*
+ * Delimiters can occur after an element start
+ * or after a level completion
+ */
+ if (parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_COMPLETED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_LEVEL_DELIMITED;
+
itemdone = true;
nelems[nest_level - 1]++;
}
+ else if (!isspace(*ptr) && !scanning_string)
+ {
+ /*
+ * Other non-space characters
+ * must be after a level start, within an
+ * element, or after a delimiter. In any case
+ * we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str)));
+ parse_state = ARRAY_ELEM_STARTED;
+ }
break;
}
if (!itemdone)
pgsql-patches by date: