Adjust pg_parse_json() so that it does not directly ereport().
authorRobert Haas <rhaas@postgresql.org>
Mon, 27 Jan 2020 16:03:21 +0000 (11:03 -0500)
committerRobert Haas <rhaas@postgresql.org>
Mon, 27 Jan 2020 16:04:51 +0000 (11:04 -0500)
Instead, it now returns a value indicating either success or the
type of error which occurred. The old behavior is still available
by calling pg_parse_json_or_ereport(). If the new interface is
used, an error can be thrown by passing the return value of
pg_parse_json() to json_ereport_error().

pg_parse_json() can still elog() in can't-happen cases, but it
seems like that issue is best handled separately.

Adjust json_lex() and json_count_array_elements() to return an
error code, too.

This is all in preparation for making the backend's json parser
available to frontend code.

Reviewed and/or tested by Mark Dilger and Andrew Dunstan.

Discussion: http://postgr.es/m/CA+TgmoYfOXhd27MUDGioVh6QtpD0C1K-f6ObSA10AWiHBAL5bA@mail.gmail.com

src/backend/utils/adt/json.c
src/backend/utils/adt/jsonapi.c
src/backend/utils/adt/jsonb.c
src/backend/utils/adt/jsonfuncs.c
src/include/utils/jsonapi.h

index 4be16b5c201a32ca4a316d8487a3b2479034bc32..e73a60ece8a6d11f8d1ffe3987497744bc824367 100644 (file)
@@ -81,7 +81,7 @@ json_in(PG_FUNCTION_ARGS)
 
        /* validate it */
        lex = makeJsonLexContext(result, false);
-       pg_parse_json(lex, &nullSemAction);
+       pg_parse_json_or_ereport(lex, &nullSemAction);
 
        /* Internal representation is the same as text, for now */
        PG_RETURN_TEXT_P(result);
@@ -128,7 +128,7 @@ json_recv(PG_FUNCTION_ARGS)
 
        /* Validate it. */
        lex = makeJsonLexContextCstringLen(str, nbytes, false);
-       pg_parse_json(lex, &nullSemAction);
+       pg_parse_json_or_ereport(lex, &nullSemAction);
 
        PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes));
 }
@@ -1337,12 +1337,15 @@ json_typeof(PG_FUNCTION_ARGS)
        JsonLexContext *lex;
        JsonTokenType tok;
        char       *type;
+       JsonParseErrorType      result;
 
        json = PG_GETARG_TEXT_PP(0);
        lex = makeJsonLexContext(json, false);
 
        /* Lex exactly one token from the input and check its type. */
-       json_lex(lex);
+       result = json_lex(lex);
+       if (result != JSON_SUCCESS)
+               json_ereport_error(result, lex);
        tok = lex->token_type;
        switch (tok)
        {
index 9e14306b6f6202b25b4acf147f62ae6622be4984..129fbd65d5149f8eb94e44bf7a45aff26b06e4ff 100644 (file)
@@ -35,18 +35,17 @@ typedef enum                                        /* contexts of JSON parser */
        JSON_PARSE_END                          /* saw the end of a document, expect nothing */
 } JsonParseContext;
 
-static inline void json_lex_string(JsonLexContext *lex);
-static inline void json_lex_number(JsonLexContext *lex, char *s,
+static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
+static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
                                                                   bool *num_err, int *total_len);
-static inline void parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
-static void parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
-static void parse_object(JsonLexContext *lex, JsonSemAction *sem);
-static void parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
-static void parse_array(JsonLexContext *lex, JsonSemAction *sem);
-static void report_parse_error(JsonParseContext ctx, JsonLexContext *lex) pg_attribute_noreturn();
-static void report_invalid_token(JsonLexContext *lex) pg_attribute_noreturn();
+static inline JsonParseErrorType parse_scalar(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_object_field(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_object(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_array_element(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType parse_array(JsonLexContext *lex, JsonSemAction *sem);
+static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
 static int     report_json_context(JsonLexContext *lex);
-static char *extract_mb_char(char *s);
+static char *extract_token(JsonLexContext *lex);
 
 /* the null action object used for pure validation */
 JsonSemAction nullSemAction =
@@ -74,13 +73,13 @@ lex_peek(JsonLexContext *lex)
  * move the lexer to the next token if the current look_ahead token matches
  * the parameter token. Otherwise, report an error.
  */
-static inline void
+static inline JsonParseErrorType
 lex_expect(JsonParseContext ctx, JsonLexContext *lex, JsonTokenType token)
 {
        if (lex_peek(lex) == token)
-               json_lex(lex);
+               return json_lex(lex);
        else
-               report_parse_error(ctx, lex);
+               return report_parse_error(ctx, lex);
 }
 
 /* chars to consider as part of an alphanumeric token */
@@ -171,13 +170,16 @@ makeJsonLexContextCstringLen(char *json, int len, bool need_escapes)
  * action routines to be called at appropriate spots during parsing, and a
  * pointer to a state object to be passed to those routines.
  */
-void
+JsonParseErrorType
 pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
 {
        JsonTokenType tok;
+       JsonParseErrorType      result;
 
        /* get the initial token */
-       json_lex(lex);
+       result = json_lex(lex);
+       if (result != JSON_SUCCESS)
+               return result;
 
        tok = lex_peek(lex);
 
@@ -185,17 +187,36 @@ pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
        switch (tok)
        {
                case JSON_TOKEN_OBJECT_START:
-                       parse_object(lex, sem);
+                       result = parse_object(lex, sem);
                        break;
                case JSON_TOKEN_ARRAY_START:
-                       parse_array(lex, sem);
+                       result = parse_array(lex, sem);
                        break;
                default:
-                       parse_scalar(lex, sem); /* json can be a bare scalar */
+                       result = parse_scalar(lex, sem); /* json can be a bare scalar */
        }
 
-       lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
+       if (result == JSON_SUCCESS)
+               result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
+
+       return result;
+}
+
+/*
+ * pg_parse_json_or_ereport
+ *
+ * This fuction is like pg_parse_json, except that it does not return a
+ * JsonParseErrorType. Instead, in case of any failure, this function will
+ * ereport(ERROR).
+ */
+void
+pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem)
+{
+       JsonParseErrorType      result;
 
+       result = pg_parse_json(lex, sem);
+       if (result != JSON_SUCCESS)
+               json_ereport_error(result, lex);
 }
 
 /*
@@ -206,11 +227,12 @@ pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
  *
  * Designed to be called from array_start routines.
  */
-int
-json_count_array_elements(JsonLexContext *lex)
+JsonParseErrorType
+json_count_array_elements(JsonLexContext *lex, int *elements)
 {
        JsonLexContext copylex;
        int                     count;
+       JsonParseErrorType      result;
 
        /*
         * It's safe to do this with a shallow copy because the lexical routines
@@ -222,21 +244,32 @@ json_count_array_elements(JsonLexContext *lex)
        copylex.lex_level++;
 
        count = 0;
-       lex_expect(JSON_PARSE_ARRAY_START, &copylex, JSON_TOKEN_ARRAY_START);
+       result = lex_expect(JSON_PARSE_ARRAY_START, &copylex,
+                                               JSON_TOKEN_ARRAY_START);
+       if (result != JSON_SUCCESS)
+               return result;
        if (lex_peek(&copylex) != JSON_TOKEN_ARRAY_END)
        {
                while (1)
                {
                        count++;
-                       parse_array_element(&copylex, &nullSemAction);
+                       result = parse_array_element(&copylex, &nullSemAction);
+                       if (result != JSON_SUCCESS)
+                               return result;
                        if (copylex.token_type != JSON_TOKEN_COMMA)
                                break;
-                       json_lex(&copylex);
+                       result = json_lex(&copylex);
+                       if (result != JSON_SUCCESS)
+                               return result;
                }
        }
-       lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex, JSON_TOKEN_ARRAY_END);
+       result = lex_expect(JSON_PARSE_ARRAY_NEXT, &copylex,
+                                                       JSON_TOKEN_ARRAY_END);
+       if (result != JSON_SUCCESS)
+               return result;
 
-       return count;
+       *elements = count;
+       return JSON_SUCCESS;
 }
 
 /*
@@ -248,25 +281,23 @@ json_count_array_elements(JsonLexContext *lex)
  *       - object ( { } )
  *       - object field
  */
-static inline void
+static inline JsonParseErrorType
 parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
 {
        char       *val = NULL;
        json_scalar_action sfunc = sem->scalar;
        JsonTokenType tok = lex_peek(lex);
+       JsonParseErrorType result;
 
        /* a scalar must be a string, a number, true, false, or null */
        if (tok != JSON_TOKEN_STRING && tok != JSON_TOKEN_NUMBER &&
                tok != JSON_TOKEN_TRUE && tok != JSON_TOKEN_FALSE &&
                tok != JSON_TOKEN_NULL)
-               report_parse_error(JSON_PARSE_VALUE, lex);
+               return report_parse_error(JSON_PARSE_VALUE, lex);
 
        /* if no semantic function, just consume the token */
        if (sfunc == NULL)
-       {
-               json_lex(lex);
-               return;
-       }
+               return json_lex(lex);
 
        /* extract the de-escaped string value, or the raw lexeme */
        if (lex_peek(lex) == JSON_TOKEN_STRING)
@@ -284,13 +315,17 @@ parse_scalar(JsonLexContext *lex, JsonSemAction *sem)
        }
 
        /* consume the token */
-       json_lex(lex);
+       result = json_lex(lex);
+       if (result != JSON_SUCCESS)
+               return result;
 
        /* invoke the callback */
        (*sfunc) (sem->semstate, val, tok);
+
+       return JSON_SUCCESS;
 }
 
-static void
+static JsonParseErrorType
 parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
 {
        /*
@@ -304,14 +339,19 @@ parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
        json_ofield_action oend = sem->object_field_end;
        bool            isnull;
        JsonTokenType tok;
+       JsonParseErrorType result;
 
        if (lex_peek(lex) != JSON_TOKEN_STRING)
-               report_parse_error(JSON_PARSE_STRING, lex);
+               return report_parse_error(JSON_PARSE_STRING, lex);
        if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
                fname = pstrdup(lex->strval->data);
-       json_lex(lex);
+       result = json_lex(lex);
+       if (result != JSON_SUCCESS)
+               return result;
 
-       lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
+       result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
+       if (result != JSON_SUCCESS)
+               return result;
 
        tok = lex_peek(lex);
        isnull = tok == JSON_TOKEN_NULL;
@@ -322,20 +362,23 @@ parse_object_field(JsonLexContext *lex, JsonSemAction *sem)
        switch (tok)
        {
                case JSON_TOKEN_OBJECT_START:
-                       parse_object(lex, sem);
+                       result = parse_object(lex, sem);
                        break;
                case JSON_TOKEN_ARRAY_START:
-                       parse_array(lex, sem);
+                       result = parse_array(lex, sem);
                        break;
                default:
-                       parse_scalar(lex, sem);
+                       result = parse_scalar(lex, sem);
        }
+       if (result != JSON_SUCCESS)
+               return result;
 
        if (oend != NULL)
                (*oend) (sem->semstate, fname, isnull);
+       return JSON_SUCCESS;
 }
 
-static void
+static JsonParseErrorType
 parse_object(JsonLexContext *lex, JsonSemAction *sem)
 {
        /*
@@ -345,6 +388,7 @@ parse_object(JsonLexContext *lex, JsonSemAction *sem)
        json_struct_action ostart = sem->object_start;
        json_struct_action oend = sem->object_end;
        JsonTokenType tok;
+       JsonParseErrorType result;
 
        check_stack_depth();
 
@@ -360,40 +404,51 @@ parse_object(JsonLexContext *lex, JsonSemAction *sem)
        lex->lex_level++;
 
        Assert(lex_peek(lex) == JSON_TOKEN_OBJECT_START);
-       json_lex(lex);
+       result = json_lex(lex);
+       if (result != JSON_SUCCESS)
+               return result;
 
        tok = lex_peek(lex);
        switch (tok)
        {
                case JSON_TOKEN_STRING:
-                       parse_object_field(lex, sem);
-                       while (lex_peek(lex) == JSON_TOKEN_COMMA)
+                       result = parse_object_field(lex, sem);
+                       while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
                        {
-                               json_lex(lex);
-                               parse_object_field(lex, sem);
+                               result = json_lex(lex);
+                               if (result != JSON_SUCCESS)
+                                       break;
+                               result = parse_object_field(lex, sem);
                        }
                        break;
                case JSON_TOKEN_OBJECT_END:
                        break;
                default:
                        /* case of an invalid initial token inside the object */
-                       report_parse_error(JSON_PARSE_OBJECT_START, lex);
+                       result = report_parse_error(JSON_PARSE_OBJECT_START, lex);
        }
+       if (result != JSON_SUCCESS)
+               return result;
 
-       lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
+       result = lex_expect(JSON_PARSE_OBJECT_NEXT, lex, JSON_TOKEN_OBJECT_END);
+       if (result != JSON_SUCCESS)
+               return result;
 
        lex->lex_level--;
 
        if (oend != NULL)
                (*oend) (sem->semstate);
+
+       return JSON_SUCCESS;
 }
 
-static void
+static JsonParseErrorType
 parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
 {
        json_aelem_action astart = sem->array_element_start;
        json_aelem_action aend = sem->array_element_end;
        JsonTokenType tok = lex_peek(lex);
+       JsonParseErrorType result;
 
        bool            isnull;
 
@@ -406,20 +461,25 @@ parse_array_element(JsonLexContext *lex, JsonSemAction *sem)
        switch (tok)
        {
                case JSON_TOKEN_OBJECT_START:
-                       parse_object(lex, sem);
+                       result = parse_object(lex, sem);
                        break;
                case JSON_TOKEN_ARRAY_START:
-                       parse_array(lex, sem);
+                       result = parse_array(lex, sem);
                        break;
                default:
-                       parse_scalar(lex, sem);
+                       result = parse_scalar(lex, sem);
        }
 
+       if (result != JSON_SUCCESS)
+               return result;
+
        if (aend != NULL)
                (*aend) (sem->semstate, isnull);
+
+       return JSON_SUCCESS;
 }
 
-static void
+static JsonParseErrorType
 parse_array(JsonLexContext *lex, JsonSemAction *sem)
 {
        /*
@@ -428,6 +488,7 @@ parse_array(JsonLexContext *lex, JsonSemAction *sem)
         */
        json_struct_action astart = sem->array_start;
        json_struct_action aend = sem->array_end;
+       JsonParseErrorType result;
 
        check_stack_depth();
 
@@ -442,35 +503,43 @@ parse_array(JsonLexContext *lex, JsonSemAction *sem)
         */
        lex->lex_level++;
 
-       lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
-       if (lex_peek(lex) != JSON_TOKEN_ARRAY_END)
+       result = lex_expect(JSON_PARSE_ARRAY_START, lex, JSON_TOKEN_ARRAY_START);
+       if (result == JSON_SUCCESS && lex_peek(lex) != JSON_TOKEN_ARRAY_END)
        {
+               result = parse_array_element(lex, sem);
 
-               parse_array_element(lex, sem);
-
-               while (lex_peek(lex) == JSON_TOKEN_COMMA)
+               while (result == JSON_SUCCESS && lex_peek(lex) == JSON_TOKEN_COMMA)
                {
-                       json_lex(lex);
-                       parse_array_element(lex, sem);
+                       result = json_lex(lex);
+                       if (result != JSON_SUCCESS)
+                               break;
+                       result = parse_array_element(lex, sem);
                }
        }
+       if (result != JSON_SUCCESS)
+               return result;
 
-       lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
+       result = lex_expect(JSON_PARSE_ARRAY_NEXT, lex, JSON_TOKEN_ARRAY_END);
+       if (result != JSON_SUCCESS)
+               return result;
 
        lex->lex_level--;
 
        if (aend != NULL)
                (*aend) (sem->semstate);
+
+       return JSON_SUCCESS;
 }
 
 /*
  * Lex one token from the input stream.
  */
-void
+JsonParseErrorType
 json_lex(JsonLexContext *lex)
 {
        char       *s;
        int                     len;
+       JsonParseErrorType      result;
 
        /* Skip leading whitespace. */
        s = lex->token_terminator;
@@ -494,6 +563,7 @@ json_lex(JsonLexContext *lex)
                lex->token_type = JSON_TOKEN_END;
        }
        else
+       {
                switch (*s)
                {
                                /* Single-character token, some kind of punctuation mark. */
@@ -529,12 +599,16 @@ json_lex(JsonLexContext *lex)
                                break;
                        case '"':
                                /* string */
-                               json_lex_string(lex);
+                               result = json_lex_string(lex);
+                               if (result != JSON_SUCCESS)
+                                       return result;
                                lex->token_type = JSON_TOKEN_STRING;
                                break;
                        case '-':
                                /* Negative number. */
-                               json_lex_number(lex, s + 1, NULL, NULL);
+                               result = json_lex_number(lex, s + 1, NULL, NULL);
+                               if (result != JSON_SUCCESS)
+                                       return result;
                                lex->token_type = JSON_TOKEN_NUMBER;
                                break;
                        case '0':
@@ -548,7 +622,9 @@ json_lex(JsonLexContext *lex)
                        case '8':
                        case '9':
                                /* Positive number. */
-                               json_lex_number(lex, s, NULL, NULL);
+                               result = json_lex_number(lex, s, NULL, NULL);
+                               if (result != JSON_SUCCESS)
+                                       return result;
                                lex->token_type = JSON_TOKEN_NUMBER;
                                break;
                        default:
@@ -576,7 +652,7 @@ json_lex(JsonLexContext *lex)
                                        {
                                                lex->prev_token_terminator = lex->token_terminator;
                                                lex->token_terminator = s + 1;
-                                               report_invalid_token(lex);
+                                               return JSON_INVALID_TOKEN;
                                        }
 
                                        /*
@@ -593,21 +669,24 @@ json_lex(JsonLexContext *lex)
                                                else if (memcmp(s, "null", 4) == 0)
                                                        lex->token_type = JSON_TOKEN_NULL;
                                                else
-                                                       report_invalid_token(lex);
+                                                       return JSON_INVALID_TOKEN;
                                        }
                                        else if (p - s == 5 && memcmp(s, "false", 5) == 0)
                                                lex->token_type = JSON_TOKEN_FALSE;
                                        else
-                                               report_invalid_token(lex);
+                                               return JSON_INVALID_TOKEN;
 
                                }
                }                                               /* end of switch */
+       }
+
+       return JSON_SUCCESS;
 }
 
 /*
  * The next token in the input stream is known to be a string; lex it.
  */
-static inline void
+static inline JsonParseErrorType
 json_lex_string(JsonLexContext *lex)
 {
        char       *s;
@@ -628,7 +707,7 @@ json_lex_string(JsonLexContext *lex)
                if (len >= lex->input_length)
                {
                        lex->token_terminator = s;
-                       report_invalid_token(lex);
+                       return JSON_INVALID_TOKEN;
                }
                else if (*s == '"')
                        break;
@@ -637,12 +716,7 @@ json_lex_string(JsonLexContext *lex)
                        /* Per RFC4627, these characters MUST be escaped. */
                        /* Since *s isn't printable, exclude it from the context string */
                        lex->token_terminator = s;
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                        errmsg("invalid input syntax for type %s", "json"),
-                                        errdetail("Character with value 0x%02x must be escaped.",
-                                                          (unsigned char) *s),
-                                        report_json_context(lex)));
+                       return JSON_ESCAPING_REQUIRED;
                }
                else if (*s == '\\')
                {
@@ -652,7 +726,7 @@ json_lex_string(JsonLexContext *lex)
                        if (len >= lex->input_length)
                        {
                                lex->token_terminator = s;
-                               report_invalid_token(lex);
+                               return JSON_INVALID_TOKEN;
                        }
                        else if (*s == 'u')
                        {
@@ -666,7 +740,7 @@ json_lex_string(JsonLexContext *lex)
                                        if (len >= lex->input_length)
                                        {
                                                lex->token_terminator = s;
-                                               report_invalid_token(lex);
+                                               return JSON_INVALID_TOKEN;
                                        }
                                        else if (*s >= '0' && *s <= '9')
                                                ch = (ch * 16) + (*s - '0');
@@ -677,12 +751,7 @@ json_lex_string(JsonLexContext *lex)
                                        else
                                        {
                                                lex->token_terminator = s + pg_mblen(s);
-                                               ereport(ERROR,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("invalid input syntax for type %s",
-                                                                               "json"),
-                                                                errdetail("\"\\u\" must be followed by four hexadecimal digits."),
-                                                                report_json_context(lex)));
+                                               return JSON_UNICODE_ESCAPE_FORMAT;
                                        }
                                }
                                if (lex->strval != NULL)
@@ -693,33 +762,20 @@ json_lex_string(JsonLexContext *lex)
                                        if (ch >= 0xd800 && ch <= 0xdbff)
                                        {
                                                if (hi_surrogate != -1)
-                                                       ereport(ERROR,
-                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                        errmsg("invalid input syntax for type %s",
-                                                                                       "json"),
-                                                                        errdetail("Unicode high surrogate must not follow a high surrogate."),
-                                                                        report_json_context(lex)));
+                                                       return JSON_UNICODE_HIGH_SURROGATE;
                                                hi_surrogate = (ch & 0x3ff) << 10;
                                                continue;
                                        }
                                        else if (ch >= 0xdc00 && ch <= 0xdfff)
                                        {
                                                if (hi_surrogate == -1)
-                                                       ereport(ERROR,
-                                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                        errmsg("invalid input syntax for type %s", "json"),
-                                                                        errdetail("Unicode low surrogate must follow a high surrogate."),
-                                                                        report_json_context(lex)));
+                                                       return JSON_UNICODE_LOW_SURROGATE;
                                                ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
                                                hi_surrogate = -1;
                                        }
 
                                        if (hi_surrogate != -1)
-                                               ereport(ERROR,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("invalid input syntax for type %s", "json"),
-                                                                errdetail("Unicode low surrogate must follow a high surrogate."),
-                                                                report_json_context(lex)));
+                                               return JSON_UNICODE_LOW_SURROGATE;
 
                                        /*
                                         * For UTF8, replace the escape sequence by the actual
@@ -731,11 +787,7 @@ json_lex_string(JsonLexContext *lex)
                                        if (ch == 0)
                                        {
                                                /* We can't allow this, since our TEXT type doesn't */
-                                               ereport(ERROR,
-                                                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
-                                                                errmsg("unsupported Unicode escape sequence"),
-                                                                errdetail("\\u0000 cannot be converted to text."),
-                                                                report_json_context(lex)));
+                                               return JSON_UNICODE_CODE_POINT_ZERO;
                                        }
                                        else if (GetDatabaseEncoding() == PG_UTF8)
                                        {
@@ -753,25 +805,14 @@ json_lex_string(JsonLexContext *lex)
                                                appendStringInfoChar(lex->strval, (char) ch);
                                        }
                                        else
-                                       {
-                                               ereport(ERROR,
-                                                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
-                                                                errmsg("unsupported Unicode escape sequence"),
-                                                                errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
-                                                                report_json_context(lex)));
-                                       }
+                                               return JSON_UNICODE_HIGH_ESCAPE;
 
                                }
                        }
                        else if (lex->strval != NULL)
                        {
                                if (hi_surrogate != -1)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                        errmsg("invalid input syntax for type %s",
-                                                                       "json"),
-                                                        errdetail("Unicode low surrogate must follow a high surrogate."),
-                                                        report_json_context(lex)));
+                                       return JSON_UNICODE_LOW_SURROGATE;
 
                                switch (*s)
                                {
@@ -796,15 +837,10 @@ json_lex_string(JsonLexContext *lex)
                                                appendStringInfoChar(lex->strval, '\t');
                                                break;
                                        default:
-                                               /* Not a valid string escape, so error out. */
+                                               /* Not a valid string escape, so signal error. */
+                                               lex->token_start = s;
                                                lex->token_terminator = s + pg_mblen(s);
-                                               ereport(ERROR,
-                                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                                errmsg("invalid input syntax for type %s",
-                                                                               "json"),
-                                                                errdetail("Escape sequence \"\\%s\" is invalid.",
-                                                                                  extract_mb_char(s)),
-                                                                report_json_context(lex)));
+                                               return JSON_ESCAPING_INVALID;
                                }
                        }
                        else if (strchr("\"\\/bfnrt", *s) == NULL)
@@ -816,24 +852,16 @@ json_lex_string(JsonLexContext *lex)
                                 * replace it with a switch statement, but testing so far has
                                 * shown it's not a performance win.
                                 */
+                               lex->token_start = s;
                                lex->token_terminator = s + pg_mblen(s);
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Escape sequence \"\\%s\" is invalid.",
-                                                                  extract_mb_char(s)),
-                                                report_json_context(lex)));
+                               return JSON_ESCAPING_INVALID;
                        }
 
                }
                else if (lex->strval != NULL)
                {
                        if (hi_surrogate != -1)
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Unicode low surrogate must follow a high surrogate."),
-                                                report_json_context(lex)));
+                               return JSON_UNICODE_LOW_SURROGATE;
 
                        appendStringInfoChar(lex->strval, *s);
                }
@@ -841,15 +869,12 @@ json_lex_string(JsonLexContext *lex)
        }
 
        if (hi_surrogate != -1)
-               ereport(ERROR,
-                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                errmsg("invalid input syntax for type %s", "json"),
-                                errdetail("Unicode low surrogate must follow a high surrogate."),
-                                report_json_context(lex)));
+               return JSON_UNICODE_LOW_SURROGATE;
 
        /* Hooray, we found the end of the string! */
        lex->prev_token_terminator = lex->token_terminator;
        lex->token_terminator = s + 1;
+       return JSON_SUCCESS;
 }
 
 /*
@@ -880,7 +905,7 @@ json_lex_string(JsonLexContext *lex)
  * raising an error for a badly-formed number.  Also, if total_len is not NULL
  * the distance from lex->input to the token end+1 is returned to *total_len.
  */
-static inline void
+static inline JsonParseErrorType
 json_lex_number(JsonLexContext *lex, char *s,
                                bool *num_err, int *total_len)
 {
@@ -969,8 +994,10 @@ json_lex_number(JsonLexContext *lex, char *s,
                lex->token_terminator = s;
                /* handle error if any */
                if (error)
-                       report_invalid_token(lex);
+                       return JSON_INVALID_TOKEN;
        }
+
+       return JSON_SUCCESS;
 }
 
 /*
@@ -978,130 +1005,117 @@ json_lex_number(JsonLexContext *lex, char *s,
  *
  * lex->token_start and lex->token_terminator must identify the current token.
  */
-static void
+static JsonParseErrorType
 report_parse_error(JsonParseContext ctx, JsonLexContext *lex)
 {
-       char       *token;
-       int                     toklen;
-
        /* Handle case where the input ended prematurely. */
        if (lex->token_start == NULL || lex->token_type == JSON_TOKEN_END)
-               ereport(ERROR,
-                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                errmsg("invalid input syntax for type %s", "json"),
-                                errdetail("The input string ended unexpectedly."),
-                                report_json_context(lex)));
+               return JSON_EXPECTED_MORE;
 
-       /* Separate out the current token. */
-       toklen = lex->token_terminator - lex->token_start;
-       token = palloc(toklen + 1);
-       memcpy(token, lex->token_start, toklen);
-       token[toklen] = '\0';
+       /* Otherwise choose the error type based on the parsing context. */
+       switch (ctx)
+       {
+               case JSON_PARSE_END:
+                       return JSON_EXPECTED_END;
+               case JSON_PARSE_VALUE:
+                       return JSON_EXPECTED_JSON;
+               case JSON_PARSE_STRING:
+                       return JSON_EXPECTED_STRING;
+               case JSON_PARSE_ARRAY_START:
+                       return JSON_EXPECTED_ARRAY_FIRST;
+               case JSON_PARSE_ARRAY_NEXT:
+                       return JSON_EXPECTED_ARRAY_NEXT;
+               case JSON_PARSE_OBJECT_START:
+                       return JSON_EXPECTED_OBJECT_FIRST;
+               case JSON_PARSE_OBJECT_LABEL:
+                       return JSON_EXPECTED_COLON;
+               case JSON_PARSE_OBJECT_NEXT:
+                       return JSON_EXPECTED_OBJECT_NEXT;
+               case JSON_PARSE_OBJECT_COMMA:
+                       return JSON_EXPECTED_STRING;
+               default:
+                       elog(ERROR, "unexpected json parse state: %d", ctx);
+       }
+}
 
-       /* Complain, with the appropriate detail message. */
-       if (ctx == JSON_PARSE_END)
+/*
+ * Report a JSON error.
+ */
+void
+json_ereport_error(JsonParseErrorType error, JsonLexContext *lex)
+{
+       if (error == JSON_UNICODE_HIGH_ESCAPE ||
+               error == JSON_UNICODE_CODE_POINT_ZERO)
+               ereport(ERROR,
+                               (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+                                errmsg("unsupported Unicode escape sequence"),
+                                errdetail("%s", json_errdetail(error, lex)),
+                                report_json_context(lex)));
+       else
                ereport(ERROR,
                                (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
                                 errmsg("invalid input syntax for type %s", "json"),
-                                errdetail("Expected end of input, but found \"%s\".",
-                                                  token),
+                                errdetail("%s", json_errdetail(error, lex)),
                                 report_json_context(lex)));
-       else
-       {
-               switch (ctx)
-               {
-                       case JSON_PARSE_VALUE:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Expected JSON value, but found \"%s\".",
-                                                                  token),
-                                                report_json_context(lex)));
-                               break;
-                       case JSON_PARSE_STRING:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Expected string, but found \"%s\".",
-                                                                  token),
-                                                report_json_context(lex)));
-                               break;
-                       case JSON_PARSE_ARRAY_START:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Expected array element or \"]\", but found \"%s\".",
-                                                                  token),
-                                                report_json_context(lex)));
-                               break;
-                       case JSON_PARSE_ARRAY_NEXT:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Expected \",\" or \"]\", but found \"%s\".",
-                                                                  token),
-                                                report_json_context(lex)));
-                               break;
-                       case JSON_PARSE_OBJECT_START:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Expected string or \"}\", but found \"%s\".",
-                                                                  token),
-                                                report_json_context(lex)));
-                               break;
-                       case JSON_PARSE_OBJECT_LABEL:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Expected \":\", but found \"%s\".",
-                                                                  token),
-                                                report_json_context(lex)));
-                               break;
-                       case JSON_PARSE_OBJECT_NEXT:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Expected \",\" or \"}\", but found \"%s\".",
-                                                                  token),
-                                                report_json_context(lex)));
-                               break;
-                       case JSON_PARSE_OBJECT_COMMA:
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                                                errmsg("invalid input syntax for type %s", "json"),
-                                                errdetail("Expected string, but found \"%s\".",
-                                                                  token),
-                                                report_json_context(lex)));
-                               break;
-                       default:
-                               elog(ERROR, "unexpected json parse state: %d", ctx);
-               }
-       }
 }
 
 /*
- * Report an invalid input token.
- *
- * lex->token_start and lex->token_terminator must identify the token.
+ * Construct a detail message for a JSON error.
  */
-static void
-report_invalid_token(JsonLexContext *lex)
+char *
+json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
 {
-       char       *token;
-       int                     toklen;
-
-       /* Separate out the offending token. */
-       toklen = lex->token_terminator - lex->token_start;
-       token = palloc(toklen + 1);
-       memcpy(token, lex->token_start, toklen);
-       token[toklen] = '\0';
-
-       ereport(ERROR,
-                       (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-                        errmsg("invalid input syntax for type %s", "json"),
-                        errdetail("Token \"%s\" is invalid.", token),
-                        report_json_context(lex)));
+       switch (error)
+       {
+               case JSON_SUCCESS:
+                       elog(ERROR, "internal error in json parser");
+                       break;
+               case JSON_ESCAPING_INVALID:
+                       return psprintf(_("Escape sequence \"\\%s\" is invalid."),
+                                                       extract_token(lex));
+               case JSON_ESCAPING_REQUIRED:
+                       return psprintf(_("Character with value 0x%02x must be escaped."),
+                                                       (unsigned char) *(lex->token_terminator));
+               case JSON_EXPECTED_END:
+                       return psprintf(_("Expected end of input, but found \"%s\"."),
+                                                       extract_token(lex));
+               case JSON_EXPECTED_ARRAY_FIRST:
+                       return psprintf(_("Expected array element or \"]\", but found \"%s\"."),
+                                                       extract_token(lex));
+               case JSON_EXPECTED_ARRAY_NEXT:
+                       return psprintf(_("Expected \",\" or \"]\", but found \"%s\"."),
+                                                       extract_token(lex));
+               case JSON_EXPECTED_COLON:
+                       return psprintf(_("Expected \":\", but found \"%s\"."),
+                                                       extract_token(lex));
+               case JSON_EXPECTED_JSON:
+                       return psprintf(_("Expected JSON value, but found \"%s\"."),
+                                                       extract_token(lex));
+               case JSON_EXPECTED_MORE:
+                       return _("The input string ended unexpectedly.");
+               case JSON_EXPECTED_OBJECT_FIRST:
+                       return psprintf(_("Expected string or \"}\", but found \"%s\"."),
+                                                       extract_token(lex));
+               case JSON_EXPECTED_OBJECT_NEXT:
+                       return psprintf(_("Expected \",\" or \"}\", but found \"%s\"."),
+                                                       extract_token(lex));
+               case JSON_EXPECTED_STRING:
+                       return psprintf(_("Expected string, but found \"%s\"."),
+                                                       extract_token(lex));
+               case JSON_INVALID_TOKEN:
+                       return psprintf(_("Token \"%s\" is invalid."),
+                                                       extract_token(lex));
+               case JSON_UNICODE_CODE_POINT_ZERO:
+                       return _("\\u0000 cannot be converted to text.");
+               case JSON_UNICODE_ESCAPE_FORMAT:
+                       return _("\"\\u\" must be followed by four hexadecimal digits.");
+               case JSON_UNICODE_HIGH_ESCAPE:
+                       return _("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.");
+               case JSON_UNICODE_HIGH_SURROGATE:
+                       return _("Unicode high surrogate must not follow a high surrogate.");
+               case JSON_UNICODE_LOW_SURROGATE:
+                       return _("Unicode low surrogate must follow a high surrogate.");
+       }
 }
 
 /*
@@ -1177,18 +1191,15 @@ report_json_context(JsonLexContext *lex)
 }
 
 /*
- * Extract a single, possibly multi-byte char from the input string.
+ * Extract the current token from a lexing context, for error reporting.
  */
 static char *
-extract_mb_char(char *s)
+extract_token(JsonLexContext *lex)
 {
-       char       *res;
-       int                     len;
-
-       len = pg_mblen(s);
-       res = palloc(len + 1);
-       memcpy(res, s, len);
-       res[len] = '\0';
+       int toklen = lex->token_terminator - lex->token_start;
+       char *token = palloc(toklen + 1);
 
-       return res;
+       memcpy(token, lex->token_start, toklen);
+       token[toklen] = '\0';
+       return token;
 }
index c4a4ec78b0ea66938c79a0487261612a741a811d..83d7f68b8219cb982d4937f78332a1899d1c8120 100644 (file)
@@ -272,7 +272,7 @@ jsonb_from_cstring(char *json, int len)
        sem.scalar = jsonb_in_scalar;
        sem.object_field_start = jsonb_in_object_field_start;
 
-       pg_parse_json(lex, &sem);
+       pg_parse_json_or_ereport(lex, &sem);
 
        /* after parsing, the item member has the composed jsonb structure */
        PG_RETURN_POINTER(JsonbValueToJsonb(state.res));
@@ -860,7 +860,7 @@ datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
                                        sem.scalar = jsonb_in_scalar;
                                        sem.object_field_start = jsonb_in_object_field_start;
 
-                                       pg_parse_json(lex, &sem);
+                                       pg_parse_json_or_ereport(lex, &sem);
 
                                }
                                break;
index 2f9955d665ada7ba880790631df281f4e87f26af..9eff506855303e58a445fae340a41498d315054d 100644 (file)
@@ -606,7 +606,7 @@ json_object_keys(PG_FUNCTION_ARGS)
                sem->object_field_start = okeys_object_field_start;
                /* remainder are all NULL, courtesy of palloc0 above */
 
-               pg_parse_json(lex, sem);
+               pg_parse_json_or_ereport(lex, sem);
                /* keys are now in state->result */
 
                pfree(lex->strval->data);
@@ -1000,7 +1000,7 @@ get_worker(text *json,
                sem->array_element_end = get_array_element_end;
        }
 
-       pg_parse_json(lex, sem);
+       pg_parse_json_or_ereport(lex, sem);
 
        return state->tresult;
 }
@@ -1148,7 +1148,12 @@ get_array_start(void *state)
                        _state->path_indexes[lex_level] != INT_MIN)
                {
                        /* Negative subscript -- convert to positive-wise subscript */
-                       int                     nelements = json_count_array_elements(_state->lex);
+                       JsonParseErrorType error;
+                       int                     nelements;
+
+                       error = json_count_array_elements(_state->lex, &nelements);
+                       if (error != JSON_SUCCESS)
+                               json_ereport_error(error, _state->lex);
 
                        if (-_state->path_indexes[lex_level] <= nelements)
                                _state->path_indexes[lex_level] += nelements;
@@ -1548,7 +1553,7 @@ json_array_length(PG_FUNCTION_ARGS)
        sem->scalar = alen_scalar;
        sem->array_element_start = alen_array_element_start;
 
-       pg_parse_json(lex, sem);
+       pg_parse_json_or_ereport(lex, sem);
 
        PG_RETURN_INT32(state->count);
 }
@@ -1814,7 +1819,7 @@ each_worker(FunctionCallInfo fcinfo, bool as_text)
                                                                                   "json_each temporary cxt",
                                                                                   ALLOCSET_DEFAULT_SIZES);
 
-       pg_parse_json(lex, sem);
+       pg_parse_json_or_ereport(lex, sem);
 
        MemoryContextDelete(state->tmp_cxt);
 
@@ -2113,7 +2118,7 @@ elements_worker(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
                                                                                   "json_array_elements temporary cxt",
                                                                                   ALLOCSET_DEFAULT_SIZES);
 
-       pg_parse_json(lex, sem);
+       pg_parse_json_or_ereport(lex, sem);
 
        MemoryContextDelete(state->tmp_cxt);
 
@@ -2485,7 +2490,7 @@ populate_array_json(PopulateArrayContext *ctx, char *json, int len)
        sem.array_element_end = populate_array_element_end;
        sem.scalar = populate_array_scalar;
 
-       pg_parse_json(state.lex, &sem);
+       pg_parse_json_or_ereport(state.lex, &sem);
 
        /* number of dimensions should be already known */
        Assert(ctx->ndims > 0 && ctx->dims);
@@ -3342,7 +3347,7 @@ get_json_object_as_hash(char *json, int len, const char *funcname)
        sem->object_field_start = hash_object_field_start;
        sem->object_field_end = hash_object_field_end;
 
-       pg_parse_json(lex, sem);
+       pg_parse_json_or_ereport(lex, sem);
 
        return tab;
 }
@@ -3641,7 +3646,7 @@ populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
 
                state->lex = lex;
 
-               pg_parse_json(lex, sem);
+               pg_parse_json_or_ereport(lex, sem);
        }
        else
        {
@@ -3971,7 +3976,7 @@ json_strip_nulls(PG_FUNCTION_ARGS)
        sem->array_element_start = sn_array_element_start;
        sem->object_field_start = sn_object_field_start;
 
-       pg_parse_json(lex, sem);
+       pg_parse_json_or_ereport(lex, sem);
 
        PG_RETURN_TEXT_P(cstring_to_text_with_len(state->strval->data,
                                                                                          state->strval->len));
@@ -5110,7 +5115,7 @@ iterate_json_values(text *json, uint32 flags, void *action_state,
        sem->scalar = iterate_values_scalar;
        sem->object_field_start = iterate_values_object_field_start;
 
-       pg_parse_json(lex, sem);
+       pg_parse_json_or_ereport(lex, sem);
 }
 
 /*
@@ -5230,7 +5235,7 @@ transform_json_string_values(text *json, void *action_state,
        sem->array_element_start = transform_string_values_array_element_start;
        sem->object_field_start = transform_string_values_object_field_start;
 
-       pg_parse_json(lex, sem);
+       pg_parse_json_or_ereport(lex, sem);
 
        return cstring_to_text_with_len(state->strval->data, state->strval->len);
 }
index bbca121bb72361208e9dda5de2887e665d04dd4d..74dc35c41c627e792ea0e94d94e1a592474a0bec 100644 (file)
@@ -33,6 +33,28 @@ typedef enum
        JSON_TOKEN_END
 } JsonTokenType;
 
+typedef enum
+{
+       JSON_SUCCESS,
+       JSON_ESCAPING_INVALID,
+       JSON_ESCAPING_REQUIRED,
+       JSON_EXPECTED_ARRAY_FIRST,
+       JSON_EXPECTED_ARRAY_NEXT,
+       JSON_EXPECTED_COLON,
+       JSON_EXPECTED_END,
+       JSON_EXPECTED_JSON,
+       JSON_EXPECTED_MORE,
+       JSON_EXPECTED_OBJECT_FIRST,
+       JSON_EXPECTED_OBJECT_NEXT,
+       JSON_EXPECTED_STRING,
+       JSON_INVALID_TOKEN,
+       JSON_UNICODE_CODE_POINT_ZERO,
+       JSON_UNICODE_ESCAPE_FORMAT,
+       JSON_UNICODE_HIGH_ESCAPE,
+       JSON_UNICODE_HIGH_SURROGATE,
+       JSON_UNICODE_LOW_SURROGATE
+} JsonParseErrorType;
+
 
 /*
  * All the fields in this structure should be treated as read-only.
@@ -101,7 +123,14 @@ typedef struct JsonSemAction
  * points to. If the action pointers are NULL the parser
  * does nothing and just continues.
  */
-extern void pg_parse_json(JsonLexContext *lex, JsonSemAction *sem);
+extern JsonParseErrorType pg_parse_json(JsonLexContext *lex,
+                                                                               JsonSemAction *sem);
+
+/*
+ * Same thing, but signal errors via ereport(ERROR) instead of returning
+ * a result code.
+ */
+extern void pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem);
 
 /* the null action object used for pure validation */
 extern JsonSemAction nullSemAction;
@@ -110,8 +139,13 @@ extern JsonSemAction nullSemAction;
  * json_count_array_elements performs a fast secondary parse to determine the
  * number of elements in passed array lex context. It should be called from an
  * array_start action.
+ *
+ * The return value indicates whether any error occurred, while the number
+ * of elements is stored into *elements (but only if the return value is
+ * JSON_SUCCESS).
  */
-extern int     json_count_array_elements(JsonLexContext *lex);
+extern JsonParseErrorType json_count_array_elements(JsonLexContext *lex,
+                                                                                                       int *elements);
 
 /*
  * constructors for JsonLexContext, with or without strval element.
@@ -128,7 +162,13 @@ extern JsonLexContext *makeJsonLexContextCstringLen(char *json,
                                                                                                        bool need_escapes);
 
 /* lex one token */
-extern void json_lex(JsonLexContext *lex);
+extern JsonParseErrorType json_lex(JsonLexContext *lex);
+
+/* report an error during json lexing or parsing */
+extern void json_ereport_error(JsonParseErrorType error, JsonLexContext *lex);
+
+/* construct an error detail string for a json error */
+extern char *json_errdetail(JsonParseErrorType error, JsonLexContext *lex);
 
 /*
  * Utility function to check if a string is a valid JSON number.