Optimize escaping of JSON strings
authorDavid Rowley <drowley@postgresql.org>
Sat, 27 Jul 2024 11:46:07 +0000 (23:46 +1200)
committerDavid Rowley <drowley@postgresql.org>
Sat, 27 Jul 2024 11:46:07 +0000 (23:46 +1200)
There were quite a few places where we either had a non-NUL-terminated
string or a text Datum which we needed to call escape_json() on.  Many of
these places required that a temporary string was created due to the fact
that escape_json() needs a NUL-terminated cstring.  For text types, those
first had to be converted to cstring before calling escape_json() on them.

Here we introduce two new functions to make escaping JSON more optimal:

escape_json_text() can be given a text Datum to append onto the given
buffer.  This is more optimal as it foregoes the need to convert the text
Datum into a cstring.  A temporary allocation is only required if the text
Datum needs to be detoasted.

escape_json_with_len() can be used when the length of the cstring is
already known or the given string isn't NUL-terminated.  Having this
allows various places which were creating a temporary NUL-terminated
string to just call escape_json_with_len() without any temporary memory
allocations.

Discussion: https://postgr.es/m/CAApHDvpLXwMZvbCKcdGfU9XQjGCDm7tFpRdTXuB9PVgpNUYfEQ@mail.gmail.com
Reviewed-by: Melih Mutlu, Heikki Linnakangas
contrib/hstore/hstore_io.c
src/backend/backup/backup_manifest.c
src/backend/utils/adt/json.c
src/backend/utils/adt/jsonb.c
src/backend/utils/adt/jsonfuncs.c
src/backend/utils/adt/jsonpath.c
src/include/utils/json.h

index 999ddad76d977a0b4ec7d7f46172f56dd6d952b1..2125436e40cde79cc23108c8391f2ecf7e95237e 100644 (file)
@@ -1343,23 +1343,20 @@ hstore_to_json_loose(PG_FUNCTION_ARGS)
        int                     count = HS_COUNT(in);
        char       *base = STRPTR(in);
        HEntry     *entries = ARRPTR(in);
-       StringInfoData tmp,
-                               dst;
+       StringInfoData dst;
 
        if (count == 0)
                PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
 
-       initStringInfo(&tmp);
        initStringInfo(&dst);
 
        appendStringInfoChar(&dst, '{');
 
        for (i = 0; i < count; i++)
        {
-               resetStringInfo(&tmp);
-               appendBinaryStringInfo(&tmp, HSTORE_KEY(entries, base, i),
-                                                          HSTORE_KEYLEN(entries, i));
-               escape_json(&dst, tmp.data);
+               escape_json_with_len(&dst,
+                                                        HSTORE_KEY(entries, base, i),
+                                                        HSTORE_KEYLEN(entries, i));
                appendStringInfoString(&dst, ": ");
                if (HSTORE_VALISNULL(entries, i))
                        appendStringInfoString(&dst, "null");
@@ -1372,13 +1369,13 @@ hstore_to_json_loose(PG_FUNCTION_ARGS)
                        appendStringInfoString(&dst, "false");
                else
                {
-                       resetStringInfo(&tmp);
-                       appendBinaryStringInfo(&tmp, HSTORE_VAL(entries, base, i),
-                                                                  HSTORE_VALLEN(entries, i));
-                       if (IsValidJsonNumber(tmp.data, tmp.len))
-                               appendBinaryStringInfo(&dst, tmp.data, tmp.len);
+                       char       *str = HSTORE_VAL(entries, base, i);
+                       int                     len = HSTORE_VALLEN(entries, i);
+
+                       if (IsValidJsonNumber(str, len))
+                               appendBinaryStringInfo(&dst, str, len);
                        else
-                               escape_json(&dst, tmp.data);
+                               escape_json_with_len(&dst, str, len);
                }
 
                if (i + 1 != count)
@@ -1398,32 +1395,28 @@ hstore_to_json(PG_FUNCTION_ARGS)
        int                     count = HS_COUNT(in);
        char       *base = STRPTR(in);
        HEntry     *entries = ARRPTR(in);
-       StringInfoData tmp,
-                               dst;
+       StringInfoData dst;
 
        if (count == 0)
                PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
 
-       initStringInfo(&tmp);
        initStringInfo(&dst);
 
        appendStringInfoChar(&dst, '{');
 
        for (i = 0; i < count; i++)
        {
-               resetStringInfo(&tmp);
-               appendBinaryStringInfo(&tmp, HSTORE_KEY(entries, base, i),
-                                                          HSTORE_KEYLEN(entries, i));
-               escape_json(&dst, tmp.data);
+               escape_json_with_len(&dst,
+                                                        HSTORE_KEY(entries, base, i),
+                                                        HSTORE_KEYLEN(entries, i));
                appendStringInfoString(&dst, ": ");
                if (HSTORE_VALISNULL(entries, i))
                        appendStringInfoString(&dst, "null");
                else
                {
-                       resetStringInfo(&tmp);
-                       appendBinaryStringInfo(&tmp, HSTORE_VAL(entries, base, i),
-                                                                  HSTORE_VALLEN(entries, i));
-                       escape_json(&dst, tmp.data);
+                       escape_json_with_len(&dst,
+                                                                HSTORE_VAL(entries, base, i),
+                                                                HSTORE_VALLEN(entries, i));
                }
 
                if (i + 1 != count)
index b360a135472e9dc159740fb9c9a32975ec36b1bd..4357cfa31de6631b7eb552174e666f5ade663234 100644 (file)
@@ -148,7 +148,7 @@ AddFileToBackupManifest(backup_manifest_info *manifest, Oid spcoid,
                pg_verify_mbstr(PG_UTF8, pathname, pathlen, true))
        {
                appendStringInfoString(&buf, "{ \"Path\": ");
-               escape_json(&buf, pathname);
+               escape_json_with_len(&buf, pathname, pathlen);
                appendStringInfoString(&buf, ", ");
        }
        else
index d719a61f16badd450d37278680f9200b109f6a12..be7bc46038f67c8580362a8edf15c816ef57d960 100644 (file)
@@ -23,6 +23,7 @@
 #include "utils/builtins.h"
 #include "utils/date.h"
 #include "utils/datetime.h"
+#include "utils/fmgroids.h"
 #include "utils/json.h"
 #include "utils/jsonfuncs.h"
 #include "utils/lsyscache.h"
@@ -285,9 +286,16 @@ datum_to_json_internal(Datum val, bool is_null, StringInfo result,
                        pfree(jsontext);
                        break;
                default:
-                       outputstr = OidOutputFunctionCall(outfuncoid, val);
-                       escape_json(result, outputstr);
-                       pfree(outputstr);
+                       /* special-case text types to save useless palloc/memcpy cycles */
+                       if (outfuncoid == F_TEXTOUT || outfuncoid == F_VARCHAROUT ||
+                               outfuncoid == F_BPCHAROUT)
+                               escape_json_text(result, (text *) DatumGetPointer(val));
+                       else
+                       {
+                               outputstr = OidOutputFunctionCall(outfuncoid, val);
+                               escape_json(result, outputstr);
+                               pfree(outputstr);
+                       }
                        break;
        }
 }
@@ -1391,7 +1399,6 @@ json_object(PG_FUNCTION_ARGS)
                                count,
                                i;
        text       *rval;
-       char       *v;
 
        switch (ndims)
        {
@@ -1434,19 +1441,16 @@ json_object(PG_FUNCTION_ARGS)
                                        (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
                                         errmsg("null value not allowed for object key")));
 
-               v = TextDatumGetCString(in_datums[i * 2]);
                if (i > 0)
                        appendStringInfoString(&result, ", ");
-               escape_json(&result, v);
+               escape_json_text(&result, (text *) DatumGetPointer(in_datums[i * 2]));
                appendStringInfoString(&result, " : ");
-               pfree(v);
                if (in_nulls[i * 2 + 1])
                        appendStringInfoString(&result, "null");
                else
                {
-                       v = TextDatumGetCString(in_datums[i * 2 + 1]);
-                       escape_json(&result, v);
-                       pfree(v);
+                       escape_json_text(&result,
+                                                        (text *) DatumGetPointer(in_datums[i * 2 + 1]));
                }
        }
 
@@ -1483,7 +1487,6 @@ json_object_two_arg(PG_FUNCTION_ARGS)
                                val_count,
                                i;
        text       *rval;
-       char       *v;
 
        if (nkdims > 1 || nkdims != nvdims)
                ereport(ERROR,
@@ -1512,20 +1515,15 @@ json_object_two_arg(PG_FUNCTION_ARGS)
                                        (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
                                         errmsg("null value not allowed for object key")));
 
-               v = TextDatumGetCString(key_datums[i]);
                if (i > 0)
                        appendStringInfoString(&result, ", ");
-               escape_json(&result, v);
+               escape_json_text(&result, (text *) DatumGetPointer(key_datums[i]));
                appendStringInfoString(&result, " : ");
-               pfree(v);
                if (val_nulls[i])
                        appendStringInfoString(&result, "null");
                else
-               {
-                       v = TextDatumGetCString(val_datums[i]);
-                       escape_json(&result, v);
-                       pfree(v);
-               }
+                       escape_json_text(&result,
+                                                        (text *) DatumGetPointer(val_datums[i]));
        }
 
        appendStringInfoChar(&result, '}');
@@ -1541,50 +1539,100 @@ json_object_two_arg(PG_FUNCTION_ARGS)
        PG_RETURN_TEXT_P(rval);
 }
 
+/*
+ * escape_json_char
+ *             Inline helper function for escape_json* functions
+ */
+static pg_attribute_always_inline void
+escape_json_char(StringInfo buf, char c)
+{
+       switch (c)
+       {
+               case '\b':
+                       appendStringInfoString(buf, "\\b");
+                       break;
+               case '\f':
+                       appendStringInfoString(buf, "\\f");
+                       break;
+               case '\n':
+                       appendStringInfoString(buf, "\\n");
+                       break;
+               case '\r':
+                       appendStringInfoString(buf, "\\r");
+                       break;
+               case '\t':
+                       appendStringInfoString(buf, "\\t");
+                       break;
+               case '"':
+                       appendStringInfoString(buf, "\\\"");
+                       break;
+               case '\\':
+                       appendStringInfoString(buf, "\\\\");
+                       break;
+               default:
+                       if ((unsigned char) c < ' ')
+                               appendStringInfo(buf, "\\u%04x", (int) c);
+                       else
+                               appendStringInfoCharMacro(buf, c);
+                       break;
+       }
+}
 
 /*
- * Produce a JSON string literal, properly escaping characters in the text.
+ * escape_json
+ *             Produce a JSON string literal, properly escaping the NUL-terminated
+ *             cstring.
  */
 void
 escape_json(StringInfo buf, const char *str)
 {
-       const char *p;
+       appendStringInfoCharMacro(buf, '"');
+
+       for (; *str != '\0'; str++)
+               escape_json_char(buf, *str);
 
        appendStringInfoCharMacro(buf, '"');
-       for (p = str; *p; p++)
-       {
-               switch (*p)
-               {
-                       case '\b':
-                               appendStringInfoString(buf, "\\b");
-                               break;
-                       case '\f':
-                               appendStringInfoString(buf, "\\f");
-                               break;
-                       case '\n':
-                               appendStringInfoString(buf, "\\n");
-                               break;
-                       case '\r':
-                               appendStringInfoString(buf, "\\r");
-                               break;
-                       case '\t':
-                               appendStringInfoString(buf, "\\t");
-                               break;
-                       case '"':
-                               appendStringInfoString(buf, "\\\"");
-                               break;
-                       case '\\':
-                               appendStringInfoString(buf, "\\\\");
-                               break;
-                       default:
-                               if ((unsigned char) *p < ' ')
-                                       appendStringInfo(buf, "\\u%04x", (int) *p);
-                               else
-                                       appendStringInfoCharMacro(buf, *p);
-                               break;
-               }
-       }
+}
+
+/*
+ * escape_json_with_len
+ *             Produce a JSON string literal, properly escaping the possibly not
+ *             NUL-terminated characters in 'str'.  'len' defines the number of bytes
+ *             from 'str' to process.
+ */
+void
+escape_json_with_len(StringInfo buf, const char *str, int len)
+{
        appendStringInfoCharMacro(buf, '"');
+
+       for (int i = 0; i < len; i++)
+               escape_json_char(buf, str[i]);
+
+       appendStringInfoCharMacro(buf, '"');
+}
+
+/*
+ * escape_json_text
+ *             Append 'txt' onto 'buf' and escape using escape_json_with_len.
+ *
+ * This is more efficient than calling text_to_cstring and appending the
+ * result as that could require an additional palloc and memcpy.
+ */
+void
+escape_json_text(StringInfo buf, const text *txt)
+{
+       /* must cast away the const, unfortunately */
+       text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, txt));
+       int                     len = VARSIZE_ANY_EXHDR(tunpacked);
+       char       *str;
+
+       str = VARDATA_ANY(tunpacked);
+
+       escape_json_with_len(buf, str, len);
+
+       /* pfree any detoasted values */
+       if (tunpacked != txt)
+               pfree(tunpacked);
 }
 
 /* Semantic actions for key uniqueness check */
index e4562b3c6cea765d0b1717a4aad0392c537ea17f..928552d5514065d8fa8c20aab5606b4a0ea4cf30 100644 (file)
@@ -354,7 +354,7 @@ jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal)
                        appendBinaryStringInfo(out, "null", 4);
                        break;
                case jbvString:
-                       escape_json(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len));
+                       escape_json_with_len(out, scalarVal->val.string.val, scalarVal->val.string.len);
                        break;
                case jbvNumeric:
                        appendStringInfoString(out,
index 1b681eff5f1c02e3443385b9e899f4601c946117..7076b344b7be192be63ad809e2d66365d2fd4a6c 100644 (file)
@@ -3133,18 +3133,6 @@ populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv,
 
                json = jsv->val.json.str;
                Assert(json);
-               if (len >= 0)
-               {
-                       /* Need to copy non-null-terminated string */
-                       str = palloc(len + 1 * sizeof(char));
-                       memcpy(str, json, len);
-                       str[len] = '\0';
-               }
-               else
-               {
-                       /* string is already null-terminated */
-                       str = unconstify(char *, json);
-               }
 
                /* If converting to json/jsonb, make string into valid JSON literal */
                if ((typid == JSONOID || typid == JSONBOID) &&
@@ -3153,12 +3141,24 @@ populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv,
                        StringInfoData buf;
 
                        initStringInfo(&buf);
-                       escape_json(&buf, str);
-                       /* free temporary buffer */
-                       if (str != json)
-                               pfree(str);
+                       if (len >= 0)
+                               escape_json_with_len(&buf, json, len);
+                       else
+                               escape_json(&buf, json);
                        str = buf.data;
                }
+               else if (len >= 0)
+               {
+                       /* create a NUL-terminated version */
+                       str = palloc(len + 1);
+                       memcpy(str, json, len);
+                       str[len] = '\0';
+               }
+               else
+               {
+                       /* string is already NUL-terminated */
+                       str = unconstify(char *, json);
+               }
        }
        else
        {
@@ -5936,7 +5936,7 @@ transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype
        {
                text       *out = _state->action(_state->action_state, token, strlen(token));
 
-               escape_json(_state->strval, text_to_cstring(out));
+               escape_json_text(_state->strval, out);
        }
        else
                appendStringInfoString(_state->strval, token);
index 11e6193e96473263ce07eb7e5f0434fc523da176..0f691bc5f0f9a6d8b48b523ac47ab9fa8cf42602 100644 (file)
@@ -523,6 +523,8 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 {
        JsonPathItem elem;
        int                     i;
+       int32           len;
+       char       *str;
 
        check_stack_depth();
        CHECK_FOR_INTERRUPTS();
@@ -533,7 +535,8 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
                        appendStringInfoString(buf, "null");
                        break;
                case jpiString:
-                       escape_json(buf, jspGetString(v, NULL));
+                       str = jspGetString(v, &len);
+                       escape_json_with_len(buf, str, len);
                        break;
                case jpiNumeric:
                        if (jspHasNext(v))
@@ -662,7 +665,8 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
                case jpiKey:
                        if (inKey)
                                appendStringInfoChar(buf, '.');
-                       escape_json(buf, jspGetString(v, NULL));
+                       str = jspGetString(v, &len);
+                       escape_json_with_len(buf, str, len);
                        break;
                case jpiCurrent:
                        Assert(!inKey);
@@ -674,7 +678,8 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
                        break;
                case jpiVariable:
                        appendStringInfoChar(buf, '$');
-                       escape_json(buf, jspGetString(v, NULL));
+                       str = jspGetString(v, &len);
+                       escape_json_with_len(buf, str, len);
                        break;
                case jpiFilter:
                        appendStringInfoString(buf, "?(");
@@ -732,7 +737,9 @@ printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
 
                        appendStringInfoString(buf, " like_regex ");
 
-                       escape_json(buf, v->content.like_regex.pattern);
+                       escape_json_with_len(buf,
+                                                                v->content.like_regex.pattern,
+                                                                v->content.like_regex.patternlen);
 
                        if (v->content.like_regex.flags)
                        {
index 6d7f1b387d1edadf00e5616da80810a19ca4affd..79c1062e1bf2c985eb578866575242eef0e11c4c 100644 (file)
@@ -18,6 +18,8 @@
 
 /* functions in json.c */
 extern void escape_json(StringInfo buf, const char *str);
+extern void escape_json_with_len(StringInfo buf, const char *str, int len);
+extern void escape_json_text(StringInfo buf, const text *txt);
 extern char *JsonEncodeDateTime(char *buf, Datum value, Oid typid,
                                                                const int *tzp);
 extern bool to_json_is_immutable(Oid typoid);