jsonapi: add lexer option to keep token ownership

author Andrew Dunstan <andrew@dunslane.net>

Wed, 27 Nov 2024 17:05:44 +0000 (12:05 -0500)

committer Andrew Dunstan <andrew@dunslane.net>

Wed, 27 Nov 2024 17:07:14 +0000 (12:07 -0500)
author Andrew Dunstan <andrew@dunslane.net>
Wed, 27 Nov 2024 17:05:44 +0000 (12:05 -0500)
committer Andrew Dunstan <andrew@dunslane.net>
Wed, 27 Nov 2024 17:07:14 +0000 (12:07 -0500)
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c

index ca4ecd571ef6a3df33082b78215430aa5e607470..0e2a82ad7a1fa98945ceba8d64526c2449b1fdc9 100644 (file)
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -161,6 +161,7 @@ struct JsonParserStack
   */
  struct JsonIncrementalState
  {
+   bool        started;
     bool        is_last_chunk;
     bool        partial_completed;
     jsonapi_StrValType partial_token;
@@ -280,6 +281,7 @@ static JsonParseErrorType parse_array_element(JsonLexContext *lex, const JsonSem
  static JsonParseErrorType parse_array(JsonLexContext *lex, const JsonSemAction *sem);
  static JsonParseErrorType report_parse_error(JsonParseContext ctx, JsonLexContext *lex);
  static bool allocate_incremental_state(JsonLexContext *lex);
+static inline void set_fname(JsonLexContext *lex, char *fname);
  
  /* the null action object used for pure validation */
  const JsonSemAction nullSemAction =
@@ -437,7 +439,7 @@ allocate_incremental_state(JsonLexContext *lex)
                *fnull;
  
     lex->inc_state = ALLOC0(sizeof(JsonIncrementalState));
-   pstack = ALLOC(sizeof(JsonParserStack));
+   pstack = ALLOC0(sizeof(JsonParserStack));
     prediction = ALLOC(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
     fnames = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(char *));
     fnull = ALLOC(JS_STACK_CHUNK_SIZE * sizeof(bool));
@@ -464,10 +466,17 @@ allocate_incremental_state(JsonLexContext *lex)
     lex->pstack = pstack;
     lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
     lex->pstack->prediction = prediction;
-   lex->pstack->pred_index = 0;
     lex->pstack->fnames = fnames;
     lex->pstack->fnull = fnull;
  
+   /*
+    * fnames between 0 and lex_level must always be defined so that
+    * freeJsonLexContext() can handle them safely. inc/dec_lex_level() handle
+    * the rest.
+    */
+   Assert(lex->lex_level == 0);
+   lex->pstack->fnames[0] = NULL;
+
     lex->incremental = true;
     return true;
  }
@@ -530,6 +539,25 @@ makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
     return lex;
  }
  
+void
+setJsonLexContextOwnsTokens(JsonLexContext *lex, bool owned_by_context)
+{
+   if (lex->incremental && lex->inc_state->started)
+   {
+       /*
+        * Switching this flag after parsing has already started is a
+        * programming error.
+        */
+       Assert(false);
+       return;
+   }
+
+   if (owned_by_context)
+       lex->flags |= JSONLEX_CTX_OWNS_TOKENS;
+   else
+       lex->flags &= ~JSONLEX_CTX_OWNS_TOKENS;
+}
+
  static inline bool
  inc_lex_level(JsonLexContext *lex)
  {
@@ -569,12 +597,23 @@ inc_lex_level(JsonLexContext *lex)
     }
  
     lex->lex_level += 1;
+
+   if (lex->incremental)
+   {
+       /*
+        * Ensure freeJsonLexContext() remains safe even if no fname is
+        * assigned at this level.
+        */
+       lex->pstack->fnames[lex->lex_level] = NULL;
+   }
+
     return true;
  }
  
  static inline void
  dec_lex_level(JsonLexContext *lex)
  {
+   set_fname(lex, NULL);       /* free the current level's fname, if needed */
     lex->lex_level -= 1;
  }
  
@@ -608,6 +647,15 @@ have_prediction(JsonParserStack *pstack)
  static inline void
  set_fname(JsonLexContext *lex, char *fname)
  {
+   if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
+   {
+       /*
+        * Don't leak prior fnames. If one hasn't been assigned yet,
+        * inc_lex_level ensured that it's NULL (and therefore safe to free).
+        */
+       FREE(lex->pstack->fnames[lex->lex_level]);
+   }
+
     lex->pstack->fnames[lex->lex_level] = fname;
  }
  
@@ -655,8 +703,19 @@ freeJsonLexContext(JsonLexContext *lex)
         jsonapi_termStringInfo(&lex->inc_state->partial_token);
         FREE(lex->inc_state);
         FREE(lex->pstack->prediction);
+
+       if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
+       {
+           int         i;
+
+           /* Clean up any tokens that were left behind. */
+           for (i = 0; i <= lex->lex_level; i++)
+               FREE(lex->pstack->fnames[i]);
+       }
+
         FREE(lex->pstack->fnames);
         FREE(lex->pstack->fnull);
+       FREE(lex->pstack->scalar_val);
         FREE(lex->pstack);
     }
  
@@ -826,6 +885,7 @@ pg_parse_json_incremental(JsonLexContext *lex,
     lex->input = lex->token_terminator = lex->line_start = json;
     lex->input_length = len;
     lex->inc_state->is_last_chunk = is_last;
+   lex->inc_state->started = true;
  
     /* get the initial token */
     result = json_lex(lex);
@@ -1086,6 +1146,17 @@ pg_parse_json_incremental(JsonLexContext *lex,
                         if (sfunc != NULL)
                         {
                             result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
+
+                           /*
+                            * Either ownership of the token passed to the
+                            * callback, or we need to free it now. Either
+                            * way, clear our pointer to it so it doesn't get
+                            * freed in the future.
+                            */
+                           if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
+                               FREE(pstack->scalar_val);
+                           pstack->scalar_val = NULL;
+
                             if (result != JSON_SUCCESS)
                                 return result;
                         }
@@ -1221,11 +1292,17 @@ parse_scalar(JsonLexContext *lex, const JsonSemAction *sem)
     /* consume the token */
     result = json_lex(lex);
     if (result != JSON_SUCCESS)
+   {
+       FREE(val);
         return result;
+   }
  
-   /* invoke the callback */
+   /* invoke the callback, which may take ownership of val */
     result = (*sfunc) (sem->semstate, val, tok);
  
+   if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
+       FREE(val);
+
     return result;
  }
  
@@ -1238,7 +1315,7 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
      * generally call a field name a "key".
      */
  
-   char       *fname = NULL;   /* keep compiler quiet */
+   char       *fname = NULL;
     json_ofield_action ostart = sem->object_field_start;
     json_ofield_action oend = sem->object_field_end;
     bool        isnull;
@@ -1255,11 +1332,17 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
     }
     result = json_lex(lex);
     if (result != JSON_SUCCESS)
+   {
+       FREE(fname);
         return result;
+   }
  
     result = lex_expect(JSON_PARSE_OBJECT_LABEL, lex, JSON_TOKEN_COLON);
     if (result != JSON_SUCCESS)
+   {
+       FREE(fname);
         return result;
+   }
  
     tok = lex_peek(lex);
     isnull = tok == JSON_TOKEN_NULL;
@@ -1268,7 +1351,7 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
     {
         result = (*ostart) (sem->semstate, fname, isnull);
         if (result != JSON_SUCCESS)
-           return result;
+           goto ofield_cleanup;
     }
  
     switch (tok)
@@ -1283,16 +1366,19 @@ parse_object_field(JsonLexContext *lex, const JsonSemAction *sem)
             result = parse_scalar(lex, sem);
     }
     if (result != JSON_SUCCESS)
-       return result;
+       goto ofield_cleanup;
  
     if (oend != NULL)
     {
         result = (*oend) (sem->semstate, fname, isnull);
         if (result != JSON_SUCCESS)
-           return result;
+           goto ofield_cleanup;
     }
  
-   return JSON_SUCCESS;
+ofield_cleanup:
+   if (lex->flags & JSONLEX_CTX_OWNS_TOKENS)
+       FREE(fname);
+   return result;
  }
  
  static JsonParseErrorType
diff --git a/src/include/common/jsonapi.h b/src/include/common/jsonapi.h

index c524ff5be8bfbf525649a1f5b1449302c0e96f4f..167615a5576944f079d515fd234f53e3d0d7f06f 100644 (file)
--- a/src/include/common/jsonapi.h
+++ b/src/include/common/jsonapi.h
@@ -92,9 +92,11 @@ typedef struct JsonIncrementalState JsonIncrementalState;
   * conjunction with token_start.
   *
   * JSONLEX_FREE_STRUCT/STRVAL are used to drive freeJsonLexContext.
+ * JSONLEX_CTX_OWNS_TOKENS is used by setJsonLexContextOwnsTokens.
   */
  #define JSONLEX_FREE_STRUCT            (1 << 0)
  #define JSONLEX_FREE_STRVAL            (1 << 1)
+#define JSONLEX_CTX_OWNS_TOKENS        (1 << 2)
  typedef struct JsonLexContext
  {
     const char *input;
@@ -130,9 +132,10 @@ typedef JsonParseErrorType (*json_scalar_action) (void *state, char *token, Json
   * to doing a pure parse with no side-effects, and is therefore exactly
   * what the json input routines do.
   *
- * The 'fname' and 'token' strings passed to these actions are palloc'd.
- * They are not free'd or used further by the parser, so the action function
- * is free to do what it wishes with them.
+ * By default, the 'fname' and 'token' strings passed to these actions are
+ * palloc'd.  They are not free'd or used further by the parser, so the action
+ * function is free to do what it wishes with them. This behavior may be
+ * modified by setJsonLexContextOwnsTokens().
   *
   * All action functions return JsonParseErrorType.  If the result isn't
   * JSON_SUCCESS, the parse is abandoned and that error code is returned.
@@ -216,6 +219,25 @@ extern JsonLexContext *makeJsonLexContextIncremental(JsonLexContext *lex,
                                                      int encoding,
                                                      bool need_escapes);
  
+/*
+ * Sets whether tokens passed to semantic action callbacks are owned by the
+ * context (in which case, the callback must duplicate the tokens for long-term
+ * storage) or by the callback (in which case, the callback must explicitly
+ * free tokens to avoid leaks).
+ *
+ * By default, this setting is false: the callback owns the tokens that are
+ * passed to it (and if parsing fails between the two object-field callbacks,
+ * the field name token will likely leak). If set to true, tokens will be freed
+ * by the lexer after the callback completes.
+ *
+ * Setting this to true is important for long-lived clients (such as libpq)
+ * that must not leak memory during a parse failure. For a server backend using
+ * memory contexts, or a client application which will exit on parse failure,
+ * this setting is less critical.
+ */
+extern void setJsonLexContextOwnsTokens(JsonLexContext *lex,
+                                       bool owned_by_context);
+
  extern void freeJsonLexContext(JsonLexContext *lex);
  
  /* lex one token */
diff --git a/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl b/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl

index 8cc42e8e292ed91b6536a79e98238c891ebc68ac..0c663b8e689abb5fefd5fc5dd8da08433780cadf 100644 (file)
--- a/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl
+++ b/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl
@@ -13,21 +13,24 @@ use FindBin;
  
  my $test_file = "$FindBin::RealBin/../tiny.json";
  
-my @exes =
-  ("test_json_parser_incremental", "test_json_parser_incremental_shlib");
+my @exes = (
+   [ "test_json_parser_incremental", ],
+   [ "test_json_parser_incremental", "-o", ],
+   [ "test_json_parser_incremental_shlib", ],
+   [ "test_json_parser_incremental_shlib", "-o", ]);
  
  foreach my $exe (@exes)
  {
-   note "testing executable $exe";
+   note "testing executable @$exe";
  
     # Test the  usage error
-   my ($stdout, $stderr) = run_command([ $exe, "-c", 10 ]);
+   my ($stdout, $stderr) = run_command([ @$exe, "-c", 10 ]);
     like($stderr, qr/Usage:/, 'error message if not enough arguments');
  
     # Test that we get success for small chunk sizes from 64 down to 1.
     for (my $size = 64; $size > 0; $size--)
     {
-       ($stdout, $stderr) = run_command([ $exe, "-c", $size, $test_file ]);
+       ($stdout, $stderr) = run_command([ @$exe, "-c", $size, $test_file ]);
  
         like($stdout, qr/SUCCESS/, "chunk size $size: test succeeds");
         is($stderr, "", "chunk size $size: no error output");
diff --git a/src/test/modules/test_json_parser/t/002_inline.pl b/src/test/modules/test_json_parser/t/002_inline.pl

index 5b6c6dc4ae7ea8108a32700fa4dcce44a226c294..71c462b31916988fc092fd13af6e9e40916db738 100644 (file)
--- a/src/test/modules/test_json_parser/t/002_inline.pl
+++ b/src/test/modules/test_json_parser/t/002_inline.pl
@@ -13,7 +13,7 @@ use Test::More;
  use File::Temp qw(tempfile);
  
  my $dir = PostgreSQL::Test::Utils::tempdir;
-my $exe;
+my @exe;
  
  sub test
  {
@@ -35,7 +35,7 @@ sub test
  
     foreach my $size (reverse(1 .. $chunk))
     {
-       my ($stdout, $stderr) = run_command([ $exe, "-c", $size, $fname ]);
+       my ($stdout, $stderr) = run_command([ @exe, "-c", $size, $fname ]);
  
         if (defined($params{error}))
         {
@@ -53,13 +53,16 @@ sub test
     }
  }
  
-my @exes =
-  ("test_json_parser_incremental", "test_json_parser_incremental_shlib");
+my @exes = (
+   [ "test_json_parser_incremental", ],
+   [ "test_json_parser_incremental", "-o", ],
+   [ "test_json_parser_incremental_shlib", ],
+   [ "test_json_parser_incremental_shlib", "-o", ]);
  
  foreach (@exes)
  {
-   $exe = $_;
-   note "testing executable $exe";
+   @exe = @$_;
+   note "testing executable @exe";
  
     test("number", "12345");
     test("string", '"hello"');
diff --git a/src/test/modules/test_json_parser/t/003_test_semantic.pl b/src/test/modules/test_json_parser/t/003_test_semantic.pl

index c11480172d3cd17d0ae67b507648709e75a0417d..c57ccdb660260181da4414f92e420f35ff4e9c52 100644 (file)
--- a/src/test/modules/test_json_parser/t/003_test_semantic.pl
+++ b/src/test/modules/test_json_parser/t/003_test_semantic.pl
@@ -16,14 +16,17 @@ use File::Temp qw(tempfile);
  my $test_file = "$FindBin::RealBin/../tiny.json";
  my $test_out = "$FindBin::RealBin/../tiny.out";
  
-my @exes =
-  ("test_json_parser_incremental", "test_json_parser_incremental_shlib");
+my @exes = (
+   [ "test_json_parser_incremental", ],
+   [ "test_json_parser_incremental", "-o", ],
+   [ "test_json_parser_incremental_shlib", ],
+   [ "test_json_parser_incremental_shlib", "-o", ]);
  
  foreach my $exe (@exes)
  {
-   note "testing executable $exe";
+   note "testing executable @$exe";
  
-   my ($stdout, $stderr) = run_command([ $exe, "-s", $test_file ]);
+   my ($stdout, $stderr) = run_command([ @$exe, "-s", $test_file ]);
  
     is($stderr, "", "no error output");
  
diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c b/src/test/modules/test_json_parser/test_json_parser_incremental.c

index 294e5f74eac493ea3dc22e52003cd19577d02f8d..0b02b5203bf2fb1c1eedd018ad2de14e8742b4b2 100644 (file)
--- a/src/test/modules/test_json_parser/test_json_parser_incremental.c
+++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c
@@ -18,6 +18,10 @@
   * If the -s flag is given, the program does semantic processing. This should
   * just mirror back the json, albeit with white space changes.
   *
+ * If the -o flag is given, the JSONLEX_CTX_OWNS_TOKENS flag is set. (This can
+ * be used in combination with a leak sanitizer; without the option, the parser
+ * may leak memory with invalid JSON.)
+ *
   * The argument specifies the file containing the JSON input.
   *
   *-------------------------------------------------------------------------
@@ -72,6 +76,8 @@ static JsonSemAction sem = {
     .scalar = do_scalar
  };
  
+static bool lex_owns_tokens = false;
+
  int
  main(int argc, char **argv)
  {
@@ -88,10 +94,11 @@ main(int argc, char **argv)
     char       *testfile;
     int         c;
     bool        need_strings = false;
+   int         ret = 0;
  
     pg_logging_init(argv[0]);
  
-   while ((c = getopt(argc, argv, "c:s")) != -1)
+   while ((c = getopt(argc, argv, "c:os")) != -1)
     {
         switch (c)
         {
@@ -100,6 +107,9 @@ main(int argc, char **argv)
                 if (chunk_size > BUFSIZE)
                     pg_fatal("chunk size cannot exceed %d", BUFSIZE);
                 break;
+           case 'o':           /* switch token ownership */
+               lex_owns_tokens = true;
+               break;
             case 's':           /* do semantic processing */
                 testsem = &sem;
                 sem.semstate = palloc(sizeof(struct DoState));
@@ -112,7 +122,7 @@ main(int argc, char **argv)
  
     if (optind < argc)
     {
-       testfile = pg_strdup(argv[optind]);
+       testfile = argv[optind];
         optind++;
     }
     else
@@ -122,6 +132,7 @@ main(int argc, char **argv)
     }
  
     makeJsonLexContextIncremental(&lex, PG_UTF8, need_strings);
+   setJsonLexContextOwnsTokens(&lex, lex_owns_tokens);
     initStringInfo(&json);
  
     if ((json_file = fopen(testfile, PG_BINARY_R)) == NULL)
@@ -160,7 +171,8 @@ main(int argc, char **argv)
             if (result != JSON_INCOMPLETE)
             {
                 fprintf(stderr, "%s\n", json_errdetail(result, &lex));
-               exit(1);
+               ret = 1;
+               goto cleanup;
             }
             resetStringInfo(&json);
         }
@@ -172,15 +184,21 @@ main(int argc, char **argv)
             if (result != JSON_SUCCESS)
             {
                 fprintf(stderr, "%s\n", json_errdetail(result, &lex));
-               exit(1);
+               ret = 1;
+               goto cleanup;
             }
             if (!need_strings)
                 printf("SUCCESS!\n");
             break;
         }
     }
+
+cleanup:
     fclose(json_file);
-   exit(0);
+   freeJsonLexContext(&lex);
+   free(json.data);
+
+   return ret;
  }
  
  /*
@@ -230,7 +248,8 @@ do_object_field_start(void *state, char *fname, bool isnull)
  static JsonParseErrorType
  do_object_field_end(void *state, char *fname, bool isnull)
  {
-   /* nothing to do really */
+   if (!lex_owns_tokens)
+       free(fname);
  
     return JSON_SUCCESS;
  }
@@ -291,6 +310,9 @@ do_scalar(void *state, char *token, JsonTokenType tokentype)
     else
         printf("%s", token);
  
+   if (!lex_owns_tokens)
+       free(token);
+
     return JSON_SUCCESS;
  }
  
@@ -343,7 +365,8 @@ usage(const char *progname)
  {
     fprintf(stderr, "Usage: %s [OPTION ...] testfile\n", progname);
     fprintf(stderr, "Options:\n");
-   fprintf(stderr, "  -c chunksize      size of piece fed to parser (default 64)n");
+   fprintf(stderr, "  -c chunksize      size of piece fed to parser (default 64)\n");
+   fprintf(stderr, "  -o                set JSONLEX_CTX_OWNS_TOKENS for leak checking\n");
     fprintf(stderr, "  -s                do semantic processing\n");
  
  }
author	Andrew Dunstan <andrew@dunslane.net>
	Wed, 27 Nov 2024 17:05:44 +0000 (12:05 -0500)
committer	Andrew Dunstan <andrew@dunslane.net>
	Wed, 27 Nov 2024 17:07:14 +0000 (12:07 -0500)
src/common/jsonapi.c		patch \| blob \| blame \| history
src/include/common/jsonapi.h		patch \| blob \| blame \| history
src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl		patch \| blob \| blame \| history
src/test/modules/test_json_parser/t/002_inline.pl		patch \| blob \| blame \| history
src/test/modules/test_json_parser/t/003_test_semantic.pl		patch \| blob \| blame \| history
src/test/modules/test_json_parser/test_json_parser_incremental.c		patch \| blob \| blame \| history