Judge multi statement query using psqlscan.

author Tatsuo Ishii <ishii@sraoss.co.jp>

Sat, 25 Mar 2023 07:21:27 +0000 (16:21 +0900)

committer Tatsuo Ishii <ishii@sraoss.co.jp>

Tue, 28 Mar 2023 08:57:21 +0000 (17:57 +0900)
author Tatsuo Ishii <ishii@sraoss.co.jp>
Sat, 25 Mar 2023 07:21:27 +0000 (16:21 +0900)
committer Tatsuo Ishii <ishii@sraoss.co.jp>
Tue, 28 Mar 2023 08:57:21 +0000 (17:57 +0900)
diff --git a/src/Makefile.am b/src/Makefile.am

index c72c83249c5960e171e9949f730cd95c899ebe22..eb631ef52f18478d7360601d9a0ce8e47e31325f 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -54,7 +54,7 @@ pgpool_SOURCES = main/main.c \
         utils/mmgr/aset.c \
         utils/error/elog.c \
         utils/error/assert.c \
-    utils/pcp/pcp_stream.c \
+       utils/pcp/pcp_stream.c \
         utils/regex_array.c \
         utils/json_writer.c \
         utils/json.c \
@@ -63,7 +63,12 @@ pgpool_SOURCES = main/main.c \
         utils/sha2.c \
         utils/ssl_utils.c \
         utils/statistics.c \
-       utils/pool_health_check_stats.c
+       utils/pool_health_check_stats.c \
+       utils/psqlscan.l \
+       utils/pgstrcasecmp.c
+
+utils/psqlscan.c: utils/psqlscan.l
+       $(LEX) -o'utils/psqlscan.c' $<
  
  DEFS = @DEFS@ \
         -DDEFAULT_CONFIGDIR=\"$(sysconfdir)\" \
diff --git a/src/include/utils/pgstrcasecmp.h b/src/include/utils/pgstrcasecmp.h

new file mode 100644 (file)

index 0000000..78027f4
--- /dev/null
+++ b/src/include/utils/pgstrcasecmp.h
@@ -0,0 +1,22 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgstrcasecmp.h
+ *       Header for src/utils/pgstrcasecmp.c compatibility functions.
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/utils/pgstrcasecmp.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef POOL_PGSTRCASECMP
+#define POOL_PGSTRCASECMP
+
+/* Portable SQL-like case-independent comparisons and conversions */
+extern int     pg_strcasecmp(const char *s1, const char *s2);
+extern int     pg_strncasecmp(const char *s1, const char *s2, size_t n);
+extern unsigned char pg_toupper(unsigned char ch);
+extern unsigned char pg_tolower(unsigned char ch);
+extern unsigned char pg_ascii_toupper(unsigned char ch);
+extern unsigned char pg_ascii_tolower(unsigned char ch);
+#endif
diff --git a/src/include/utils/pqexpbuffer.h b/src/include/utils/pqexpbuffer.h

new file mode 100644 (file)

index 0000000..020e94e
--- /dev/null
+++ b/src/include/utils/pqexpbuffer.h
@@ -0,0 +1,192 @@
+/*-------------------------------------------------------------------------
+ *
+ * pqexpbuffer.h
+ *       Declarations/definitions for "PQExpBuffer" functions.
+ *
+ * PQExpBuffer provides an indefinitely-extensible string data type.
+ * It can be used to buffer either ordinary C strings (null-terminated text)
+ * or arbitrary binary data.  All storage is allocated with malloc().
+ *
+ * This module is essentially the same as the backend's StringInfo data type,
+ * but it is intended for use in frontend libpq and client applications.
+ * Thus, it does not rely on palloc() nor elog().
+ *
+ * It does rely on vsnprintf(); if configure finds that libc doesn't provide
+ * a usable vsnprintf(), then a copy of our own implementation of it will
+ * be linked into libpq.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/interfaces/libpq/pqexpbuffer.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PQEXPBUFFER_H
+#define PQEXPBUFFER_H
+
+/*-------------------------
+ * PQExpBufferData holds information about an extensible string.
+ *             data    is the current buffer for the string (allocated with malloc).
+ *             len             is the current string length.  There is guaranteed to be
+ *                             a terminating '\0' at data[len], although this is not very
+ *                             useful when the string holds binary data rather than text.
+ *             maxlen  is the allocated size in bytes of 'data', i.e. the maximum
+ *                             string size (including the terminating '\0' char) that we can
+ *                             currently store in 'data' without having to reallocate
+ *                             more space.  We must always have maxlen > len.
+ *
+ * An exception occurs if we failed to allocate enough memory for the string
+ * buffer.  In that case data points to a statically allocated empty string,
+ * and len = maxlen = 0.
+ *-------------------------
+ */
+typedef struct PQExpBufferData
+{
+       char       *data;
+       size_t          len;
+       size_t          maxlen;
+} PQExpBufferData;
+
+typedef PQExpBufferData *PQExpBuffer;
+
+/*------------------------
+ * Test for a broken (out of memory) PQExpBuffer.
+ * When a buffer is "broken", all operations except resetting or deleting it
+ * are no-ops.
+ *------------------------
+ */
+#define PQExpBufferBroken(str) \
+       ((str) == NULL || (str)->maxlen == 0)
+
+/*------------------------
+ * Same, but for use when using a static or local PQExpBufferData struct.
+ * For that, a null-pointer test is useless and may draw compiler warnings.
+ *------------------------
+ */
+#define PQExpBufferDataBroken(buf)     \
+       ((buf).maxlen == 0)
+
+/*------------------------
+ * Initial size of the data buffer in a PQExpBuffer.
+ * NB: this must be large enough to hold error messages that might
+ * be returned by PQrequestCancel().
+ *------------------------
+ */
+#define INITIAL_EXPBUFFER_SIZE 256
+
+/*------------------------
+ * There are two ways to create a PQExpBuffer object initially:
+ *
+ * PQExpBuffer stringptr = createPQExpBuffer();
+ *             Both the PQExpBufferData and the data buffer are malloc'd.
+ *
+ * PQExpBufferData string;
+ * initPQExpBuffer(&string);
+ *             The data buffer is malloc'd but the PQExpBufferData is presupplied.
+ *             This is appropriate if the PQExpBufferData is a field of another
+ *             struct.
+ *-------------------------
+ */
+
+/*------------------------
+ * createPQExpBuffer
+ * Create an empty 'PQExpBufferData' & return a pointer to it.
+ */
+extern PQExpBuffer createPQExpBuffer(void);
+
+/*------------------------
+ * initPQExpBuffer
+ * Initialize a PQExpBufferData struct (with previously undefined contents)
+ * to describe an empty string.
+ */
+extern void initPQExpBuffer(PQExpBuffer str);
+
+/*------------------------
+ * To destroy a PQExpBuffer, use either:
+ *
+ * destroyPQExpBuffer(str);
+ *             free()s both the data buffer and the PQExpBufferData.
+ *             This is the inverse of createPQExpBuffer().
+ *
+ * termPQExpBuffer(str)
+ *             free()s the data buffer but not the PQExpBufferData itself.
+ *             This is the inverse of initPQExpBuffer().
+ *
+ * NOTE: some routines build up a string using PQExpBuffer, and then
+ * release the PQExpBufferData but return the data string itself to their
+ * caller.  At that point the data string looks like a plain malloc'd
+ * string.
+ */
+extern void destroyPQExpBuffer(PQExpBuffer str);
+extern void termPQExpBuffer(PQExpBuffer str);
+
+/*------------------------
+ * resetPQExpBuffer
+ *             Reset a PQExpBuffer to empty
+ *
+ * Note: if possible, a "broken" PQExpBuffer is returned to normal.
+ */
+extern void resetPQExpBuffer(PQExpBuffer str);
+
+/*------------------------
+ * enlargePQExpBuffer
+ * Make sure there is enough space for 'needed' more bytes in the buffer
+ * ('needed' does not include the terminating null).
+ *
+ * Returns 1 if OK, 0 if failed to enlarge buffer.  (In the latter case
+ * the buffer is left in "broken" state.)
+ */
+extern int     enlargePQExpBuffer(PQExpBuffer str, size_t needed);
+
+/*------------------------
+ * printfPQExpBuffer
+ * Format text data under the control of fmt (an sprintf-like format string)
+ * and insert it into str.  More space is allocated to str if necessary.
+ * This is a convenience routine that does the same thing as
+ * resetPQExpBuffer() followed by appendPQExpBuffer().
+ */
+extern void printfPQExpBuffer(PQExpBuffer str, const char *fmt,...) pg_attribute_printf(2, 3);
+
+/*------------------------
+ * appendPQExpBuffer
+ * Format text data under the control of fmt (an sprintf-like format string)
+ * and append it to whatever is already in str.  More space is allocated
+ * to str if necessary.  This is sort of like a combination of sprintf and
+ * strcat.
+ */
+extern void appendPQExpBuffer(PQExpBuffer str, const char *fmt,...) pg_attribute_printf(2, 3);
+
+/*------------------------
+ * appendPQExpBufferVA
+ * Attempt to format data and append it to str.  Returns true if done
+ * (either successful or hard failure), false if need to retry.
+ *
+ * Caution: callers must be sure to preserve their entry-time errno
+ * when looping, in case the fmt contains "%m".
+ */
+extern bool appendPQExpBufferVA(PQExpBuffer str, const char *fmt, va_list args) pg_attribute_printf(2, 0);
+
+/*------------------------
+ * appendPQExpBufferStr
+ * Append the given string to a PQExpBuffer, allocating more space
+ * if necessary.
+ */
+extern void appendPQExpBufferStr(PQExpBuffer str, const char *data);
+
+/*------------------------
+ * appendPQExpBufferChar
+ * Append a single byte to str.
+ * Like appendPQExpBuffer(str, "%c", ch) but much faster.
+ */
+extern void appendPQExpBufferChar(PQExpBuffer str, char ch);
+
+/*------------------------
+ * appendBinaryPQExpBuffer
+ * Append arbitrary binary data to a PQExpBuffer, allocating more space
+ * if necessary.
+ */
+extern void appendBinaryPQExpBuffer(PQExpBuffer str,
+                                                                       const char *data, size_t datalen);
+
+#endif                                                 /* PQEXPBUFFER_H */
diff --git a/src/include/utils/psqlscan.h b/src/include/utils/psqlscan.h

new file mode 100644 (file)

index 0000000..6a90fca
--- /dev/null
+++ b/src/include/utils/psqlscan.h
@@ -0,0 +1,90 @@
+/*-------------------------------------------------------------------------
+ *
+ * psqlscan.h
+ *       lexical scanner for SQL commands
+ *
+ * This lexer used to be part of psql, and that heritage is reflected in
+ * the file name as well as function and typedef names, though it can now
+ * be used by other frontend programs as well.  It's also possible to extend
+ * this lexer with a compatible add-on lexer to handle program-specific
+ * backslash commands.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/fe_utils/psqlscan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PSQLSCAN_H
+#define PSQLSCAN_H
+
+#include "pqexpbuffer.h"
+
+
+/* Abstract type for lexer's internal state */
+typedef struct PsqlScanStateData *PsqlScanState;
+
+/* Termination states for psql_scan() */
+typedef enum
+{
+       PSCAN_SEMICOLON,                        /* found command-ending semicolon */
+       PSCAN_BACKSLASH,                        /* found backslash command */
+       PSCAN_INCOMPLETE,                       /* end of line, SQL statement incomplete */
+       PSCAN_EOL                                       /* end of line, SQL possibly complete */
+} PsqlScanResult;
+
+/* Prompt type returned by psql_scan() */
+typedef enum _promptStatus
+{
+       PROMPT_READY,
+       PROMPT_CONTINUE,
+       PROMPT_COMMENT,
+       PROMPT_SINGLEQUOTE,
+       PROMPT_DOUBLEQUOTE,
+       PROMPT_DOLLARQUOTE,
+       PROMPT_PAREN,
+       PROMPT_COPY
+} promptStatus_t;
+
+/* Quoting request types for get_variable() callback */
+typedef enum
+{
+       PQUOTE_PLAIN,                           /* just return the actual value */
+       PQUOTE_SQL_LITERAL,                     /* add quotes to make a valid SQL literal */
+       PQUOTE_SQL_IDENT,                       /* quote if needed to make a SQL identifier */
+       PQUOTE_SHELL_ARG                        /* quote if needed to be safe in a shell cmd */
+} PsqlScanQuoteType;
+
+/* Callback functions to be used by the lexer */
+typedef struct PsqlScanCallbacks
+{
+       /* Fetch value of a variable, as a free'able string; NULL if unknown */
+       /* This pointer can be NULL if no variable substitution is wanted */
+       char       *(*get_variable) (const char *varname, PsqlScanQuoteType quote,
+                                                                void *passthrough);
+} PsqlScanCallbacks;
+
+
+extern PsqlScanState psql_scan_create(const PsqlScanCallbacks *callbacks);
+extern void psql_scan_destroy(PsqlScanState state);
+
+extern void psql_scan_set_passthrough(PsqlScanState state, void *passthrough);
+
+extern void psql_scan_setup(PsqlScanState state,
+                                                       const char *line, int line_len,
+                                                       int encoding, bool std_strings);
+extern void psql_scan_finish(PsqlScanState state);
+
+extern PsqlScanResult psql_scan(PsqlScanState state,
+                                                               PQExpBuffer query_buf,
+                                                               promptStatus_t *prompt);
+
+extern void psql_scan_reset(PsqlScanState state);
+
+extern void psql_scan_reselect_sql_lexer(PsqlScanState state);
+
+extern bool psql_scan_in_quote(PsqlScanState state);
+
+#endif                                                 /* PSQLSCAN_H */
diff --git a/src/include/utils/psqlscan_int.h b/src/include/utils/psqlscan_int.h

new file mode 100644 (file)

index 0000000..87debbf
--- /dev/null
+++ b/src/include/utils/psqlscan_int.h
@@ -0,0 +1,157 @@
+/*-------------------------------------------------------------------------
+ *
+ * psqlscan_int.h
+ *       lexical scanner internal declarations
+ *
+ * This file declares the PsqlScanStateData structure used by psqlscan.l
+ * and shared by other lexers compatible with it, such as psqlscanslash.l.
+ *
+ * One difficult aspect of this code is that we need to work in multibyte
+ * encodings that are not ASCII-safe.  A "safe" encoding is one in which each
+ * byte of a multibyte character has the high bit set (it's >= 0x80).  Since
+ * all our lexing rules treat all high-bit-set characters alike, we don't
+ * really need to care whether such a byte is part of a sequence or not.
+ * In an "unsafe" encoding, we still expect the first byte of a multibyte
+ * sequence to be >= 0x80, but later bytes might not be.  If we scan such
+ * a sequence as-is, the lexing rules could easily be fooled into matching
+ * such bytes to ordinary ASCII characters.  Our solution for this is to
+ * substitute 0xFF for each non-first byte within the data presented to flex.
+ * The flex rules will then pass the FF's through unmolested.  The
+ * psqlscan_emit() subroutine is responsible for looking back to the original
+ * string and replacing FF's with the corresponding original bytes.
+ *
+ * Another interesting thing we do here is scan different parts of the same
+ * input with physically separate flex lexers (ie, lexers written in separate
+ * .l files).  We can get away with this because the only part of the
+ * persistent state of a flex lexer that depends on its parsing rule tables
+ * is the start state number, which is easy enough to manage --- usually,
+ * in fact, we just need to set it to INITIAL when changing lexers.  But to
+ * make that work at all, we must use re-entrant lexers, so that all the
+ * relevant state is in the yyscan_t attached to the PsqlScanState;
+ * if we were using lexers with separate static state we would soon end up
+ * with dangling buffer pointers in one or the other.  Also note that this
+ * is unlikely to work very nicely if the lexers aren't all built with the
+ * same flex version, or if they don't use the same flex options.
+ *
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/fe_utils/psqlscan_int.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PSQLSCAN_INT_H
+#define PSQLSCAN_INT_H
+
+#include "utils/psqlscan.h"
+
+/*
+ * These are just to allow this file to be compilable standalone for header
+ * validity checking; in actual use, this file should always be included
+ * from the body of a flex file, where these symbols are already defined.
+ */
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+#ifndef YY_TYPEDEF_YY_SCANNER_T
+#define YY_TYPEDEF_YY_SCANNER_T
+typedef void *yyscan_t;
+#endif
+
+/*
+ * We use a stack of flex buffers to handle substitution of psql variables.
+ * Each stacked buffer contains the as-yet-unread text from one psql variable.
+ * When we pop the stack all the way, we resume reading from the outer buffer
+ * identified by scanbufhandle.
+ */
+typedef struct StackElem
+{
+       YY_BUFFER_STATE buf;            /* flex input control structure */
+       char       *bufstring;          /* data actually being scanned by flex */
+       char       *origstring;         /* copy of original data, if needed */
+       char       *varname;            /* name of variable providing data, or NULL */
+       struct StackElem *next;
+} StackElem;
+
+/*
+ * All working state of the lexer must be stored in PsqlScanStateData
+ * between calls.  This allows us to have multiple open lexer operations,
+ * which is needed for nested include files.  The lexer itself is not
+ * recursive, but it must be re-entrant.
+ */
+typedef struct PsqlScanStateData
+{
+       yyscan_t        scanner;                /* Flex's state for this PsqlScanState */
+
+       PQExpBuffer output_buf;         /* current output buffer */
+
+       StackElem  *buffer_stack;       /* stack of variable expansion buffers */
+
+       /*
+        * These variables always refer to the outer buffer, never to any stacked
+        * variable-expansion buffer.
+        */
+       YY_BUFFER_STATE scanbufhandle;
+       char       *scanbuf;            /* start of outer-level input buffer */
+       const char *scanline;           /* current input line at outer level */
+
+       /* safe_encoding, curline, refline are used by emit() to replace FFs */
+       int                     encoding;               /* encoding being used now */
+       bool            safe_encoding;  /* is current encoding "safe"? */
+       bool            std_strings;    /* are string literals standard? */
+       const char *curline;            /* actual flex input string for cur buf */
+       const char *refline;            /* original data for cur buffer */
+
+       /*
+        * All this state lives across successive input lines, until explicitly
+        * reset by psql_scan_reset.  start_state is adopted by yylex() on entry,
+        * and updated with its finishing state on exit.
+        */
+       int                     start_state;    /* yylex's starting/finishing state */
+       int                     state_before_str_stop;  /* start cond. before end quote */
+       int                     paren_depth;    /* depth of nesting in parentheses */
+       int                     xcdepth;                /* depth of nesting in slash-star comments */
+       char       *dolqstart;          /* current $foo$ quote start string */
+
+       /*
+        * State to track boundaries of BEGIN ... END blocks in function
+        * definitions, so that semicolons do not send query too early.
+        */
+       int                     identifier_count;       /* identifiers since start of statement */
+       char            identifiers[4]; /* records the first few identifiers */
+       int                     begin_depth;    /* depth of begin/end pairs */
+
+       /*
+        * Callback functions provided by the program making use of the lexer,
+        * plus a void* callback passthrough argument.
+        */
+       const PsqlScanCallbacks *callbacks;
+       void       *cb_passthrough;
+} PsqlScanStateData;
+
+
+/*
+ * Functions exported by psqlscan.l, but only meant for use within
+ * compatible lexers.
+ */
+extern void psqlscan_push_new_buffer(PsqlScanState state,
+                                                                        const char *newstr, const char *varname);
+extern void psqlscan_pop_buffer_stack(PsqlScanState state);
+extern void psqlscan_select_top_buffer(PsqlScanState state);
+extern bool psqlscan_var_is_current_source(PsqlScanState state,
+                                                                                  const char *varname);
+extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state,
+                                                                                          const char *txt, int len,
+                                                                                          char **txtcopy);
+extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
+extern char *psqlscan_extract_substring(PsqlScanState state,
+                                                                               const char *txt, int len);
+extern void psqlscan_escape_variable(PsqlScanState state,
+                                                                        const char *txt, int len,
+                                                                        PsqlScanQuoteType quote);
+extern void psqlscan_test_variable(PsqlScanState state,
+                                                                  const char *txt, int len);
+
+#endif                                                 /* PSQLSCAN_INT_H */
diff --git a/src/protocol/pool_proto_modules.c b/src/protocol/pool_proto_modules.c

index d641cc1379e8b4faecbcab15da73e89c3414a37c..0f9ce21c1e985158e21b51fd895a039248f10c5f 100644 (file)
--- a/src/protocol/pool_proto_modules.c
+++ b/src/protocol/pool_proto_modules.c
@@ -62,6 +62,7 @@
  #include "query_cache/pool_memqcache.h"
  #include "main/pool_internal_comms.h"
  #include "pool_config_variables.h"
+#include "utils/psqlscan.h"
  
  char      *copy_table = NULL;  /* copy table name */
  char      *copy_schema = NULL; /* copy table name */
@@ -104,6 +105,8 @@ static void si_get_snapshot(POOL_CONNECTION * frontend, POOL_CONNECTION_POOL * b
  
  static bool check_transaction_state_and_abort(char *query, Node *node, POOL_CONNECTION * frontend, POOL_CONNECTION_POOL * backend);
  
+static bool multi_statement_query(char *buf);
+
  /*
   * This is the workhorse of processing the pg_terminate_backend function to
   * make sure that the use of function should not trigger the backend node failover.
@@ -196,6 +199,7 @@ SimpleQuery(POOL_CONNECTION * frontend,
         POOL_QUERY_CONTEXT *query_context;
  
         bool            error;
+       bool            use_minimal;
  
         /* Get session context */
         session_context = pool_get_session_context(false);
@@ -256,8 +260,30 @@ SimpleQuery(POOL_CONNECTION * frontend,
         query_context = pool_init_query_context();
         MemoryContext old_context = MemoryContextSwitchTo(query_context->memory_context);
  
+       /*
+        * Check whether the query is multi statement or not.
+        */
+       if (multi_statement_query(contents))
+       {
+               elog(DEBUG5, "multi statement query found");
+               query_context->is_multi_statement = true;
+               use_minimal = false;    /* never use minimal parser */
+       }
+       else
+       {
+               query_context->is_multi_statement = false;
+               /*
+                * Do not use minimal parser if we are in native replication or
+                * snapshot isolation mode.
+                */
+               if (REPLICATION)
+                       use_minimal = false;
+               else
+                       use_minimal = true;
+       }
+
         /* parse SQL string */
-       parse_tree_list = raw_parser(contents, RAW_PARSE_DEFAULT, len, &error, false);
+       parse_tree_list = raw_parser(contents, RAW_PARSE_DEFAULT, len, &error, use_minimal);
  
         if (parse_tree_list == NIL)
         {
@@ -343,19 +369,7 @@ SimpleQuery(POOL_CONNECTION * frontend,
                                                 (errmsg("DB's oid to discard its cache directory: dboid = %d", query_context->dboid)));
                         }
                 }
-
-               /*
-                * Check if multi statement query
-                */
-               if (parse_tree_list && list_length(parse_tree_list) > 1)
-               {
-                       query_context->is_multi_statement = true;
-               }
-               else
-               {
-                       query_context->is_multi_statement = false;
-               }
-
+               
                 /*
                  * check COPY FROM STDIN if true, set copy_* variable
                  */
@@ -4593,3 +4607,66 @@ check_transaction_state_and_abort(char *query, Node *node, POOL_CONNECTION * fro
         }
         return true;
  }
+
+/*
+ * Return true if query in buf is multi statement query.
+ * We import PostgreSQL's psqlscan() for the purpose.
+ * As far as I know this is the most accurate and cheap way.
+ */
+static
+bool multi_statement_query(char *queries)
+{
+       PsqlScanState sstate;
+       promptStatus_t prompt;
+       PsqlScanResult sr;
+       PQExpBufferData lbuf;
+       int             num_semicolons = 0;
+       bool    done = false;
+
+       initPQExpBuffer(&lbuf); /* initialize line buffer */
+
+       sstate = psql_scan_create(NULL);        /* create scan state */
+
+       /* add the query string to the scan state */
+       psql_scan_setup(sstate, queries, strlen(queries), 0, true);
+
+       for (;;)
+       {
+               resetPQExpBuffer(&lbuf);
+               sr = psql_scan(sstate, &lbuf, &prompt); /* run scanner */
+
+               switch(sr)
+               {
+                       case PSCAN_SEMICOLON:   /* found command-ending semicolon */
+                               num_semicolons++;
+                               break;
+                       case PSCAN_BACKSLASH:   /* found backslash command */
+                               break;
+                       case PSCAN_INCOMPLETE:  /* end of line, SQL statement incomplete */
+                       case PSCAN_EOL:                 /* end of line, SQL possibly complete */
+                               /*
+                                * If we have already seen ";" and this time something is
+                                * transferred into buffer, we assume that the last query is
+                                * not terminated by ";".  We should treat this as a multi
+                                * statement query. So we count up the semicolon counter.
+                                */
+                               if (num_semicolons > 0 && lbuf.len > 0)
+                               {
+                                       num_semicolons++;
+                               }
+                               done = true;
+                               break;
+                       default:
+                               break;
+               }
+               if (done)
+                       break;
+       }
+
+       /* we are done */
+       termPQExpBuffer(&lbuf);
+       psql_scan_finish(sstate);
+       psql_scan_destroy(sstate);
+
+       return num_semicolons > 1;
+}
diff --git a/src/utils/pgstrcasecmp.c b/src/utils/pgstrcasecmp.c

new file mode 100644 (file)

index 0000000..f8e4a24
--- /dev/null
+++ b/src/utils/pgstrcasecmp.c
@@ -0,0 +1,154 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgstrcasecmp.c
+ *        Portable SQL-like case-independent comparisons and conversions.
+ *
+ * SQL99 specifies Unicode-aware case normalization, which we don't yet
+ * have the infrastructure for.  Instead we use tolower() to provide a
+ * locale-aware translation.  However, there are some locales where this
+ * is not right either (eg, Turkish may do strange things with 'i' and
+ * 'I').  Our current compromise is to use tolower() for characters with
+ * the high bit set, and use an ASCII-only downcasing for 7-bit
+ * characters.
+ *
+ * NB: this code should match downcase_truncate_identifier() in scansup.c.
+ *
+ * We also provide strict ASCII-only case conversion functions, which can
+ * be used to implement C/POSIX case folding semantics no matter what the
+ * C library thinks the locale is.
+ *
+ *
+ * Portions Copyright (c) 2023, PgPool Global Development Group
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ *
+ * src/port/pgstrcasecmp.c
+ *
+ *-------------------------------------------------------------------------
+ */
+//#include "c.h"
+
+#include <ctype.h>
+#include <stddef.h>
+#include "utils/pgstrcasecmp.h"
+#include "parser/pool_parser.h"
+
+/*
+ * Case-independent comparison of two null-terminated strings.
+ */
+int
+pg_strcasecmp(const char *s1, const char *s2)
+{
+       for (;;)
+       {
+               unsigned char ch1 = (unsigned char) *s1++;
+               unsigned char ch2 = (unsigned char) *s2++;
+
+               if (ch1 != ch2)
+               {
+                       if (ch1 >= 'A' && ch1 <= 'Z')
+                               ch1 += 'a' - 'A';
+                       else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
+                               ch1 = tolower(ch1);
+
+                       if (ch2 >= 'A' && ch2 <= 'Z')
+                               ch2 += 'a' - 'A';
+                       else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
+                               ch2 = tolower(ch2);
+
+                       if (ch1 != ch2)
+                               return (int) ch1 - (int) ch2;
+               }
+               if (ch1 == 0)
+                       break;
+       }
+       return 0;
+}
+
+/*
+ * Case-independent comparison of two not-necessarily-null-terminated strings.
+ * At most n bytes will be examined from each string.
+ */
+int
+pg_strncasecmp(const char *s1, const char *s2, size_t n)
+{
+       while (n-- > 0)
+       {
+               unsigned char ch1 = (unsigned char) *s1++;
+               unsigned char ch2 = (unsigned char) *s2++;
+
+               if (ch1 != ch2)
+               {
+                       if (ch1 >= 'A' && ch1 <= 'Z')
+                               ch1 += 'a' - 'A';
+                       else if (IS_HIGHBIT_SET(ch1) && isupper(ch1))
+                               ch1 = tolower(ch1);
+
+                       if (ch2 >= 'A' && ch2 <= 'Z')
+                               ch2 += 'a' - 'A';
+                       else if (IS_HIGHBIT_SET(ch2) && isupper(ch2))
+                               ch2 = tolower(ch2);
+
+                       if (ch1 != ch2)
+                               return (int) ch1 - (int) ch2;
+               }
+               if (ch1 == 0)
+                       break;
+       }
+       return 0;
+}
+
+/*
+ * Fold a character to upper case.
+ *
+ * Unlike some versions of toupper(), this is safe to apply to characters
+ * that aren't lower case letters.  Note however that the whole thing is
+ * a bit bogus for multibyte character sets.
+ */
+unsigned char
+pg_toupper(unsigned char ch)
+{
+       if (ch >= 'a' && ch <= 'z')
+               ch += 'A' - 'a';
+       else if (IS_HIGHBIT_SET(ch) && islower(ch))
+               ch = toupper(ch);
+       return ch;
+}
+
+/*
+ * Fold a character to lower case.
+ *
+ * Unlike some versions of tolower(), this is safe to apply to characters
+ * that aren't upper case letters.  Note however that the whole thing is
+ * a bit bogus for multibyte character sets.
+ */
+unsigned char
+pg_tolower(unsigned char ch)
+{
+       if (ch >= 'A' && ch <= 'Z')
+               ch += 'a' - 'A';
+       else if (IS_HIGHBIT_SET(ch) && isupper(ch))
+               ch = tolower(ch);
+       return ch;
+}
+
+/*
+ * Fold a character to upper case, following C/POSIX locale rules.
+ */
+unsigned char
+pg_ascii_toupper(unsigned char ch)
+{
+       if (ch >= 'a' && ch <= 'z')
+               ch += 'A' - 'a';
+       return ch;
+}
+
+/*
+ * Fold a character to lower case, following C/POSIX locale rules.
+ */
+unsigned char
+pg_ascii_tolower(unsigned char ch)
+{
+       if (ch >= 'A' && ch <= 'Z')
+               ch += 'a' - 'A';
+       return ch;
+}
diff --git a/src/utils/pqexpbuffer.c b/src/utils/pqexpbuffer.c

new file mode 100644 (file)

index 0000000..e220cd7
--- /dev/null
+++ b/src/utils/pqexpbuffer.c
@@ -0,0 +1,418 @@
+/*-------------------------------------------------------------------------
+ *
+ * pqexpbuffer.c
+ *
+ * PQExpBuffer provides an indefinitely-extensible string data type.
+ * It can be used to buffer either ordinary C strings (null-terminated text)
+ * or arbitrary binary data.  All storage is allocated with malloc().
+ *
+ * This module is essentially the same as the backend's StringInfo data type,
+ * but it is intended for use in frontend libpq and client applications.
+ * Thus, it does not rely on palloc() nor elog(), nor psprintf.c which
+ * will exit() on error.
+ *
+ * It does rely on vsnprintf(); if configure finds that libc doesn't provide
+ * a usable vsnprintf(), then a copy of our own implementation of it will
+ * be linked into libpq.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/interfaces/libpq/pqexpbuffer.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include <limits.h>
+
+#include "pqexpbuffer.h"
+
+#ifdef WIN32
+#include "win32.h"
+#endif
+
+
+/* All "broken" PQExpBuffers point to this string. */
+static const char oom_buffer[1] = "";
+
+/* Need a char * for unconstify() compatibility */
+static const char *oom_buffer_ptr = oom_buffer;
+
+#define malloc palloc
+#define free   pfree
+
+/*
+ * markPQExpBufferBroken
+ *
+ * Put a PQExpBuffer in "broken" state if it isn't already.
+ */
+static void
+markPQExpBufferBroken(PQExpBuffer str)
+{
+       if (str->data != oom_buffer)
+               free(str->data);
+
+       /*
+        * Casting away const here is a bit ugly, but it seems preferable to not
+        * marking oom_buffer const.  We want to do that to encourage the compiler
+        * to put oom_buffer in read-only storage, so that anyone who tries to
+        * scribble on a broken PQExpBuffer will get a failure.
+        */
+       str->data = unconstify(char *, oom_buffer_ptr);
+       str->len = 0;
+       str->maxlen = 0;
+}
+
+/*
+ * createPQExpBuffer
+ *
+ * Create an empty 'PQExpBufferData' & return a pointer to it.
+ */
+PQExpBuffer
+createPQExpBuffer(void)
+{
+       PQExpBuffer res;
+
+       res = (PQExpBuffer) malloc(sizeof(PQExpBufferData));
+       if (res != NULL)
+               initPQExpBuffer(res);
+
+       return res;
+}
+
+/*
+ * initPQExpBuffer
+ *
+ * Initialize a PQExpBufferData struct (with previously undefined contents)
+ * to describe an empty string.
+ */
+void
+initPQExpBuffer(PQExpBuffer str)
+{
+       str->data = (char *) malloc(INITIAL_EXPBUFFER_SIZE);
+       if (str->data == NULL)
+       {
+               str->data = unconstify(char *, oom_buffer_ptr); /* see comment above */
+               str->maxlen = 0;
+               str->len = 0;
+       }
+       else
+       {
+               str->maxlen = INITIAL_EXPBUFFER_SIZE;
+               str->len = 0;
+               str->data[0] = '\0';
+       }
+}
+
+/*
+ * destroyPQExpBuffer(str);
+ *
+ *             free()s both the data buffer and the PQExpBufferData.
+ *             This is the inverse of createPQExpBuffer().
+ */
+void
+destroyPQExpBuffer(PQExpBuffer str)
+{
+       if (str)
+       {
+               termPQExpBuffer(str);
+               free(str);
+       }
+}
+
+/*
+ * termPQExpBuffer(str)
+ *             free()s the data buffer but not the PQExpBufferData itself.
+ *             This is the inverse of initPQExpBuffer().
+ */
+void
+termPQExpBuffer(PQExpBuffer str)
+{
+       if (str->data != oom_buffer)
+               free(str->data);
+       /* just for luck, make the buffer validly empty. */
+       str->data = unconstify(char *, oom_buffer_ptr); /* see comment above */
+       str->maxlen = 0;
+       str->len = 0;
+}
+
+/*
+ * resetPQExpBuffer
+ *             Reset a PQExpBuffer to empty
+ *
+ * Note: if possible, a "broken" PQExpBuffer is returned to normal.
+ */
+void
+resetPQExpBuffer(PQExpBuffer str)
+{
+       if (str)
+       {
+               if (str->data != oom_buffer)
+               {
+                       str->len = 0;
+                       str->data[0] = '\0';
+               }
+               else
+               {
+                       /* try to reinitialize to valid state */
+                       initPQExpBuffer(str);
+               }
+       }
+}
+
+/*
+ * enlargePQExpBuffer
+ * Make sure there is enough space for 'needed' more bytes in the buffer
+ * ('needed' does not include the terminating null).
+ *
+ * Returns 1 if OK, 0 if failed to enlarge buffer.  (In the latter case
+ * the buffer is left in "broken" state.)
+ */
+int
+enlargePQExpBuffer(PQExpBuffer str, size_t needed)
+{
+       size_t          newlen;
+       char       *newdata;
+
+       if (PQExpBufferBroken(str))
+               return 0;                               /* already failed */
+
+       /*
+        * Guard against ridiculous "needed" values, which can occur if we're fed
+        * bogus data.  Without this, we can get an overflow or infinite loop in
+        * the following.
+        */
+       if (needed >= ((size_t) INT_MAX - str->len))
+       {
+               markPQExpBufferBroken(str);
+               return 0;
+       }
+
+       needed += str->len + 1;         /* total space required now */
+
+       /* Because of the above test, we now have needed <= INT_MAX */
+
+       if (needed <= str->maxlen)
+               return 1;                               /* got enough space already */
+
+       /*
+        * We don't want to allocate just a little more space with each append;
+        * for efficiency, double the buffer size each time it overflows.
+        * Actually, we might need to more than double it if 'needed' is big...
+        */
+       newlen = (str->maxlen > 0) ? (2 * str->maxlen) : 64;
+       while (needed > newlen)
+               newlen = 2 * newlen;
+
+       /*
+        * Clamp to INT_MAX in case we went past it.  Note we are assuming here
+        * that INT_MAX <= UINT_MAX/2, else the above loop could overflow.  We
+        * will still have newlen >= needed.
+        */
+       if (newlen > (size_t) INT_MAX)
+               newlen = (size_t) INT_MAX;
+
+       newdata = (char *) realloc(str->data, newlen);
+       if (newdata != NULL)
+       {
+               str->data = newdata;
+               str->maxlen = newlen;
+               return 1;
+       }
+
+       markPQExpBufferBroken(str);
+       return 0;
+}
+
+/*
+ * printfPQExpBuffer
+ * Format text data under the control of fmt (an sprintf-like format string)
+ * and insert it into str.  More space is allocated to str if necessary.
+ * This is a convenience routine that does the same thing as
+ * resetPQExpBuffer() followed by appendPQExpBuffer().
+ */
+void
+printfPQExpBuffer(PQExpBuffer str, const char *fmt,...)
+{
+       int                     save_errno = errno;
+       va_list         args;
+       bool            done;
+
+       resetPQExpBuffer(str);
+
+       if (PQExpBufferBroken(str))
+               return;                                 /* already failed */
+
+       /* Loop in case we have to retry after enlarging the buffer. */
+       do
+       {
+               errno = save_errno;
+               va_start(args, fmt);
+               done = appendPQExpBufferVA(str, fmt, args);
+               va_end(args);
+       } while (!done);
+}
+
+/*
+ * appendPQExpBuffer
+ *
+ * Format text data under the control of fmt (an sprintf-like format string)
+ * and append it to whatever is already in str.  More space is allocated
+ * to str if necessary.  This is sort of like a combination of sprintf and
+ * strcat.
+ */
+void
+appendPQExpBuffer(PQExpBuffer str, const char *fmt,...)
+{
+       int                     save_errno = errno;
+       va_list         args;
+       bool            done;
+
+       if (PQExpBufferBroken(str))
+               return;                                 /* already failed */
+
+       /* Loop in case we have to retry after enlarging the buffer. */
+       do
+       {
+               errno = save_errno;
+               va_start(args, fmt);
+               done = appendPQExpBufferVA(str, fmt, args);
+               va_end(args);
+       } while (!done);
+}
+
+/*
+ * appendPQExpBufferVA
+ * Shared guts of printfPQExpBuffer/appendPQExpBuffer.
+ * Attempt to format data and append it to str.  Returns true if done
+ * (either successful or hard failure), false if need to retry.
+ *
+ * Caution: callers must be sure to preserve their entry-time errno
+ * when looping, in case the fmt contains "%m".
+ */
+bool
+appendPQExpBufferVA(PQExpBuffer str, const char *fmt, va_list args)
+{
+       size_t          avail;
+       size_t          needed;
+       int                     nprinted;
+
+       /*
+        * Try to format the given string into the available space; but if there's
+        * hardly any space, don't bother trying, just enlarge the buffer first.
+        */
+       if (str->maxlen > str->len + 16)
+       {
+               avail = str->maxlen - str->len;
+
+               nprinted = vsnprintf(str->data + str->len, avail, fmt, args);
+
+               /*
+                * If vsnprintf reports an error, fail (we assume this means there's
+                * something wrong with the format string).
+                */
+               if (unlikely(nprinted < 0))
+               {
+                       markPQExpBufferBroken(str);
+                       return true;
+               }
+
+               if ((size_t) nprinted < avail)
+               {
+                       /* Success.  Note nprinted does not include trailing null. */
+                       str->len += nprinted;
+                       return true;
+               }
+
+               /*
+                * We assume a C99-compliant vsnprintf, so believe its estimate of the
+                * required space, and add one for the trailing null.  (If it's wrong,
+                * the logic will still work, but we may loop multiple times.)
+                *
+                * Choke if the required space would exceed INT_MAX, since str->maxlen
+                * can't represent more than that.
+                */
+               if (unlikely(nprinted > INT_MAX - 1))
+               {
+                       markPQExpBufferBroken(str);
+                       return true;
+               }
+               needed = nprinted + 1;
+       }
+       else
+       {
+               /*
+                * We have to guess at how much to enlarge, since we're skipping the
+                * formatting work.  Fortunately, because of enlargePQExpBuffer's
+                * preference for power-of-2 sizes, this number isn't very sensitive;
+                * the net effect is that we'll double the buffer size before trying
+                * to run vsnprintf, which seems sensible.
+                */
+               needed = 32;
+       }
+
+       /* Increase the buffer size and try again. */
+       if (!enlargePQExpBuffer(str, needed))
+               return true;                    /* oops, out of memory */
+
+       return false;
+}
+
+/*
+ * appendPQExpBufferStr
+ * Append the given string to a PQExpBuffer, allocating more space
+ * if necessary.
+ */
+void
+appendPQExpBufferStr(PQExpBuffer str, const char *data)
+{
+       appendBinaryPQExpBuffer(str, data, strlen(data));
+}
+
+/*
+ * appendPQExpBufferChar
+ * Append a single byte to str.
+ * Like appendPQExpBuffer(str, "%c", ch) but much faster.
+ */
+void
+appendPQExpBufferChar(PQExpBuffer str, char ch)
+{
+       /* Make more room if needed */
+       if (!enlargePQExpBuffer(str, 1))
+               return;
+
+       /* OK, append the character */
+       str->data[str->len] = ch;
+       str->len++;
+       str->data[str->len] = '\0';
+}
+
+/*
+ * appendBinaryPQExpBuffer
+ *
+ * Append arbitrary binary data to a PQExpBuffer, allocating more space
+ * if necessary.
+ */
+void
+appendBinaryPQExpBuffer(PQExpBuffer str, const char *data, size_t datalen)
+{
+       /* Make more room if needed */
+       if (!enlargePQExpBuffer(str, datalen))
+               return;
+
+       /* OK, append the data */
+       memcpy(str->data + str->len, data, datalen);
+       str->len += datalen;
+
+       /*
+        * Keep a trailing null in place, even though it's probably useless for
+        * binary data...
+        */
+       str->data[str->len] = '\0';
+}
+
+#undef malloc
+#undef free
+
diff --git a/src/utils/psqlscan.l b/src/utils/psqlscan.l

new file mode 100644 (file)

index 0000000..4bc2af1
--- /dev/null
+++ b/src/utils/psqlscan.l
@@ -0,0 +1,1620 @@
+%top{
+/*-------------------------------------------------------------------------
+ *
+ * psqlscan.l
+ *       lexical scanner for SQL commands
+ *
+ * This lexer used to be part of psql, and that heritage is reflected in
+ * the file name as well as function and typedef names, though it can now
+ * be used by other frontend programs as well.  It's also possible to extend
+ * this lexer with a compatible add-on lexer to handle program-specific
+ * backslash commands.
+ *
+ * This code is mainly concerned with determining where the end of a SQL
+ * statement is: we are looking for semicolons that are not within quotes,
+ * comments, or parentheses.  The most reliable way to handle this is to
+ * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
+ * below are (except for a few) the same as the backend's, but their actions
+ * are just ECHO whereas the backend's actions generally do other things.
+ *
+ * XXX The rules in this file must be kept in sync with the backend lexer!!!
+ *
+ * XXX Avoid creating backtracking cases --- see the backend lexer for info.
+ *
+ * See psqlscan_int.h for additional commentary.
+ *
+ *
+ * Portions Copyright (c) 2023, PgPool Global Development Group
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *       src/fe_utils/psqlscan.l
+ *
+ *-------------------------------------------------------------------------
+ */
+}
+
+%{
+
+/* LCOV_EXCL_START */
+
+#include "parser/pg_config_manual.h"
+#include "pool.h"
+#include "utils/psqlscan.h"
+#include "utils/psqlscan_int.h"
+#include "utils/pqexpbuffer.h"
+#include "utils/pgstrcasecmp.h"
+#include "utils/elog.h"
+#include "parser/pool_parser.h"
+
+#define        pg_log_warning(str, var)        elog(WARNING, (str), (var))
+#define        pg_malloc                       palloc
+#define        pg_malloc0                      palloc0
+
+static char *pg_strdup(const char *in);
+
+/*
+ * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
+ * doesn't presently make use of that argument, so just declare it as int.
+ */
+typedef int YYSTYPE;
+
+/*
+ * Set the type of yyextra; we use it as a pointer back to the containing
+ * PsqlScanState.
+ */
+#define YY_EXTRA_TYPE PsqlScanState
+
+
+/* Return values from yylex() */
+#define LEXRES_EOL                     0       /* end of input */
+#define LEXRES_SEMI                    1       /* command-terminating semicolon found */
+#define LEXRES_BACKSLASH       2       /* backslash command start */
+
+
+#define ECHO psqlscan_emit(cur_state, yytext, yyleng)
+
+/*
+ * Work around a bug in flex 2.5.35: it emits a couple of functions that
+ * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
+ * this would cause warnings.  Providing our own declarations should be
+ * harmless even when the bug gets fixed.
+ */
+extern int     psql_yyget_column(yyscan_t yyscanner);
+extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
+
+%}
+
+%option reentrant
+%option bison-bridge
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option warn
+%option prefix="psql_yy"
+
+/*
+ * All of the following definitions and rules should exactly match
+ * src/backend/parser/scan.l so far as the flex patterns are concerned.
+ * The rule bodies are just ECHO as opposed to what the backend does,
+ * however.  (But be sure to duplicate code that affects the lexing process,
+ * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
+ * whereas scan.l has a separate one for each exclusive state.
+ */
+
+/*
+ * OK, here is a short description of lex/flex rules behavior.
+ * The longest pattern which matches an input string is always chosen.
+ * For equal-length patterns, the first occurring in the rules list is chosen.
+ * INITIAL is the starting state, to which all non-conditional rules apply.
+ * Exclusive states change parsing rules while the state is active.  When in
+ * an exclusive state, only those rules defined for that state apply.
+ *
+ * We use exclusive states for quoted strings, extended comments,
+ * and to eliminate parsing troubles for numeric strings.
+ * Exclusive states:
+ *  <xb> bit string literal
+ *  <xc> extended C-style comments
+ *  <xd> delimited identifiers (double-quoted identifiers)
+ *  <xh> hexadecimal byte string
+ *  <xq> standard quoted strings
+ *  <xqs> quote stop (detect continued strings)
+ *  <xe> extended quoted strings (support backslash escape sequences)
+ *  <xdolq> $foo$ quoted strings
+ *  <xui> quoted identifier with Unicode escapes
+ *  <xus> quoted string with Unicode escapes
+ *
+ * Note: we intentionally don't mimic the backend's <xeu> state; we have
+ * no need to distinguish it from <xe> state, and no good way to get out
+ * of it in error cases.  The backend just throws yyerror() in those
+ * cases, but that's not an option here.
+ */
+
+%x xb
+%x xc
+%x xd
+%x xh
+%x xq
+%x xqs
+%x xe
+%x xdolq
+%x xui
+%x xus
+
+/*
+ * In order to make the world safe for Windows and Mac clients as well as
+ * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
+ * sequence will be seen as two successive newlines, but that doesn't cause
+ * any problems.  Comments that start with -- and extend to the next
+ * newline are treated as equivalent to a single whitespace character.
+ *
+ * NOTE a fine point: if there is no newline following --, we will absorb
+ * everything to the end of the input as a comment.  This is correct.  Older
+ * versions of Postgres failed to recognize -- as a comment if the input
+ * did not end with a newline.
+ *
+ * XXX perhaps \f (formfeed) should be treated as a newline as well?
+ *
+ * XXX if you change the set of whitespace characters, fix scanner_isspace()
+ * to agree.
+ */
+
+space                  [ \t\n\r\f]
+horiz_space            [ \t\f]
+newline                        [\n\r]
+non_newline            [^\n\r]
+
+comment                        ("--"{non_newline}*)
+
+whitespace             ({space}+|{comment})
+
+/*
+ * SQL requires at least one newline in the whitespace separating
+ * string literals that are to be concatenated.  Silly, but who are we
+ * to argue?  Note that {whitespace_with_newline} should not have * after
+ * it, whereas {whitespace} should generally have a * after it...
+ */
+
+special_whitespace             ({space}+|{comment}{newline})
+horiz_whitespace               ({horiz_space}|{comment})
+whitespace_with_newline        ({horiz_whitespace}*{newline}{special_whitespace}*)
+
+quote                  '
+/* If we see {quote} then {quotecontinue}, the quoted string continues */
+quotecontinue  {whitespace_with_newline}{quote}
+
+/*
+ * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
+ * {quotecontinue}.  It might seem that this could just be {whitespace}*,
+ * but if there's a dash after {whitespace_with_newline}, it must be consumed
+ * to see if there's another dash --- which would start a {comment} and thus
+ * allow continuation of the {quotecontinue} token.
+ */
+quotecontinuefail      {whitespace}*"-"?
+
+/* Bit string
+ * It is tempting to scan the string for only those characters
+ * which are allowed. However, this leads to silently swallowed
+ * characters if illegal characters are included in the string.
+ * For example, if xbinside is [01] then B'ABCD' is interpreted
+ * as a zero-length string, and the ABCD' is lost!
+ * Better to pass the string forward and let the input routines
+ * validate the contents.
+ */
+xbstart                        [bB]{quote}
+xbinside               [^']*
+
+/* Hexadecimal byte string */
+xhstart                        [xX]{quote}
+xhinside               [^']*
+
+/* National character */
+xnstart                        [nN]{quote}
+
+/* Quoted string that allows backslash escapes */
+xestart                        [eE]{quote}
+xeinside               [^\\']+
+xeescape               [\\][^0-7]
+xeoctesc               [\\][0-7]{1,3}
+xehexesc               [\\]x[0-9A-Fa-f]{1,2}
+xeunicode              [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
+xeunicodefail  [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
+
+/* Extended quote
+ * xqdouble implements embedded quote, ''''
+ */
+xqstart                        {quote}
+xqdouble               {quote}{quote}
+xqinside               [^']+
+
+/* $foo$ style quotes ("dollar quoting")
+ * The quoted string starts with $foo$ where "foo" is an optional string
+ * in the form of an identifier, except that it may not contain "$",
+ * and extends to the first occurrence of an identical string.
+ * There is *no* processing of the quoted text.
+ *
+ * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
+ * fails to match its trailing "$".
+ */
+dolq_start             [A-Za-z\200-\377_]
+dolq_cont              [A-Za-z\200-\377_0-9]
+dolqdelim              \$({dolq_start}{dolq_cont}*)?\$
+dolqfailed             \${dolq_start}{dolq_cont}*
+dolqinside             [^$]+
+
+/* Double quote
+ * Allows embedded spaces and other special characters into identifiers.
+ */
+dquote                 \"
+xdstart                        {dquote}
+xdstop                 {dquote}
+xddouble               {dquote}{dquote}
+xdinside               [^"]+
+
+/* Quoted identifier with Unicode escapes */
+xuistart               [uU]&{dquote}
+
+/* Quoted string with Unicode escapes */
+xusstart               [uU]&{quote}
+
+/* error rule to avoid backup */
+xufailed               [uU]&
+
+
+/* C-style comments
+ *
+ * The "extended comment" syntax closely resembles allowable operator syntax.
+ * The tricky part here is to get lex to recognize a string starting with
+ * slash-star as a comment, when interpreting it as an operator would produce
+ * a longer match --- remember lex will prefer a longer match!  Also, if we
+ * have something like plus-slash-star, lex will think this is a 3-character
+ * operator whereas we want to see it as a + operator and a comment start.
+ * The solution is two-fold:
+ * 1. append {op_chars}* to xcstart so that it matches as much text as
+ *    {operator} would. Then the tie-breaker (first matching rule of same
+ *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
+ *    in case it contains a star-slash that should terminate the comment.
+ * 2. In the operator rule, check for slash-star within the operator, and
+ *    if found throw it back with yyless().  This handles the plus-slash-star
+ *    problem.
+ * Dash-dash comments have similar interactions with the operator rule.
+ */
+xcstart                        \/\*{op_chars}*
+xcstop                 \*+\/
+xcinside               [^*/]+
+
+ident_start            [A-Za-z\200-\377_]
+ident_cont             [A-Za-z\200-\377_0-9\$]
+
+identifier             {ident_start}{ident_cont}*
+
+/* Assorted special-case operators and operator-like tokens */
+typecast               "::"
+dot_dot                        \.\.
+colon_equals   ":="
+
+/*
+ * These operator-like tokens (unlike the above ones) also match the {operator}
+ * rule, which means that they might be overridden by a longer match if they
+ * are followed by a comment start or a + or - character. Accordingly, if you
+ * add to this list, you must also add corresponding code to the {operator}
+ * block to return the correct token in such cases. (This is not needed in
+ * psqlscan.l since the token value is ignored there.)
+ */
+equals_greater "=>"
+less_equals            "<="
+greater_equals ">="
+less_greater   "<>"
+not_equals             "!="
+
+/*
+ * "self" is the set of chars that should be returned as single-character
+ * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
+ * which can be one or more characters long (but if a single-char token
+ * appears in the "self" set, it is not to be returned as an Op).  Note
+ * that the sets overlap, but each has some chars that are not in the other.
+ *
+ * If you change either set, adjust the character lists appearing in the
+ * rule for "operator"!
+ */
+self                   [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
+op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
+operator               {op_chars}+
+
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * the parser, and there it gets coerced via doNegate().
+ *
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ *
+ * {realfail} is added to prevent the need for scanner
+ * backup when the {real} rule fails to match completely.
+ */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}(_?{decdigit})*
+hexinteger             0[xX](_?{hexdigit})+
+octinteger             0[oO](_?{octdigit})+
+bininteger             0[bB](_?{bindigit})+
+
+hexfail                        0[xX]_?
+octfail                        0[oO]_?
+binfail                        0[bB]_?
+
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decinteger}
+realfail               ({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk        {decinteger}{ident_start}
+hexinteger_junk        {hexinteger}{ident_start}
+octinteger_junk        {octinteger}{ident_start}
+bininteger_junk        {bininteger}{ident_start}
+numeric_junk   {numeric}{ident_start}
+real_junk              {real}{ident_start}
+
+param                  \${decinteger}
+param_junk             \${decinteger}{ident_start}
+
+/* psql-specific: characters allowed in variable names */
+variable_char  [A-Za-z\200-\377_0-9]
+
+other                  .
+
+/*
+ * Dollar quoted strings are totally opaque, and no escaping is done on them.
+ * Other quoted strings must allow some special characters such as single-quote
+ *  and newline.
+ * Embedded single-quotes are implemented both in the SQL standard
+ *  style of two adjacent single quotes "''" and in the Postgres/Java style
+ *  of escaped-quote "\'".
+ * Other embedded escaped characters are matched explicitly and the leading
+ *  backslash is dropped from the string.
+ * Note that xcstart must appear before operator, as explained above!
+ *  Also whitespace (comment) must appear before operator.
+ */
+
+%%
+
+%{
+               /* Declare some local variables inside yylex(), for convenience */
+               PsqlScanState cur_state = yyextra;
+               PQExpBuffer output_buf = cur_state->output_buf;
+
+               /*
+                * Force flex into the state indicated by start_state.  This has a
+                * couple of purposes: it lets some of the functions below set a new
+                * starting state without ugly direct access to flex variables, and it
+                * allows us to transition from one flex lexer to another so that we
+                * can lex different parts of the source string using separate lexers.
+                */
+               BEGIN(cur_state->start_state);
+%}
+
+{whitespace}   {
+                                       /*
+                                        * Note that the whitespace rule includes both true
+                                        * whitespace and single-line ("--" style) comments.
+                                        * We suppress whitespace until we have collected some
+                                        * non-whitespace data.  (This interacts with some
+                                        * decisions in MainLoop(); see there for details.)
+                                        */
+                                       if (output_buf->len > 0)
+                                               ECHO;
+                               }
+
+{xcstart}              {
+                                       cur_state->xcdepth = 0;
+                                       BEGIN(xc);
+                                       /* Put back any characters past slash-star; see above */
+                                       yyless(2);
+                                       ECHO;
+                               }
+
+<xc>{
+{xcstart}              {
+                                       cur_state->xcdepth++;
+                                       /* Put back any characters past slash-star; see above */
+                                       yyless(2);
+                                       ECHO;
+                               }
+
+{xcstop}               {
+                                       if (cur_state->xcdepth <= 0)
+                                               BEGIN(INITIAL);
+                                       else
+                                               cur_state->xcdepth--;
+                                       ECHO;
+                               }
+
+{xcinside}             {
+                                       ECHO;
+                               }
+
+{op_chars}             {
+                                       ECHO;
+                               }
+
+\*+                            {
+                                       ECHO;
+                               }
+} /* <xc> */
+
+{xbstart}              {
+                                       BEGIN(xb);
+                                       ECHO;
+                               }
+<xh>{xhinside} |
+<xb>{xbinside} {
+                                       ECHO;
+                               }
+
+{xhstart}              {
+                                       /* Hexadecimal bit type.
+                                        * At some point we should simply pass the string
+                                        * forward to the parser and label it there.
+                                        * In the meantime, place a leading "x" on the string
+                                        * to mark it for the input routine as a hex string.
+                                        */
+                                       BEGIN(xh);
+                                       ECHO;
+                               }
+
+{xnstart}              {
+                                       yyless(1);      /* eat only 'n' this time */
+                                       ECHO;
+                               }
+
+{xqstart}              {
+                                       if (cur_state->std_strings)
+                                               BEGIN(xq);
+                                       else
+                                               BEGIN(xe);
+                                       ECHO;
+                               }
+{xestart}              {
+                                       BEGIN(xe);
+                                       ECHO;
+                               }
+{xusstart}             {
+                                       BEGIN(xus);
+                                       ECHO;
+                               }
+
+<xb,xh,xq,xe,xus>{quote} {
+                                       /*
+                                        * When we are scanning a quoted string and see an end
+                                        * quote, we must look ahead for a possible continuation.
+                                        * If we don't see one, we know the end quote was in fact
+                                        * the end of the string.  To reduce the lexer table size,
+                                        * we use a single "xqs" state to do the lookahead for all
+                                        * types of strings.
+                                        */
+                                       cur_state->state_before_str_stop = YYSTATE;
+                                       BEGIN(xqs);
+                                       ECHO;
+                               }
+<xqs>{quotecontinue} {
+                                       /*
+                                        * Found a quote continuation, so return to the in-quote
+                                        * state and continue scanning the literal.  Nothing is
+                                        * added to the literal's contents.
+                                        */
+                                       BEGIN(cur_state->state_before_str_stop);
+                                       ECHO;
+                               }
+<xqs>{quotecontinuefail} |
+<xqs>{other}   {
+                                       /*
+                                        * Failed to see a quote continuation.  Throw back
+                                        * everything after the end quote, and handle the string
+                                        * according to the state we were in previously.
+                                        */
+                                       yyless(0);
+                                       BEGIN(INITIAL);
+                                       /* There's nothing to echo ... */
+                               }
+
+<xq,xe,xus>{xqdouble} {
+                                       ECHO;
+                               }
+<xq,xus>{xqinside}  {
+                                       ECHO;
+                               }
+<xe>{xeinside}  {
+                                       ECHO;
+                               }
+<xe>{xeunicode} {
+                                       ECHO;
+                               }
+<xe>{xeunicodefail}    {
+                                       ECHO;
+                               }
+<xe>{xeescape}  {
+                                       ECHO;
+                               }
+<xe>{xeoctesc}  {
+                                       ECHO;
+                               }
+<xe>{xehexesc}  {
+                                       ECHO;
+                               }
+<xe>.                  {
+                                       /* This is only needed for \ just before EOF */
+                                       ECHO;
+                               }
+
+{dolqdelim}            {
+                                       cur_state->dolqstart = pg_strdup(yytext);
+                                       BEGIN(xdolq);
+                                       ECHO;
+                               }
+{dolqfailed}   {
+                                       /* throw back all but the initial "$" */
+                                       yyless(1);
+                                       ECHO;
+                               }
+<xdolq>{dolqdelim} {
+                                       if (strcmp(yytext, cur_state->dolqstart) == 0)
+                                       {
+                                               pfree(cur_state->dolqstart);
+                                               cur_state->dolqstart = NULL;
+                                               BEGIN(INITIAL);
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * When we fail to match $...$ to dolqstart, transfer
+                                                * the $... part to the output, but put back the final
+                                                * $ for rescanning.  Consider $delim$...$junk$delim$
+                                                */
+                                               yyless(yyleng - 1);
+                                       }
+                                       ECHO;
+                               }
+<xdolq>{dolqinside} {
+                                       ECHO;
+                               }
+<xdolq>{dolqfailed} {
+                                       ECHO;
+                               }
+<xdolq>.               {
+                                       /* This is only needed for $ inside the quoted text */
+                                       ECHO;
+                               }
+
+{xdstart}              {
+                                       BEGIN(xd);
+                                       ECHO;
+                               }
+{xuistart}             {
+                                       BEGIN(xui);
+                                       ECHO;
+                               }
+<xd>{xdstop}   {
+                                       BEGIN(INITIAL);
+                                       ECHO;
+                               }
+<xui>{dquote}  {
+                                       BEGIN(INITIAL);
+                                       ECHO;
+                               }
+<xd,xui>{xddouble}     {
+                                       ECHO;
+                               }
+<xd,xui>{xdinside}     {
+                                       ECHO;
+                               }
+
+{xufailed}     {
+                                       /* throw back all but the initial u/U */
+                                       yyless(1);
+                                       ECHO;
+                               }
+
+{typecast}             {
+                                       ECHO;
+                               }
+
+{dot_dot}              {
+                                       ECHO;
+                               }
+
+{colon_equals} {
+                                       ECHO;
+                               }
+
+{equals_greater} {
+                                       ECHO;
+                               }
+
+{less_equals}  {
+                                       ECHO;
+                               }
+
+{greater_equals} {
+                                       ECHO;
+                               }
+
+{less_greater} {
+                                       ECHO;
+                               }
+
+{not_equals}   {
+                                       ECHO;
+                               }
+
+       /*
+        * These rules are specific to psql --- they implement parenthesis
+        * counting and detection of command-ending semicolon.  These must
+        * appear before the {self} rule so that they take precedence over it.
+        */
+
+"("                            {
+                                       cur_state->paren_depth++;
+                                       ECHO;
+                               }
+
+")"                            {
+                                       if (cur_state->paren_depth > 0)
+                                               cur_state->paren_depth--;
+                                       ECHO;
+                               }
+
+";"                            {
+                                       ECHO;
+                                       if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
+                                       {
+                                               /* Terminate lexing temporarily */
+                                               cur_state->start_state = YY_START;
+                                               cur_state->identifier_count = 0;
+                                               return LEXRES_SEMI;
+                                       }
+                               }
+
+       /*
+        * psql-specific rules to handle backslash commands and variable
+        * substitution.  We want these before {self}, also.
+        */
+
+"\\"[;:]               {
+                                       /* Force a semi-colon or colon into the query buffer */
+                                       psqlscan_emit(cur_state, yytext + 1, 1);
+                                       if (yytext[1] == ';')
+                                               cur_state->identifier_count = 0;
+                               }
+
+"\\"                   {
+                                       /* Terminate lexing temporarily */
+                                       cur_state->start_state = YY_START;
+                                       return LEXRES_BACKSLASH;
+                               }
+
+:{variable_char}+      {
+                                       /* Possible psql variable substitution */
+                                       char       *varname;
+                                       char       *value;
+
+                                       varname = psqlscan_extract_substring(cur_state,
+                                                                                                                yytext + 1,
+                                                                                                                yyleng - 1);
+                                       if (cur_state->callbacks->get_variable)
+                                               value = cur_state->callbacks->get_variable(varname,
+                                                                                                                                  PQUOTE_PLAIN,
+                                                                                                                                  cur_state->cb_passthrough);
+                                       else
+                                               value = NULL;
+
+                                       if (value)
+                                       {
+                                               /* It is a variable, check for recursion */
+                                               if (psqlscan_var_is_current_source(cur_state, varname))
+                                               {
+                                                       /* Recursive expansion --- don't go there */
+                                                       pg_log_warning("skipping recursive expansion of variable \"%s\"",
+                                                                                                                         varname);
+                                                       /* Instead copy the string as is */
+                                                       ECHO;
+                                               }
+                                               else
+                                               {
+                                                       /* OK, perform substitution */
+                                                       psqlscan_push_new_buffer(cur_state, value, varname);
+                                                       /* yy_scan_string already made buffer active */
+                                               }
+                                               pfree(value);
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * if the variable doesn't exist we'll copy the string
+                                                * as is
+                                                */
+                                               ECHO;
+                                       }
+
+                                       pfree(varname);
+                               }
+
+:'{variable_char}+'    {
+                                       psqlscan_escape_variable(cur_state, yytext, yyleng,
+                                                                                        PQUOTE_SQL_LITERAL);
+                               }
+
+:\"{variable_char}+\"  {
+                                       psqlscan_escape_variable(cur_state, yytext, yyleng,
+                                                                                        PQUOTE_SQL_IDENT);
+                               }
+
+:\{\?{variable_char}+\}        {
+                                       psqlscan_test_variable(cur_state, yytext, yyleng);
+                               }
+
+       /*
+        * These rules just avoid the need for scanner backup if one of the
+        * three rules above fails to match completely.
+        */
+
+:'{variable_char}*     {
+                                       /* Throw back everything but the colon */
+                                       yyless(1);
+                                       ECHO;
+                               }
+
+:\"{variable_char}*    {
+                                       /* Throw back everything but the colon */
+                                       yyless(1);
+                                       ECHO;
+                               }
+
+:\{\?{variable_char}*  {
+                                       /* Throw back everything but the colon */
+                                       yyless(1);
+                                       ECHO;
+                               }
+:\{    {
+                                       /* Throw back everything but the colon */
+                                       yyless(1);
+                                       ECHO;
+                               }
+
+       /*
+        * Back to backend-compatible rules.
+        */
+
+{self}                 {
+                                       ECHO;
+                               }
+
+{operator}             {
+                                       /*
+                                        * Check for embedded slash-star or dash-dash; those
+                                        * are comment starts, so operator must stop there.
+                                        * Note that slash-star or dash-dash at the first
+                                        * character will match a prior rule, not this one.
+                                        */
+                                       int                     nchars = yyleng;
+                                       char       *slashstar = strstr(yytext, "/*");
+                                       char       *dashdash = strstr(yytext, "--");
+
+                                       if (slashstar && dashdash)
+                                       {
+                                               /* if both appear, take the first one */
+                                               if (slashstar > dashdash)
+                                                       slashstar = dashdash;
+                                       }
+                                       else if (!slashstar)
+                                               slashstar = dashdash;
+                                       if (slashstar)
+                                               nchars = slashstar - yytext;
+
+                                       /*
+                                        * For SQL compatibility, '+' and '-' cannot be the
+                                        * last char of a multi-char operator unless the operator
+                                        * contains chars that are not in SQL operators.
+                                        * The idea is to lex '=-' as two operators, but not
+                                        * to forbid operator names like '?-' that could not be
+                                        * sequences of SQL operators.
+                                        */
+                                       if (nchars > 1 &&
+                                               (yytext[nchars - 1] == '+' ||
+                                                yytext[nchars - 1] == '-'))
+                                       {
+                                               int                     ic;
+
+                                               for (ic = nchars - 2; ic >= 0; ic--)
+                                               {
+                                                       char c = yytext[ic];
+                                                       if (c == '~' || c == '!' || c == '@' ||
+                                                               c == '#' || c == '^' || c == '&' ||
+                                                               c == '|' || c == '`' || c == '?' ||
+                                                               c == '%')
+                                                               break;
+                                               }
+                                               if (ic < 0)
+                                               {
+                                                       /*
+                                                        * didn't find a qualifying character, so remove
+                                                        * all trailing [+-]
+                                                        */
+                                                       do {
+                                                               nchars--;
+                                                       } while (nchars > 1 &&
+                                                                (yytext[nchars - 1] == '+' ||
+                                                                 yytext[nchars - 1] == '-'));
+                                               }
+                                       }
+
+                                       if (nchars < yyleng)
+                                       {
+                                               /* Strip the unwanted chars from the token */
+                                               yyless(nchars);
+                                       }
+                                       ECHO;
+                               }
+
+{param}                        {
+                                       ECHO;
+                               }
+{param_junk}   {
+                                       ECHO;
+                               }
+
+{decinteger}   {
+                                       ECHO;
+                               }
+{hexinteger}   {
+                                       ECHO;
+                               }
+{octinteger}   {
+                                       ECHO;
+                               }
+{bininteger}   {
+                                       ECHO;
+                               }
+{hexfail}              {
+                                       ECHO;
+                               }
+{octfail}              {
+                                       ECHO;
+                               }
+{binfail}              {
+                                       ECHO;
+                               }
+{numeric}              {
+                                       ECHO;
+                               }
+{numericfail}  {
+                                       /* throw back the .., and treat as integer */
+                                       yyless(yyleng - 2);
+                                       ECHO;
+                               }
+{real}                 {
+                                       ECHO;
+                               }
+{realfail}             {
+                                       ECHO;
+                               }
+{decinteger_junk}      {
+                                       ECHO;
+                               }
+{hexinteger_junk}      {
+                                       ECHO;
+                               }
+{octinteger_junk}      {
+                                       ECHO;
+                               }
+{bininteger_junk}      {
+                                       ECHO;
+                               }
+{numeric_junk} {
+                                       ECHO;
+                               }
+{real_junk}            {
+                                       ECHO;
+                               }
+
+
+{identifier}   {
+                                       /*
+                                        * We need to track if we are inside a BEGIN .. END block
+                                        * in a function definition, so that semicolons contained
+                                        * therein don't terminate the whole statement.  Short of
+                                        * writing a full parser here, the following heuristic
+                                        * should work.  First, we track whether the beginning of
+                                        * the statement matches CREATE [OR REPLACE]
+                                        * {FUNCTION|PROCEDURE}
+                                        */
+
+                                       if (cur_state->identifier_count == 0)
+                                               memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
+
+                                       if (pg_strcasecmp(yytext, "create") == 0 ||
+                                               pg_strcasecmp(yytext, "function") == 0 ||
+                                               pg_strcasecmp(yytext, "procedure") == 0 ||
+                                               pg_strcasecmp(yytext, "or") == 0 ||
+                                               pg_strcasecmp(yytext, "replace") == 0)
+                                       {
+                                               if (cur_state->identifier_count < sizeof(cur_state->identifiers))
+                                                       cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
+                                       }
+
+                                       cur_state->identifier_count++;
+
+                                       if (cur_state->identifiers[0] == 'c' &&
+                                               (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
+                                                (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
+                                                 (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
+                                               cur_state->paren_depth == 0)
+                                       {
+                                               if (pg_strcasecmp(yytext, "begin") == 0)
+                                                       cur_state->begin_depth++;
+                                               else if (pg_strcasecmp(yytext, "case") == 0)
+                                               {
+                                                       /*
+                                                        * CASE also ends with END.  We only need to track
+                                                        * this if we are already inside a BEGIN.
+                                                        */
+                                                       if (cur_state->begin_depth >= 1)
+                                                               cur_state->begin_depth++;
+                                               }
+                                               else if (pg_strcasecmp(yytext, "end") == 0)
+                                               {
+                                                       if (cur_state->begin_depth > 0)
+                                                               cur_state->begin_depth--;
+                                               }
+                                       }
+
+                                       ECHO;
+                               }
+
+{other}                        {
+                                       ECHO;
+                               }
+
+<<EOF>>                        {
+                                       if (cur_state->buffer_stack == NULL)
+                                       {
+                                               cur_state->start_state = YY_START;
+                                               return LEXRES_EOL;              /* end of input reached */
+                                       }
+
+                                       /*
+                                        * We were expanding a variable, so pop the inclusion
+                                        * stack and keep lexing
+                                        */
+                                       psqlscan_pop_buffer_stack(cur_state);
+                                       psqlscan_select_top_buffer(cur_state);
+                               }
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+/*
+ * Create a lexer working state struct.
+ *
+ * callbacks is a struct of function pointers that encapsulate some
+ * behavior we need from the surrounding program.  This struct must
+ * remain valid for the lifespan of the PsqlScanState.
+ */
+PsqlScanState
+psql_scan_create(const PsqlScanCallbacks *callbacks)
+{
+       PsqlScanState state;
+
+       state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
+
+       state->callbacks = callbacks;
+
+       yylex_init(&state->scanner);
+
+       yyset_extra(state, state->scanner);
+
+       psql_scan_reset(state);
+
+       return state;
+}
+
+/*
+ * Destroy a lexer working state struct, releasing all resources.
+ */
+void
+psql_scan_destroy(PsqlScanState state)
+{
+       psql_scan_finish(state);
+
+       psql_scan_reset(state);
+
+       yylex_destroy(state->scanner);
+
+       pfree(state);
+}
+
+/*
+ * Set the callback passthrough pointer for the lexer.
+ *
+ * This could have been integrated into psql_scan_create, but keeping it
+ * separate allows the application to change the pointer later, which might
+ * be useful.
+ */
+void
+psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
+{
+       state->cb_passthrough = passthrough;
+}
+
+/*
+ * Set up to perform lexing of the given input line.
+ *
+ * The text at *line, extending for line_len bytes, will be scanned by
+ * subsequent calls to the psql_scan routines.  psql_scan_finish should
+ * be called when scanning is complete.  Note that the lexer retains
+ * a pointer to the storage at *line --- this string must not be altered
+ * or freed until after psql_scan_finish is called.
+ *
+ * encoding is the libpq identifier for the character encoding in use,
+ * and std_strings says whether standard_conforming_strings is on.
+ */
+void
+psql_scan_setup(PsqlScanState state,
+                               const char *line, int line_len,
+                               int encoding, bool std_strings)
+{
+       /* Mustn't be scanning already */
+       Assert(state->scanbufhandle == NULL);
+       Assert(state->buffer_stack == NULL);
+
+       /* Do we need to hack the character set encoding? */
+       state->encoding = encoding;
+       state->safe_encoding = pg_valid_server_encoding_id(encoding);
+
+       /* Save standard-strings flag as well */
+       state->std_strings = std_strings;
+
+       /* Set up flex input buffer with appropriate translation and padding */
+       state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
+                                                                                                  &state->scanbuf);
+       state->scanline = line;
+
+       /* Set lookaside data in case we have to map unsafe encoding */
+       state->curline = state->scanbuf;
+       state->refline = state->scanline;
+}
+
+/*
+ * Do lexical analysis of SQL command text.
+ *
+ * The text previously passed to psql_scan_setup is scanned, and appended
+ * (possibly with transformation) to query_buf.
+ *
+ * The return value indicates the condition that stopped scanning:
+ *
+ * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
+ * transferred to query_buf.)  The command accumulated in query_buf should
+ * be executed, then clear query_buf and call again to scan the remainder
+ * of the line.
+ *
+ * PSCAN_BACKSLASH: found a backslash that starts a special command.
+ * Any previous data on the line has been transferred to query_buf.
+ * The caller will typically next apply a separate flex lexer to scan
+ * the special command.
+ *
+ * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
+ * incomplete SQL command.  *prompt is set to the appropriate prompt type.
+ *
+ * PSCAN_EOL: the end of the line was reached, and there is no lexical
+ * reason to consider the command incomplete.  The caller may or may not
+ * choose to send it.  *prompt is set to the appropriate prompt type if
+ * the caller chooses to collect more input.
+ *
+ * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
+ * be called next, then the cycle may be repeated with a fresh input line.
+ *
+ * In all cases, *prompt is set to an appropriate prompt type code for the
+ * next line-input operation.
+ */
+PsqlScanResult
+psql_scan(PsqlScanState state,
+                 PQExpBuffer query_buf,
+                 promptStatus_t *prompt)
+{
+       PsqlScanResult result;
+       int                     lexresult;
+
+       /* Must be scanning already */
+       Assert(state->scanbufhandle != NULL);
+
+       /* Set current output target */
+       state->output_buf = query_buf;
+
+       /* Set input source */
+       if (state->buffer_stack != NULL)
+               yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
+       else
+               yy_switch_to_buffer(state->scanbufhandle, state->scanner);
+
+       /* And lex. */
+       lexresult = yylex(NULL, state->scanner);
+
+       /*
+        * Check termination state and return appropriate result info.
+        */
+       switch (lexresult)
+       {
+               case LEXRES_EOL:                /* end of input */
+                       switch (state->start_state)
+                       {
+                               case INITIAL:
+                               case xqs:               /* we treat this like INITIAL */
+                                       if (state->paren_depth > 0)
+                                       {
+                                               result = PSCAN_INCOMPLETE;
+                                               *prompt = PROMPT_PAREN;
+                                       }
+                                       else if (state->begin_depth > 0)
+                                       {
+                                               result = PSCAN_INCOMPLETE;
+                                               *prompt = PROMPT_CONTINUE;
+                                       }
+                                       else if (query_buf->len > 0)
+                                       {
+                                               result = PSCAN_EOL;
+                                               *prompt = PROMPT_CONTINUE;
+                                       }
+                                       else
+                                       {
+                                               /* never bother to send an empty buffer */
+                                               result = PSCAN_INCOMPLETE;
+                                               *prompt = PROMPT_READY;
+                                       }
+                                       break;
+                               case xb:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_SINGLEQUOTE;
+                                       break;
+                               case xc:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_COMMENT;
+                                       break;
+                               case xd:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_DOUBLEQUOTE;
+                                       break;
+                               case xh:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_SINGLEQUOTE;
+                                       break;
+                               case xe:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_SINGLEQUOTE;
+                                       break;
+                               case xq:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_SINGLEQUOTE;
+                                       break;
+                               case xdolq:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_DOLLARQUOTE;
+                                       break;
+                               case xui:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_DOUBLEQUOTE;
+                                       break;
+                               case xus:
+                                       result = PSCAN_INCOMPLETE;
+                                       *prompt = PROMPT_SINGLEQUOTE;
+                                       break;
+                               default:
+                                       /* can't get here */
+                                       fprintf(stderr, "invalid YY_START\n");
+                                       exit(1);
+                       }
+                       break;
+               case LEXRES_SEMI:               /* semicolon */
+                       result = PSCAN_SEMICOLON;
+                       *prompt = PROMPT_READY;
+                       break;
+               case LEXRES_BACKSLASH:  /* backslash */
+                       result = PSCAN_BACKSLASH;
+                       *prompt = PROMPT_READY;
+                       break;
+               default:
+                       /* can't get here */
+                       fprintf(stderr, "invalid yylex result\n");
+                       exit(1);
+       }
+
+       return result;
+}
+
+/*
+ * Clean up after scanning a string.  This flushes any unread input and
+ * releases resources (but not the PsqlScanState itself).  Note however
+ * that this does not reset the lexer scan state; that can be done by
+ * psql_scan_reset(), which is an orthogonal operation.
+ *
+ * It is legal to call this when not scanning anything (makes it easier
+ * to deal with error recovery).
+ */
+void
+psql_scan_finish(PsqlScanState state)
+{
+       /* Drop any incomplete variable expansions. */
+       while (state->buffer_stack != NULL)
+               psqlscan_pop_buffer_stack(state);
+
+       /* Done with the outer scan buffer, too */
+       if (state->scanbufhandle)
+               yy_delete_buffer(state->scanbufhandle, state->scanner);
+       state->scanbufhandle = NULL;
+       if (state->scanbuf)
+               pfree(state->scanbuf);
+       state->scanbuf = NULL;
+}
+
+/*
+ * Reset lexer scanning state to start conditions.  This is appropriate
+ * for executing \r psql commands (or any other time that we discard the
+ * prior contents of query_buf).  It is not, however, necessary to do this
+ * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
+ * PSCAN_EOL scan result, because the scan state must be INITIAL when those
+ * conditions are returned.
+ *
+ * Note that this is unrelated to flushing unread input; that task is
+ * done by psql_scan_finish().
+ */
+void
+psql_scan_reset(PsqlScanState state)
+{
+       state->start_state = INITIAL;
+       state->paren_depth = 0;
+       state->xcdepth = 0;                     /* not really necessary */
+       if (state->dolqstart)
+               pfree(state->dolqstart);
+       state->dolqstart = NULL;
+       state->identifier_count = 0;
+       state->begin_depth = 0;
+}
+
+/*
+ * Reselect this lexer (psqlscan.l) after using another one.
+ *
+ * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
+ * state, because we'd never switch to another lexer in a different state.
+ * However, we don't want to reset e.g. paren_depth, so this can't be
+ * the same as psql_scan_reset().
+ *
+ * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
+ * must be a superset of this.
+ *
+ * Note: it seems likely that other lexers could just assign INITIAL for
+ * themselves, since that probably has the value zero in every flex-generated
+ * lexer.  But let's not assume that.
+ */
+void
+psql_scan_reselect_sql_lexer(PsqlScanState state)
+{
+       state->start_state = INITIAL;
+}
+
+/*
+ * Return true if lexer is currently in an "inside quotes" state.
+ *
+ * This is pretty grotty but is needed to preserve the old behavior
+ * that mainloop.c drops blank lines not inside quotes without even
+ * echoing them.
+ */
+bool
+psql_scan_in_quote(PsqlScanState state)
+{
+       return state->start_state != INITIAL &&
+                       state->start_state != xqs;
+}
+
+/*
+ * Push the given string onto the stack of stuff to scan.
+ *
+ * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
+ */
+void
+psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
+                                                const char *varname)
+{
+       StackElem  *stackelem;
+
+       stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
+
+       /*
+        * In current usage, the passed varname points at the current flex input
+        * buffer; we must copy it before calling psqlscan_prepare_buffer()
+        * because that will change the buffer state.
+        */
+       stackelem->varname = varname ? pg_strdup(varname) : NULL;
+
+       stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
+                                                                                        &stackelem->bufstring);
+       state->curline = stackelem->bufstring;
+       if (state->safe_encoding)
+       {
+               stackelem->origstring = NULL;
+               state->refline = stackelem->bufstring;
+       }
+       else
+       {
+               stackelem->origstring = pg_strdup(newstr);
+               state->refline = stackelem->origstring;
+       }
+       stackelem->next = state->buffer_stack;
+       state->buffer_stack = stackelem;
+}
+
+/*
+ * Pop the topmost buffer stack item (there must be one!)
+ *
+ * NB: after this, the flex input state is unspecified; caller must
+ * switch to an appropriate buffer to continue lexing.
+ * See psqlscan_select_top_buffer().
+ */
+void
+psqlscan_pop_buffer_stack(PsqlScanState state)
+{
+       StackElem  *stackelem = state->buffer_stack;
+
+       state->buffer_stack = stackelem->next;
+       yy_delete_buffer(stackelem->buf, state->scanner);
+       pfree(stackelem->bufstring);
+       if (stackelem->origstring)
+               pfree(stackelem->origstring);
+       if (stackelem->varname)
+               pfree(stackelem->varname);
+       pfree(stackelem);
+}
+
+/*
+ * Select the topmost surviving buffer as the active input.
+ */
+void
+psqlscan_select_top_buffer(PsqlScanState state)
+{
+       StackElem  *stackelem = state->buffer_stack;
+
+       if (stackelem != NULL)
+       {
+               yy_switch_to_buffer(stackelem->buf, state->scanner);
+               state->curline = stackelem->bufstring;
+               state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
+       }
+       else
+       {
+               yy_switch_to_buffer(state->scanbufhandle, state->scanner);
+               state->curline = state->scanbuf;
+               state->refline = state->scanline;
+       }
+}
+
+/*
+ * Check if specified variable name is the source for any string
+ * currently being scanned
+ */
+bool
+psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
+{
+       StackElem  *stackelem;
+
+       for (stackelem = state->buffer_stack;
+                stackelem != NULL;
+                stackelem = stackelem->next)
+       {
+               if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
+                       return true;
+       }
+       return false;
+}
+
+/*
+ * Set up a flex input buffer to scan the given data.  We always make a
+ * copy of the data.  If working in an unsafe encoding, the copy has
+ * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
+ *
+ * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
+ */
+YY_BUFFER_STATE
+psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
+                                               char **txtcopy)
+{
+       char       *newtxt;
+
+       /* Flex wants two \0 characters after the actual data */
+       newtxt = pg_malloc(len + 2);
+       *txtcopy = newtxt;
+       newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
+
+       if (state->safe_encoding)
+               memcpy(newtxt, txt, len);
+       else
+       {
+               /* Gotta do it the hard way */
+               int                     i = 0;
+
+               while (i < len)
+               {
+                       int                     thislen = PQmblen(txt + i, state->encoding);
+
+                       /* first byte should always be okay... */
+                       newtxt[i] = txt[i];
+                       i++;
+                       while (--thislen > 0 && i < len)
+                               newtxt[i++] = (char) 0xFF;
+               }
+       }
+
+       return yy_scan_buffer(newtxt, len + 2, state->scanner);
+}
+
+/*
+ * psqlscan_emit() --- body for ECHO macro
+ *
+ * NB: this must be used for ALL and ONLY the text copied from the flex
+ * input data.  If you pass it something that is not part of the yytext
+ * string, you are making a mistake.  Internally generated text can be
+ * appended directly to state->output_buf.
+ */
+void
+psqlscan_emit(PsqlScanState state, const char *txt, int len)
+{
+       PQExpBuffer output_buf = state->output_buf;
+
+       if (state->safe_encoding)
+               appendBinaryPQExpBuffer(output_buf, txt, len);
+       else
+       {
+               /* Gotta do it the hard way */
+               const char *reference = state->refline;
+               int                     i;
+
+               reference += (txt - state->curline);
+
+               for (i = 0; i < len; i++)
+               {
+                       char            ch = txt[i];
+
+                       if (ch == (char) 0xFF)
+                               ch = reference[i];
+                       appendPQExpBufferChar(output_buf, ch);
+               }
+       }
+}
+
+/*
+ * psqlscan_extract_substring --- fetch value of (part of) the current token
+ *
+ * This is like psqlscan_emit(), except that the data is returned as a
+ * malloc'd string rather than being pushed directly to state->output_buf.
+ */
+char *
+psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
+{
+       char       *result = (char *) pg_malloc(len + 1);
+
+       if (state->safe_encoding)
+               memcpy(result, txt, len);
+       else
+       {
+               /* Gotta do it the hard way */
+               const char *reference = state->refline;
+               int                     i;
+
+               reference += (txt - state->curline);
+
+               for (i = 0; i < len; i++)
+               {
+                       char            ch = txt[i];
+
+                       if (ch == (char) 0xFF)
+                               ch = reference[i];
+                       result[i] = ch;
+               }
+       }
+       result[len] = '\0';
+       return result;
+}
+
+/*
+ * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
+ *
+ * If the variable name is found, escape its value using the appropriate
+ * quoting method and emit the value to output_buf.  (Since the result is
+ * surely quoted, there is never any reason to rescan it.)     If we don't
+ * find the variable or escaping fails, emit the token as-is.
+ */
+void
+psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
+                                                PsqlScanQuoteType quote)
+{
+       char       *varname;
+       char       *value;
+
+       /* Variable lookup. */
+       varname = psqlscan_extract_substring(state, txt + 2, len - 3);
+       if (state->callbacks->get_variable)
+               value = state->callbacks->get_variable(varname, quote,
+                                                                                          state->cb_passthrough);
+       else
+               value = NULL;
+       pfree(varname);
+
+       if (value)
+       {
+               /* Emit the suitably-escaped value */
+               appendPQExpBufferStr(state->output_buf, value);
+               pfree(value);
+       }
+       else
+       {
+               /* Emit original token as-is */
+               psqlscan_emit(state, txt, len);
+       }
+}
+
+void
+psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
+{
+       char    *varname;
+       char    *value;
+
+       varname = psqlscan_extract_substring(state, txt + 3, len - 4);
+       if (state->callbacks->get_variable)
+               value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
+                                                                                          state->cb_passthrough);
+       else
+               value = NULL;
+       pfree(varname);
+
+       if (value != NULL)
+       {
+               psqlscan_emit(state, "TRUE", 4);
+               pfree(value);
+       }
+       else
+       {
+               psqlscan_emit(state, "FALSE", 5);
+       }
+}
+
+/*
+ * "Safe" wrapper around strdup() using palloc().
+ * For pgpool backend.
+ */
+static char *
+pg_strdup(const char *in)
+{
+       char       *tmp;
+
+       if (!in)
+       {
+               ereport(ERROR,
+                       (errmsg("cannot duplicate null pointer (internal error)")));
+       }
+       tmp = pstrdup(in);
+       if (!tmp)
+       {
+               ereport(ERROR,
+                       (errmsg("out of memory")));
+       }
+       return tmp;
+}
+
+#undef pg_log_warning
+#undef pg_malloc
+#undef pg_malloc0
author	Tatsuo Ishii <ishii@sraoss.co.jp>
	Sat, 25 Mar 2023 07:21:27 +0000 (16:21 +0900)
committer	Tatsuo Ishii <ishii@sraoss.co.jp>
	Tue, 28 Mar 2023 08:57:21 +0000 (17:57 +0900)
src/Makefile.am		patch \| blob \| blame \| history
src/include/utils/pgstrcasecmp.h	[new file with mode: 0644]	patch \| blob
src/include/utils/pqexpbuffer.h	[new file with mode: 0644]	patch \| blob
src/include/utils/psqlscan.h	[new file with mode: 0644]	patch \| blob
src/include/utils/psqlscan_int.h	[new file with mode: 0644]	patch \| blob
src/protocol/pool_proto_modules.c		patch \| blob \| blame \| history
src/utils/pgstrcasecmp.c	[new file with mode: 0644]	patch \| blob
src/utils/pqexpbuffer.c	[new file with mode: 0644]	patch \| blob
src/utils/psqlscan.l	[new file with mode: 0644]	patch \| blob