Fix pg_restore's direct-to-database mode for INSERT-style table data.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 6 Jan 2012 18:04:15 +0000 (13:04 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 6 Jan 2012 18:04:15 +0000 (13:04 -0500)
In commit 6545a901aaf84cb05212bb6a7674059908f527c3, I removed the mini SQL
lexer that was in pg_backup_db.c, thinking that it had no real purpose
beyond separating COPY data from SQL commands, which purpose had been
obsoleted by long-ago fixes in pg_dump's archive file format.
Unfortunately this was in error: that code was also used to identify
command boundaries in INSERT-style table data, which is run together as a
single string in the archive file for better compressibility.  As a result,
direct-to-database restores from archive files made with --inserts or
--column-inserts fail in our latest releases, as reported by Dick Visser.

To fix, restore the mini SQL lexer, but simplify it by adjusting the
calling logic so that it's only required to cope with INSERT-style table
data, not arbitrary SQL commands.  This allows us to not have to deal with
SQL comments, E'' strings, or dollar-quoted strings, none of which have
ever been emitted by dumpTableData_insert.

Also, fix the lexer to cope with standard-conforming strings, which was the
actual bug that the previous patch was meant to solve.

Back-patch to all supported branches.  The previous patch went back to 8.2,
which unfortunately means that the EOL release of 8.2 contains this bug,
but I don't think we're doing another 8.2 release just because of that.

src/bin/pg_dump/pg_backup_archiver.c
src/bin/pg_dump/pg_backup_archiver.h
src/bin/pg_dump/pg_backup_db.c
src/bin/pg_dump/pg_dump.c

index 62206ecda4d3761aaa3fe9c607a09e79f12ab592..729cc6b561349d01a0d85ee93b3197413ab7e9af 100644 (file)
@@ -617,20 +617,20 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te,
                                        if (te->copyStmt && strlen(te->copyStmt) > 0)
                                        {
                                                ahprintf(AH, "%s", te->copyStmt);
-                                               AH->writingCopyData = true;
+                                               AH->outputKind = OUTPUT_COPYDATA;
                                        }
+                                       else
+                                               AH->outputKind = OUTPUT_OTHERDATA;
 
                                        (*AH->PrintTocDataPtr) (AH, te, ropt);
 
                                        /*
                                         * Terminate COPY if needed.
                                         */
-                                       if (AH->writingCopyData)
-                                       {
-                                               if (RestoringToDB(AH))
-                                                       EndDBCopyMode(AH, te);
-                                               AH->writingCopyData = false;
-                                       }
+                                       if (AH->outputKind == OUTPUT_COPYDATA &&
+                                               RestoringToDB(AH))
+                                               EndDBCopyMode(AH, te);
+                                       AH->outputKind = OUTPUT_SQLCMDS;
 
                                        /* close out the transaction started above */
                                        if (is_parallel && te->created)
@@ -2006,6 +2006,8 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
        AH->mode = mode;
        AH->compression = compression;
 
+       memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
+
        /* Open stdout with no compression for AH output handle */
        AH->gzOut = 0;
        AH->OF = stdout;
@@ -4209,7 +4211,8 @@ CloneArchive(ArchiveHandle *AH)
                die_horribly(AH, modulename, "out of memory\n");
        memcpy(clone, AH, sizeof(ArchiveHandle));
 
-       /* Handle format-independent fields ... none at the moment */
+       /* Handle format-independent fields */
+       memset(&(clone->sqlparse), 0, sizeof(clone->sqlparse));
 
        /* The clone will have its own connection, so disregard connection state */
        clone->connection = NULL;
@@ -4242,7 +4245,9 @@ DeCloneArchive(ArchiveHandle *AH)
        /* Clear format-specific state */
        (AH->DeClonePtr) (AH);
 
-       /* Clear state allocated by CloneArchive ... none at the moment */
+       /* Clear state allocated by CloneArchive */
+       if (AH->sqlparse.curCmd)
+               destroyPQExpBuffer(AH->sqlparse.curCmd);
 
        /* Clear any connection-local state */
        if (AH->currUser)
index 8a3a6f9e2221a7f513c607ee329e7586b7dc60fa..45c9139bc0664ca62a4fcf03b3492275b0077525 100644 (file)
@@ -132,6 +132,20 @@ typedef void (*DeClonePtr) (struct _archiveHandle * AH);
 
 typedef size_t (*CustomOutPtr) (struct _archiveHandle * AH, const void *buf, size_t len);
 
+typedef enum
+{
+       SQL_SCAN = 0,                           /* normal */
+       SQL_IN_SINGLE_QUOTE,            /* '...' literal */
+       SQL_IN_DOUBLE_QUOTE                     /* "..." identifier */
+} sqlparseState;
+
+typedef struct
+{
+       sqlparseState state;            /* see above */
+       bool            backSlash;              /* next char is backslash quoted? */
+       PQExpBuffer curCmd;                     /* incomplete line (NULL if not created) */
+} sqlparseInfo;
+
 typedef enum
 {
        STAGE_NONE = 0,
@@ -140,6 +154,13 @@ typedef enum
        STAGE_FINALIZING
 } ArchiverStage;
 
+typedef enum
+{
+       OUTPUT_SQLCMDS = 0,                     /* emitting general SQL commands */
+       OUTPUT_COPYDATA,                        /* writing COPY data */
+       OUTPUT_OTHERDATA                        /* writing data as INSERT commands */
+} ArchiverOutput;
+
 typedef enum
 {
        REQ_SCHEMA = 1,
@@ -167,6 +188,8 @@ typedef struct _archiveHandle
                                                                 * Added V1.7 */
        ArchiveFormat format;           /* Archive format */
 
+       sqlparseInfo sqlparse;          /* state for parsing INSERT data */
+
        time_t          createDate;             /* Date archive created */
 
        /*
@@ -217,7 +240,7 @@ typedef struct _archiveHandle
        PGconn     *connection;
        int                     connectToDB;    /* Flag to indicate if direct DB connection is
                                                                 * required */
-       bool            writingCopyData;        /* True when we are sending COPY data */
+       ArchiverOutput outputKind;      /* Flag for what we're currently writing */
        bool            pgCopyIn;               /* Currently in libpq 'COPY IN' mode. */
 
        int                     loFd;                   /* BLOB fd */
index 600728d19856aac3b2b46850ec243bfed0c63a37..2d2ddf61f7adf7e3132980020937ca8c01dd8323 100644 (file)
@@ -369,15 +369,93 @@ ExecuteSqlCommand(ArchiveHandle *AH, const char *qry, const char *desc)
 }
 
 
+/*
+ * Process non-COPY table data (that is, INSERT commands).
+ *
+ * The commands have been run together as one long string for compressibility,
+ * and we are receiving them in bufferloads with arbitrary boundaries, so we
+ * have to locate command boundaries and save partial commands across calls.
+ * All state must be kept in AH->sqlparse, not in local variables of this
+ * routine.  We assume that AH->sqlparse was filled with zeroes when created.
+ *
+ * We have to lex the data to the extent of identifying literals and quoted
+ * identifiers, so that we can recognize statement-terminating semicolons.
+ * We assume that INSERT data will not contain SQL comments, E'' literals,
+ * or dollar-quoted strings, so this is much simpler than a full SQL lexer.
+ */
+static void
+ExecuteInsertCommands(ArchiveHandle *AH, const char *buf, size_t bufLen)
+{
+       const char *qry = buf;
+       const char *eos = buf + bufLen;
+
+       /* initialize command buffer if first time through */
+       if (AH->sqlparse.curCmd == NULL)
+               AH->sqlparse.curCmd = createPQExpBuffer();
+
+       for (; qry < eos; qry++)
+       {
+               char    ch = *qry;
+
+               /* For neatness, we skip any newlines between commands */
+               if (!(ch == '\n' && AH->sqlparse.curCmd->len == 0))
+                       appendPQExpBufferChar(AH->sqlparse.curCmd, ch);
+
+               switch (AH->sqlparse.state)
+               {
+                       case SQL_SCAN:          /* Default state == 0, set in _allocAH */
+                               if (ch == ';')
+                               {
+                                       /*
+                                        * We've found the end of a statement. Send it and reset
+                                        * the buffer.
+                                        */
+                                       ExecuteSqlCommand(AH, AH->sqlparse.curCmd->data,
+                                                                         "could not execute query");
+                                       resetPQExpBuffer(AH->sqlparse.curCmd);
+                               }
+                               else if (ch == '\'')
+                               {
+                                       AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
+                                       AH->sqlparse.backSlash = false;
+                               }
+                               else if (ch == '"')
+                               {
+                                       AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
+                               }
+                               break;
+
+                       case SQL_IN_SINGLE_QUOTE:
+                               /* We needn't handle '' specially */
+                               if (ch == '\'' && !AH->sqlparse.backSlash)
+                                       AH->sqlparse.state = SQL_SCAN;
+                               else if (ch == '\\' && !AH->public.std_strings)
+                                       AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
+                               else
+                                       AH->sqlparse.backSlash = false;
+                               break;
+
+                       case SQL_IN_DOUBLE_QUOTE:
+                               /* We needn't handle "" specially */
+                               if (ch == '"')
+                                       AH->sqlparse.state = SQL_SCAN;
+                               break;
+               }
+       }
+}
+
+
 /*
  * Implement ahwrite() for direct-to-DB restore
  */
 int
 ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
 {
-       if (AH->writingCopyData)
+       if (AH->outputKind == OUTPUT_COPYDATA)
        {
                /*
+                * COPY data.
+                *
                 * We drop the data on the floor if libpq has failed to enter COPY
                 * mode; this allows us to behave reasonably when trying to continue
                 * after an error in a COPY command.
@@ -387,9 +465,19 @@ ExecuteSqlCommandBuf(ArchiveHandle *AH, const char *buf, size_t bufLen)
                        die_horribly(AH, modulename, "error returned by PQputCopyData: %s",
                                                 PQerrorMessage(AH->connection));
        }
+       else if (AH->outputKind == OUTPUT_OTHERDATA)
+       {
+               /*
+                * Table data expressed as INSERT commands.
+                */
+               ExecuteInsertCommands(AH, buf, bufLen);
+       }
        else
        {
                /*
+                * General SQL commands; we assume that commands will not be split
+                * across calls.
+                *
                 * In most cases the data passed to us will be a null-terminated
                 * string, but if it's not, we have to add a trailing null.
                 */
index 57f2ed31ad0846441829b4685b3d229f5016280d..06fa3c92a45bbcc35fff2c0c7e9f08d5e4215480 100644 (file)
@@ -1352,6 +1352,14 @@ dumpTableData_copy(Archive *fout, void *dcontext)
        return 1;
 }
 
+/*
+ * Dump table data using INSERT commands.
+ *
+ * Caution: when we restore from an archive file direct to database, the
+ * INSERT commands emitted by this function have to be parsed by
+ * pg_backup_db.c's ExecuteInsertCommands(), which will not handle comments,
+ * E'' strings, or dollar-quoted strings.  So don't emit anything like that.
+ */
 static int
 dumpTableData_insert(Archive *fout, void *dcontext)
 {