Improve performance of dumpSequenceData().
authorNathan Bossart <nathan@postgresql.org>
Wed, 31 Jul 2024 15:12:42 +0000 (10:12 -0500)
committerNathan Bossart <nathan@postgresql.org>
Wed, 31 Jul 2024 15:12:42 +0000 (10:12 -0500)
As one might guess, this function dumps the sequence data.  It is
called once per sequence, and each such call executes a query to
retrieve the relevant data for a single sequence.  This can cause
pg_dump to take significantly longer, especially when there are
many sequences.

This commit improves the performance of this function by gathering
all the sequence data with a single query at the beginning of
pg_dump.  This information is stored in a sorted array that
dumpSequenceData() can bsearch() for what it needs.  This follows a
similar approach as previous commits that introduced sorted arrays
for role information, pg_class information, and sequence metadata.
As with those commits, this patch will cause pg_dump to use more
memory, but that isn't expected to be too egregious.

Note that we use the brand new function pg_sequence_read_tuple() in
the query that gathers all sequence data, so we must continue to
use the preexisting query-per-sequence approach for versions older
than 18.

Reviewed-by: Euler Taveira, Michael Paquier, Tom Lane
Discussion: https://postgr.es/m/20240503025140.GA1227404%40nathanxps13

src/bin/pg_dump/pg_dump.c

index d4e6694cc929eb8fa7c1c63eb2f07c4788139ecc..0d0251627384ba9143b9e09735cd83850228ec49 100644 (file)
@@ -132,6 +132,8 @@ typedef struct
        int64           startv;                 /* start value */
        int64           incby;                  /* increment value */
        int64           cache;                  /* cache size */
+       int64           last_value;             /* last value of sequence */
+       bool            is_called;              /* whether nextval advances before returning */
 } SequenceItem;
 
 typedef enum OidOptions
@@ -17330,16 +17332,30 @@ collectSequences(Archive *fout)
         * Before Postgres 10, sequence metadata is in the sequence itself.  With
         * some extra effort, we might be able to use the sorted table for those
         * versions, but for now it seems unlikely to be worth it.
+        *
+        * Since version 18, we can gather the sequence data in this query with
+        * pg_sequence_read_tuple(), but we only do so for non-schema-only dumps.
         */
        if (fout->remoteVersion < 100000)
                return;
-       else
+       else if (fout->remoteVersion < 180000 ||
+                        (fout->dopt->schemaOnly && !fout->dopt->sequence_data))
                query = "SELECT seqrelid, format_type(seqtypid, NULL), "
                        "seqstart, seqincrement, "
                        "seqmax, seqmin, "
-                       "seqcache, seqcycle "
+                       "seqcache, seqcycle, "
+                       "NULL, 'f' "
                        "FROM pg_catalog.pg_sequence "
                        "ORDER BY seqrelid";
+       else
+               query = "SELECT seqrelid, format_type(seqtypid, NULL), "
+                       "seqstart, seqincrement, "
+                       "seqmax, seqmin, "
+                       "seqcache, seqcycle, "
+                       "last_value, is_called "
+                       "FROM pg_catalog.pg_sequence, "
+                       "pg_sequence_read_tuple(seqrelid) "
+                       "ORDER BY seqrelid;";
 
        res = ExecuteSqlQuery(fout, query, PGRES_TUPLES_OK);
 
@@ -17356,6 +17372,8 @@ collectSequences(Archive *fout)
                sequences[i].minv = strtoi64(PQgetvalue(res, i, 5), NULL, 10);
                sequences[i].cache = strtoi64(PQgetvalue(res, i, 6), NULL, 10);
                sequences[i].cycled = (strcmp(PQgetvalue(res, i, 7), "t") == 0);
+               sequences[i].last_value = strtoi64(PQgetvalue(res, i, 8), NULL, 10);
+               sequences[i].is_called = (strcmp(PQgetvalue(res, i, 9), "t") == 0);
        }
 
        PQclear(res);
@@ -17622,30 +17640,59 @@ static void
 dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
 {
        TableInfo  *tbinfo = tdinfo->tdtable;
-       PGresult   *res;
-       char       *last;
+       int64           last;
        bool            called;
        PQExpBuffer query = createPQExpBuffer();
 
-       appendPQExpBuffer(query,
-                                         "SELECT last_value, is_called FROM %s",
-                                         fmtQualifiedDumpable(tbinfo));
+       /*
+        * For versions >= 18, the sequence information is gathered in the sorted
+        * array before any calls to dumpSequenceData().  See collectSequences()
+        * for more information.
+        *
+        * For older versions, we have to query the sequence relations
+        * individually.
+        */
+       if (fout->remoteVersion < 180000)
+       {
+               PGresult   *res;
 
-       res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+               appendPQExpBuffer(query,
+                                                 "SELECT last_value, is_called FROM %s",
+                                                 fmtQualifiedDumpable(tbinfo));
 
-       if (PQntuples(res) != 1)
-               pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
-                                                 "query to get data of sequence \"%s\" returned %d rows (expected 1)",
-                                                 PQntuples(res)),
-                                tbinfo->dobj.name, PQntuples(res));
+               res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
 
-       last = PQgetvalue(res, 0, 0);
-       called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
+               if (PQntuples(res) != 1)
+                       pg_fatal(ngettext("query to get data of sequence \"%s\" returned %d row (expected 1)",
+                                                         "query to get data of sequence \"%s\" returned %d rows (expected 1)",
+                                                         PQntuples(res)),
+                                        tbinfo->dobj.name, PQntuples(res));
+
+               last = strtoi64(PQgetvalue(res, 0, 0), NULL, 10);
+               called = (strcmp(PQgetvalue(res, 0, 1), "t") == 0);
+
+               PQclear(res);
+       }
+       else
+       {
+               SequenceItem key = {0};
+               SequenceItem *entry;
+
+               Assert(sequences);
+               Assert(tbinfo->dobj.catId.oid);
+
+               key.oid = tbinfo->dobj.catId.oid;
+               entry = bsearch(&key, sequences, nsequences,
+                                               sizeof(SequenceItem), SequenceItemCmp);
+
+               last = entry->last_value;
+               called = entry->is_called;
+       }
 
        resetPQExpBuffer(query);
        appendPQExpBufferStr(query, "SELECT pg_catalog.setval(");
        appendStringLiteralAH(query, fmtQualifiedDumpable(tbinfo), fout);
-       appendPQExpBuffer(query, ", %s, %s);\n",
+       appendPQExpBuffer(query, ", " INT64_FORMAT ", %s);\n",
                                          last, (called ? "true" : "false"));
 
        if (tdinfo->dobj.dump & DUMP_COMPONENT_DATA)
@@ -17659,8 +17706,6 @@ dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo)
                                                                  .deps = &(tbinfo->dobj.dumpId),
                                                                  .nDeps = 1));
 
-       PQclear(res);
-
        destroyPQExpBuffer(query);
 }