Simplify and speed up pg_dump's creation of parent-table links.
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 17 Mar 2023 17:43:10 +0000 (13:43 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 17 Mar 2023 17:43:10 +0000 (13:43 -0400)
Instead of trying to optimize this by skipping creation of the
links for tables we don't plan to dump, just create them all in
bulk with a single scan over the pg_inherits data.  The previous
approach was more or less O(N^2) in the number of pg_inherits
entries, not to mention being way too complicated.

Also, don't create useless TableAttachInfo objects.
It's silly to create a TableAttachInfo object that we're not
going to dump, when we know perfectly well at creation time
that it won't be dumped.

Patch by me; thanks to Julien Rouhaud for review.

Discussion: https://postgr.es/m/1376149.1675268279@sss.pgh.pa.us

src/bin/pg_dump/common.c
src/bin/pg_dump/pg_dump.c
src/bin/pg_dump/pg_dump.h

index 44558b60e8f9eef20b94332373a57589e85c15ff..5d988986edc01fafbca2818aa96091c1541f8221 100644 (file)
@@ -83,8 +83,6 @@ static void flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
                                                  InhInfo *inhinfo, int numInherits);
 static void flagInhIndexes(Archive *fout, TableInfo *tblinfo, int numTables);
 static void flagInhAttrs(DumpOptions *dopt, TableInfo *tblinfo, int numTables);
-static void findParentsByOid(TableInfo *self,
-                                                        InhInfo *inhinfo, int numInherits);
 static int     strInArray(const char *pattern, char **arr, int arr_size);
 static IndxInfo *findIndexByOid(Oid oid);
 
@@ -288,45 +286,70 @@ static void
 flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
                          InhInfo *inhinfo, int numInherits)
 {
+       TableInfo  *child = NULL;
+       TableInfo  *parent = NULL;
        int                     i,
                                j;
 
-       for (i = 0; i < numTables; i++)
+       /*
+        * Set up links from child tables to their parents.
+        *
+        * We used to attempt to skip this work for tables that are not to be
+        * dumped; but the optimizable cases are rare in practice, and setting up
+        * these links in bulk is cheaper than the old way.  (Note in particular
+        * that it's very rare for a child to have more than one parent.)
+        */
+       for (i = 0; i < numInherits; i++)
        {
-               bool            find_parents = true;
-               bool            mark_parents = true;
-
-               /* Some kinds never have parents */
-               if (tblinfo[i].relkind == RELKIND_SEQUENCE ||
-                       tblinfo[i].relkind == RELKIND_VIEW ||
-                       tblinfo[i].relkind == RELKIND_MATVIEW)
-                       continue;
-
                /*
-                * Normally, we don't bother computing anything for non-target tables.
-                * However, we must find the parents of non-root partitioned tables in
-                * any case, so that we can trace from leaf partitions up to the root
-                * (in case a leaf is to be dumped but its parents are not).  We need
-                * not mark such parents interesting for getTableAttrs, though.
+                * Skip a hashtable lookup if it's same table as last time.  This is
+                * unlikely for the child, but less so for the parent.  (Maybe we
+                * should ask the backend for a sorted array to make it more likely?
+                * Not clear the sorting effort would be repaid, though.)
                 */
-               if (!tblinfo[i].dobj.dump)
+               if (child == NULL ||
+                       child->dobj.catId.oid != inhinfo[i].inhrelid)
                {
-                       mark_parents = false;
+                       child = findTableByOid(inhinfo[i].inhrelid);
 
-                       if (!(tblinfo[i].relkind == RELKIND_PARTITIONED_TABLE &&
-                                 tblinfo[i].ispartition))
-                               find_parents = false;
+                       /*
+                        * If we find no TableInfo, assume the pg_inherits entry is for a
+                        * partitioned index, which we don't need to track.
+                        */
+                       if (child == NULL)
+                               continue;
                }
+               if (parent == NULL ||
+                       parent->dobj.catId.oid != inhinfo[i].inhparent)
+               {
+                       parent = findTableByOid(inhinfo[i].inhparent);
+                       if (parent == NULL)
+                               pg_fatal("failed sanity check, parent OID %u of table \"%s\" (OID %u) not found",
+                                                inhinfo[i].inhparent,
+                                                child->dobj.name,
+                                                child->dobj.catId.oid);
+               }
+               /* Add this parent to the child's list of parents. */
+               if (child->numParents > 0)
+                       child->parents = pg_realloc_array(child->parents,
+                                                                                         TableInfo *,
+                                                                                         child->numParents + 1);
+               else
+                       child->parents = pg_malloc_array(TableInfo *, 1);
+               child->parents[child->numParents++] = parent;
+       }
 
-               /* If needed, find all the immediate parent tables. */
-               if (find_parents)
-                       findParentsByOid(&tblinfo[i], inhinfo, numInherits);
-
+       /*
+        * Now consider all child tables and mark parents interesting as needed.
+        */
+       for (i = 0; i < numTables; i++)
+       {
                /*
                 * If needed, mark the parents as interesting for getTableAttrs and
-                * getIndexes.
+                * getIndexes.  We only need this for direct parents of dumpable
+                * tables.
                 */
-               if (mark_parents)
+               if (tblinfo[i].dobj.dump)
                {
                        int                     numParents = tblinfo[i].numParents;
                        TableInfo **parents = tblinfo[i].parents;
@@ -336,7 +359,8 @@ flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
                }
 
                /* Create TableAttachInfo object if needed */
-               if (tblinfo[i].dobj.dump && tblinfo[i].ispartition)
+               if ((tblinfo[i].dobj.dump & DUMP_COMPONENT_DEFINITION) &&
+                       tblinfo[i].ispartition)
                {
                        TableAttachInfo *attachinfo;
 
@@ -995,52 +1019,6 @@ findOwningExtension(CatalogId catalogId)
 }
 
 
-/*
- * findParentsByOid
- *       find a table's parents in tblinfo[]
- */
-static void
-findParentsByOid(TableInfo *self,
-                                InhInfo *inhinfo, int numInherits)
-{
-       Oid                     oid = self->dobj.catId.oid;
-       int                     i,
-                               j;
-       int                     numParents;
-
-       numParents = 0;
-       for (i = 0; i < numInherits; i++)
-       {
-               if (inhinfo[i].inhrelid == oid)
-                       numParents++;
-       }
-
-       self->numParents = numParents;
-
-       if (numParents > 0)
-       {
-               self->parents = pg_malloc_array(TableInfo *, numParents);
-               j = 0;
-               for (i = 0; i < numInherits; i++)
-               {
-                       if (inhinfo[i].inhrelid == oid)
-                       {
-                               TableInfo  *parent;
-
-                               parent = findTableByOid(inhinfo[i].inhparent);
-                               if (parent == NULL)
-                                       pg_fatal("failed sanity check, parent OID %u of table \"%s\" (OID %u) not found",
-                                                        inhinfo[i].inhparent,
-                                                        self->dobj.name,
-                                                        oid);
-                               self->parents[j++] = parent;
-                       }
-               }
-       }
-       else
-               self->parents = NULL;
-}
-
 /*
  * parseOidArray
  *       parse a string of numbers delimited by spaces into a character array
index c94aedc145bdc19a044122a6b282a4bb45a30a29..d62780a0880e8e99510952659f627b1fcf265117 100644 (file)
@@ -16180,9 +16180,6 @@ dumpTableAttach(Archive *fout, const TableAttachInfo *attachinfo)
        if (dopt->dataOnly)
                return;
 
-       if (!(attachinfo->partitionTbl->dobj.dump & DUMP_COMPONENT_DEFINITION))
-               return;
-
        q = createPQExpBuffer();
 
        if (!fout->is_prepared[PREPQUERY_DUMPTABLEATTACH])
index ffa37265c82dee79dc9ef2ebfc02c3d7c9f7f036..283cd1a602c5158318a5b2bee8c4a0f7c383cde9 100644 (file)
@@ -320,6 +320,9 @@ typedef struct _tableInfo
        bool            ispartition;    /* is table a partition? */
        bool            unsafe_partitions;      /* is it an unsafe partitioned table? */
 
+       int                     numParents;             /* number of (immediate) parent tables */
+       struct _tableInfo **parents;    /* TableInfos of immediate parents */
+
        /*
         * These fields are computed only if we decide the table is interesting
         * (it's either a table to dump, or a direct parent of a dumpable table).
@@ -351,8 +354,6 @@ typedef struct _tableInfo
        /*
         * Stuff computed only for dumpable tables.
         */
-       int                     numParents;             /* number of (immediate) parent tables */
-       struct _tableInfo **parents;    /* TableInfos of immediate parents */
        int                     numIndexes;             /* number of indexes */
        struct _indxInfo *indexes;      /* indexes */
        struct _tableDataInfo *dataObj; /* TableDataInfo, if dumping its data */