Make PlaceHolderInfo lookup O(1).
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 17 Aug 2022 19:35:51 +0000 (15:35 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 17 Aug 2022 19:35:51 +0000 (15:35 -0400)
Up to now we've just searched the placeholder_list when we want to
find the PlaceHolderInfo with a given ID.  While there's no evidence
of that being a problem in the field, an upcoming patch will add
find_placeholder_info() calls in build_joinrel_tlist(), which seems
likely to make it more of an issue: a joinrel emitting lots of
PlaceHolderVars would incur O(N^2) cost, and we might be building
a lot of joinrels in complex queries.  Hence, add an array that
can be indexed directly by phid to make the lookups constant-time.

Discussion: https://postgr.es/m/1405792.1660677844@sss.pgh.pa.us

src/backend/optimizer/plan/analyzejoins.c
src/backend/optimizer/plan/planmain.c
src/backend/optimizer/util/placeholder.c
src/backend/optimizer/util/var.c
src/include/nodes/pathnodes.h

index 337f470d58346f572a5f6a165cafc315178315be..bbeca9a9abe78beb2858b34ada95b059313ac6aa 100644 (file)
@@ -388,8 +388,11 @@ remove_rel_from_query(PlannerInfo *root, int relid, Relids joinrelids)
        Assert(!bms_is_member(relid, phinfo->ph_lateral));
        if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
            bms_is_member(relid, phinfo->ph_eval_at))
+       {
            root->placeholder_list = foreach_delete_current(root->placeholder_list,
                                                            l);
+           root->placeholder_array[phinfo->phid] = NULL;
+       }
        else
        {
            phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
index c92ddd27ed10c592b12c907f1cbc16f42637b4e9..248cde4d9bdb2991e5becd4afb8866425475f5a9 100644 (file)
@@ -75,6 +75,8 @@ query_planner(PlannerInfo *root,
    root->full_join_clauses = NIL;
    root->join_info_list = NIL;
    root->placeholder_list = NIL;
+   root->placeholder_array = NULL;
+   root->placeholder_array_size = 0;
    root->fkey_list = NIL;
    root->initial_rels = NIL;
 
index 3b0f0584f0174682f356876302d89d4149d8e4b3..0184e6d36c13f43e385a868fa69b8cf66b5237b7 100644 (file)
@@ -71,16 +71,19 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv,
 {
    PlaceHolderInfo *phinfo;
    Relids      rels_used;
-   ListCell   *lc;
 
    /* if this ever isn't true, we'd need to be able to look in parent lists */
    Assert(phv->phlevelsup == 0);
 
-   foreach(lc, root->placeholder_list)
+   /* Use placeholder_array to look up existing PlaceHolderInfo quickly */
+   if (phv->phid < root->placeholder_array_size)
+       phinfo = root->placeholder_array[phv->phid];
+   else
+       phinfo = NULL;
+   if (phinfo != NULL)
    {
-       phinfo = (PlaceHolderInfo *) lfirst(lc);
-       if (phinfo->phid == phv->phid)
-           return phinfo;
+       Assert(phinfo->phid == phv->phid);
+       return phinfo;
    }
 
    /* Not found, so create it */
@@ -115,8 +118,38 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar *phv,
    phinfo->ph_width = get_typavgwidth(exprType((Node *) phv->phexpr),
                                       exprTypmod((Node *) phv->phexpr));
 
+   /*
+    * Add to both placeholder_list and placeholder_array.  Note: because we
+    * store pointers to the PlaceHolderInfos in two data structures, it'd be
+    * unsafe to pass the whole placeholder_list structure through
+    * expression_tree_mutator or the like --- or at least, you'd have to
+    * rebuild the placeholder_array afterwards.
+    */
    root->placeholder_list = lappend(root->placeholder_list, phinfo);
 
+   if (phinfo->phid >= root->placeholder_array_size)
+   {
+       /* Must allocate or enlarge placeholder_array */
+       int         new_size;
+
+       new_size = root->placeholder_array_size ? root->placeholder_array_size * 2 : 8;
+       while (phinfo->phid >= new_size)
+           new_size *= 2;
+       if (root->placeholder_array)
+       {
+           root->placeholder_array = (PlaceHolderInfo **)
+               repalloc(root->placeholder_array,
+                        sizeof(PlaceHolderInfo *) * new_size);
+           MemSet(root->placeholder_array + root->placeholder_array_size, 0,
+                  sizeof(PlaceHolderInfo *) * (new_size - root->placeholder_array_size));
+       }
+       else
+           root->placeholder_array = (PlaceHolderInfo **)
+               palloc0(new_size * sizeof(PlaceHolderInfo *));
+       root->placeholder_array_size = new_size;
+   }
+   root->placeholder_array[phinfo->phid] = phinfo;
+
    /*
     * The PHV's contained expression may contain other, lower-level PHVs.  We
     * now know we need to get those into the PlaceHolderInfo list, too, so we
index ebc6ce84b0bdcffe5b303aaef3e4b5f88362b65a..7db86c39efcf97aa18f16702b58a583fb5713dd8 100644 (file)
@@ -210,15 +210,8 @@ pull_varnos_walker(Node *node, pull_varnos_context *context)
 
            if (phv->phlevelsup == 0)
            {
-               ListCell   *lc;
-
-               foreach(lc, context->root->placeholder_list)
-               {
-                   phinfo = (PlaceHolderInfo *) lfirst(lc);
-                   if (phinfo->phid == phv->phid)
-                       break;
-                   phinfo = NULL;
-               }
+               if (phv->phid < context->root->placeholder_array_size)
+                   phinfo = context->root->placeholder_array[phv->phid];
            }
            if (phinfo == NULL)
            {
index 3b065139e68d61952eadf936319ec6609c753949..b310d3c6fd336c34bb8fccd340e5e55f9679089d 100644 (file)
@@ -357,6 +357,11 @@ struct PlannerInfo
    /* list of PlaceHolderInfos */
    List       *placeholder_list;
 
+   /* array of PlaceHolderInfos indexed by phid */
+   struct PlaceHolderInfo **placeholder_array pg_node_attr(read_write_ignore, array_size(placeholder_array_size));
+   /* allocated size of array */
+   int         placeholder_array_size pg_node_attr(read_write_ignore);
+
    /* list of ForeignKeyOptInfos */
    List       *fkey_list;