pgpa_join.c
authorRobert Haas <rhaas@postgresql.org>
Thu, 24 Apr 2025 18:37:56 +0000 (14:37 -0400)
committerRobert Haas <rhaas@postgresql.org>
Mon, 19 May 2025 15:02:50 +0000 (11:02 -0400)
contrib/pg_plan_advice/meson.build
contrib/pg_plan_advice/pgpa_join.c [new file with mode: 0644]
contrib/pg_plan_advice/pgpa_join.h

index 2f05b7ff5425a0afaecf485b581dc6200972f549..8caca010acda94374262ed475080bb2cc03efa34 100644 (file)
@@ -1,8 +1,7 @@
 # Copyright (c) 2022-2024, PostgreSQL Global Development Group
 
 pg_plan_advice_sources = files(
-  'create_advice.c',
-  'pg_plan_advice.c',
+  'pgpa_join.c',
 )
 
 if host_system == 'windows'
diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c
new file mode 100644 (file)
index 0000000..7ed543b
--- /dev/null
@@ -0,0 +1,222 @@
+#include "postgres.h"
+
+#include "pgpa_join.h"
+
+static pgpa_join_strategy pgpa_decompose_join(Plan *plan,
+                                                                                         Plan **realinner,
+                                                                                         Plan **realouter);
+static void pgpa_fill_join_member(PlannedStmt *pstmt, pgpa_join_member * m);
+static pgpa_clumped_join *pgpa_make_clumped_join_from_elided(ElidedNode *n);
+static unsigned pgpa_get_single_rti(Plan *plan);
+
+/*
+ * Should a given plan node be represented by a pgpa_unrolled_join, a
+ * pgpa_clumped_join, or neither?
+ */
+pgpa_join_class
+pgpa_get_join_class(Plan *plan)
+{
+       Bitmapset  *relids = NULL;
+
+       /* Standard join types can be unrolled. */
+       if (IsA(plan, NestLoop) || IsA(plan, MergeJoin) || IsA(plan, HashJoin))
+               return PGPA_UNROLLED_JOIN;
+
+       /*
+        * If a Result node, ForeignScan node, Append node, or MergeAppend node
+        * has multiple relids, then it's a clumped join; otherwise, it's not a
+        * join at all.
+        */
+       if (IsA(plan, Result))
+               relids = ((Result *) plan)->relids;
+       else if (IsA(plan, ForeignScan))
+               relids = ((ForeignScan *) plan)->fs_relids;
+       else if (IsA(plan, Append))
+               relids = ((Append *) plan)->apprelids;
+       else if (IsA(plan, MergeAppend))
+               relids = ((MergeAppend *) plan)->apprelids;
+       if (bms_membership(relids) == BMS_MULTIPLE)
+               return PGPA_CLUMPED_JOIN;
+
+       /* Doesn't seem to be a join. */
+       return PGPA_NON_JOIN;
+}
+
+pgpa_unrolled_join *
+pgpa_make_unrolled_join(PlannedStmt *pstmt, Plan *plan)
+{
+       unsigned        nallocated = 4;
+       unsigned        nused = 0;
+       unsigned        i;
+       Plan      **subplans;
+       pgpa_join_strategy *strategy;
+       pgpa_unrolled_join *ujoin;
+
+       subplans = palloc_array(Plan *, nallocated);
+       strategy = palloc_array(pgpa_join_strategy, nallocated);
+
+       /*
+        * Repeatedly descend to the outer child of the current join until we find
+        * something that is not an unrollable join.
+        */
+       while (pgpa_get_join_class(plan) == PGPA_UNROLLED_JOIN)
+       {
+               if (nallocated == nused)
+               {
+                       nallocated *= 2;
+                       subplans = repalloc_array(subplans, Plan *, nallocated);
+                       strategy = repalloc_array(strategy, pgpa_join_strategy,
+                                                                         nallocated);
+               }
+
+               strategy[nused] =
+                       pgpa_decompose_join(plan, &plan, &subplans[nused]);
+               ++nused;
+       }
+
+       /*
+        * This function should only be called with an unrollable join, so the
+        * loop above should have iterated at least once.
+        */
+       Assert(nused > 0);
+
+       /* Allocate result structures. */
+       ujoin = palloc0_object(pgpa_unrolled_join);
+       ujoin->outer.plan = plan;
+       ujoin->strategy = palloc0_array(pgpa_join_strategy, nused);
+       ujoin->inner = palloc0_array(pgpa_join_member, nused);
+
+       /*
+        * We want the joins from the deepest part of the plan tree to appear
+        * first in the result object, and the loop above produces exactly the
+        * reverse, so we need to flip both arrays when constructing the final
+        * result.
+        */
+       for (i = 0; i < nused; ++i)
+       {
+               ujoin->strategy[i] = strategy[nused - i - 1];
+               ujoin->inner[i].plan = subplans[nused - i - 1];
+       }
+
+       /*
+        * Basic structure is now in place, but so far each pgpa_join_member only
+        * has a Plan pointer. Let's go back through them and complete the
+        * remaining details.
+        */
+       pgpa_fill_join_member(pstmt, &ujoin->outer);
+       for (i = 0; i < nused; ++i)
+               pgpa_fill_join_member(pstmt, &ujoin->inner[i]);
+
+       return ujoin;
+}
+
+static pgpa_join_strategy
+pgpa_decompose_join(Plan *plan, Plan **realinner, Plan **realouter)
+{
+       Plan       *outerplan = plan->lefttree;
+       Plan       *innerplan = plan->righttree;
+       pgpa_join_strategy strategy;
+
+       switch (nodeTag(plan))
+       {
+               case T_MergeJoin:
+                       if (IsA(innerplan, Material))
+                       {
+                               innerplan = innerplan->lefttree;
+                               strategy = JSTRAT_MERGEJOIN_MATERIALIZE;
+                       }
+                       else
+                               strategy = JSTRAT_MERGEJOIN_PLAIN;
+                       break;
+               case T_NestLoop:
+                       if (IsA(innerplan, Material))
+                       {
+                               innerplan = innerplan->lefttree;
+                               strategy = JSTRAT_NESTLOOP_MATERIALIZE;
+                       }
+                       else if (IsA(innerplan, Memoize))
+                       {
+                               innerplan = innerplan->lefttree;
+                               strategy = JSTRAT_NESTLOOP_MEMOIZE;
+                       }
+                       else
+                               strategy = JSTRAT_NESTLOOP_PLAIN;
+                       break;
+               case T_HashJoin:
+                       strategy = JSTRAT_HASHJOIN;
+                       break;
+               default:
+                       elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan));
+       }
+
+       *realinner = innerplan;
+       *realouter = outerplan;
+       return strategy;
+}
+
+static void
+pgpa_fill_join_member(PlannedStmt *pstmt, pgpa_join_member * m)
+{
+       /*
+        * It's possible that more than one node was elided here; if so, we care
+        * about the uppermost one, which will be last in the list.
+        */
+       foreach_node(ElidedNode, n, pstmt->elidedNodes)
+       {
+               if (n->plan_node_id == m->plan->plan_node_id)
+                       m->elided_node = n;
+       }
+
+       /*
+        * If we found an elided node, use that to complete the state for this
+        * join member.
+        */
+       if (m->elided_node != NULL)
+       {
+               switch (bms_membership(m->elided_node->relids))
+               {
+                       case BMS_MULTIPLE:
+                               m->clump_join =
+                                       pgpa_make_clumped_join_from_elided(m->elided_node);
+                               break;
+                       case BMS_SINGLETON:
+                               m->rti = bms_singleton_member(m->elided_node->relids);
+                               break;
+                       case BMS_EMPTY_SET:
+                               elog(ERROR, "no RTIs for elided node");
+                               break;
+               }
+               return;
+       }
+
+       /*
+        * Otherwise, classify the Plan node and proceed accordingly.
+        */
+       switch (pgpa_get_join_class(m->plan))
+       {
+               case PGPA_NON_JOIN:
+                       m->rti = pgpa_get_single_rti(m->plan);
+                       break;
+               case PGPA_CLUMPED_JOIN:
+                       m->clump_join = pgpa_make_clumped_join(m->plan);
+                       break;
+               case PGPA_UNROLLED_JOIN:
+                       m->unrolled_join = pgpa_make_unrolled_join(pstmt, m->plan);
+                       break;
+       }
+}
+
+static pgpa_clumped_join *
+pgpa_make_clumped_join_from_elided(ElidedNode *n)
+{
+       /* XXX FIXME */
+       return NULL;
+}
+
+/* XXX might not belong in this file */
+static unsigned
+pgpa_get_single_rti(Plan *plan)
+{
+       /* XXX */
+       return 0;
+}
index 7af620f87e58fa24c83ffa718fefc21a27258ee1..b8d71684024fb6dffe9d3c44ab6ea26382cfd8d0 100644 (file)
 #ifndef PGPA_JOIN_H
 #define PGPA_JOIN_H
 
+#include "nodes/plannodes.h"
+
+typedef struct pgpa_unrolled_join pgpa_unrolled_join;
+
 /*
  * Certain types of plan nodes can join any number of input relations in
  * a single step; we call these "clumped joins".
@@ -53,6 +57,8 @@ typedef struct
  * more precisely here: merge joins have the option of using materialization
  * on the inner side, and nested loops can use either materialization or
  * memoization.
+ *
+ * XXX. I think we need to do something with Unique here also.
  */
 typedef enum
 {
@@ -65,85 +71,75 @@ typedef enum
 } pgpa_join_strategy;
 
 /*
- * Non-clumped joins are unrolled; that is, we convert outer-deep join trees
- * to a flat structure. ((A JOIN B) JOIN C) JOIN D gets converted to
- * outer_subplan = A, inner_subplans = <B C D>, provided that none of the
- * joins involved are clumped. When joins aren't outer-deep, substructure is
- * required, e.g. (A JOIN B) JOIN (C JOIN D) is represented as
- * outer_subplan = A, inner_subplans = <B X>, where X is a clumped or
- * unrolled join covering C-D.
+ * Within a pgpa_unrolled_join, we may need to refer to either to scans or to
+ * other joins, which may be either unrolled or clumped.
+ *
+ * The plan node we store here will be the inner or outer child of the join
+ * node, as appropriate, except that we look through subnodes that we regard as
+ * part of the join method itself. For instance, for a Nested Loop that
+ * materializes the inner input, we'll store the child of the Materialize node,
+ * not the Materialize node itself.
+ *
+ * XXX. What happens with Gather and Gather Merge here?
+ *
+ * If setrefs processing elided one or more nodes from the plan tree, then
+ * we'll store details about the topmost of those in elided_node.
+ *
+ * If this is a scan, rti will be the RTI taken from elided_node, or if that
+ * is NULL, from plan. Otherwise, this is a join, and either clump_join or
+ * unrolled_join will be set as appropriate; otherwise, they will be NULL.
  */
 typedef struct
 {
-       /*
-        * Typically, the outermost subplan will be a scan of some relation, in
-        * which case outer_rti will be the RTI extracted from outer_subplan, and
-        * outer_clump_join will be NULL; but it's possible that it could be
-        * clumped join, in which case outer_subplan will be NULL and outer_rti
-        * will be zero.
-        */
-       Plan       *outer_subplan;
-       Index           outer_rti;
-       pgpa_clump_join **outer_clump_join;
+       Plan   *plan;
+       ElidedNode *elided_node;
+       Index   rti;
+       pgpa_clumped_join *clump_join;
+       pgpa_unrolled_join *unrolled_join;
+} pgpa_join_member;
 
-       /*
-        * nallocated is the allocated length of the strategy, inner_subplans,
-        * inner_rti, inner_unrolled_join, and inner_clump_join arrays; and nused
-        * is the number of elements in use within each of those arrays.
-        */
-       unsigned        nallocated;
-       unsigned        nused;
+/*
+ * Non-clumped joins are unrolled; that is, we convert outer-deep join trees
+ * to a flat structure. ((A JOIN B) JOIN C) JOIN D gets converted to
+ * outer = A, inner = <B C D>, provided that none of the joins involved are
+ * clumped. When joins aren't outer-deep, substructure is required, e.g.
+ * (A JOIN B) JOIN (C JOIN D) is represented as outer = A, inner = <B X>,
+ * where X is a clumped or unrolled join covering C-D.
+ */
+struct pgpa_unrolled_join
+{
+       /* Outermost member; must not itself be an unrolled join. */
+       pgpa_join_member outer;
 
-       /*
-        * Foreach 0 <= n < nused, strategy[n] is the join strategy used to the
-        * next table or group of tables, as further detailed by the fields below.
-        * For instance, given (A MERGE_JOIN_PLAIN B) HASH_JOIN C, we would set
-        * strategy[0] = JSTRAT_MERGEJOIN_PLAIN and strategy[1] =
-        * JSTRATE_HASHJOIN.
-        */
-       pgpa_join_strategy **strategy;
+       /* Number of inner members. Length of the strategy and inner arrays. */
+       unsigned        ninner;
 
-       /*
-        * For each 0 <= n < nused, exactly one of inner_subplans[n],
-        * inner_unrolled_join[n], and inner_clump_join[n] should be non-NULL;
-        * inner_rti[n] should be non-zero if and only if inner_subplans[n] is
-        * non-NULL.
-        *
-        * Note that when inner_unrolled_joins[n] is non-NULL, we've got what the
-        * optimizer calls a "bushy" plan, where several tables are joined to each
-        * other before being joined to the remainder of the join tree all at
-        * once.
-        */
-       Plan      **inner_subplans;
-       Index      *inner_rti;
-       pgpa_unrolled_join **inner_unrolled_join;
-       pgpa_clump_join **inner_clump_join;
-} pgpa_unrolled_join;
+       /* Array of strategies, one per non-outermost member. */
+       pgpa_join_strategy *strategy;
 
-/* Derive a clumped join from a Plan. */
-extern pgpa_clumped_join *pgpa_make_clumped_join(Plan *plan);
+       /* Array of members, excluding the outermost. Deepest first. */
+       pgpa_join_member *inner;
+};
 
 /*
- * pgpa_make_unrolled_join objects are constructed in steps.
- *
- * Call pgpa_make_unrolled_join() just once. Then, call one of the
- * pgpa_set_outer_* functions just once. Then, call one of the
- * pgpa_append_inner_* functions once for every join level represented
- * by this object.
+ * Classification of Plan nodes based on what structure we should construct.
  */
-extern pgpa_unrolled_join *pgpa_make_unrolled_join(void);
-extern void pgpa_set_outer_scan(pgpa_unrolled_join *join, Plan *outer_subplan);
-extern void pgpa_set_outer_elided_scan(pgpa_unrolled_join *join,
-                                                                          ElidedNode *elided_node);
-extern void pgpa_set_outer_clumped_join(pgpa_unrolled_join *join,
-                                                                               pgpa_clumped_join *subjoin);
-extern void pgpa_append_inner_scan(pgpa_unrolled_join *join,
-                                                                  Plan *inner_subplan);
-extern void pgpa_append_inner_elided_scan(pgpa_unrolled_join *join,
-                                                                                 ElidedNode *elided_node);
-extern void pgpa_append_inner_unrolled_join(pgpa_unrolled_join *join,
-                                                                                       pgpa_unrolled_join *subjoin);
-extern void pgpa_append_inner_clumped_join(pgpa_unrolled_join *join,
-                                                                                  pgpa_clumped_join *subjoin);
+typedef enum
+{
+       PGPA_NON_JOIN,
+       PGPA_CLUMPED_JOIN,
+       PGPA_UNROLLED_JOIN
+} pgpa_join_class;
+
+extern pgpa_join_class pgpa_get_join_class(Plan *plan);
+extern pgpa_clumped_join *pgpa_make_clumped_join(Plan *plan);
+extern pgpa_unrolled_join *pgpa_make_unrolled_join(PlannedStmt *pstmt,
+                                                                                                  Plan *plan);
+
+typedef bool (*pgpa_unrolled_join_walker_callback) (pgpa_join_member *member,
+                                                                                                       void *context);
+extern bool pgpa_unrolled_join_walker(pgpa_unrolled_join *join,
+                                                                         pgpa_unrolled_join_walker_callback cb,
+                                                                         void *context);
 
 #endif