summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/Makefile1
-rw-r--r--contrib/meson.build1
-rw-r--r--contrib/pg_plan_advice/Makefile30
-rw-r--r--contrib/pg_plan_advice/meson.build35
-rw-r--r--contrib/pg_plan_advice/pg_plan_advice--1.0.sql40
-rw-r--r--contrib/pg_plan_advice/pg_plan_advice.c342
-rw-r--r--contrib/pg_plan_advice/pg_plan_advice.control5
-rw-r--r--contrib/pg_plan_advice/pg_plan_advice.h35
-rw-r--r--contrib/pg_plan_advice/pgpa_collector.c630
-rw-r--r--contrib/pg_plan_advice/pgpa_collector.h3
-rw-r--r--contrib/pg_plan_advice/pgpa_identifier.c385
-rw-r--r--contrib/pg_plan_advice/pgpa_identifier.h29
-rw-r--r--contrib/pg_plan_advice/pgpa_join.c601
-rw-r--r--contrib/pg_plan_advice/pgpa_join.h118
-rw-r--r--contrib/pg_plan_advice/pgpa_output.c480
-rw-r--r--contrib/pg_plan_advice/pgpa_output.h21
-rw-r--r--contrib/pg_plan_advice/pgpa_scan.c193
-rw-r--r--contrib/pg_plan_advice/pgpa_scan.h76
-rw-r--r--contrib/pg_plan_advice/pgpa_walker.c408
-rw-r--r--contrib/pg_plan_advice/pgpa_walker.h86
-rw-r--r--src/tools/pgindent/typedefs.list17
21 files changed, 3536 insertions, 0 deletions
diff --git a/contrib/Makefile b/contrib/Makefile
index 2f0a88d3f7..dd04c20acd 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -34,6 +34,7 @@ SUBDIRS = \
pg_freespacemap \
pg_logicalinspect \
pg_overexplain \
+ pg_plan_advice \
pg_prewarm \
pg_stat_statements \
pg_surgery \
diff --git a/contrib/meson.build b/contrib/meson.build
index ed30ee7d63..cb718dbdac 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -48,6 +48,7 @@ subdir('pgcrypto')
subdir('pg_freespacemap')
subdir('pg_logicalinspect')
subdir('pg_overexplain')
+subdir('pg_plan_advice')
subdir('pg_prewarm')
subdir('pgrowlocks')
subdir('pg_stat_statements')
diff --git a/contrib/pg_plan_advice/Makefile b/contrib/pg_plan_advice/Makefile
new file mode 100644
index 0000000000..6f8127e551
--- /dev/null
+++ b/contrib/pg_plan_advice/Makefile
@@ -0,0 +1,30 @@
+# contrib/pg_plan_advice/Makefile
+
+MODULE_big = pg_plan_advice
+OBJS = \
+ $(WIN32RES) \
+ pg_plan_advice.o \
+ pgpa_collector.o \
+ pgpa_identifier.o \
+ pgpa_join.o \
+ pgpa_output.o \
+ pgpa_scan.o \
+ pgpa_walker.o
+
+EXTENSION = pg_plan_advice
+DATA = pg_plan_advice--1.0.sql
+PGFILEDESC = "pg_plan_advice - help the planner get the right plan"
+
+REGRESS = pg_plan_advice
+TAP_TESTS = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_plan_advice
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_plan_advice/meson.build b/contrib/pg_plan_advice/meson.build
new file mode 100644
index 0000000000..d9e3ec148c
--- /dev/null
+++ b/contrib/pg_plan_advice/meson.build
@@ -0,0 +1,35 @@
+# Copyright (c) 2022-2024, PostgreSQL Global Development Group
+
+pg_plan_advice_sources = files(
+ 'pg_plan_advice.c',
+ 'pgpa_collector.c',
+ 'pgpa_identifier.c',
+ 'pgpa_join.c',
+ 'pgpa_output.c',
+ 'pgpa_scan.c',
+ 'pgpa_walker.c',
+)
+
+if host_system == 'windows'
+ pg_plan_advice_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+ '--NAME', 'pg_plan_advice',
+ '--FILEDESC', 'pg_plan_advice - help the planner get the right plan',])
+endif
+
+pg_plan_advice = shared_module('pg_plan_advice',
+ pg_plan_advice_sources,
+ kwargs: contrib_mod_args,
+)
+contrib_targets += pg_plan_advice
+
+install_data(
+ 'pg_plan_advice--1.0.sql',
+ 'pg_plan_advice.control',
+ kwargs: contrib_data_args,
+)
+
+tests += {
+ 'name': 'pg_plan_advice',
+ 'sd': meson.current_source_dir(),
+ 'bd': meson.current_build_dir(),
+}
diff --git a/contrib/pg_plan_advice/pg_plan_advice--1.0.sql b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql
new file mode 100644
index 0000000000..729950bcce
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice--1.0.sql
@@ -0,0 +1,40 @@
+/* contrib/pg_plan_advice/pg_plan_advice--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_plan_advice" to load this file. \quit
+
+CREATE FUNCTION pg_clear_collected_local_advice()
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_clear_collected_local_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_clear_collected_shared_advice()
+RETURNS void
+AS 'MODULE_PATHNAME', 'pg_clear_collected_shared_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_get_collected_local_advice(
+ OUT id bigint,
+ OUT userid oid,
+ OUT dbid oid,
+ OUT queryid bigint,
+ OUT collection_time timestamptz,
+ OUT query text,
+ OUT advice text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_collected_local_advice'
+LANGUAGE C STRICT;
+
+CREATE FUNCTION pg_get_collected_shared_advice(
+ OUT id bigint,
+ OUT userid oid,
+ OUT dbid oid,
+ OUT queryid bigint,
+ OUT collection_time timestamptz,
+ OUT query text,
+ OUT advice text
+)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_get_collected_shared_advice'
+LANGUAGE C STRICT;
diff --git a/contrib/pg_plan_advice/pg_plan_advice.c b/contrib/pg_plan_advice/pg_plan_advice.c
new file mode 100644
index 0000000000..0c54cd8f54
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.c
@@ -0,0 +1,342 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_plan_advice.c
+ * Main entrypoints for generating and applying planner advice
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pg_plan_advice.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_collector.h"
+#include "pgpa_identifier.h"
+#include "pgpa_output.h"
+#include "pgpa_walker.h"
+
+#include "commands/defrem.h"
+#include "commands/explain.h"
+#include "commands/explain_format.h"
+#include "commands/explain_state.h"
+#include "funcapi.h"
+#include "storage/dsm_registry.h"
+#include "utils/guc.h"
+
+PG_MODULE_MAGIC;
+
+static pgpa_shared_state *pgpa_state = NULL;
+static dsa_area *pgpa_dsa_area = NULL;
+
+/* GUC variables */
+int pg_plan_advice_local_collection_limit = 0;
+int pg_plan_advice_shared_collection_limit = 0;
+
+/* Saved hook values */
+static ExecutorStart_hook_type prev_ExecutorStart = NULL;
+static explain_per_plan_hook_type prev_explain_per_plan_hook = NULL;
+
+/* Other file-level globals */
+static int es_extension_id;
+static MemoryContext pgpa_memory_context = NULL;
+
+static void pg_plan_advice_ExecutorStart(QueryDesc *queryDesc, int eflags);
+static void pg_plan_advice_explain_option_handler(ExplainState *es,
+ DefElem *opt,
+ ParseState *pstate);
+static void pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt,
+ IntoClause *into,
+ ExplainState *es,
+ const char *queryString,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv);
+static char *pg_plan_advice_generate(PlannedStmt *pstmt);
+
+/*
+ * Initialize this module.
+ */
+void
+_PG_init(void)
+{
+ DefineCustomIntVariable("pg_plan_advice.local_collection_limit",
+ "# of advice entries to retain in per-backend memory",
+ NULL,
+ &pg_plan_advice_local_collection_limit,
+ 0,
+ 0, INT_MAX,
+ PGC_USERSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ DefineCustomIntVariable("pg_plan_advice.shared_collection_limit",
+ "# of advice entries to retain in shared memory",
+ NULL,
+ &pg_plan_advice_shared_collection_limit,
+ 0,
+ 0, INT_MAX,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
+ /* Get an ID that we can use to cache data in an ExplainState. */
+ es_extension_id = GetExplainExtensionId("pg_plan_advice");
+
+ /* Register the new EXPLAIN options implemented by this module. */
+ RegisterExtensionExplainOption("plan_advice",
+ pg_plan_advice_explain_option_handler);
+
+ /* Install hooks */
+ prev_ExecutorStart = ExecutorStart_hook;
+ ExecutorStart_hook = pg_plan_advice_ExecutorStart;
+ prev_explain_per_plan_hook = explain_per_plan_hook;
+ explain_per_plan_hook = pg_plan_advice_explain_per_plan_hook;
+}
+
+/*
+ * Initialize shared state when first created.
+ */
+static void
+pgpa_init_shared_state(void *ptr)
+{
+ pgpa_shared_state *state = (pgpa_shared_state *) ptr;
+
+ LWLockInitialize(&state->lock, LWLockNewTrancheId());
+ state->dsa_tranche = LWLockNewTrancheId();
+ state->area = DSA_HANDLE_INVALID;
+ state->shared_collector = InvalidDsaPointer;
+}
+
+/*
+ * Return a pointer to a memory context where long-lived data managed by this
+ * module can be stored.
+ */
+MemoryContext
+pg_plan_advice_get_mcxt(void)
+{
+ if (pgpa_memory_context == NULL)
+ pgpa_memory_context = AllocSetContextCreate(TopMemoryContext,
+ "pg_plan_advice",
+ ALLOCSET_DEFAULT_SIZES);
+
+ return pgpa_memory_context;
+}
+
+/*
+ * Get a pointer to our shared state.
+ *
+ * If no shared state exists, create and initialize it. If it does exist but
+ * this backend has not yet accessed it, attach to it. Otherwise, just return
+ * our cached pointer.
+ *
+ * Along the way, make sure the relevant LWLock tranches are registered.
+ */
+pgpa_shared_state *
+pg_plan_advice_attach(void)
+{
+ pgpa_shared_state *state;
+ bool found;
+
+ if (pgpa_state == NULL)
+ {
+ state = GetNamedDSMSegment("pg_plan_advice", sizeof(pgpa_shared_state),
+ pgpa_init_shared_state, &found);
+ LWLockRegisterTranche(state->lock.tranche, "pg_plan_advice_lock");
+ LWLockRegisterTranche(state->dsa_tranche, "pg_plan_advice_dsa");
+ pgpa_state = state;
+ }
+
+ return pgpa_state;
+}
+
+/*
+ * Return a pointer to pg_plan_advice's DSA area, creating it if needed.
+ */
+dsa_area *
+pg_plan_advice_dsa_area(void)
+{
+ if (pgpa_dsa_area == NULL)
+ {
+ pgpa_shared_state *state = pg_plan_advice_attach();
+ dsa_handle area_handle;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+
+ LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+ area_handle = state->area;
+ if (area_handle == DSA_HANDLE_INVALID)
+ {
+ pgpa_dsa_area = dsa_create(state->dsa_tranche);
+ dsa_pin(pgpa_dsa_area);
+ state->area = dsa_get_handle(pgpa_dsa_area);
+ LWLockRelease(&state->lock);
+ }
+ else
+ {
+ LWLockRelease(&state->lock);
+ pgpa_dsa_area = dsa_attach(area_handle);
+ }
+
+ dsa_pin_mapping(pgpa_dsa_area);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ return pgpa_dsa_area;
+}
+
+static void
+pg_plan_advice_ExecutorStart(QueryDesc *queryDesc, int eflags)
+{
+ PlannedStmt *pstmt = queryDesc->plannedstmt;
+
+ if (pg_plan_advice_local_collection_limit > 0
+ || pg_plan_advice_shared_collection_limit > 0)
+ {
+ char *advice;
+
+ advice = pg_plan_advice_generate(pstmt);
+
+ /*
+ * If the advice string is non-empty, pass it to the collectors.
+ *
+ * A query such as SELECT 2+2 or SELECT * FROM generate_series(1,10)
+ * will not produce any advice, since there are no query planning
+ * decisions that can be influenced. It wouldn't exactly be wrong to
+ * record the query together with the empty advice string, but there
+ * doesn't seem to be much value in it, so skip it to save space.
+ *
+ * If this proves confusing to users, we might need to revist the
+ * behavior here.
+ */
+ if (advice[0] != '\0')
+ pgpa_collect_advice(pstmt->queryId, queryDesc->sourceText,
+ advice);
+ }
+
+ if (prev_ExecutorStart)
+ prev_ExecutorStart(queryDesc, eflags);
+ else
+ standard_ExecutorStart(queryDesc, eflags);
+}
+
+/*
+ * Handler for EXPLAIN (PLAN_ADVICE).
+ */
+static void
+pg_plan_advice_explain_option_handler(ExplainState *es, DefElem *opt,
+ ParseState *pstate)
+{
+ bool *plan_advice;
+
+ plan_advice = GetExplainExtensionState(es, es_extension_id);
+
+ if (plan_advice == NULL)
+ {
+ plan_advice = palloc0_object(bool);
+ SetExplainExtensionState(es, es_extension_id, plan_advice);
+ }
+
+ *plan_advice = defGetBoolean(opt);
+}
+
+/*
+ * If the PLAN_ADVICE option was specified -- and not set to FALSE -- generate
+ * advice from the provided plan and add it to the EXPLAIN output.
+ */
+static void
+pg_plan_advice_explain_per_plan_hook(PlannedStmt *plannedstmt,
+ IntoClause *into,
+ ExplainState *es,
+ const char *queryString,
+ ParamListInfo params,
+ QueryEnvironment *queryEnv)
+{
+ bool *plan_advice;
+
+ if (prev_explain_per_plan_hook)
+ prev_explain_per_plan_hook(plannedstmt, into, es, queryString,
+ params, queryEnv);
+
+ plan_advice = GetExplainExtensionState(es, es_extension_id);
+ if (plan_advice != NULL && *plan_advice)
+ {
+ char *advice = pg_plan_advice_generate(plannedstmt);
+
+ /*
+ * The advice string likely spans multiple lines; the last line will
+ * not end in a newline, but the others will. In text format, it looks
+ * nicest to indent each line of the advice separately, beginning on
+ * the line below the "Plan Advice" label. For non-text formats, it's
+ * best not to add any special handling.
+ */
+ if (es->format != EXPLAIN_FORMAT_TEXT)
+ ExplainPropertyText("Plan Advice", advice, es);
+ else if (*advice != '\0')
+ {
+ char *s;
+
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "Plan Advice:\n");
+
+ es->indent++;
+
+ while ((s = strchr(advice, '\n')) != NULL)
+ {
+ ExplainIndentText(es);
+ appendBinaryStringInfo(es->str, advice, (s - advice) + 1);
+ advice = s + 1;
+ }
+
+ if (*advice != '\0')
+ {
+ ExplainIndentText(es);
+ appendStringInfo(es->str, "%s\n", advice);
+ }
+
+ es->indent--;
+ }
+ }
+}
+
+/*
+ * Generate advice from a query plan and send it to the relevant collectors.
+ */
+static char *
+pg_plan_advice_generate(PlannedStmt *pstmt)
+{
+ pgpa_plan_walker_context walker;
+ StringInfoData buf;
+ ListCell *lc;
+ const char **rt_identifiers;
+
+ /* Construct unique identifiers for non-JOIN RTEs. */
+ rt_identifiers = pgpa_create_identifiers_for_planned_stmt(pstmt);
+
+ /* Initialization. */
+ memset(&walker, 0, sizeof(pgpa_plan_walker_context));
+ walker.pstmt = pstmt;
+
+ /* Walk the main plan tree. */
+ pgpa_plan_walker(&walker, pstmt->planTree, 0, NULL, NIL);
+
+ /* Main plan tree walk won't reach subplans, so walk those. */
+ foreach(lc, pstmt->subplans)
+ {
+ Plan *plan = lfirst(lc);
+
+ if (plan != NULL)
+ pgpa_plan_walker(&walker, plan, 0, NULL, NIL);
+ }
+
+ /* Put advice into string form. */
+ initStringInfo(&buf);
+ pgpa_output_advice(&buf, &walker, rt_identifiers);
+ return buf.data;
+}
diff --git a/contrib/pg_plan_advice/pg_plan_advice.control b/contrib/pg_plan_advice/pg_plan_advice.control
new file mode 100644
index 0000000000..aa6fdc9e7b
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.control
@@ -0,0 +1,5 @@
+# pg_plan_advice extension
+comment = 'help the planner get the right plan'
+default_version = '1.0'
+module_pathname = '$libdir/pg_plan_advice'
+relocatable = true
diff --git a/contrib/pg_plan_advice/pg_plan_advice.h b/contrib/pg_plan_advice/pg_plan_advice.h
new file mode 100644
index 0000000000..385d3f3052
--- /dev/null
+++ b/contrib/pg_plan_advice/pg_plan_advice.h
@@ -0,0 +1,35 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_plan_advice.h
+ * header file for pg_plan_advice contrib module
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pg_plan_advice.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_PLAN_ADVICE_H
+#define PG_PLAN_ADVICE_H
+
+#include "storage/lwlock.h"
+#include "utils/dsa.h"
+
+typedef struct pgpa_shared_state
+{
+ LWLock lock;
+ int dsa_tranche;
+ dsa_handle area;
+ dsa_pointer shared_collector;
+} pgpa_shared_state;
+
+/* GUC variables */
+extern int pg_plan_advice_local_collection_limit;
+extern int pg_plan_advice_shared_collection_limit;
+
+/* Function prototypes */
+extern MemoryContext pg_plan_advice_get_mcxt(void);
+extern pgpa_shared_state *pg_plan_advice_attach(void);
+extern dsa_area *pg_plan_advice_dsa_area(void);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_collector.c b/contrib/pg_plan_advice/pgpa_collector.c
new file mode 100644
index 0000000000..1bcefe1fdf
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_collector.c
@@ -0,0 +1,630 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_collector.c
+ * collect advice into backend-local or shared memory
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_collector.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pg_plan_advice.h"
+#include "pgpa_collector.h"
+
+#include "datatype/timestamp.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "utils/builtins.h"
+#include "utils/timestamp.h"
+
+PG_FUNCTION_INFO_V1(pg_clear_collected_local_advice);
+PG_FUNCTION_INFO_V1(pg_clear_collected_shared_advice);
+PG_FUNCTION_INFO_V1(pg_get_collected_local_advice);
+PG_FUNCTION_INFO_V1(pg_get_collected_shared_advice);
+
+#define ADVICE_CHUNK_SIZE 1024
+#define ADVICE_CHUNK_ARRAY_SIZE 64
+
+#define PG_GET_ADVICE_COLUMNS 7
+
+/*
+ * Advice extracted from one query plan, together with the query string
+ * and various other identifying details.
+ */
+typedef struct pgpa_collected_advice
+{
+ Oid userid; /* user OID */
+ Oid dbid; /* database OID */
+ uint64 queryid; /* query identifier */
+ TimestampTz timestamp; /* query timestamp */
+ int advice_offset; /* start of advice in textual data */
+ char textual_data[FLEXIBLE_ARRAY_MEMBER];
+} pgpa_collected_advice;
+
+/*
+ * A bunch of pointers to pgpa_collected_advice objects, stored in
+ * backend-local memory.
+ */
+typedef struct pgpa_local_advice_chunk
+{
+ pgpa_collected_advice *entries[ADVICE_CHUNK_SIZE];
+} pgpa_local_advice_chunk;
+
+/*
+ * Information about all of the pgpa_collected_advice objects that we're
+ * storing in local memory.
+ *
+ * We assign consecutive IDs, starting from 0, to each pgpa_collected_advice
+ * object that we store. The actual storage is an array of chunks, which
+ * helps keep memcpy() overhead low when we start discarding older data.
+ */
+typedef struct pgpa_local_advice
+{
+ uint64 next_id;
+ uint64 oldest_id;
+ uint64 base_id;
+ int chunk_array_allocated_size;
+ pgpa_local_advice_chunk **chunks;
+} pgpa_local_advice;
+
+/*
+ * Just like pgpa_local_advice_chunk, but stored in a dynamic shared area,
+ * so we must use dsa_pointer instead of native pointers.
+ */
+typedef struct pgpa_shared_advice_chunk
+{
+ dsa_pointer entries[ADVICE_CHUNK_SIZE];
+} pgpa_shared_advice_chunk;
+
+/*
+ * Just like pgpa_local_advice, but stored in a dynamic shared area, so
+ * we must use dsa_pointer instead of native pointers.
+ */
+typedef struct pgpa_shared_advice
+{
+ uint64 next_id;
+ uint64 oldest_id;
+ uint64 base_id;
+ int chunk_array_allocated_size;
+ dsa_pointer chunks;
+} pgpa_shared_advice;
+
+/* Pointers to local and shared collectors */
+static pgpa_local_advice *local_collector = NULL;
+static pgpa_shared_advice *shared_collector = NULL;
+
+/* Static functions */
+static pgpa_collected_advice *pgpa_make_collected_advice(Oid userid,
+ Oid dbid,
+ uint64 queryId,
+ TimestampTz timestamp,
+ const char *query_string,
+ const char *advice_string,
+ dsa_area *area,
+ dsa_pointer *result);
+static void pgpa_store_local_advice(pgpa_collected_advice *ca);
+static void pgpa_trim_local_advice(int limit);
+static void pgpa_store_shared_advice(dsa_pointer ca_pointer);
+static void pgpa_trim_shared_advice(dsa_area *area, int limit);
+
+/* Helper function to extract the query string from pgpa_collected_advice */
+static inline const char *
+query_string(pgpa_collected_advice *ca)
+{
+ return ca->textual_data;
+}
+
+/* Helper function to extract the advice string from pgpa_collected_advice */
+static inline const char *
+advice_string(pgpa_collected_advice *ca)
+{
+ return ca->textual_data + ca->advice_offset;
+}
+
+/*
+ * Store collected query advice into the local or shared advice collector,
+ * as appropriate.
+ */
+void
+pgpa_collect_advice(uint64 queryId, const char *query_string,
+ const char *advice_string)
+{
+ Oid userid = GetUserId();
+ Oid dbid = MyDatabaseId;
+ TimestampTz now = GetCurrentTimestamp();
+
+ if (pg_plan_advice_local_collection_limit > 0)
+ {
+ pgpa_collected_advice *ca;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(pg_plan_advice_get_mcxt());
+ ca = pgpa_make_collected_advice(userid, dbid, queryId, now,
+ query_string, advice_string,
+ NULL, NULL);
+ pgpa_store_local_advice(ca);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ if (pg_plan_advice_shared_collection_limit > 0)
+ {
+ dsa_area *area = pg_plan_advice_dsa_area();
+ dsa_pointer ca_pointer;
+
+ pgpa_make_collected_advice(userid, dbid, queryId, now,
+ query_string, advice_string, area,
+ &ca_pointer);
+ pgpa_store_shared_advice(ca_pointer);
+ }
+}
+
+/*
+ * Allocate and fill a new pgpa_collected_advice object.
+ *
+ * If area != NULL, it is used to allocate the new object, and the resulting
+ * dsa_pointer is returned via *result.
+ *
+ * If area == NULL, the new object is allocated in the current memory context,
+ * and result is not examined or modified.
+ */
+static pgpa_collected_advice *
+pgpa_make_collected_advice(Oid userid, Oid dbid, uint64 queryId,
+ TimestampTz timestamp,
+ const char *query_string,
+ const char *advice_string,
+ dsa_area *area, dsa_pointer *result)
+{
+ size_t query_string_length = strlen(query_string) + 1;
+ size_t advice_string_length = strlen(advice_string) + 1;
+ size_t total_length;
+ pgpa_collected_advice *ca;
+
+ total_length = offsetof(pgpa_collected_advice, textual_data)
+ + query_string_length + advice_string_length;
+
+ if (area == NULL)
+ ca = palloc(total_length);
+ else
+ {
+ *result = dsa_allocate(area, total_length);
+ ca = dsa_get_address(area, *result);
+ }
+
+ ca->userid = GetUserId();
+ ca->dbid = MyDatabaseId;
+ ca->queryid = queryId;
+ ca->timestamp = timestamp;
+ ca->advice_offset = query_string_length;
+
+ memcpy(ca->textual_data, query_string, query_string_length);
+ memcpy(&ca->textual_data[ca->advice_offset],
+ advice_string, advice_string_length);
+
+ return ca;
+}
+
+/*
+ * Add a pg_collected_advice object to our backend-local advice collection.
+ *
+ * Caller is responsible for switching to the appropriate memory context;
+ * the provided object should have been allocated in that same context.
+ */
+static void
+pgpa_store_local_advice(pgpa_collected_advice *ca)
+{
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_local_advice *la = local_collector;
+
+ /* If the local advice collector isn't initialized yet, do that now. */
+ if (la == NULL)
+ {
+ la = palloc0(sizeof(pgpa_local_advice));
+ la->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE;
+ la->chunks = palloc0_array(pgpa_local_advice_chunk *,
+ la->chunk_array_allocated_size);
+ local_collector = la;
+ }
+
+ /* Compute chunk and offset at which to store this advice. */
+ chunk_number = (la->next_id - la->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (la->next_id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+ /* Extend chunk array, if needed. */
+ if (chunk_number > la->chunk_array_allocated_size)
+ {
+ int new_size;
+
+ new_size = la->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE;
+ la->chunks = repalloc0_array(la->chunks,
+ pgpa_local_advice_chunk *,
+ la->chunk_array_allocated_size,
+ new_size);
+ la->chunk_array_allocated_size = new_size;
+ }
+
+ /* Allocate new chunk, if needed. */
+ if (la->chunks[chunk_number] == NULL)
+ la->chunks[chunk_number] = palloc0_object(pgpa_local_advice_chunk);
+
+ /* Save pointer and bump next-id counter. */
+ Assert(la->chunks[chunk_number]->entries[chunk_offset] == NULL);
+ la->chunks[chunk_number]->entries[chunk_offset] = ca;
+ ++la->next_id;
+
+ /* If we've exceeded the storage limit, discard old data. */
+ pgpa_trim_local_advice(pg_plan_advice_local_collection_limit);
+}
+
+/*
+ * Add a pg_collected_advice object to the shared advice collection.
+ *
+ * 'ca_pointer' should have been allocated from the pg_plan_advice DSA area
+ * and should point to an object of type pgpa_collected_advice.
+ */
+static void
+pgpa_store_shared_advice(dsa_pointer ca_pointer)
+{
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_shared_state *state = pg_plan_advice_attach();
+ dsa_area *area = pg_plan_advice_dsa_area();
+ pgpa_shared_advice *sa = shared_collector;
+ dsa_pointer *chunk_array;
+ pgpa_shared_advice_chunk *chunk;
+
+ /* Lock the shared state. */
+ LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+ /*
+ * If we're not attached to the shared advice collector yet, fix that now.
+ * If we're the first ones to attach, we may need to create the object.
+ */
+ if (sa == NULL)
+ {
+ if (state->shared_collector == InvalidDsaPointer)
+ state->shared_collector =
+ dsa_allocate0(area, sizeof(pgpa_shared_advice));
+ shared_collector = sa = dsa_get_address(area, state->shared_collector);
+ }
+
+ /*
+ * It's possible that some other backend may have succeeded in creating
+ * the main collector object but failed to allocate an initial chunk
+ * array, so we must be prepared to allocate the chunk array here whether
+ * or not we created the collector object.
+ */
+ if (shared_collector->chunk_array_allocated_size == 0)
+ {
+ sa->chunks =
+ dsa_allocate0(area,
+ sizeof(dsa_pointer) * ADVICE_CHUNK_ARRAY_SIZE);
+ sa->chunk_array_allocated_size = ADVICE_CHUNK_ARRAY_SIZE;
+ }
+
+ /* Compute chunk and offset at which to store this advice. */
+ chunk_number = (sa->next_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (sa->next_id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+ /* Get the address of the chunk array and, if needed, extend it. */
+ if (chunk_number > sa->chunk_array_allocated_size)
+ {
+ int new_size;
+ dsa_pointer new_chunks;
+
+ /*
+ * DSA can't enlarge an existing allocation, so we must make a new
+ * allocation and copy data over.
+ */
+ new_size = sa->chunk_array_allocated_size + ADVICE_CHUNK_ARRAY_SIZE;
+ new_chunks = dsa_allocate0(area, sizeof(dsa_pointer) * new_size);
+ chunk_array = dsa_get_address(area, new_chunks);
+ memcpy(chunk_array, dsa_get_address(area, sa->chunks),
+ sizeof(dsa_pointer) * sa->chunk_array_allocated_size);
+ dsa_free(area, sa->chunks);
+ sa->chunks = new_chunks;
+ sa->chunk_array_allocated_size = new_size;
+ }
+ else
+ chunk_array = dsa_get_address(area, sa->chunks);
+
+ /* Get the address of the desired chunk, allocating it if needed. */
+ if (chunk_array[chunk_number] == InvalidDsaPointer)
+ chunk_array[chunk_number] =
+ dsa_allocate0(area, sizeof(pgpa_shared_advice_chunk));
+ chunk = dsa_get_address(area, chunk_array[chunk_number]);
+
+ /* Save pointer and bump next-id counter. */
+ Assert(chunk->entries[chunk_offset] == InvalidDsaPointer);
+ chunk->entries[chunk_offset] = ca_pointer;
+ ++sa->next_id;
+
+ /* If we've exceeded the storage limit, discard old data. */
+ pgpa_trim_shared_advice(area, pg_plan_advice_shared_collection_limit);
+
+ /* Release lock on shared state. */
+ LWLockRelease(&state->lock);
+}
+
+/*
+ * Discard collected advice stored in backend-local memory in excess of the
+ * specified limit.
+ */
+static void
+pgpa_trim_local_advice(int limit)
+{
+ pgpa_local_advice *la = local_collector;
+ uint64 current_count;
+ uint64 trim_count;
+ uint64 total_chunk_count;
+ uint64 trim_chunk_count;
+ uint64 remaining_chunk_count;
+
+ /* If we haven't yet reached the limit, there's nothing to do. */
+ current_count = la->next_id - la->oldest_id;
+ if (current_count <= limit)
+ return;
+
+ /* Free enough entries to get us back down to the limit. */
+ trim_count = current_count - limit;
+ while (trim_count > 0)
+ {
+ uint64 chunk_number;
+ uint64 chunk_offset;
+
+ chunk_number = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (la->oldest_id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+ Assert(la->chunks[chunk_number]->entries[chunk_offset] != NULL);
+ pfree(la->chunks[chunk_number]->entries[chunk_offset]);
+ la->chunks[chunk_number]->entries[chunk_offset] = NULL;
+ ++la->oldest_id;
+ --trim_count;
+ }
+
+ /* Free any chunks that are now entirely unused. */
+ trim_chunk_count = (la->oldest_id - la->base_id) / ADVICE_CHUNK_SIZE;
+ for (uint64 n = 0; n < trim_chunk_count; ++n)
+ pfree(la->chunks[n]);
+
+ /* Slide remaining chunk pointers back toward the base of the array. */
+ total_chunk_count = (la->next_id - la->base_id +
+ ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE;
+ remaining_chunk_count = total_chunk_count - trim_chunk_count;
+ if (remaining_chunk_count > 0)
+ memmove(&la->chunks[0], &la->chunks[trim_chunk_count],
+ sizeof(pgpa_local_advice_chunk *) * remaining_chunk_count);
+
+ /* Adjust base ID value accordingly. */
+ la->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE;
+}
+
+/*
+ * Discard collected advice stored in shared memory in excess of the
+ * specified limit.
+ */
+static void
+pgpa_trim_shared_advice(dsa_area *area, int limit)
+{
+ pgpa_shared_advice *sa = shared_collector;
+ uint64 current_count;
+ uint64 trim_count;
+ uint64 total_chunk_count;
+ uint64 trim_chunk_count;
+ uint64 remaining_chunk_count;
+ dsa_pointer *chunk_array;
+
+ /* If we haven't yet reached the limit, there's nothing to do. */
+ current_count = sa->next_id - sa->oldest_id;
+ if (current_count <= limit)
+ return;
+
+ /* Get a pointer to the chunk array. */
+ chunk_array = dsa_get_address(area, sa->chunks);
+
+ /* Free enough entries to get us back down to the limit. */
+ trim_count = current_count - limit;
+ while (trim_count > 0)
+ {
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_shared_advice_chunk *chunk;
+
+ chunk_number = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (sa->oldest_id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+ chunk = dsa_get_address(area, chunk_array[chunk_number]);
+ Assert(chunk->entries[chunk_offset] != InvalidDsaPointer);
+ dsa_free(area, chunk->entries[chunk_offset]);
+ chunk->entries[chunk_offset] = InvalidDsaPointer;
+ ++sa->oldest_id;
+ --trim_count;
+ }
+
+ /* Free any chunks that are now entirely unused. */
+ trim_chunk_count = (sa->oldest_id - sa->base_id) / ADVICE_CHUNK_SIZE;
+ for (uint64 n = 0; n < trim_chunk_count; ++n)
+ dsa_free(area, chunk_array[n]);
+
+ /* Slide remaining chunk pointers back toward the base of the array. */
+ total_chunk_count = (sa->next_id - sa->base_id +
+ ADVICE_CHUNK_SIZE - 1) / ADVICE_CHUNK_SIZE;
+ remaining_chunk_count = total_chunk_count - trim_chunk_count;
+ if (remaining_chunk_count > 0)
+ memmove(&chunk_array[0], &chunk_array[trim_chunk_count],
+ sizeof(dsa_pointer) * remaining_chunk_count);
+
+ /* Adjust base ID value accordingly. */
+ sa->base_id += trim_chunk_count * ADVICE_CHUNK_SIZE;
+}
+
+/*
+ * SQL-callable function to discard advice collected in backend-local memory
+ */
+Datum
+pg_clear_collected_local_advice(PG_FUNCTION_ARGS)
+{
+ pgpa_trim_local_advice(0);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SQL-callable function to discard advice collected in backend-local memory
+ */
+Datum
+pg_clear_collected_shared_advice(PG_FUNCTION_ARGS)
+{
+ pgpa_shared_state *state = pg_plan_advice_attach();
+ dsa_area *area = pg_plan_advice_dsa_area();
+
+ LWLockAcquire(&state->lock, LW_EXCLUSIVE);
+
+ /*
+ * If we're not attached to the shared advice collector yet, fix that now;
+ * but if the collector doesn't even exist, we can return without doing
+ * anything else.
+ */
+ if (shared_collector == NULL)
+ {
+ if (state->shared_collector == InvalidDsaPointer)
+ {
+ LWLockRelease(&state->lock);
+ return (Datum) 0;
+ }
+ shared_collector = dsa_get_address(area, state->shared_collector);
+ }
+
+ /* Do the real work */
+ pgpa_trim_shared_advice(area, 0);
+
+ LWLockRelease(&state->lock);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SQL-callable SRF to return advice collected in backend-local memory
+ */
+Datum
+pg_get_collected_local_advice(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ pgpa_local_advice *la = local_collector;
+
+ /*
+ * XXX. Is this the correct thing from a security point of view?
+ */
+ if (InSecurityRestrictedOperation())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("cannot call \"%s\" within security-restricted operation",
+ "pg_get_collected_local_advice")));
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ if (la == NULL)
+ return (Datum) 0;
+
+ /* Loop over all entries. */
+ for (uint64 id = la->oldest_id; id < la->next_id; ++id)
+ {
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_collected_advice *ca;
+ Datum values[PG_GET_ADVICE_COLUMNS];
+ bool nulls[PG_GET_ADVICE_COLUMNS] = {0};
+
+ chunk_number = (id - la->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (id - la->base_id) % ADVICE_CHUNK_SIZE;
+
+ ca = la->chunks[chunk_number]->entries[chunk_offset];
+
+ values[0] = UInt64GetDatum(id);
+ values[1] = ObjectIdGetDatum(ca->userid);
+ values[2] = ObjectIdGetDatum(ca->dbid);
+ values[3] = UInt64GetDatum(ca->queryid);
+ values[4] = TimestampGetDatum(ca->timestamp);
+ values[5] = CStringGetTextDatum(query_string(ca));
+ values[6] = CStringGetTextDatum(advice_string(ca));
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ return (Datum) 0;
+}
+
+/*
+ * SQL-callable SRF to return advice collected in shared memory
+ */
+Datum
+pg_get_collected_shared_advice(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ pgpa_shared_state *state = pg_plan_advice_attach();
+ dsa_area *area = pg_plan_advice_dsa_area();
+ dsa_pointer *chunk_array;
+ pgpa_shared_advice *sa = shared_collector;
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* Lock the shared state. */
+ LWLockAcquire(&state->lock, LW_SHARED);
+
+ /*
+ * If we're not attached to the shared advice collector yet, fix that now;
+ * but if the collector doesn't even exist, we can return without doing
+ * anything else.
+ */
+ if (sa == NULL)
+ {
+ if (state->shared_collector == InvalidDsaPointer)
+ {
+ LWLockRelease(&state->lock);
+ return (Datum) 0;
+ }
+ shared_collector = sa = dsa_get_address(area, state->shared_collector);
+ }
+
+ /* Get a pointer to the chunk array. */
+ chunk_array = dsa_get_address(area, sa->chunks);
+
+ /* Loop over all entries. */
+ for (uint64 id = sa->oldest_id; id < sa->next_id; ++id)
+ {
+ uint64 chunk_number;
+ uint64 chunk_offset;
+ pgpa_shared_advice_chunk *chunk;
+ pgpa_collected_advice *ca;
+ Datum values[PG_GET_ADVICE_COLUMNS];
+ bool nulls[PG_GET_ADVICE_COLUMNS] = {0};
+
+ chunk_number = (id - sa->base_id) / ADVICE_CHUNK_SIZE;
+ chunk_offset = (id - sa->base_id) % ADVICE_CHUNK_SIZE;
+
+ chunk = dsa_get_address(area, chunk_array[chunk_number]);
+ ca = dsa_get_address(area, chunk->entries[chunk_offset]);
+
+ values[0] = UInt64GetDatum(id);
+ values[1] = ObjectIdGetDatum(ca->userid);
+ values[2] = ObjectIdGetDatum(ca->dbid);
+ values[3] = UInt64GetDatum(ca->queryid);
+ values[4] = TimestampGetDatum(ca->timestamp);
+ values[5] = CStringGetTextDatum(query_string(ca));
+ values[6] = CStringGetTextDatum(advice_string(ca));
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ /* Release lock on shared state. */
+ LWLockRelease(&state->lock);
+
+ return (Datum) 0;
+}
diff --git a/contrib/pg_plan_advice/pgpa_collector.h b/contrib/pg_plan_advice/pgpa_collector.h
new file mode 100644
index 0000000000..6f8efa27f0
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_collector.h
@@ -0,0 +1,3 @@
+
+extern void pgpa_collect_advice(uint64 queryId, const char *query_string,
+ const char *advice_string);
diff --git a/contrib/pg_plan_advice/pgpa_identifier.c b/contrib/pg_plan_advice/pgpa_identifier.c
new file mode 100644
index 0000000000..56ec832b23
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_identifier.c
@@ -0,0 +1,385 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_identifier.c
+ * create appropriate identifiers for range table entries
+ *
+ * The goal of this module is to be able to produce identifiers for range
+ * table entries that are unique, understandable to human beings, and
+ * able to be reconstructed during future planning cycles. As an
+ * exception, we do not care about, or want to produce, identifiers for
+ * RTE_JOIN entries. This is because (1) we would end up with a ton of
+ * RTEs with unhelpful names like unnamed_join_17; (2) not all joins have
+ * RTEs; and (3) we intend to refer to joins by their constituent members
+ * rather than by reference to the join RTE.
+ *
+ * In general, we construct identifiers of the following form:
+ *
+ * alias_name#occurrence_number/child_table_name@subquery_name
+ *
+ * However, occurrence_number is omitted when it is the first occurrence
+ * within the same subquery, child_table_name is omitted for relations that
+ * are not child tables, and subquery_name is omitted for the topmost
+ * query level. Whenever an item is omitted, the preceding punctuation mark
+ * is also omitted. Identifier-style escaping is applied to alias_name and
+ * subquery_name. child_table_name is always a schema-qualified name, and
+ * identifier-style escaping is applied to the schema and to the relation
+ * names separately.
+ *
+ * The upshot of all of these rules is that in simple cases, the relation
+ * identifier is textually identical to the alias name, making life easier
+ * for users. However, even in complex cases, every relation identifier
+ * for a given query will be unique (or at least we hope so: if not, this
+ * code is buggy and the identifier format might need to be rethought).
+ *
+ * A key goal of this system is that we want to be able to reconstruct the
+ * same identifiers during a future planning cycle for the same query, so
+ * that if a certain behavior is specified for a certain identifier, we can
+ * properly identify the RTI for which that behavior is mandated. In order
+ * for this to work, subquery names must be unique and known before the
+ * subquery is planned, and the remainder of the identifier must not depend
+ * on any part of the query outside of the current subquery level. In
+ * particular, occurrence_number must be calculated relative to the range
+ * table for the relevant subquery, not the final flattened range table.
+ *
+ * Copyright (c) 2016-2024, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_identifier.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_identifier.h"
+
+#include "parser/parsetree.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+static Index *pgpa_create_top_rti_map(Index rtable_length, List *rtable,
+ List *appinfos);
+static int pgpa_occurrence_number(List *rtable, Index *top_rti_map,
+ SubPlanRTInfo *rtinfo, Index rti);
+
+/*
+ * Create a range table identifier from scratch.
+ *
+ * This function leaves the caller to do all the heavy lifting, so it's
+ * generally better to use one of the functions below instead.
+ *
+ * See the file header comments for more details on the format of an
+ * identifier.
+ */
+const char *
+pgpa_create_identifier(const char *alias_name, int occurrence,
+ const char *partnsp, const char *partrel,
+ const char *plan_name)
+{
+ const char *result;
+
+ result = quote_identifier(alias_name);
+
+ if (occurrence != 1)
+ {
+ Assert(occurrence > 1);
+ result = psprintf("%s#%d", result, occurrence);
+ }
+
+ if (partrel != NULL)
+ {
+ Assert(partnsp != NULL);
+ result = psprintf("%s/%s.%s", result,
+ quote_identifier(partnsp),
+ quote_identifier(partrel));
+ }
+
+ if (plan_name != NULL)
+ result = psprintf("%s@%s", result, quote_identifier(plan_name));
+
+ return result;
+}
+
+/*
+ * Create a range table identifier for a given RTI.
+ */
+const char *
+pgpa_create_identifier_by_rti(PlannerInfo *root, Index rti, char *plan_name)
+{
+ Index top_rti = rti;
+ int occurrence = 1;
+ RangeTblEntry *rte;
+ RangeTblEntry *top_rte;
+ char *partnsp = NULL;
+ char *partrel = NULL;
+
+ /*
+ * If this is a child RTE, find the topmost parent that is still of type
+ * RTE_RELATION. We do this because we identify children of partitioned
+ * tables by the name of the child table, but subqueries can also have
+ * child rels and we don't care about those here.
+ */
+ for (;;)
+ {
+ AppendRelInfo *appinfo = root->append_rel_array[top_rti];
+ RangeTblEntry *parent_rte;
+
+ if (appinfo == NULL)
+ break;
+
+ parent_rte = planner_rt_fetch(appinfo->parent_relid, root);
+ if (parent_rte->rtekind != RTE_RELATION)
+ break;
+
+ top_rti = appinfo->parent_relid;
+ }
+
+ /* Get the range table entries for the RTI and top RTI. */
+ rte = planner_rt_fetch(rti, root);
+ top_rte = planner_rt_fetch(top_rti, root);
+ Assert(rte->rtekind != RTE_JOIN);
+ Assert(top_rte->rtekind != RTE_JOIN);
+
+ /* Work out the correct occurrence number. */
+ for (Index prior_rti = 1; prior_rti < rti; ++prior_rti)
+ {
+ RangeTblEntry *prior_rte;
+ AppendRelInfo *appinfo = root->append_rel_array[prior_rti];
+
+ /*
+ * If this is a child rel of a parent that is a relation, skip it.
+ *
+ * Such range table entries are disambiguated by mentioning the schema
+ * and name of the table, not by counting them as separate occurrences
+ * of the same table.
+ */
+ if (appinfo != NULL)
+ {
+ RangeTblEntry *parent_rte;
+
+ parent_rte = planner_rt_fetch(appinfo->parent_relid, root);
+ if (parent_rte->rtekind == RTE_RELATION)
+ continue;
+ }
+
+ /* Skip joins. */
+ prior_rte = planner_rt_fetch(prior_rti, root);
+ if (prior_rte->rtekind == RTE_JOIN)
+ continue;
+
+ /* Skip if the alias name differs. */
+ if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
+ continue;
+
+ /* Looks like a true duplicate. */
+ ++occurrence;
+ }
+
+ /* If this is a child table, get the schema and relation names. */
+ if (rti != top_rti)
+ {
+ partnsp = get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+ partrel = get_rel_name(rte->relid);
+ }
+
+ /* We have everything we need to create the identifier. */
+ return pgpa_create_identifier(top_rte->eref->aliasname,
+ occurrence, partnsp, partrel,
+ plan_name);
+}
+
+/*
+ * Create an array of range table identifiers for all the non-NULL,
+ * non-RTE_JOIN entries in the PlannedStmt's range table.
+ */
+const char **
+pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt)
+{
+ Index rtable_length = list_length(pstmt->rtable);
+ const char **result = palloc0_array(const char *, rtable_length);
+ Index *top_rti_map;
+ int rtinfoindex = 0;
+ SubPlanRTInfo *rtinfo = NULL;
+ SubPlanRTInfo *nextrtinfo = NULL;
+
+ /*
+ * Account for relations addded by inheritance expansion of partitioned
+ * tables.
+ */
+ top_rti_map = pgpa_create_top_rti_map(rtable_length, pstmt->rtable,
+ pstmt->appendRelations);
+
+ /*
+ * When we begin iterating, we're processing the portion of the range
+ * table that originated from the top-level PlannerInfo, so subrtinfo is
+ * NULL. Later, subrtinfo will be the SubPlanRTInfo for the subquery whose
+ * portion of the range table we are processing. nextrtinfo is always the
+ * SubPlanRTInfo that follows the current one, if any, so when we're
+ * processing the top-level query's portion of the range table, the next
+ * SubPlanRTInfo is the very first one.
+ */
+ if (pstmt->subrtinfos != NULL)
+ nextrtinfo = linitial(pstmt->subrtinfos);
+
+ /* Main loop over the range table. */
+ for (Index rti = 1; rti <= rtable_length; rti++)
+ {
+ const char *plan_name;
+ Index top_rti;
+ RangeTblEntry *rte;
+ RangeTblEntry *top_rte;
+ char *partnsp = NULL;
+ char *partrel = NULL;
+ int occurrence;
+
+ /*
+ * Advance to the next SubPlanRTInfo, if it's time to do that.
+ *
+ * This loop probably shouldn't ever iterate more than once, because
+ * that would imply that a subquery was planned but added nothing to
+ * the range table; but let's be defensive and assume it can happen.
+ */
+ while (nextrtinfo != NULL && rti > nextrtinfo->rtoffset)
+ {
+ rtinfo = nextrtinfo;
+ if (++rtinfoindex >= list_length(pstmt->subrtinfos))
+ nextrtinfo = NULL;
+ else
+ nextrtinfo = list_nth(pstmt->subrtinfos, rtinfoindex);
+ }
+
+ /* Fetch the range table entry, if any. */
+ rte = rt_fetch(rti, pstmt->rtable);
+
+ /*
+ * We can't and don't need to identify null entries, and we don't want
+ * to identify join entries.
+ */
+ if (rte == NULL || rte->rtekind == RTE_JOIN)
+ continue;
+
+ /*
+ * If this is not a relation added by partitioned table expansion,
+ * then the top RTI/RTE are just the same as this RTI/RTE. Otherwise,
+ * we need the information for the top RTI/RTE, and must also fetch
+ * the partition schema and name.
+ */
+ top_rti = top_rti_map[rti - 1];
+ if (rti == top_rti)
+ top_rte = rte;
+ else
+ {
+ top_rte = rt_fetch(top_rti, pstmt->rtable);
+ partnsp =
+ get_namespace_name_or_temp(get_rel_namespace(rte->relid));
+ partrel = get_rel_name(rte->relid);
+ }
+
+ /* Compute the correct occurrence number. */
+ occurrence = pgpa_occurrence_number(pstmt->rtable, top_rti_map,
+ rtinfo, top_rti);
+
+ /* Get the name of the current plan (NULL for toplevel query). */
+ plan_name = rtinfo == NULL ? NULL : rtinfo->plan_name;
+
+ /* OK, we're ready to create the identifier. */
+ result[rti - 1] = pgpa_create_identifier(top_rte->eref->aliasname,
+ occurrence,
+ partnsp, partrel,
+ plan_name);
+ }
+
+ return result;
+}
+
+/*
+ * Build a mapping from each RTI to the RTI whose alias_name will be used to
+ * construct the range table identifier.
+ *
+ * For child relations, this is the topmost parent that is still of type
+ * RTE_RELATION. For other relations, it's just the original RTI.
+ *
+ * Since we're eventually going to need this information for every RTI in
+ * the range table, it's best to compute all the answers in a single pass over
+ * the AppendRelInfo list. Otherwise, we might end up searching through that
+ * list repeatedly for entries of interest.
+ *
+ * Note that the returned array is uses zero-based indexing, while RTIs use
+ * 1-based indexing, so subtract 1 from the RTI before looking it up in the
+ * array.
+ */
+static Index *
+pgpa_create_top_rti_map(Index rtable_length, List *rtable, List *appinfos)
+{
+ Index *top_rti_map = palloc0_array(Index, rtable_length);
+
+ /* Initially, make every RTI point to itself. */
+ for (Index rti = 1; rti <= rtable_length; ++rti)
+ top_rti_map[rti - 1] = rti;
+
+ /* Update the map for each AppendRelInfo object. */
+ foreach_node(AppendRelInfo, appinfo, appinfos)
+ {
+ Index parent_rti = appinfo->parent_relid;
+ RangeTblEntry *parent_rte = rt_fetch(parent_rti, rtable);
+
+ /* If the parent is not RTE_RELATION, ignore this entry. */
+ if (parent_rte->rtekind != RTE_RELATION)
+ continue;
+
+ /*
+ * Map the child to wherever we mapped the parent. Parents always
+ * precede their children in the AppendRelInfo list, so this should
+ * work out.
+ */
+ top_rti_map[appinfo->child_relid - 1] = top_rti_map[parent_rti - 1];
+ }
+
+ return top_rti_map;
+}
+
+/*
+ * Find the occurence number of a certain relation within a certain subquery.
+ *
+ * The same alias name can occur multiple times within a subquery, but we want
+ * to disambiguate by giving different occurrences different integer indexes.
+ * However, child tables are disambiguated by including the table name rather
+ * than by incrementing the occurrence number; and joins are not named and so
+ * shouldn't increment the occurence number either.
+ */
+static int
+pgpa_occurrence_number(List *rtable, Index *top_rti_map,
+ SubPlanRTInfo *rtinfo, Index rti)
+{
+ Index rtoffset = (rtinfo == NULL) ? 0 : rtinfo->rtoffset;
+ int occurrence = 1;
+ RangeTblEntry *rte = rt_fetch(rti, rtable);
+
+ for (Index prior_rti = rtoffset + 1; prior_rti < rti; ++prior_rti)
+ {
+ RangeTblEntry *prior_rte;
+
+ /*
+ * If this is a child rel of a parent that is a relation, skip it.
+ *
+ * Such range table entries are disambiguated by mentioning the schema
+ * and name of the table, not by counting them as separate occurrences
+ * of the same table.
+ */
+ if (top_rti_map[prior_rti - 1] != prior_rti)
+ break;
+
+ /* Skip joins. */
+ prior_rte = rt_fetch(prior_rti, rtable);
+ if (prior_rte->rtekind == RTE_JOIN)
+ continue;
+
+ /* Skip if the alias name differs. */
+ if (strcmp(prior_rte->eref->aliasname, rte->eref->aliasname) != 0)
+ continue;
+
+ /* Looks like a true duplicate. */
+ ++occurrence;
+ }
+
+ return occurrence;
+}
diff --git a/contrib/pg_plan_advice/pgpa_identifier.h b/contrib/pg_plan_advice/pgpa_identifier.h
new file mode 100644
index 0000000000..7684ba0a6a
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_identifier.h
@@ -0,0 +1,29 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_identifier.h
+ * create appropriate identifiers for range table entries
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_identifier.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PGPA_IDENTIFIER_H
+#define PGPA_IDENTIFIER_H
+
+#include "nodes/pathnodes.h"
+#include "nodes/plannodes.h"
+
+extern const char *pgpa_create_identifier(const char *alias_name,
+ int occurrence,
+ const char *partnsp,
+ const char *partrel,
+ const char *plan_name);
+extern const char *pgpa_create_identifier_by_rti(PlannerInfo *root,
+ Index rti,
+ char *plan_name);
+extern const char **pgpa_create_identifiers_for_planned_stmt(PlannedStmt *pstmt);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_join.c b/contrib/pg_plan_advice/pgpa_join.c
new file mode 100644
index 0000000000..1984bd1352
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_join.c
@@ -0,0 +1,601 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_join.c
+ * analysis of joins in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_join.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/pathnodes.h"
+#include "nodes/print.h"
+#include "parser/parsetree.h"
+
+/*
+ * Temporary object used when unrolling a join tree.
+ */
+struct pgpa_join_unroller
+{
+ unsigned nallocated;
+ unsigned nused;
+ Plan *outer_subplan;
+ ElidedNode *outer_elided_node;
+ pgpa_join_strategy *strategy;
+ Plan **inner_subplans;
+ ElidedNode **inner_elided_nodes;
+ pgpa_join_unroller **inner_unrollers;
+};
+
+static pgpa_join_strategy pgpa_decompose_join(pgpa_plan_walker_context *walker,
+ Plan *plan,
+ Plan **realouter,
+ Plan **realinner,
+ ElidedNode **elidedrealouter,
+ ElidedNode **elidedrealinner);
+static ElidedNode *pgpa_descend_node(PlannedStmt *pstmt, Plan **plan);
+static ElidedNode *pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan);
+static bool pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+ ElidedNode **elided_node);
+
+static Bitmapset *pgpa_add_member_relids(Bitmapset *relids,
+ pgpa_join_member *member);
+
+static bool is_result_node_with_child(Plan *plan);
+static bool is_sorting_plan(Plan *plan);
+
+/*
+ * Create an initially-empty object for unrolling joins.
+ *
+ * See comments for pgpa_unrolled_join and pgpa_clumped_join in pgpa_join.h
+ * for a discussion of clumped and unrolled joins. This function creates a
+ * helper object that can later be used to create a pgpa_unrolled_join, after
+ * first calling pgpa_unroll_join one or more times.
+ */
+pgpa_join_unroller *
+pgpa_create_join_unroller(void)
+{
+ pgpa_join_unroller *join_unroller;
+
+ join_unroller = palloc0_object(pgpa_join_unroller);
+ join_unroller->nallocated = 4;
+ join_unroller->strategy =
+ palloc_array(pgpa_join_strategy, join_unroller->nallocated);
+ join_unroller->inner_subplans =
+ palloc_array(Plan *, join_unroller->nallocated);
+ join_unroller->inner_elided_nodes =
+ palloc_array(ElidedNode *, join_unroller->nallocated);
+ join_unroller->inner_unrollers =
+ palloc_array(pgpa_join_unroller *, join_unroller->nallocated);
+
+ return join_unroller;
+}
+
+/*
+ * Unroll one level of an unrollable join tree.
+ *
+ * Our basic goal here is to unroll join trees as they occur in the Plan
+ * tree into a simpler and more regular structure that we can more easily
+ * use for further processing. Unrolling is outer-deep, so if the plan tree
+ * has Join1(Join2(A,B),Join3(C,D)), the same join unroller object should be
+ * used for Join1 and Join2, but a different one will be needed for Join3,
+ * since that involves a join within the *inner* side of another join.
+ *
+ * pgpa_plan_walker creates a "top level" join unroller object when it
+ * encounters a join in a portion of the plan tree in which no join unroller
+ * is already active. From there, this function is responsible for determing
+ * to what portion of the plan tree that join unroller applies, and for
+ * creating any subordinate join unroller objects that are needed as a result
+ * of non-outer-deep join trees. We do this by returning the join unroller
+ * objects that should be used for further traversal of the outer and inner
+ * subtrees of the current plan node via *outer_join_unroller and
+ * *inner_join_unroller, respectively.
+ */
+void
+pgpa_unroll_join(pgpa_plan_walker_context *walker, Plan *plan,
+ pgpa_join_unroller *join_unroller,
+ pgpa_join_unroller **outer_join_unroller,
+ pgpa_join_unroller **inner_join_unroller)
+{
+ pgpa_join_strategy strategy;
+ Plan *realinner,
+ *realouter;
+ ElidedNode *elidedinner,
+ *elidedouter;
+ int n;
+
+ Assert(join_unroller != NULL);
+
+ /*
+ * We need to pass the join_unroller object down through certain types of
+ * plan nodes -- anything that's considered part of the join strategy, and
+ * any other nodes that can occur in a join tree despite not being scans
+ * or joins.
+ *
+ * This includes:
+ *
+ * (1) Materialize, Memoize, and Hash nodes, which are part of the join
+ * strategy,
+ *
+ * (2) Gather and Gather Merge nodes, which can occur at any point in the
+ * join tree where the planner decided to initiate parallelism,
+ *
+ * (3) Sort and IncrementalSort nodes, which can occur beneath MergeJoin
+ * or GatherMerge,
+ *
+ * (4) Agg and Unique nodes, which can occur when we decide to make the
+ * nullable side of a semijoin unique and then join the result, and
+ *
+ * (5) Result nodes with children, which can be added either to project to
+ * enforce a one-time filter (but Result nodes without children are
+ * degenerate scans or joins).
+ */
+ if (IsA(plan, Material) || IsA(plan, Memoize) || IsA(plan, Hash)
+ || IsA(plan, Gather) || IsA(plan, GatherMerge)
+ || is_sorting_plan(plan) || IsA(plan, Agg) || IsA(plan, Unique)
+ || is_result_node_with_child(plan))
+ {
+ *outer_join_unroller = join_unroller;
+ return;
+ }
+
+ /*
+ * Since we've already handled nodes that require pass-through treatment,
+ * this should be an unrollable join.
+ */
+ strategy = pgpa_decompose_join(walker, plan,
+ &realouter, &realinner,
+ &elidedouter, &elidedinner);
+
+ /* If our workspace is full, expand it. */
+ if (join_unroller->nused >= join_unroller->nallocated)
+ {
+ join_unroller->nallocated *= 2;
+ join_unroller->strategy =
+ repalloc_array(join_unroller->strategy,
+ pgpa_join_strategy,
+ join_unroller->nallocated);
+ join_unroller->inner_subplans =
+ repalloc_array(join_unroller->inner_subplans,
+ Plan *,
+ join_unroller->nallocated);
+ join_unroller->inner_elided_nodes =
+ repalloc_array(join_unroller->inner_elided_nodes,
+ ElidedNode *,
+ join_unroller->nallocated);
+ join_unroller->inner_unrollers =
+ repalloc_array(join_unroller->inner_unrollers,
+ pgpa_join_unroller *,
+ join_unroller->nallocated);
+ }
+
+ /*
+ * Since we're flattening outer-deep join trees, it follows that if the
+ * outer side is still an unrollable join, it should be unrolled into this
+ * same object. Otherwise, we've reached the limit of what we can unroll
+ * into this object and must remember the outer side as the final outer
+ * subplan.
+ */
+ if (elidedouter == NULL && pgpa_is_join(realouter))
+ *outer_join_unroller = join_unroller;
+ else
+ {
+ join_unroller->outer_subplan = realouter;
+ join_unroller->outer_elided_node = elidedouter;
+ }
+
+ /*
+ * Store the inner subplan. If it's an unrollable join, it needs to be
+ * flattened in turn, but into a new unroller object, not this one.
+ */
+ n = join_unroller->nused++;
+ join_unroller->strategy[n] = strategy;
+ join_unroller->inner_subplans[n] = realinner;
+ join_unroller->inner_elided_nodes[n] = elidedinner;
+ if (elidedinner == NULL && pgpa_is_join(realinner))
+ *inner_join_unroller = pgpa_create_join_unroller();
+ else
+ *inner_join_unroller = NULL;
+ join_unroller->inner_unrollers[n] = *inner_join_unroller;
+}
+
+/*
+ * Use the data we've accumulated in a pgpa_join_unroller object to construct
+ * a pgpa_unrolled_join.
+ */
+pgpa_unrolled_join *
+pgpa_build_unrolled_join(PlannedStmt *pstmt,
+ pgpa_join_unroller *join_unroller)
+{
+ pgpa_unrolled_join *ujoin;
+ int i;
+
+ /*
+ * We shouldn't have gone even so far as to create a join unroller unless
+ * we found at least one unrollable join.
+ */
+ Assert(join_unroller->nused > 0);
+
+ /* Allocate result structures. */
+ ujoin = palloc0_object(pgpa_unrolled_join);
+ ujoin->ninner = join_unroller->nused;
+ ujoin->strategy = palloc0_array(pgpa_join_strategy, join_unroller->nused);
+ ujoin->inner = palloc0_array(pgpa_join_member, join_unroller->nused);
+
+ /* Handle the outermost join. */
+ ujoin->outer.plan = join_unroller->outer_subplan;
+ ujoin->outer.elided_node = join_unroller->outer_elided_node;
+ ujoin->outer.scan = pgpa_build_scan(pstmt, ujoin->outer.plan,
+ ujoin->outer.elided_node, true);
+
+ /*
+ * We want the joins from the deepest part of the plan tree to appear
+ * first in the result object, but the join unroller adds them in exactly
+ * the reverse of that order, so we need to flip the order of the arrays
+ * when constructing the final result.
+ */
+ for (i = 0; i < join_unroller->nused; ++i)
+ {
+ int k = join_unroller->nused - i - 1;
+
+ /* Copy strategy, Plan, and ElidedNode. */
+ ujoin->strategy[i] = join_unroller->strategy[k];
+ ujoin->inner[i].plan = join_unroller->inner_subplans[k];
+ ujoin->inner[i].elided_node = join_unroller->inner_elided_nodes[k];
+
+ /*
+ * Fill in remaining details, using either the nested join unroller,
+ * or by deriving them from the plan and elided nodes.
+ */
+ if (join_unroller->inner_unrollers[k] != NULL)
+ ujoin->inner[i].unrolled_join =
+ pgpa_build_unrolled_join(pstmt,
+ join_unroller->inner_unrollers[k]);
+ else
+ ujoin->inner[i].scan = pgpa_build_scan(pstmt, ujoin->inner[i].plan,
+ ujoin->inner[i].elided_node,
+ true);
+ }
+
+ return ujoin;
+}
+
+/*
+ * Free memory allocated for pgpa_join_unroller.
+ */
+void
+pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller)
+{
+ pfree(join_unroller->strategy);
+ pfree(join_unroller->inner_subplans);
+ pfree(join_unroller->inner_elided_nodes);
+ pfree(join_unroller->inner_unrollers);
+ pfree(join_unroller);
+}
+
+/*
+ * Identify the join strategy used by a join and the "real" inner and outer
+ * plans.
+ *
+ * For example, a Hash Join always has a Hash node on the inner side, but
+ * for all intents and purposes the real inner input is the Hash node's child,
+ * not the Hash node itself.
+ *
+ * Likewise, a Merge Join may have Sort note on the inner or outer side; if
+ * it does, the real input to the join is the Sort node's child, not the
+ * Sort node itself.
+ *
+ * In addition, with a Merge Join or a Nested Loop, the join planning code
+ * may add additional nodes such as Materialize or Memoize. We regard these
+ * as an aspect of the join strategy. As in the previous cases, the true input
+ * to the join is the underlying node.
+ *
+ * However, if any involved child node previously had a now-elided node stacked
+ * on top, then we can't "look through" that node -- indeed, what's going to be
+ * relevant for our purposes is the ElidedNode on top of that plan node, rather
+ * than the plan node itself.
+ *
+ * If there are multiple elided nodes, we want that one that would have been
+ * uppermost in the plan tree prior to setrefs processing; we expect to find
+ * that one last in the list of elided nodes.
+ *
+ * On return *realouter and *realinner will have been set to the real inner
+ * and real outer plans that we identified, and *elidedrealouter and
+ * *elidedrealinner to the last of any correspoding elided nodes.
+ */
+static pgpa_join_strategy
+pgpa_decompose_join(pgpa_plan_walker_context *walker, Plan *plan,
+ Plan **realouter, Plan **realinner,
+ ElidedNode **elidedrealouter, ElidedNode **elidedrealinner)
+{
+ PlannedStmt *pstmt = walker->pstmt;
+ JoinType jointype = ((Join *) plan)->jointype;
+ Plan *outerplan = plan->lefttree;
+ Plan *innerplan = plan->righttree;
+ ElidedNode *elidedouter;
+ ElidedNode *elidedinner;
+ pgpa_join_strategy strategy;
+ bool uniqueouter;
+ bool uniqueinner;
+
+ elidedouter = pgpa_last_elided_node(pstmt, outerplan);
+ elidedinner = pgpa_last_elided_node(pstmt, innerplan);
+
+ switch (nodeTag(plan))
+ {
+ case T_MergeJoin:
+
+ /*
+ * The planner may have chosen to place a Material node on the
+ * inner side of the MergeJoin; if this is present, we record it
+ * as part of the join strategy.
+ */
+ if (elidedinner == NULL && IsA(innerplan, Material))
+ {
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ strategy = JSTRAT_MERGE_JOIN_MATERIALIZE;
+ }
+ else
+ strategy = JSTRAT_MERGE_JOIN_PLAIN;
+
+ /*
+ * For a MergeJoin, either the outer or the inner subplan, or
+ * both, may have needed to be sorted; we must disregard any Sort
+ * or IncrementalSort node to find the real inner or outer
+ * subplan.
+ */
+ if (elidedouter == NULL && is_sorting_plan(outerplan))
+ elidedouter = pgpa_descend_node(pstmt, &outerplan);
+ if (elidedinner == NULL && is_sorting_plan(innerplan))
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ break;
+
+ case T_NestLoop:
+
+ /*
+ * The planner may have chosen to place a Material or Memoize node
+ * on the inner side of the NestLoop; if this is present, we
+ * record it as part of the join strategy.
+ */
+ if (elidedinner == NULL && IsA(innerplan, Material))
+ {
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ strategy = JSTRAT_NESTED_LOOP_MATERIALIZE;
+ }
+ else if (elidedinner == NULL && IsA(innerplan, Memoize))
+ {
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ strategy = JSTRAT_NESTED_LOOP_MEMOIZE;
+ }
+ else
+ strategy = JSTRAT_NESTED_LOOP_PLAIN;
+ break;
+
+ case T_HashJoin:
+
+ /*
+ * The inner subplan of a HashJoin is always a Hash node; the real
+ * inner subplan is the Hash node's child.
+ */
+ Assert(IsA(innerplan, Hash));
+ Assert(elidedinner == NULL);
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+ strategy = JSTRAT_HASH_JOIN;
+ break;
+
+ default:
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(plan));
+ }
+
+ /*
+ * The planner may have decided to implement a semijoin by first making
+ * the nullable side of the plan unique, and then performing a normal join
+ * against the result. Therefore, we might need to descend through a
+ * unique node on either side of the plan.
+ */
+ uniqueouter = pgpa_descend_any_unique(pstmt, &outerplan, &elidedouter);
+ uniqueinner = pgpa_descend_any_unique(pstmt, &innerplan, &elidedinner);
+
+ /*
+ * The planner may have decided to parallelize part of the join tree, so
+ * we could find a Gather or Gather Merge node here. Note that, if
+ * present, this will appear below both (1) nodes we considered as part of
+ * the join strategy and (2) nodes inserted for uniqueness, because we
+ * currently only consider gathering a partial path on top of the final
+ * path for some RelOptInfo.
+ */
+ if (elidedouter == NULL)
+ elidedouter = pgpa_descend_any_gather(pstmt, &outerplan);
+ if (elidedinner == NULL)
+ elidedinner = pgpa_descend_any_gather(pstmt, &innerplan);
+
+ /*
+ * It's possible that Result node has been inserted either to project a
+ * target list or to implement a one-time filter. If so, we can descend
+ * throught it. Note that a result node without a child would be a
+ * degenerate scan or join, and not something we could descend through.
+ *
+ * XXX. I suspect it's possible for this to happen above the Gather or
+ * Gather Merge node, too, but apparently we have no test case for that
+ * scenario.
+ */
+ if (elidedouter == NULL && is_result_node_with_child(outerplan))
+ elidedouter = pgpa_descend_node(pstmt, &outerplan);
+ if (elidedinner == NULL && is_result_node_with_child(innerplan))
+ elidedinner = pgpa_descend_node(pstmt, &innerplan);
+
+ /*
+ * If this is a semijoin that was converted to an inner join by making one
+ * side or the other unique, make a note that the inner or outer subplan,
+ * as appropriate, should be treated as a query plan feature when the main
+ * tree traversal reaches it.
+ *
+ * Conversely, if the planner could have made one side of the join unique
+ * and thereby converted it to an inner join, and chose not to do so, that
+ * is also worth noting. (XXX: I'm not sure that we need to emit
+ * non-unique advice in every case where these join-type tests pass.)
+ *
+ * NB: This code could appear slightly higher up in in this function, but
+ * none of the nodes through which we just descended should be have
+ * associated RTIs.
+ *
+ * NB: This seems like a somewhat hacky way of passing information up to
+ * the main tree walk, but I don't currently have a better idea.
+ */
+ if (uniqueouter)
+ pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, outerplan);
+ else if (jointype == JOIN_RIGHT_SEMI)
+ pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, outerplan);
+ if (uniqueinner)
+ pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_UNIQUE, innerplan);
+ else if (jointype == JOIN_SEMI)
+ pgpa_add_future_feature(walker, PGPAQF_SEMIJOIN_NON_UNIQUE, innerplan);
+
+ /* Set output parameters. */
+ *realouter = outerplan;
+ *realinner = innerplan;
+ *elidedrealouter = elidedouter;
+ *elidedrealinner = elidedinner;
+ return strategy;
+}
+
+/*
+ * Descend through a Plan node in a join tree that the caller has determined
+ * to be irrelevant.
+ *
+ * Updates *plan, and returns the last of any elided nodes pertaining to the
+ * new plan node.
+ */
+static ElidedNode *
+pgpa_descend_node(PlannedStmt *pstmt, Plan **plan)
+{
+ *plan = (*plan)->lefttree;
+ return pgpa_last_elided_node(pstmt, *plan);
+}
+
+/*
+ * Descend through a Gather or Gather Merge node, if present, and any Sort
+ * or IncrementalSort node occurring under a Gather Merge.
+ *
+ * Caller should have verified that there is no ElidedNode pertaining to
+ * the initial value of *plan.
+ *
+ * Updates *plan, and returns the last of any elided nodes pertaining to the
+ * new plan node.
+ */
+static ElidedNode *
+pgpa_descend_any_gather(PlannedStmt *pstmt, Plan **plan)
+{
+ if (IsA(*plan, Gather))
+ return pgpa_descend_node(pstmt, plan);
+
+ if (IsA(*plan, GatherMerge))
+ {
+ ElidedNode *elided = pgpa_descend_node(pstmt, plan);
+
+ if (elided == NULL && is_sorting_plan(*plan))
+ elided = pgpa_descend_node(pstmt, plan);
+
+ return elided;
+ }
+
+ return NULL;
+}
+
+/*
+ * If *plan is an Agg or Unique node, we want to descend through it, unless
+ * it has a corresponding elided node. If its immediate child is a Sort or
+ * IncrementalSort, we also want to descend through that, unless it has a
+ * corresponding elided node.
+ *
+ * On entry, *elided_node must be the last of any elided nodes corresponding
+ * to *plan; on exit, this will still be true, but *plan may have been updated.
+ *
+ * The reason we don't want to descend through elided nodes is that a single
+ * join tree can't cross through any sort of elided node: subqueries are
+ * planned separately, and planning inside an Append or MergeAppend is
+ * separate from planning outside of it.
+ *
+ * The return value is true if we descend through at least one node, and
+ * otherwise false.
+ */
+static bool
+pgpa_descend_any_unique(PlannedStmt *pstmt, Plan **plan,
+ ElidedNode **elided_node)
+{
+ if (*elided_node != NULL)
+ return false;
+
+ if (IsA(*plan, Agg) || IsA(*plan, Unique))
+ {
+ *elided_node = pgpa_descend_node(pstmt, plan);
+
+ if (*elided_node == NULL && is_sorting_plan(*plan))
+ *elided_node = pgpa_descend_node(pstmt, plan);
+
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Create a Bitmapset of all RTIs that we associated with an unrolled join.
+ *
+ * As usual, join RTIs are excluded.
+ */
+Bitmapset *
+pgpa_unrolled_join_all_relids(pgpa_unrolled_join *join)
+{
+ Bitmapset *relids = NULL;
+
+ relids = pgpa_add_member_relids(relids, &join->outer);
+
+ for (int k = 0; k < join->ninner; ++k)
+ relids = pgpa_add_member_relids(relids, &join->inner[k]);
+
+ return relids;
+}
+
+/*
+ * Add all RTIs that we associate with a join member to the provided set,
+ * returning the result.
+ */
+static Bitmapset *
+pgpa_add_member_relids(Bitmapset *relids, pgpa_join_member *member)
+{
+ if (member->unrolled_join != NULL)
+ return bms_union(relids,
+ pgpa_unrolled_join_all_relids(member->unrolled_join));
+ else
+ {
+ Assert(member->scan != NULL);
+ return bms_union(relids, member->scan->relids);
+ }
+}
+
+/*
+ * Is this a Result node that has a child?
+ */
+static bool
+is_result_node_with_child(Plan *plan)
+{
+ return IsA(plan, Result) && plan->lefttree != NULL;
+}
+
+/*
+ * Is this a Plan node whose purpose is put the data in a certain order?
+ */
+static bool
+is_sorting_plan(Plan *plan)
+{
+ return IsA(plan, Sort) || IsA(plan, IncrementalSort);
+}
diff --git a/contrib/pg_plan_advice/pgpa_join.h b/contrib/pg_plan_advice/pgpa_join.h
new file mode 100644
index 0000000000..7c861fb446
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_join.h
@@ -0,0 +1,118 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_join.h
+ * analysis of joins in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_join.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_JOIN_H
+#define PGPA_JOIN_H
+
+#include "nodes/plannodes.h"
+
+struct pgpa_plan_walker_context;
+struct pgpa_clumped_join;
+typedef struct pgpa_join_unroller pgpa_join_unroller;
+typedef struct pgpa_unrolled_join pgpa_unrolled_join;
+
+/*
+ * Although there are three main join strategies, we try to classify things
+ * more precisely here: merge joins have the option of using materialization
+ * on the inner side, and nested loops can use either materialization or
+ * memoization.
+ */
+typedef enum
+{
+ JSTRAT_MERGE_JOIN_PLAIN = 0,
+ JSTRAT_MERGE_JOIN_MATERIALIZE,
+ JSTRAT_NESTED_LOOP_PLAIN,
+ JSTRAT_NESTED_LOOP_MATERIALIZE,
+ JSTRAT_NESTED_LOOP_MEMOIZE,
+ JSTRAT_HASH_JOIN
+ /* update NUM_PGPA_JOIN_STRATEGY if you add anything here */
+} pgpa_join_strategy;
+
+#define NUM_PGPA_JOIN_STRATEGY ((int) JSTRAT_HASH_JOIN + 1)
+
+/*
+ * In an outer-deep join tree, every member of an unrolled join will be a scan,
+ * but join trees with other shapes can contain unrolled joins.
+ *
+ * The plan node we store here will be the inner or outer child of the join
+ * node, as appropriate, except that we look through subnodes that we regard as
+ * part of the join method itself. For instance, for a Nested Loop that
+ * materializes the inner input, we'll store the child of the Materialize node,
+ * not the Materialize node itself.
+ *
+ * If setrefs processing elided one or more nodes from the plan tree, then
+ * we'll store details about the topmost of those in elided_node; otherwise,
+ * it will be NULL.
+ *
+ * Exactly one of scan and unrolled_join will be non-NULL.
+ */
+typedef struct
+{
+ Plan *plan;
+ ElidedNode *elided_node;
+ struct pgpa_scan *scan;
+ pgpa_unrolled_join *unrolled_join;
+} pgpa_join_member;
+
+/*
+ * Non-clumped joins are unrolled; that is, we convert outer-deep join trees
+ * to a flat structure. ((A JOIN B) JOIN C) JOIN D gets converted to
+ * outer = A, inner = <B C D>, provided that none of the joins involved are
+ * clumped. When joins aren't outer-deep, substructure is required, e.g.
+ * (A JOIN B) JOIN (C JOIN D) is represented as outer = A, inner = <B X>,
+ * where X is a clumped or unrolled join covering C-D.
+ */
+struct pgpa_unrolled_join
+{
+ /* Outermost member; must not itself be an unrolled join. */
+ pgpa_join_member outer;
+
+ /* Number of inner members. Length of the strategy and inner arrays. */
+ unsigned ninner;
+
+ /* Array of strategies, one per non-outermost member. */
+ pgpa_join_strategy *strategy;
+
+ /* Array of members, excluding the outermost. Deepest first. */
+ pgpa_join_member *inner;
+};
+
+/*
+ * Classification of Plan nodes based on what structure we should construct.
+ */
+typedef enum
+{
+ PGPA_NON_JOIN,
+ PGPA_CLUMPED_JOIN,
+ PGPA_UNROLLED_JOIN
+} pgpa_join_class;
+
+/*
+ * Does this plan node inherit from Join?
+ */
+static inline bool
+pgpa_is_join(Plan *plan)
+{
+ return IsA(plan, NestLoop) || IsA(plan, MergeJoin) || IsA(plan, HashJoin);
+}
+
+extern pgpa_join_unroller *pgpa_create_join_unroller(void);
+extern void pgpa_unroll_join(struct pgpa_plan_walker_context *walker,
+ Plan *plan,
+ pgpa_join_unroller *join_unroller,
+ pgpa_join_unroller **outer_join_unroller,
+ pgpa_join_unroller **inner_join_unroller);
+extern pgpa_unrolled_join *pgpa_build_unrolled_join(PlannedStmt *pstmt,
+ pgpa_join_unroller *join_unroller);
+extern void pgpa_destroy_join_unroller(pgpa_join_unroller *join_unroller);
+extern Bitmapset *pgpa_unrolled_join_all_relids(pgpa_unrolled_join *join);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_output.c b/contrib/pg_plan_advice/pgpa_output.c
new file mode 100644
index 0000000000..1b660791ba
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_output.c
@@ -0,0 +1,480 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_output.c
+ * produce textual output from the results of a plan tree walk
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_output.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgpa_output.h"
+#include "pgpa_scan.h"
+
+typedef struct pgpa_output_context
+{
+ const char **rt_identifiers;
+ StringInfo buf;
+ List *unrolled_joins[NUM_PGPA_JOIN_STRATEGY];
+ List *scans[NUM_PGPA_SCAN_STRATEGY];
+ List *query_features[NUM_PGPA_QF_TYPES];
+ int wrap_column;
+} pgpa_output_context;
+
+static void pgpa_output_unrolled_join(pgpa_output_context *context,
+ pgpa_unrolled_join *join);
+static void pgpa_output_join_member(pgpa_output_context *context,
+ pgpa_join_member *member);
+static void pgpa_output_relations(pgpa_output_context *context, StringInfo buf,
+ Bitmapset *relids);
+
+static char *pgpa_cstring_join_strategy(pgpa_join_strategy strategy);
+static char *pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy);
+
+static void pgpa_maybe_linebreak(StringInfo buf, int wrap_column);
+
+/*
+ * Append query advice to the provided buffer.
+ *
+ * Before calling this function, 'walker' must be used to iterate over the
+ * main plan tree and all subplans from the PlannedStmt.
+ *
+ * 'rt_identifiers' is a table of unique identifiers, one for each RTI.
+ * See pgpa_create_identifiers_for_planned_stmt().
+ *
+ * Results will be appended to 'buf'.
+ */
+void
+pgpa_output_advice(StringInfo buf, pgpa_plan_walker_context *walker,
+ const char **rt_identifiers)
+{
+ ListCell *lc;
+ pgpa_output_context context;
+
+ /*
+ * If the user chooses to use EXPLAIN (PLAN_ADVICE) in an 80-column window
+ * from a psql client with default settings, psql will add one space to the
+ * left of the output and EXPLAIN will add two more to the left of the
+ * advice. Thus, lines of more than 77 characters will wrap. We set the
+ * wrap limit to 76 here so that the output won't reach all the way to the
+ * very last column of the terminal.
+ *
+ * Of course, this is fairly arbitrary set of assumptions, and one could
+ * well make an argument for a different wrap limit, or for a configurable
+ * one.
+ */
+ memset(&context, 0, sizeof(pgpa_output_context));
+ context.rt_identifiers = rt_identifiers;
+ context.buf = buf;
+ context.wrap_column = 76;
+
+ /*
+ * Put all the top-level scans for each strategy into a single list.
+ */
+ foreach(lc, walker->scans)
+ {
+ pgpa_scan *scan = lfirst(lc);
+
+ context.scans[scan->strategy] =
+ lappend(context.scans[scan->strategy], scan->relids);
+ }
+
+ /*
+ * Each piece of JOIN_ORDER() advice fully describes the join order for a
+ * a single unrolled join. Merging is not permitted, because that would
+ * change the meaning, e.g. SEQ_SCAN(a b c d) means simply that sequential
+ * scans should be used for all of those relations, and is thus equivalent
+ * to SEQ_SCAN(a b) SEQ_SCAN(c d), but JOIN_ORDER(a b c d) means that "a"
+ * is the driving table which is then joined to "b" then "c" then "d",
+ * which is totally different from JOIN_ORDER(a b) and JOIN_ORDER(c d).
+ *
+ * As a side effect, the calls to pgpa_output_unrolled_join() will add to
+ * the context.unrolled_joins[] lists and context.scans[] lists, which
+ * will be important for emitting join and scan strategy advice further
+ * down.
+ */
+ foreach(lc, walker->unrolled_joins)
+ {
+ pgpa_unrolled_join *ujoin = lfirst(lc);
+
+ if (buf->len > 0)
+ appendStringInfoChar(buf, '\n');
+ appendStringInfo(context.buf, "JOIN_ORDER(");
+ pgpa_output_unrolled_join(&context, ujoin);
+ appendStringInfoChar(context.buf, ')');
+ pgpa_maybe_linebreak(context.buf, context.wrap_column);
+ }
+
+ /*
+ * Emit just one piece of advice for each unrolled-join strategy that is
+ * used at least once in the query.
+ */
+ for (int s = 0; s < NUM_PGPA_JOIN_STRATEGY; ++s)
+ {
+ char *strategy = pgpa_cstring_join_strategy(s);
+ bool first = true;
+
+ if (context.unrolled_joins[s] == NIL)
+ continue;
+
+ if (buf->len > 0)
+ appendStringInfoChar(buf, '\n');
+ appendStringInfo(buf, "%s(", strategy);
+
+ foreach(lc, context.unrolled_joins[s])
+ {
+ Bitmapset *relids = lfirst(lc);
+
+ if (first)
+ first = false;
+ else
+ {
+ pgpa_maybe_linebreak(context.buf, context.wrap_column);
+ appendStringInfoChar(context.buf, ' ');
+ }
+
+ if (bms_membership(relids) == BMS_SINGLETON)
+ pgpa_output_relations(&context, buf, relids);
+ else
+ {
+ appendStringInfoChar(buf, '(');
+ pgpa_output_relations(&context, buf, relids);
+ appendStringInfoChar(buf, ')');
+ }
+ }
+ appendStringInfoChar(buf, ')');
+ pgpa_maybe_linebreak(context.buf, context.wrap_column);
+ }
+
+ /*
+ * Emit just one piece of advice for each scan strategy that is used at
+ * least once in the query.
+ *
+ * XXX. We shouldn't emit anything for ordinary scans, and we should
+ * emit more detailed information for index, index-only, and
+ * bitmap-heap scans.
+ */
+ for (int c = 0; c < NUM_PGPA_SCAN_STRATEGY; ++c)
+ {
+ char *cstrategy = pgpa_cstring_scan_strategy(c);
+ bool first = true;
+
+ if (context.scans[c] == NIL)
+ continue;
+
+ if (buf->len > 0)
+ appendStringInfoChar(buf, '\n');
+ appendStringInfo(buf, "%s(", cstrategy);
+
+ foreach(lc, context.scans[c])
+ {
+ Bitmapset *relids = lfirst(lc);
+
+ if (first)
+ first = false;
+ else
+ {
+ pgpa_maybe_linebreak(context.buf, context.wrap_column);
+ appendStringInfoChar(context.buf, ' ');
+ }
+
+ if (bms_membership(relids) == BMS_SINGLETON)
+ pgpa_output_relations(&context, buf, relids);
+ else
+ {
+ appendStringInfoChar(buf, '(');
+ pgpa_output_relations(&context, buf, relids);
+ appendStringInfoChar(buf, ')');
+ }
+ }
+ appendStringInfoChar(buf, ')');
+ pgpa_maybe_linebreak(context.buf, context.wrap_column);
+ }
+
+ /* Sort query features into one list per query feature type. */
+ foreach(lc, walker->query_features)
+ {
+ pgpa_query_feature *qf = lfirst(lc);
+
+ context.query_features[qf->type] =
+ lappend(context.query_features[qf->type], qf);
+ }
+
+ /*
+ * Emit just one piece of advice for each type of query feature that is
+ * used at least once in the query.
+ */
+ for (int t = 0; t < NUM_PGPA_QF_TYPES; ++t)
+ {
+ bool first = true;
+
+ if (context.query_features[t] == NIL)
+ continue;
+
+ if (buf->len > 0)
+ appendStringInfoChar(buf, '\n');
+
+ switch (t)
+ {
+ case PGPAQF_GATHER:
+ appendStringInfo(buf, "GATHER(");
+ break;
+ case PGPAQF_GATHER_MERGE:
+ appendStringInfo(buf, "GATHER_MERGE(");
+ break;
+ case PGPAQF_SEMIJOIN_NON_UNIQUE:
+ appendStringInfo(buf, "SEMIJOIN_NON_UNIQUE(");
+ break;
+ case PGPAQF_SEMIJOIN_UNIQUE:
+ appendStringInfo(buf, "SEMIJOIN_UNIQUE(");
+ break;
+ }
+
+ foreach(lc, context.query_features[t])
+ {
+ pgpa_query_feature *qf = lfirst(lc);
+
+ if (first)
+ first = false;
+ else
+ {
+ pgpa_maybe_linebreak(context.buf, context.wrap_column);
+ appendStringInfoChar(context.buf, ' ');
+ }
+
+ if (bms_membership(qf->relids) == BMS_SINGLETON)
+ pgpa_output_relations(&context, buf, qf->relids);
+ else
+ {
+ appendStringInfoChar(buf, '(');
+ pgpa_output_relations(&context, buf, qf->relids);
+ appendStringInfoChar(buf, ')');
+ }
+ }
+ appendStringInfoChar(buf, ')');
+ pgpa_maybe_linebreak(context.buf, context.wrap_column);
+ }
+}
+
+/*
+ * Output the members of an unrolled join, first the outermost member, and
+ * then the inner members one by one, as part of JOIN_ORDER() advice.
+ *
+ * As we visit members, we add to the context->unrolled_joins[] and
+ * context->scans[] lists. The former updates are done by this function
+ * directly, and the latter via pgpa_output_join_member.
+ */
+static void
+pgpa_output_unrolled_join(pgpa_output_context *context,
+ pgpa_unrolled_join *join)
+{
+ pgpa_output_join_member(context, &join->outer);
+
+ for (int k = 0; k < join->ninner; ++k)
+ {
+ pgpa_join_member *member = &join->inner[k];
+ Bitmapset *relids;
+
+ pgpa_maybe_linebreak(context->buf, context->wrap_column);
+ appendStringInfoChar(context->buf, ' ');
+ pgpa_output_join_member(context, member);
+
+ if (member->unrolled_join != NULL)
+ relids = pgpa_unrolled_join_all_relids(member->unrolled_join);
+ else
+ {
+ Assert(member->scan != NULL);
+ relids = member->scan->relids;
+ }
+ context->unrolled_joins[join->strategy[k]] =
+ lappend(context->unrolled_joins[join->strategy[k]], relids);
+ }
+}
+
+/*
+ * Output a single member of an unrolled join as part of JOIN_ORDER() advice.
+ *
+ * If that member happens to be a scan, also add it to the appropriate
+ * context->scans[] list.
+ */
+static void
+pgpa_output_join_member(pgpa_output_context *context,
+ pgpa_join_member *member)
+{
+ if (member->unrolled_join != NULL)
+ {
+ appendStringInfoChar(context->buf, '(');
+ pgpa_output_unrolled_join(context, member->unrolled_join);
+ appendStringInfoChar(context->buf, ')');
+ }
+ else
+ {
+ pgpa_scan *scan = member->scan;
+
+ Assert(scan != NULL);
+ if (bms_membership(scan->relids) == BMS_SINGLETON)
+ pgpa_output_relations(context, context->buf, scan->relids);
+ else
+ {
+ appendStringInfoChar(context->buf, '{');
+ pgpa_output_relations(context, context->buf, scan->relids);
+ appendStringInfoChar(context->buf, '}');
+ }
+ context->scans[scan->strategy] =
+ lappend(context->scans[scan->strategy], scan->relids);
+ }
+}
+
+/*
+ * Output the identifiers for each RTI in the provided set.
+ *
+ * Identifiers are separated by spaces, and a line break is possible after
+ * each one.
+ */
+static void
+pgpa_output_relations(pgpa_output_context *context, StringInfo buf,
+ Bitmapset *relids)
+{
+ int rti = -1;
+ bool first = true;
+
+ while ((rti = bms_next_member(relids, rti)) >= 0)
+ {
+ const char *identifier = context->rt_identifiers[rti - 1];
+
+ if (identifier == NULL)
+ elog(ERROR, "no identifier for RTI %d", rti);
+
+ if (first)
+ {
+ first = false;
+ appendStringInfoString(buf, identifier);
+ }
+ else
+ {
+ pgpa_maybe_linebreak(buf, context->wrap_column);
+ appendStringInfo(buf, " %s", identifier);
+ }
+ }
+}
+
+/*
+ * Get a C string that corresponds to the specified join strategy.
+ */
+static char *
+pgpa_cstring_join_strategy(pgpa_join_strategy strategy)
+{
+ switch (strategy)
+ {
+ case JSTRAT_MERGE_JOIN_PLAIN:
+ return "MERGE_JOIN_PLAIN";
+ case JSTRAT_MERGE_JOIN_MATERIALIZE:
+ return "MERGE_JOIN_MATERIALIZE";
+ case JSTRAT_NESTED_LOOP_PLAIN:
+ return "NESTED_LOOP_PLAIN";
+ case JSTRAT_NESTED_LOOP_MATERIALIZE:
+ return "NESTED_LOOP_MATERIALIZE";
+ case JSTRAT_NESTED_LOOP_MEMOIZE:
+ return "NESTED_LOOP_MEMOIZE";
+ case JSTRAT_HASH_JOIN:
+ return "HASH_JOIN";
+ }
+
+ Assert(false);
+}
+
+/*
+ * Get a C string that corresponds to the specified scan strategy.
+ */
+static char *
+pgpa_cstring_scan_strategy(pgpa_scan_strategy strategy)
+{
+ switch (strategy)
+ {
+ case PGPA_SCAN_ORDINARY:
+ return "ORDINARY_SCAN";
+ case PGPA_SCAN_SEQ:
+ return "SEQ_SCAN";
+ case PGPA_SCAN_BITMAP_HEAP:
+ return "BITMAP_HEAP_SCAN";
+ case PGPA_SCAN_FOREIGN:
+ return "FOREIGN_SCAN";
+ case PGPA_SCAN_CUSTOM:
+ return "CUSTOM_SCAN";
+ case PGPA_SCAN_INDEX:
+ return "INDEX_SCAN";
+ case PGPA_SCAN_INDEX_ONLY:
+ return "INDEX_ONLY_SCAN";
+ case PGPA_SCAN_PARTITIONWISE:
+ return "PARTITIONWISE";
+ }
+
+ Assert(false);
+}
+
+/*
+ * Insert a line break into the StringInfoData, if needed.
+ *
+ * If wrap_column is zero or negative, this does nothing. Otherwise, we
+ * consider inserting a newline. We only insert a newline if the length of
+ * the last line in the buffer exceeds wrap_column, and not if we'd be
+ * inserting a newline at or before the beginning of the current line.
+ *
+ * The position at which the newline is inserted is simply wherever the
+ * buffer ended the last time this function was called. In other words,
+ * the caller is expected to call this function every time we reach a good
+ * place for a line break.
+ */
+static void
+pgpa_maybe_linebreak(StringInfo buf, int wrap_column)
+{
+ char *trailing_nl;
+ int line_start;
+ int save_cursor;
+
+ /* If line wrapping is disabled, exit quickly. */
+ if (wrap_column <= 0)
+ return;
+
+ /*
+ * Set line_start to the byte offset within buf->data of the first
+ * character of the current line, where the current line means the last
+ * one in the buffer. Note that line_start could be the offset of the
+ * trailing '\0' if the last character in the buffer is a line break.
+ */
+ trailing_nl = strrchr(buf->data, '\n');
+ if (trailing_nl == NULL)
+ line_start = 0;
+ else
+ line_start = (trailing_nl - buf->data) + 1;
+
+ /*
+ * Remember that the current end of the buffer is a potential location to
+ * insert a line break on a future call to this function.
+ */
+ save_cursor = buf->cursor;
+ buf->cursor = buf->len;
+
+ /* If we haven't passed the wrap column, we don't need a newline. */
+ if (buf->len - line_start <= wrap_column)
+ return;
+
+ /*
+ * It only makes sense to insert a newline at a position later than the
+ * beginning of the current line.
+ */
+ if (buf->cursor <= line_start)
+ return;
+
+ /* Insert a newline at the previous cursor location. */
+ enlargeStringInfo(buf, 1);
+ memmove(&buf->data[save_cursor] + 1, &buf->data[save_cursor],
+ buf->len - save_cursor);
+ ++buf->cursor;
+ buf->data[++buf->len] = '\0';
+ buf->data[save_cursor] = '\n';
+}
diff --git a/contrib/pg_plan_advice/pgpa_output.h b/contrib/pg_plan_advice/pgpa_output.h
new file mode 100644
index 0000000000..f7d74d3794
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_output.h
@@ -0,0 +1,21 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_output.h
+ * produce textual output from the results of a plan tree walk
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_output.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_OUTPUT_H
+#define PGPA_OUTPUT_H
+
+#include "pgpa_walker.h"
+
+extern void pgpa_output_advice(StringInfo buf,
+ pgpa_plan_walker_context *walker,
+ const char **rt_identifiers);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_scan.c b/contrib/pg_plan_advice/pgpa_scan.c
new file mode 100644
index 0000000000..5d0c6f5043
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_scan.c
@@ -0,0 +1,193 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_scan.c
+ * analysis of scans in Plan trees
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_scan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/parsenodes.h"
+#include "parser/parsetree.h"
+
+/*
+ * Build a pgpa_scan object for a Plan node.
+ *
+ * If there is at least one ElidedNode for this plan node, pass the uppermost
+ * one as elided_node, else pass NULL.
+ *
+ * Set the 'force' flag if we're inside of a join problem and not otherwise;
+ * see comments below for why this matters.
+ *
+ * The return value can be NULL only if force is false.
+ */
+pgpa_scan *
+pgpa_build_scan(PlannedStmt *pstmt, Plan *plan, ElidedNode *elided_node,
+ bool force)
+{
+ pgpa_scan *scan;
+ pgpa_scan_strategy strategy = PGPA_SCAN_ORDINARY;
+ Bitmapset *relids = NULL;
+ int rti = -1;
+ bool filter_join_rtis = false;
+
+ if (elided_node != NULL)
+ {
+ NodeTag elided_type = elided_node->elided_type;
+
+ /*
+ * If setrefs processing elided an Append or MergeAppend node that
+ * had only one surviving child, then this is a partitionwise
+ * "scan" -- which may really be a partitionwise join, but there's
+ * no need to distinguish.
+ *
+ * If it's a trivial SubqueryScan that was elided, then this is an
+ * "ordinary" scan i.e. one for which we need to generate advice
+ * because the planner has not made any meaningful choice.
+ */
+ relids = elided_node->relids;
+ if (elided_type == T_Append || elided_type == T_MergeAppend)
+ strategy = PGPA_SCAN_PARTITIONWISE;
+ else
+ strategy = PGPA_SCAN_ORDINARY;
+ filter_join_rtis = true;
+ }
+ else if ((rti = pgpa_scanrelid(plan)) != 0)
+ {
+ relids = bms_make_singleton(rti);
+
+ switch (nodeTag(plan))
+ {
+ case T_SeqScan:
+ strategy = PGPA_SCAN_SEQ;
+ break;
+ case T_BitmapHeapScan:
+ strategy = PGPA_SCAN_BITMAP_HEAP;
+ break;
+ case T_ForeignScan:
+ strategy = PGPA_SCAN_FOREIGN;
+ break;
+ case T_CustomScan:
+ strategy = PGPA_SCAN_CUSTOM;
+ break;
+ case T_IndexScan:
+ strategy = PGPA_SCAN_INDEX;
+ break;
+ case T_IndexOnlyScan:
+ strategy = PGPA_SCAN_INDEX_ONLY;
+ break;
+ default:
+ strategy = PGPA_SCAN_ORDINARY;
+ break;
+ }
+ }
+ else if ((relids = pgpa_relids(plan)) != NULL)
+ {
+ switch (nodeTag(plan))
+ {
+ case T_ForeignScan:
+ strategy = PGPA_SCAN_FOREIGN;
+ break;
+ case T_Append:
+ case T_MergeAppend:
+ {
+ int first_rti;
+ RangeTblEntry *first_rte;
+
+ first_rti = bms_next_member(relids, -1);
+ first_rte = rt_fetch(first_rti, pstmt->rtable);
+
+ /*
+ * Append and MergeAppend nodes can be the result either
+ * of set operations or of inheritance expansion. In the
+ * latter case, this plan node is telling us that the relid
+ * set found in the plan node was handled partitionwise,
+ * but in the former, it isn't. Even if the set operations
+ * planner has multiple ways to execute some set operation,
+ * we lack the ability to describe what it decided here,
+ * and hence just label this as an ordinary scan.
+ *
+ * XXX. This breaks with a single Append node that involves
+ * both set operations and inheritance expansion. Consider:
+ *
+ * create table foo (a int) partition by range (a);
+ * create table foo0 partition of foo for values
+ * from (0) to (1000);
+ * create table foo1 partition of foo for values
+ * from (1000) to (2000);
+ * explain select * from foo union all select 1;
+ */
+ if (first_rte->rtekind == RTE_RELATION)
+ strategy = PGPA_SCAN_PARTITIONWISE;
+ else
+ strategy = PGPA_SCAN_ORDINARY;
+ }
+ break;
+ default:
+ strategy = PGPA_SCAN_ORDINARY;
+ break;
+ }
+ filter_join_rtis = true;
+ }
+
+ /*
+ * If this plan node has no associated RTIs, it's not a scan. When the
+ * 'force' flag is set, that's unexpected, so throw an error, else return
+ * quietly.
+ */
+ if (relids == NULL)
+ {
+ if (force)
+ elog(ERROR, "plan node has no RTIs: %d", (int) nodeTag(plan));
+ return NULL;
+ }
+
+ /*
+ * Ordinary scans appearing outside of a join problem need not be tracked,
+ * since nothing interesting can be said either about the choice of join
+ * method or about the join order.
+ */
+ if (!force && strategy == PGPA_SCAN_ORDINARY)
+ return NULL;
+
+ /* Now create the result object. */
+ scan = palloc(sizeof(pgpa_scan));
+ scan->plan = plan;
+ scan->strategy = strategy;
+
+ /*
+ * In some cases, it's possible for RTIs of type RTE_JOIN to be included,
+ * and when that is the case, we want to filter them out. We always refer
+ * to groups of relations in terms of the relation RTIs, not the join RTIs,
+ * so the join RTIs are just noise.
+ *
+ * When the code earlier in this function knows that no join RTIs can have
+ * infiltrated the relids set, it can set filter_join_rtis = false to save
+ * a few CPU cycles here.
+ *
+ * One important case where join RTIs can definitely occur is when we
+ * encounter a Result node where a whole join result has been proven empty.
+ */
+ if (!filter_join_rtis)
+ scan->relids = relids;
+ else
+ {
+ scan->relids = NULL;
+ while ((rti = bms_next_member(relids, rti)) >= 0)
+ {
+ RangeTblEntry *rte = rt_fetch(rti, pstmt->rtable);
+
+ if (rte->rtekind != RTE_JOIN)
+ scan->relids = bms_add_member(scan->relids, rti);
+ }
+ }
+
+ return scan;
+}
diff --git a/contrib/pg_plan_advice/pgpa_scan.h b/contrib/pg_plan_advice/pgpa_scan.h
new file mode 100644
index 0000000000..c2a7c645b7
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_scan.h
@@ -0,0 +1,76 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_scan.h
+ * analysis of scans in Plan trees
+ *
+ * Note that our definition of "scan" is extremely broad. It includes
+ * (1) single plan nodes that scan multiple RTIs, such as a degenerate
+ * Result node that replaces what would otherwise have been a join, and
+ * (2) Append and MergeAppend nodes implementing a partitionwise scan
+ * or a partitionwise join.
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_scan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_SCAN_H
+#define PGPA_SCAN_H
+
+#include "nodes/plannodes.h"
+
+/*
+ * Scan strategies.
+ *
+ * PGPA_SCAN_ORDINARY is any scan strategy that isn't interesting to us
+ * because there is no meaningful planner decision involved. For example,
+ * the only way to scan a subquery is a SubqueryScan, and the only way to
+ * scan a VALUES construct is a ValuesScan. We need not care exactly which
+ * type of planner node was used in such cases, because the same thing will
+ * happen when replanning.
+ *
+ * PGPA_SCAN_ORDINARY also includes Result nodes that correspond to scans
+ * or even joins that are proved empty. We don't know whether or not the scan
+ * or join will still be provably empty at replanning time, but if it is,
+ * then no scan-type advice is needed, and if it's not, we can't recommend
+ * a scan type based on the current plan.
+ *
+ * PGPA_SCAN_PARTITIONWISE also lumps together scans and joins: this can
+ * be either a partitionwise scan of a partitioned table or a partitionwise
+ * join between several partitioned tables. Note that all decisions about
+ * whether or not to use partitionwise join are meaningful: no matter what
+ * we decided this time, we could do more or fewer things partitionwise the
+ * next time.
+ *
+ * Other scan strategies map one-to-one to plan nodes.
+ */
+typedef enum
+{
+ PGPA_SCAN_ORDINARY = 0,
+ PGPA_SCAN_SEQ,
+ PGPA_SCAN_BITMAP_HEAP,
+ PGPA_SCAN_FOREIGN,
+ PGPA_SCAN_CUSTOM,
+ PGPA_SCAN_INDEX,
+ PGPA_SCAN_INDEX_ONLY,
+ PGPA_SCAN_PARTITIONWISE
+ /* update NUM_PGPA_SCAN_STRATEGY if you add anything here */
+} pgpa_scan_strategy;
+
+#define NUM_PGPA_SCAN_STRATEGY ((int) PGPA_SCAN_PARTITIONWISE + 1)
+
+/*
+ * All of the details we need regarding a scan.
+ */
+typedef struct pgpa_scan
+{
+ Plan *plan;
+ pgpa_scan_strategy strategy;
+ Bitmapset *relids;
+} pgpa_scan;
+
+extern pgpa_scan *pgpa_build_scan(PlannedStmt *pstmt, Plan *plan,
+ ElidedNode *elided_node, bool force);
+
+#endif
diff --git a/contrib/pg_plan_advice/pgpa_walker.c b/contrib/pg_plan_advice/pgpa_walker.c
new file mode 100644
index 0000000000..9eb739220a
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_walker.c
@@ -0,0 +1,408 @@
+#include "postgres.h"
+
+#include "pgpa_join.h"
+#include "pgpa_scan.h"
+#include "pgpa_walker.h"
+
+#include "nodes/plannodes.h"
+
+static pgpa_query_feature *pgpa_add_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type,
+ Plan *plan);
+
+static void pgpa_qf_add_rti(List *active_query_features, Index rti);
+static void pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids);
+static void pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan);
+
+/*
+ * Iterate over the entire plan tree.
+ *
+ * XXX. More comments.
+ */
+void
+pgpa_plan_walker(pgpa_plan_walker_context *walker, Plan *plan,
+ int join_unroll_level,
+ pgpa_join_unroller *join_unroller,
+ List *active_query_features)
+{
+ pgpa_join_unroller *outer_join_unroller = NULL;
+ pgpa_join_unroller *inner_join_unroller = NULL;
+ bool join_unroller_toplevel = false;
+ bool pushdown_query_features = false;
+ ListCell *lc;
+ List *extraplans = NIL;
+ List *elided_nodes = NIL;
+ bool is_query_feature = false;
+
+ /*
+ * If this is a Gather or Gather Merge node, directly add it to the list
+ * of currently-active query features.
+ *
+ * Otherwise, check the future_query_features list to see whether this was
+ * previously identified as a plan node that needs to be treated as a
+ * query feature.
+ */
+ if (IsA(plan, Gather))
+ {
+ active_query_features =
+ lappend(active_query_features,
+ pgpa_add_feature(walker, PGPAQF_GATHER, plan));
+ is_query_feature = true;
+ }
+ else if (IsA(plan, GatherMerge))
+ {
+ active_query_features =
+ lappend(active_query_features,
+ pgpa_add_feature(walker, PGPAQF_GATHER_MERGE, plan));
+ is_query_feature = true;
+ }
+ else
+ {
+ foreach_ptr(pgpa_query_feature, qf, walker->future_query_features)
+ {
+ if (qf->plan == plan)
+ {
+ is_query_feature = true;
+ active_query_features = lappend(active_query_features, qf);
+ walker->future_query_features =
+ list_delete_ptr(walker->future_query_features, plan);
+ break;
+ }
+ }
+ }
+
+ /*
+ * Find all elided nodes for this Plan node.
+ */
+ foreach_node(ElidedNode, n, walker->pstmt->elidedNodes)
+ {
+ if (n->plan_node_id == plan->plan_node_id)
+ elided_nodes = lappend(elided_nodes, n);
+ }
+
+ /* If we found any elided_nodes, handle them. */
+ if (elided_nodes != NIL)
+ {
+ int num_elided_nodes = list_length(elided_nodes);
+ ElidedNode *last_elided_node;
+
+ /* XXX */
+ last_elided_node = list_nth(elided_nodes, num_elided_nodes - 1);
+ pgpa_qf_add_rtis(active_query_features, last_elided_node->relids);
+ active_query_features = NIL;
+
+ /*
+ * If there are multiple relids for the elided node, a clumped join
+ * should be built for it exactly once. When there's a join_unroller,
+ * the join unrolling process will have already built any necessary
+ * clumped join for the final elided node, so throw it out.
+ */
+ if (join_unroller != NULL)
+ elided_nodes = list_truncate(elided_nodes, num_elided_nodes - 1);
+
+ /*
+ * Every element of elided_nodes is an ElidedNode for which any
+ * necessary pgpa_scan has not yet been created. Do that here, and
+ * attach the resulting objects directly to the walker object, since
+ * we have nowhere else to put a reference to it.
+ *
+ * XXX. Isn't this a bald-faced lie?
+ */
+ foreach_node(ElidedNode, n, elided_nodes)
+ {
+ pgpa_scan *scan;
+
+ scan = pgpa_build_scan(walker->pstmt, plan, n, false);
+ if (scan != NULL)
+ walker->scans = lappend(walker->scans, scan);
+ }
+
+ /*
+ * Because we only want to unroll joins that the planner would have
+ * considered as part of the same join problem, nodes elided during
+ * setrefs processing act as boundaries.
+ *
+ * In more detail, if an Append or MergeAppend was elided, then j a
+ * partitionwise join was chosen and only a single child survived; if
+ * a SubqueryScan was elided, the subquery was separately without
+ * flattening it into the parent. In either case, the join order and
+ * join methods beneath the elided node should be described separately
+ * from the join order and methods above the elided node.
+ */
+ join_unroller = NULL;
+ }
+
+ /*
+ * If this join needs to unrolled but there's no join unroller already
+ * available, create one.
+ */
+ if (join_unroller == NULL && pgpa_is_join(plan))
+ {
+ join_unroller = pgpa_create_join_unroller();
+ join_unroller_toplevel = true;
+ ++join_unroll_level;
+ }
+
+ /*
+ * We only need to handle scans here if we're not underneath a join
+ * unroller. When we are, the join unroller will create the pgpa_scan and
+ * and attach it to the unrolled join. Otherwise, do it here.
+ */
+ if (join_unroll_level == 0)
+ {
+ pgpa_scan *scan;
+
+ scan = pgpa_build_scan(walker->pstmt, plan, NULL, false);
+ if (scan != NULL)
+ walker->scans = lappend(walker->scans, scan);
+ }
+
+ /*
+ * If this join is to be unrolled, pgpa_unroll_join() will return the join
+ * unroller object that should be passed down when we recurse into the
+ * outer and inner sides of the plan.
+ */
+ if (join_unroller != NULL)
+ pgpa_unroll_join(walker, plan, join_unroller,
+ &outer_join_unroller, &inner_join_unroller);
+
+ /* Add RTIs from the plan node to all active query features. */
+ pgpa_qf_add_plan_rtis(active_query_features, plan);
+
+ /* Recurse into the outer and inner subtrees. */
+ if (plan->lefttree != NULL)
+ pgpa_plan_walker(walker, plan->lefttree, join_unroll_level,
+ outer_join_unroller, active_query_features);
+ if (plan->righttree != NULL)
+ pgpa_plan_walker(walker, plan->righttree, join_unroll_level,
+ inner_join_unroller, active_query_features);
+
+ /*
+ * If we created a join unroller up above, then it's also our join to use
+ * it to build the final pgpa_unrolled_join, and to destroy the object.
+ */
+ if (join_unroller_toplevel)
+ {
+ pgpa_unrolled_join *ujoin;
+
+ ujoin = pgpa_build_unrolled_join(walker->pstmt, join_unroller);
+ walker->unrolled_joins = lappend(walker->unrolled_joins, ujoin);
+ pgpa_destroy_join_unroller(join_unroller);
+ }
+
+ /*
+ * Some plan types can have additional children. Nodes like Append that
+ * can have any number of children store them in a List; a SubqueryScan
+ * just has a field for a single additional Plan.
+ */
+ switch (nodeTag(plan))
+ {
+ case T_Append:
+ {
+ Append *aplan = (Append *) plan;
+
+ extraplans = aplan->appendplans;
+ pushdown_query_features = bms_is_empty(aplan->apprelids);
+ }
+ break;
+ case T_MergeAppend:
+ {
+ MergeAppend *maplan = (MergeAppend *) plan;
+
+ extraplans = maplan->mergeplans;
+ pushdown_query_features = bms_is_empty(maplan->apprelids);
+ }
+ break;
+ case T_BitmapAnd:
+ extraplans = ((BitmapAnd *) plan)->bitmapplans;
+ break;
+ case T_BitmapOr:
+ extraplans = ((BitmapOr *) plan)->bitmapplans;
+ break;
+ case T_SubqueryScan:
+
+ /*
+ * We don't pass down active_query_features across here, because
+ * those are specific to a subquery level.
+ */
+ pgpa_plan_walker(walker, ((SubqueryScan *) plan)->subplan,
+ 0, NULL, NIL);
+ break;
+ case T_CustomScan:
+ extraplans = ((CustomScan *) plan)->custom_plans;
+ break;
+ default:
+ break;
+ }
+
+ /* If we found a list of extra children, iterate over it. */
+ foreach(lc, extraplans)
+ {
+ Plan *subplan = lfirst(lc);
+
+ pgpa_plan_walker(walker, subplan, 0, NULL,
+ pushdown_query_features ? active_query_features : NIL);
+ }
+
+ /*
+ * If the current node is a query feature, then active_query_features has
+ * been destructively modified as compared to the value passed down from
+ * the caller, and we need to put things back as they were.
+ *
+ * Exceptions: If the caller passed NIL, or if we reset the list to NIL
+ * because of the presence of an elided SubqueryScan, then we created a
+ * new list here instead of destructively modifying the caller's list.
+ * That case requires no special handling, because list_truncate() will
+ * simply exit quickly.
+ */
+ if (is_query_feature)
+ {
+ int num_aqf = list_length(active_query_features);
+
+ (void) list_truncate(active_query_features, num_aqf - 1);
+ }
+}
+
+/*
+ * Arrange for the given plan node to be treated as a query feature when the
+ * tree walk reaches it.
+ *
+ * Make sure to only use this for nodes that the tree walk can't have reached
+ * yet!
+ */
+void
+pgpa_add_future_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type, Plan *plan)
+{
+ pgpa_query_feature *qf = pgpa_add_feature(walker, type, plan);
+
+ walker->future_query_features =
+ lappend(walker->future_query_features, qf);
+}
+
+/*
+ * Return the last of any elided nodes associated with this plan node ID.
+ *
+ * The last elided node is the one that would have been uppermost in the plan
+ * tree had it not been removed during setrefs processig.
+ */
+ElidedNode *
+pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan)
+{
+ ElidedNode *elided_node = NULL;
+
+ foreach_node(ElidedNode, n, pstmt->elidedNodes)
+ {
+ if (n->plan_node_id == plan->plan_node_id)
+ elided_node = n;
+ }
+
+ return elided_node;
+}
+
+/*
+ * Certain plan nodes can refer to a set of RTIs. Extract and return the set.
+ */
+Bitmapset *
+pgpa_relids(Plan *plan)
+{
+ if (IsA(plan, Result))
+ return ((Result *) plan)->relids;
+ else if (IsA(plan, ForeignScan))
+ return ((ForeignScan *) plan)->fs_relids;
+ else if (IsA(plan, Append))
+ return ((Append *) plan)->apprelids;
+ else if (IsA(plan, MergeAppend))
+ return ((MergeAppend *) plan)->apprelids;
+
+ return NULL;
+}
+
+/*
+ * Extract the scanned RTI from a plan node.
+ *
+ * Returns 0 if there isn't one.
+ */
+Index
+pgpa_scanrelid(Plan *plan)
+{
+ switch (nodeTag(plan))
+ {
+ case T_SeqScan:
+ case T_SampleScan:
+ case T_BitmapHeapScan:
+ case T_TidScan:
+ case T_TidRangeScan:
+ case T_SubqueryScan:
+ case T_FunctionScan:
+ case T_TableFuncScan:
+ case T_ValuesScan:
+ case T_CteScan:
+ case T_NamedTuplestoreScan:
+ case T_WorkTableScan:
+ case T_ForeignScan:
+ case T_CustomScan:
+ case T_IndexScan:
+ case T_IndexOnlyScan:
+ return ((Scan *) plan)->scanrelid;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Create a pgpa_query_feature and add it to the list of all query features
+ * for this plan.
+ */
+static pgpa_query_feature *
+pgpa_add_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type, Plan *plan)
+{
+ pgpa_query_feature *qf = palloc0_object(pgpa_query_feature);
+
+ qf->type = type;
+ qf->plan = plan;
+
+ walker->query_features = lappend(walker->query_features, qf);
+
+ return qf;
+}
+
+/*
+ * Add a single RTI to each active query feature.
+ */
+static void
+pgpa_qf_add_rti(List *active_query_features, Index rti)
+{
+ foreach_ptr(pgpa_query_feature, qf, active_query_features)
+ {
+ qf->relids = bms_add_member(qf->relids, rti);
+ }
+}
+
+/*
+ * Add a set of RTIs to each active query feature.
+ */
+static void
+pgpa_qf_add_rtis(List *active_query_features, Bitmapset *relids)
+{
+ foreach_ptr(pgpa_query_feature, qf, active_query_features)
+ {
+ qf->relids = bms_add_members(qf->relids, relids);
+ }
+}
+
+/*
+ * Add RTIs directly contained in a plan node to each active query feature.
+ */
+static void
+pgpa_qf_add_plan_rtis(List *active_query_features, Plan *plan)
+{
+ Bitmapset *relids;
+ Index rti;
+
+ if ((relids = pgpa_relids(plan)) != NULL)
+ pgpa_qf_add_rtis(active_query_features, relids);
+ else if ((rti = pgpa_scanrelid(plan)) != 0)
+ pgpa_qf_add_rti(active_query_features, rti);
+}
diff --git a/contrib/pg_plan_advice/pgpa_walker.h b/contrib/pg_plan_advice/pgpa_walker.h
new file mode 100644
index 0000000000..871d31801d
--- /dev/null
+++ b/contrib/pg_plan_advice/pgpa_walker.h
@@ -0,0 +1,86 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgpa_walker.h
+ * Plan tree iteration
+ *
+ * Copyright (c) 2016-2025, PostgreSQL Global Development Group
+ *
+ * contrib/pg_plan_advice/pgpa_walker.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PGPA_WALKER_H
+#define PGPA_WALKER_H
+
+#include "pgpa_join.h"
+
+/*
+ * We use the term "query feature" to refer to plan nodes that are interesting
+ * in the following way: to generate advice, we'll need to know the set of
+ * same-subquery, non-join RTIs occuring at or below that plan node, without
+ * admixture of parent and child RTIs.
+ *
+ * For example, Gather nodes, desiginated by PGPAQF_GATHER, and Gather Merge
+ * nodes, designated by PGPAQF_GATHER_MERGE, are query features, because we'll
+ * want to admit some kind of advice that describes the portion of the plan
+ * tree that appears beneath those nodes.
+ *
+ * Each semijoin can be implemented either by directly performing a semijoin,
+ * or by making one side unique and then performing a normal join. Either way,
+ * we use a query feature to notice what decision was made, so that we can
+ * describe it by enumerating the RTIs on that side of the join.
+ *
+ * To elaborate on the "no admixture of parent and child RTIs" rule, in all of
+ * these cases, if the entirety of an inheritance hierarchy appears beneath
+ * the query feature, we only want to name the parent table. But it's also
+ * possible to have cases where we must name child tables. This is particularly
+ * likely to happen when partitionwise join is in use, but could happen for
+ * Gather or Gather Merge even without that, if one of those appears below
+ * an Append or MergeAppend node for a single table.
+ */
+typedef enum pgpa_qf_type
+{
+ PGPAQF_GATHER,
+ PGPAQF_GATHER_MERGE,
+ PGPAQF_SEMIJOIN_NON_UNIQUE,
+ /* update NUM_PGPA_QF_TYPES if you add anything here */
+ PGPAQF_SEMIJOIN_UNIQUE
+} pgpa_qf_type;
+
+#define NUM_PGPA_QF_TYPES ((int) PGPAQF_SEMIJOIN_UNIQUE + 1)
+
+/*
+ * For each query feature, we keep track of the feature type and the set of
+ * relids that we found underneath the relevant plan node. See the comments
+ * on pgpa_qf_type, above, for additional details.
+ */
+typedef struct pgpa_query_feature
+{
+ pgpa_qf_type type;
+ Plan *plan;
+ Bitmapset *relids;
+} pgpa_query_feature;
+
+typedef struct pgpa_plan_walker_context
+{
+ PlannedStmt *pstmt;
+ List *unrolled_joins;
+ List *scans;
+ List *query_features;
+ List *future_query_features;
+} pgpa_plan_walker_context;
+
+extern void pgpa_plan_walker(pgpa_plan_walker_context *walker, Plan *plan,
+ int join_unroll_level,
+ pgpa_join_unroller *join_unroller,
+ List *active_query_features);
+
+extern void pgpa_add_future_feature(pgpa_plan_walker_context *walker,
+ pgpa_qf_type type,
+ Plan *plan);
+
+extern ElidedNode *pgpa_last_elided_node(PlannedStmt *pstmt, Plan *plan);
+extern Bitmapset *pgpa_relids(Plan *plan);
+extern Index pgpa_scanrelid(Plan *plan);
+
+#endif
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index df850cf634..ad8416a4d8 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -4345,3 +4345,20 @@ z_streamp
zic_t
SubPlanRTInfo
ElidedNode
+pgpa_collected_advice
+pgpa_join_class
+pgpa_join_member
+pgpa_join_strategy
+pgpa_join_unroller
+pgpa_local_advice
+pgpa_local_advice_chunk
+pgpa_output_context
+pgpa_plan_walker_context
+pgpa_qf_type
+pgpa_query_feature
+pgpa_scan
+pgpa_scan_strategy
+pgpa_shared_advice
+pgpa_shared_advice_chunk
+pgpa_shared_state
+pgpa_unrolled_join