summaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorDavid Rowley2021-02-27 09:59:36 +0000
committerDavid Rowley2021-02-27 09:59:36 +0000
commitbb437f995d47405ecd92cf66df71f7f7e40ed460 (patch)
tree0ee50f8a501e1ecc30d5cfd0eeb6ed0bcd41e2b2 /src/backend
parentf4adc41c4f92cc91d507b19e397140c35bb9fd71 (diff)
Add TID Range Scans to support efficient scanning ranges of TIDs
This adds a new executor node named TID Range Scan. The query planner will generate paths for TID Range scans when quals are discovered on base relations which search for ranges on the table's ctid column. These ranges may be open at either end. For example, WHERE ctid >= '(10,0)'; will return all tuples on page 10 and over. To support this, two new optional callback functions have been added to table AM. scan_set_tidrange is used to set the scan range to just the given range of TIDs. scan_getnextslot_tidrange fetches the next tuple in the given range. For AMs were scanning ranges of TIDs would not make sense, these functions can be set to NULL in the TableAmRoutine. The query planner won't generate TID Range Scan Paths in that case. Author: Edmund Horner, David Rowley Reviewed-by: David Rowley, Tomas Vondra, Tom Lane, Andres Freund, Zhihong Yu Discussion: https://postgr.es/m/CAMyN-kB-nFTkF=VA_JPwFNo08S0d-Yk0F741S2B7LDmYAi8eyA@mail.gmail.com
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/heap/heapam.c147
-rw-r--r--src/backend/access/heap/heapam_handler.c3
-rw-r--r--src/backend/commands/explain.c23
-rw-r--r--src/backend/executor/Makefile1
-rw-r--r--src/backend/executor/execAmi.c6
-rw-r--r--src/backend/executor/execCurrent.c1
-rw-r--r--src/backend/executor/execProcnode.c10
-rw-r--r--src/backend/executor/nodeTidrangescan.c413
-rw-r--r--src/backend/nodes/copyfuncs.c24
-rw-r--r--src/backend/nodes/outfuncs.c14
-rw-r--r--src/backend/optimizer/README1
-rw-r--r--src/backend/optimizer/path/costsize.c95
-rw-r--r--src/backend/optimizer/path/tidpath.c119
-rw-r--r--src/backend/optimizer/plan/createplan.c98
-rw-r--r--src/backend/optimizer/plan/setrefs.c16
-rw-r--r--src/backend/optimizer/plan/subselect.c6
-rw-r--r--src/backend/optimizer/util/pathnode.c29
-rw-r--r--src/backend/optimizer/util/plancat.c6
-rw-r--r--src/backend/optimizer/util/relnode.c3
-rw-r--r--src/backend/storage/page/itemptr.c59
20 files changed, 1062 insertions, 12 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 9c1d590dc71..3b435c107d0 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1391,6 +1391,153 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s
return true;
}
+void
+heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
+ ItemPointer maxtid)
+{
+ HeapScanDesc scan = (HeapScanDesc) sscan;
+ BlockNumber startBlk;
+ BlockNumber numBlks;
+ ItemPointerData highestItem;
+ ItemPointerData lowestItem;
+
+ /*
+ * For relations without any pages, we can simply leave the TID range
+ * unset. There will be no tuples to scan, therefore no tuples outside
+ * the given TID range.
+ */
+ if (scan->rs_nblocks == 0)
+ return;
+
+ /*
+ * Set up some ItemPointers which point to the first and last possible
+ * tuples in the heap.
+ */
+ ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
+ ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
+
+ /*
+ * If the given maximum TID is below the highest possible TID in the
+ * relation, then restrict the range to that, otherwise we scan to the end
+ * of the relation.
+ */
+ if (ItemPointerCompare(maxtid, &highestItem) < 0)
+ ItemPointerCopy(maxtid, &highestItem);
+
+ /*
+ * If the given minimum TID is above the lowest possible TID in the
+ * relation, then restrict the range to only scan for TIDs above that.
+ */
+ if (ItemPointerCompare(mintid, &lowestItem) > 0)
+ ItemPointerCopy(mintid, &lowestItem);
+
+ /*
+ * Check for an empty range and protect from would be negative results
+ * from the numBlks calculation below.
+ */
+ if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
+ {
+ /* Set an empty range of blocks to scan */
+ heap_setscanlimits(sscan, 0, 0);
+ return;
+ }
+
+ /*
+ * Calculate the first block and the number of blocks we must scan. We
+ * could be more aggressive here and perform some more validation to try
+ * and further narrow the scope of blocks to scan by checking if the
+ * lowerItem has an offset above MaxOffsetNumber. In this case, we could
+ * advance startBlk by one. Likewise, if highestItem has an offset of 0
+ * we could scan one fewer blocks. However, such an optimization does not
+ * seem worth troubling over, currently.
+ */
+ startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
+
+ numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
+ ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
+
+ /* Set the start block and number of blocks to scan */
+ heap_setscanlimits(sscan, startBlk, numBlks);
+
+ /* Finally, set the TID range in sscan */
+ ItemPointerCopy(&lowestItem, &sscan->rs_mintid);
+ ItemPointerCopy(&highestItem, &sscan->rs_maxtid);
+}
+
+bool
+heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
+ TupleTableSlot *slot)
+{
+ HeapScanDesc scan = (HeapScanDesc) sscan;
+ ItemPointer mintid = &sscan->rs_mintid;
+ ItemPointer maxtid = &sscan->rs_maxtid;
+
+ /* Note: no locking manipulations needed */
+ for (;;)
+ {
+ if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
+ heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
+ else
+ heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
+
+ if (scan->rs_ctup.t_data == NULL)
+ {
+ ExecClearTuple(slot);
+ return false;
+ }
+
+ /*
+ * heap_set_tidrange will have used heap_setscanlimits to limit the
+ * range of pages we scan to only ones that can contain the TID range
+ * we're scanning for. Here we must filter out any tuples from these
+ * pages that are outwith that range.
+ */
+ if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
+ {
+ ExecClearTuple(slot);
+
+ /*
+ * When scanning backwards, the TIDs will be in descending order.
+ * Future tuples in this direction will be lower still, so we can
+ * just return false to indicate there will be no more tuples.
+ */
+ if (ScanDirectionIsBackward(direction))
+ return false;
+
+ continue;
+ }
+
+ /*
+ * Likewise for the final page, we must filter out TIDs greater than
+ * maxtid.
+ */
+ if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
+ {
+ ExecClearTuple(slot);
+
+ /*
+ * When scanning forward, the TIDs will be in ascending order.
+ * Future tuples in this direction will be higher still, so we can
+ * just return false to indicate there will be no more tuples.
+ */
+ if (ScanDirectionIsForward(direction))
+ return false;
+ continue;
+ }
+
+ break;
+ }
+
+ /*
+ * if we get here it means we have a new current scan tuple, so point to
+ * the proper return buffer and return the tuple.
+ */
+ pgstat_count_heap_getnext(scan->rs_base.rs_rd);
+
+ ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
+ return true;
+}
+
/*
* heap_fetch - retrieve tuple with given tid
*
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 4a70e20a143..bd5faf0c1fb 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2542,6 +2542,9 @@ static const TableAmRoutine heapam_methods = {
.scan_rescan = heap_rescan,
.scan_getnextslot = heap_getnextslot,
+ .scan_set_tidrange = heap_set_tidrange,
+ .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
+
.parallelscan_estimate = table_block_parallelscan_estimate,
.parallelscan_initialize = table_block_parallelscan_initialize,
.parallelscan_reinitialize = table_block_parallelscan_reinitialize,
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index f80e379973a..afc45429ba4 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1057,6 +1057,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_TableFuncScan:
@@ -1223,6 +1224,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_TidScan:
pname = sname = "Tid Scan";
break;
+ case T_TidRangeScan:
+ pname = sname = "Tid Range Scan";
+ break;
case T_SubqueryScan:
pname = sname = "Subquery Scan";
break;
@@ -1417,6 +1421,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
case T_SampleScan:
case T_BitmapHeapScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_TableFuncScan:
@@ -1871,6 +1876,23 @@ ExplainNode(PlanState *planstate, List *ancestors,
planstate, es);
}
break;
+ case T_TidRangeScan:
+ {
+ /*
+ * The tidrangequals list has AND semantics, so be sure to
+ * show it as an AND condition.
+ */
+ List *tidquals = ((TidRangeScan *) plan)->tidrangequals;
+
+ if (list_length(tidquals) > 1)
+ tidquals = list_make1(make_andclause(tidquals));
+ show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es);
+ show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+ if (plan->qual)
+ show_instrumentation_count("Rows Removed by Filter", 1,
+ planstate, es);
+ }
+ break;
case T_ForeignScan:
show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
if (plan->qual)
@@ -3558,6 +3580,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_ForeignScan:
case T_CustomScan:
case T_ModifyTable:
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index f990c6473a3..74ac59faa13 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -67,6 +67,7 @@ OBJS = \
nodeSubplan.o \
nodeSubqueryscan.o \
nodeTableFuncscan.o \
+ nodeTidrangescan.o \
nodeTidscan.o \
nodeUnique.o \
nodeValuesscan.o \
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 23bdb53cd10..4543ac79edf 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -51,6 +51,7 @@
#include "executor/nodeSubplan.h"
#include "executor/nodeSubqueryscan.h"
#include "executor/nodeTableFuncscan.h"
+#include "executor/nodeTidrangescan.h"
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
#include "executor/nodeValuesscan.h"
@@ -197,6 +198,10 @@ ExecReScan(PlanState *node)
ExecReScanTidScan((TidScanState *) node);
break;
+ case T_TidRangeScanState:
+ ExecReScanTidRangeScan((TidRangeScanState *) node);
+ break;
+
case T_SubqueryScanState:
ExecReScanSubqueryScan((SubqueryScanState *) node);
break;
@@ -562,6 +567,7 @@ ExecSupportsBackwardScan(Plan *node)
case T_SeqScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_FunctionScan:
case T_ValuesScan:
case T_CteScan:
diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c
index 33221a4d6ce..4f430fb1603 100644
--- a/src/backend/executor/execCurrent.c
+++ b/src/backend/executor/execCurrent.c
@@ -336,6 +336,7 @@ search_plan_tree(PlanState *node, Oid table_oid,
case T_IndexOnlyScanState:
case T_BitmapHeapScanState:
case T_TidScanState:
+ case T_TidRangeScanState:
case T_ForeignScanState:
case T_CustomScanState:
{
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 414df50a054..29766d8196f 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -109,6 +109,7 @@
#include "executor/nodeSubplan.h"
#include "executor/nodeSubqueryscan.h"
#include "executor/nodeTableFuncscan.h"
+#include "executor/nodeTidrangescan.h"
#include "executor/nodeTidscan.h"
#include "executor/nodeUnique.h"
#include "executor/nodeValuesscan.h"
@@ -238,6 +239,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
estate, eflags);
break;
+ case T_TidRangeScan:
+ result = (PlanState *) ExecInitTidRangeScan((TidRangeScan *) node,
+ estate, eflags);
+ break;
+
case T_SubqueryScan:
result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node,
estate, eflags);
@@ -637,6 +643,10 @@ ExecEndNode(PlanState *node)
ExecEndTidScan((TidScanState *) node);
break;
+ case T_TidRangeScanState:
+ ExecEndTidRangeScan((TidRangeScanState *) node);
+ break;
+
case T_SubqueryScanState:
ExecEndSubqueryScan((SubqueryScanState *) node);
break;
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
new file mode 100644
index 00000000000..2b0d205d7dd
--- /dev/null
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -0,0 +1,413 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeTidrangescan.c
+ * Routines to support TID range scans of relations
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeTidrangescan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "catalog/pg_operator.h"
+#include "executor/execdebug.h"
+#include "executor/nodeTidrangescan.h"
+#include "nodes/nodeFuncs.h"
+#include "storage/bufmgr.h"
+#include "utils/rel.h"
+
+
+#define IsCTIDVar(node) \
+ ((node) != NULL && \
+ IsA((node), Var) && \
+ ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
+ ((Var *) (node))->varlevelsup == 0)
+
+typedef enum
+{
+ TIDEXPR_UPPER_BOUND,
+ TIDEXPR_LOWER_BOUND
+} TidExprType;
+
+/* Upper or lower range bound for scan */
+typedef struct TidOpExpr
+{
+ TidExprType exprtype; /* type of op; lower or upper */
+ ExprState *exprstate; /* ExprState for a TID-yielding subexpr */
+ bool inclusive; /* whether op is inclusive */
+} TidOpExpr;
+
+/*
+ * For the given 'expr', build and return an appropriate TidOpExpr taking into
+ * account the expr's operator and operand order.
+ */
+static TidOpExpr *
+MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate)
+{
+ Node *arg1 = get_leftop((Expr *) expr);
+ Node *arg2 = get_rightop((Expr *) expr);
+ ExprState *exprstate = NULL;
+ bool invert = false;
+ TidOpExpr *tidopexpr;
+
+ if (IsCTIDVar(arg1))
+ exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps);
+ else if (IsCTIDVar(arg2))
+ {
+ exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps);
+ invert = true;
+ }
+ else
+ elog(ERROR, "could not identify CTID variable");
+
+ tidopexpr = (TidOpExpr *) palloc(sizeof(TidOpExpr));
+ tidopexpr->inclusive = false; /* for now */
+
+ switch (expr->opno)
+ {
+ case TIDLessEqOperator:
+ tidopexpr->inclusive = true;
+ /* fall through */
+ case TIDLessOperator:
+ tidopexpr->exprtype = invert ? TIDEXPR_LOWER_BOUND : TIDEXPR_UPPER_BOUND;
+ break;
+ case TIDGreaterEqOperator:
+ tidopexpr->inclusive = true;
+ /* fall through */
+ case TIDGreaterOperator:
+ tidopexpr->exprtype = invert ? TIDEXPR_UPPER_BOUND : TIDEXPR_LOWER_BOUND;
+ break;
+ default:
+ elog(ERROR, "could not identify CTID operator");
+ }
+
+ tidopexpr->exprstate = exprstate;
+
+ return tidopexpr;
+}
+
+/*
+ * Extract the qual subexpressions that yield TIDs to search for,
+ * and compile them into ExprStates if they're ordinary expressions.
+ */
+static void
+TidExprListCreate(TidRangeScanState *tidrangestate)
+{
+ TidRangeScan *node = (TidRangeScan *) tidrangestate->ss.ps.plan;
+ List *tidexprs = NIL;
+ ListCell *l;
+
+ foreach(l, node->tidrangequals)
+ {
+ OpExpr *opexpr = lfirst(l);
+ TidOpExpr *tidopexpr;
+
+ if (!IsA(opexpr, OpExpr))
+ elog(ERROR, "could not identify CTID expression");
+
+ tidopexpr = MakeTidOpExpr(opexpr, tidrangestate);
+ tidexprs = lappend(tidexprs, tidopexpr);
+ }
+
+ tidrangestate->trss_tidexprs = tidexprs;
+}
+
+/* ----------------------------------------------------------------
+ * TidRangeEval
+ *
+ * Compute and set node's block and offset range to scan by evaluating
+ * the trss_tidexprs. Returns false if we detect the range cannot
+ * contain any tuples. Returns true if it's possible for the range to
+ * contain tuples.
+ * ----------------------------------------------------------------
+ */
+static bool
+TidRangeEval(TidRangeScanState *node)
+{
+ ExprContext *econtext = node->ss.ps.ps_ExprContext;
+ ItemPointerData lowerBound;
+ ItemPointerData upperBound;
+ ListCell *l;
+
+ /*
+ * Set the upper and lower bounds to the absolute limits of the range of
+ * the ItemPointer type. Below we'll try to narrow this range on either
+ * side by looking at the TidOpExprs.
+ */
+ ItemPointerSet(&lowerBound, 0, 0);
+ ItemPointerSet(&upperBound, InvalidBlockNumber, PG_UINT16_MAX);
+
+ foreach(l, node->trss_tidexprs)
+ {
+ TidOpExpr *tidopexpr = (TidOpExpr *) lfirst(l);
+ ItemPointer itemptr;
+ bool isNull;
+
+ /* Evaluate this bound. */
+ itemptr = (ItemPointer)
+ DatumGetPointer(ExecEvalExprSwitchContext(tidopexpr->exprstate,
+ econtext,
+ &isNull));
+
+ /* If the bound is NULL, *nothing* matches the qual. */
+ if (isNull)
+ return false;
+
+ if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND)
+ {
+ ItemPointerData lb;
+
+ ItemPointerCopy(itemptr, &lb);
+
+ /*
+ * Normalize non-inclusive ranges to become inclusive. The
+ * resulting ItemPointer here may not be a valid item pointer.
+ */
+ if (!tidopexpr->inclusive)
+ ItemPointerInc(&lb);
+
+ /* Check if we can narrow the range using this qual */
+ if (ItemPointerCompare(&lb, &lowerBound) > 0)
+ ItemPointerCopy(&lb, &lowerBound);
+ }
+
+ else if (tidopexpr->exprtype == TIDEXPR_UPPER_BOUND)
+ {
+ ItemPointerData ub;
+
+ ItemPointerCopy(itemptr, &ub);
+
+ /*
+ * Normalize non-inclusive ranges to become inclusive. The
+ * resulting ItemPointer here may not be a valid item pointer.
+ */
+ if (!tidopexpr->inclusive)
+ ItemPointerDec(&ub);
+
+ /* Check if we can narrow the range using this qual */
+ if (ItemPointerCompare(&ub, &upperBound) < 0)
+ ItemPointerCopy(&ub, &upperBound);
+ }
+ }
+
+ ItemPointerCopy(&lowerBound, &node->trss_mintid);
+ ItemPointerCopy(&upperBound, &node->trss_maxtid);
+
+ return true;
+}
+
+/* ----------------------------------------------------------------
+ * TidRangeNext
+ *
+ * Retrieve a tuple from the TidRangeScan node's currentRelation
+ * using the TIDs in the TidRangeScanState information.
+ *
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+TidRangeNext(TidRangeScanState *node)
+{
+ TableScanDesc scandesc;
+ EState *estate;
+ ScanDirection direction;
+ TupleTableSlot *slot;
+
+ /*
+ * extract necessary information from TID scan node
+ */
+ scandesc = node->ss.ss_currentScanDesc;
+ estate = node->ss.ps.state;
+ slot = node->ss.ss_ScanTupleSlot;
+ direction = estate->es_direction;
+
+ if (!node->trss_inScan)
+ {
+ /* First time through, compute TID range to scan */
+ if (!TidRangeEval(node))
+ return NULL;
+
+ if (scandesc == NULL)
+ {
+ scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
+ estate->es_snapshot,
+ &node->trss_mintid,
+ &node->trss_maxtid);
+ node->ss.ss_currentScanDesc = scandesc;
+ }
+ else
+ {
+ /* rescan with the updated TID range */
+ table_rescan_tidrange(scandesc, &node->trss_mintid,
+ &node->trss_maxtid);
+ }
+
+ node->trss_inScan = true;
+ }
+
+ /* Fetch the next tuple. */
+ if (!table_scan_getnextslot_tidrange(scandesc, direction, slot))
+ {
+ node->trss_inScan = false;
+ ExecClearTuple(slot);
+ }
+
+ return slot;
+}
+
+/*
+ * TidRangeRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot)
+{
+ return true;
+}
+
+/* ----------------------------------------------------------------
+ * ExecTidRangeScan(node)
+ *
+ * Scans the relation using tids and returns the next qualifying tuple.
+ * We call the ExecScan() routine and pass it the appropriate
+ * access method functions.
+ *
+ * Conditions:
+ * -- the "cursor" maintained by the AMI is positioned at the tuple
+ * returned previously.
+ *
+ * Initial States:
+ * -- the relation indicated is opened for TID range scanning.
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+ExecTidRangeScan(PlanState *pstate)
+{
+ TidRangeScanState *node = castNode(TidRangeScanState, pstate);
+
+ return ExecScan(&node->ss,
+ (ExecScanAccessMtd) TidRangeNext,
+ (ExecScanRecheckMtd) TidRangeRecheck);
+}
+
+/* ----------------------------------------------------------------
+ * ExecReScanTidRangeScan(node)
+ * ----------------------------------------------------------------
+ */
+void
+ExecReScanTidRangeScan(TidRangeScanState *node)
+{
+ /* mark scan as not in progress, and tid range list as not computed yet */
+ node->trss_inScan = false;
+
+ /*
+ * We must wait until TidRangeNext before calling table_rescan_tidrange.
+ */
+ ExecScanReScan(&node->ss);
+}
+
+/* ----------------------------------------------------------------
+ * ExecEndTidRangeScan
+ *
+ * Releases any storage allocated through C routines.
+ * Returns nothing.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndTidRangeScan(TidRangeScanState *node)
+{
+ TableScanDesc scan = node->ss.ss_currentScanDesc;
+
+ if (scan != NULL)
+ table_endscan(scan);
+
+ /*
+ * Free the exprcontext
+ */
+ ExecFreeExprContext(&node->ss.ps);
+
+ /*
+ * clear out tuple table slots
+ */
+ if (node->ss.ps.ps_ResultTupleSlot)
+ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+}
+
+/* ----------------------------------------------------------------
+ * ExecInitTidRangeScan
+ *
+ * Initializes the tid range scan's state information, creates
+ * scan keys, and opens the scan relation.
+ *
+ * Parameters:
+ * node: TidRangeScan node produced by the planner.
+ * estate: the execution state initialized in InitPlan.
+ * ----------------------------------------------------------------
+ */
+TidRangeScanState *
+ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
+{
+ TidRangeScanState *tidrangestate;
+ Relation currentRelation;
+
+ /*
+ * create state structure
+ */
+ tidrangestate = makeNode(TidRangeScanState);
+ tidrangestate->ss.ps.plan = (Plan *) node;
+ tidrangestate->ss.ps.state = estate;
+ tidrangestate->ss.ps.ExecProcNode = ExecTidRangeScan;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &tidrangestate->ss.ps);
+
+ /*
+ * mark scan as not in progress, and TID range as not computed yet
+ */
+ tidrangestate->trss_inScan = false;
+
+ /*
+ * open the scan relation
+ */
+ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
+
+ tidrangestate->ss.ss_currentRelation = currentRelation;
+ tidrangestate->ss.ss_currentScanDesc = NULL; /* no table scan here */
+
+ /*
+ * get the scan type from the relation descriptor.
+ */
+ ExecInitScanTupleSlot(estate, &tidrangestate->ss,
+ RelationGetDescr(currentRelation),
+ table_slot_callbacks(currentRelation));
+
+ /*
+ * Initialize result type and projection.
+ */
+ ExecInitResultTypeTL(&tidrangestate->ss.ps);
+ ExecAssignScanProjectionInfo(&tidrangestate->ss);
+
+ /*
+ * initialize child expressions
+ */
+ tidrangestate->ss.ps.qual =
+ ExecInitQual(node->scan.plan.qual, (PlanState *) tidrangestate);
+
+ TidExprListCreate(tidrangestate);
+
+ /*
+ * all done.
+ */
+ return tidrangestate;
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 65bbc18ecba..aaba1ec2c4a 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -586,6 +586,27 @@ _copyTidScan(const TidScan *from)
}
/*
+ * _copyTidRangeScan
+ */
+static TidRangeScan *
+_copyTidRangeScan(const TidRangeScan *from)
+{
+ TidRangeScan *newnode = makeNode(TidRangeScan);
+
+ /*
+ * copy node superclass fields
+ */
+ CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+ /*
+ * copy remainder of node
+ */
+ COPY_NODE_FIELD(tidrangequals);
+
+ return newnode;
+}
+
+/*
* _copySubqueryScan
*/
static SubqueryScan *
@@ -4938,6 +4959,9 @@ copyObjectImpl(const void *from)
case T_TidScan:
retval = _copyTidScan(from);
break;
+ case T_TidRangeScan:
+ retval = _copyTidRangeScan(from);
+ break;
case T_SubqueryScan:
retval = _copySubqueryScan(from);
break;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index f5dcedf6e89..8fc432bfe17 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -609,6 +609,16 @@ _outTidScan(StringInfo str, const TidScan *node)
}
static void
+_outTidRangeScan(StringInfo str, const TidRangeScan *node)
+{
+ WRITE_NODE_TYPE("TIDRANGESCAN");
+
+ _outScanInfo(str, (const Scan *) node);
+
+ WRITE_NODE_FIELD(tidrangequals);
+}
+
+static void
_outSubqueryScan(StringInfo str, const SubqueryScan *node)
{
WRITE_NODE_TYPE("SUBQUERYSCAN");
@@ -2314,6 +2324,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(subroot);
WRITE_NODE_FIELD(subplan_params);
WRITE_INT_FIELD(rel_parallel_workers);
+ WRITE_UINT_FIELD(amflags);
WRITE_OID_FIELD(serverid);
WRITE_OID_FIELD(userid);
WRITE_BOOL_FIELD(useridiscurrent);
@@ -3810,6 +3821,9 @@ outNode(StringInfo str, const void *obj)
case T_TidScan:
_outTidScan(str, obj);
break;
+ case T_TidRangeScan:
+ _outTidRangeScan(str, obj);
+ break;
case T_SubqueryScan:
_outSubqueryScan(str, obj);
break;
diff --git a/src/backend/optimizer/README b/src/backend/optimizer/README
index efb52858c88..4a6c3481623 100644
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -374,6 +374,7 @@ RelOptInfo - a relation or joined relations
IndexPath - index scan
BitmapHeapPath - top of a bitmapped index scan
TidPath - scan by CTID
+ TidRangePath - scan a contiguous range of CTIDs
SubqueryScanPath - scan a subquery-in-FROM
ForeignPath - scan a foreign table, foreign join or foreign upper-relation
CustomPath - for custom scan providers
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index aab06c7d213..a25b674a192 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1284,6 +1284,101 @@ cost_tidscan(Path *path, PlannerInfo *root,
}
/*
+ * cost_tidrangescan
+ * Determines and sets the costs of scanning a relation using a range of
+ * TIDs for 'path'
+ *
+ * 'baserel' is the relation to be scanned
+ * 'tidrangequals' is the list of TID-checkable range quals
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_tidrangescan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, List *tidrangequals,
+ ParamPathInfo *param_info)
+{
+ Selectivity selectivity;
+ double pages;
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ QualCost qpqual_cost;
+ Cost cpu_per_tuple;
+ QualCost tid_qual_cost;
+ double ntuples;
+ double nseqpages;
+ double spc_random_page_cost;
+ double spc_seq_page_cost;
+
+ /* Should only be applied to base relations */
+ Assert(baserel->relid > 0);
+ Assert(baserel->rtekind == RTE_RELATION);
+
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
+
+ /* Count how many tuples and pages we expect to scan */
+ selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid,
+ JOIN_INNER, NULL);
+ pages = ceil(selectivity * baserel->pages);
+
+ if (pages <= 0.0)
+ pages = 1.0;
+
+ /*
+ * The first page in a range requires a random seek, but each subsequent
+ * page is just a normal sequential page read. NOTE: it's desirable for
+ * TID Range Scans to cost more than the equivalent Sequential Scans,
+ * because Seq Scans have some performance advantages such as scan
+ * synchronization and parallelizability, and we'd prefer one of them to
+ * be picked unless a TID Range Scan really is better.
+ */
+ ntuples = selectivity * baserel->tuples;
+ nseqpages = pages - 1.0;
+
+ if (!enable_tidscan)
+ startup_cost += disable_cost;
+
+ /*
+ * The TID qual expressions will be computed once, any other baserestrict
+ * quals once per retrieved tuple.
+ */
+ cost_qual_eval(&tid_qual_cost, tidrangequals, root);
+
+ /* fetch estimated page cost for tablespace containing table */
+ get_tablespace_page_costs(baserel->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /* disk costs; 1 random page and the remainder as seq pages */
+ run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages;
+
+ /* Add scanning CPU costs */
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ /*
+ * XXX currently we assume TID quals are a subset of qpquals at this
+ * point; they will be removed (if possible) when we create the plan, so
+ * we subtract their cost from the total qpqual cost. (If the TID quals
+ * can't be removed, this is a mistake and we're going to underestimate
+ * the CPU cost a bit.)
+ */
+ startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
+ tid_qual_cost.per_tuple;
+ run_cost += cpu_per_tuple * ntuples;
+
+ /* tlist eval costs are paid per output row, not per tuple scanned */
+ startup_cost += path->pathtarget->cost.startup;
+ run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+ path->startup_cost = startup_cost;
+ path->total_cost = startup_cost + run_cost;
+}
+
+/*
* cost_subqueryscan
* Determines and returns the cost of scanning a subquery RTE.
*
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c
index 0845b460e2c..0725d950c54 100644
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -2,9 +2,9 @@
*
* tidpath.c
* Routines to determine which TID conditions are usable for scanning
- * a given relation, and create TidPaths accordingly.
+ * a given relation, and create TidPaths and TidRangePaths accordingly.
*
- * What we are looking for here is WHERE conditions of the form
+ * For TidPaths, we look for WHERE conditions of the form
* "CTID = pseudoconstant", which can be implemented by just fetching
* the tuple directly via heap_fetch(). We can also handle OR'd conditions
* such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr
@@ -23,6 +23,9 @@
* a function, but in practice it works better to keep the special node
* representation all the way through to execution.
*
+ * Additionally, TidRangePaths may be created for conditions of the form
+ * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=, and
+ * AND-clauses composed of such conditions.
*
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -63,14 +66,14 @@ IsCTIDVar(Var *var, RelOptInfo *rel)
/*
* Check to see if a RestrictInfo is of the form
- * CTID = pseudoconstant
+ * CTID OP pseudoconstant
* or
- * pseudoconstant = CTID
- * where the CTID Var belongs to relation "rel", and nothing on the
- * other side of the clause does.
+ * pseudoconstant OP CTID
+ * where OP is a binary operation, the CTID Var belongs to relation "rel",
+ * and nothing on the other side of the clause does.
*/
static bool
-IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
+IsBinaryTidClause(RestrictInfo *rinfo, RelOptInfo *rel)
{
OpExpr *node;
Node *arg1,
@@ -83,10 +86,9 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
return false;
node = (OpExpr *) rinfo->clause;
- /* Operator must be tideq */
- if (node->opno != TIDEqualOperator)
+ /* OpExpr must have two arguments */
+ if (list_length(node->args) != 2)
return false;
- Assert(list_length(node->args) == 2);
arg1 = linitial(node->args);
arg2 = lsecond(node->args);
@@ -118,6 +120,50 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
/*
* Check to see if a RestrictInfo is of the form
+ * CTID = pseudoconstant
+ * or
+ * pseudoconstant = CTID
+ * where the CTID Var belongs to relation "rel", and nothing on the
+ * other side of the clause does.
+ */
+static bool
+IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ if (!IsBinaryTidClause(rinfo, rel))
+ return false;
+
+ if (((OpExpr *) rinfo->clause)->opno == TIDEqualOperator)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
+ * CTID OP pseudoconstant
+ * or
+ * pseudoconstant OP CTID
+ * where OP is a range operator such as <, <=, >, or >=, the CTID Var belongs
+ * to relation "rel", and nothing on the other side of the clause does.
+ */
+static bool
+IsTidRangeClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+ Oid opno;
+
+ if (!IsBinaryTidClause(rinfo, rel))
+ return false;
+ opno = ((OpExpr *) rinfo->clause)->opno;
+
+ if (opno == TIDLessOperator || opno == TIDLessEqOperator ||
+ opno == TIDGreaterOperator || opno == TIDGreaterEqOperator)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
* CTID = ANY (pseudoconstant_array)
* where the CTID Var belongs to relation "rel", and nothing on the
* other side of the clause does.
@@ -222,7 +268,7 @@ TidQualFromRestrictInfo(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel)
*
* Returns a List of CTID qual RestrictInfos for the specified rel (with
* implicit OR semantics across the list), or NIL if there are no usable
- * conditions.
+ * equality conditions.
*
* This function is just concerned with handling AND/OR recursion.
*/
@@ -302,6 +348,34 @@ TidQualFromRestrictInfoList(PlannerInfo *root, List *rlist, RelOptInfo *rel)
}
/*
+ * Extract a set of CTID range conditions from implicit-AND List of RestrictInfos
+ *
+ * Returns a List of CTID range qual RestrictInfos for the specified rel
+ * (with implicit AND semantics across the list), or NIL if there are no
+ * usable range conditions or if the rel's table AM does not support TID range
+ * scans.
+ */
+static List *
+TidRangeQualFromRestrictInfoList(List *rlist, RelOptInfo *rel)
+{
+ List *rlst = NIL;
+ ListCell *l;
+
+ if ((rel->amflags & AMFLAG_HAS_TID_RANGE) == 0)
+ return NIL;
+
+ foreach(l, rlist)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+ if (IsTidRangeClause(rinfo, rel))
+ rlst = lappend(rlst, rinfo);
+ }
+
+ return rlst;
+}
+
+/*
* Given a list of join clauses involving our rel, create a parameterized
* TidPath for each one that is a suitable TidEqual clause.
*
@@ -385,6 +459,7 @@ void
create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
{
List *tidquals;
+ List *tidrangequals;
/*
* If any suitable quals exist in the rel's baserestrict list, generate a
@@ -392,7 +467,7 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
*/
tidquals = TidQualFromRestrictInfoList(root, rel->baserestrictinfo, rel);
- if (tidquals)
+ if (tidquals != NIL)
{
/*
* This path uses no join clauses, but it could still have required
@@ -405,6 +480,26 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
}
/*
+ * If there are range quals in the baserestrict list, generate a
+ * TidRangePath.
+ */
+ tidrangequals = TidRangeQualFromRestrictInfoList(rel->baserestrictinfo,
+ rel);
+
+ if (tidrangequals != NIL)
+ {
+ /*
+ * This path uses no join clauses, but it could still have required
+ * parameterization due to LATERAL refs in its tlist.
+ */
+ Relids required_outer = rel->lateral_relids;
+
+ add_path(rel, (Path *) create_tidrangescan_path(root, rel,
+ tidrangequals,
+ required_outer));
+ }
+
+ /*
* Try to generate parameterized TidPaths using equality clauses extracted
* from EquivalenceClasses. (This is important since simple "t1.ctid =
* t2.ctid" clauses will turn into ECs.)
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 6c8305c977e..906cab70532 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -129,6 +129,10 @@ static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
static void bitmap_subplan_mark_shared(Plan *plan);
static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
List *tlist, List *scan_clauses);
+static TidRangeScan *create_tidrangescan_plan(PlannerInfo *root,
+ TidRangePath *best_path,
+ List *tlist,
+ List *scan_clauses);
static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root,
SubqueryScanPath *best_path,
List *tlist, List *scan_clauses);
@@ -193,6 +197,8 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist,
Index scanrelid);
static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid,
List *tidquals);
+static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual,
+ Index scanrelid, List *tidrangequals);
static SubqueryScan *make_subqueryscan(List *qptlist,
List *qpqual,
Index scanrelid,
@@ -384,6 +390,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
case T_IndexOnlyScan:
case T_BitmapHeapScan:
case T_TidScan:
+ case T_TidRangeScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_TableFuncScan:
@@ -679,6 +686,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags)
scan_clauses);
break;
+ case T_TidRangeScan:
+ plan = (Plan *) create_tidrangescan_plan(root,
+ (TidRangePath *) best_path,
+ tlist,
+ scan_clauses);
+ break;
+
case T_SubqueryScan:
plan = (Plan *) create_subqueryscan_plan(root,
(SubqueryScanPath *) best_path,
@@ -3437,6 +3451,71 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
}
/*
+ * create_tidrangescan_plan
+ * Returns a tidrangescan plan for the base relation scanned by 'best_path'
+ * with restriction clauses 'scan_clauses' and targetlist 'tlist'.
+ */
+static TidRangeScan *
+create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path,
+ List *tlist, List *scan_clauses)
+{
+ TidRangeScan *scan_plan;
+ Index scan_relid = best_path->path.parent->relid;
+ List *tidrangequals = best_path->tidrangequals;
+
+ /* it should be a base rel... */
+ Assert(scan_relid > 0);
+ Assert(best_path->path.parent->rtekind == RTE_RELATION);
+
+ /*
+ * The qpqual list must contain all restrictions not enforced by the
+ * tidrangequals list. tidrangequals has AND semantics, so we can simply
+ * remove any qual that appears in it.
+ */
+ {
+ List *qpqual = NIL;
+ ListCell *l;
+
+ foreach(l, scan_clauses)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+ if (rinfo->pseudoconstant)
+ continue; /* we may drop pseudoconstants here */
+ if (list_member_ptr(tidrangequals, rinfo))
+ continue; /* simple duplicate */
+ qpqual = lappend(qpqual, rinfo);
+ }
+ scan_clauses = qpqual;
+ }
+
+ /* Sort clauses into best execution order */
+ scan_clauses = order_qual_clauses(root, scan_clauses);
+
+ /* Reduce RestrictInfo lists to bare expressions; ignore pseudoconstants */
+ tidrangequals = extract_actual_clauses(tidrangequals, false);
+ scan_clauses = extract_actual_clauses(scan_clauses, false);
+
+ /* Replace any outer-relation variables with nestloop params */
+ if (best_path->path.param_info)
+ {
+ tidrangequals = (List *)
+ replace_nestloop_params(root, (Node *) tidrangequals);
+ scan_clauses = (List *)
+ replace_nestloop_params(root, (Node *) scan_clauses);
+ }
+
+ scan_plan = make_tidrangescan(tlist,
+ scan_clauses,
+ scan_relid,
+ tidrangequals);
+
+ copy_generic_path_info(&scan_plan->scan.plan, &best_path->path);
+
+ return scan_plan;
+}
+
+/*
* create_subqueryscan_plan
* Returns a subqueryscan plan for the base relation scanned by 'best_path'
* with restriction clauses 'scan_clauses' and targetlist 'tlist'.
@@ -5369,6 +5448,25 @@ make_tidscan(List *qptlist,
return node;
}
+static TidRangeScan *
+make_tidrangescan(List *qptlist,
+ List *qpqual,
+ Index scanrelid,
+ List *tidrangequals)
+{
+ TidRangeScan *node = makeNode(TidRangeScan);
+ Plan *plan = &node->scan.plan;
+
+ plan->targetlist = qptlist;
+ plan->qual = qpqual;
+ plan->lefttree = NULL;
+ plan->righttree = NULL;
+ node->scan.scanrelid = scanrelid;
+ node->tidrangequals = tidrangequals;
+
+ return node;
+}
+
static SubqueryScan *
make_subqueryscan(List *qptlist,
List *qpqual,
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index c3c36be13e1..42f088ad714 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -619,6 +619,22 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
rtoffset, 1);
}
break;
+ case T_TidRangeScan:
+ {
+ TidRangeScan *splan = (TidRangeScan *) plan;
+
+ splan->scan.scanrelid += rtoffset;
+ splan->scan.plan.targetlist =
+ fix_scan_list(root, splan->scan.plan.targetlist,
+ rtoffset, NUM_EXEC_TLIST(plan));
+ splan->scan.plan.qual =
+ fix_scan_list(root, splan->scan.plan.qual,
+ rtoffset, NUM_EXEC_QUAL(plan));
+ splan->tidrangequals =
+ fix_scan_list(root, splan->tidrangequals,
+ rtoffset, 1);
+ }
+ break;
case T_SubqueryScan:
/* Needs special treatment, see comments below */
return set_subqueryscan_references(root,
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 54ef61bfb35..f3e46e0959e 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -2367,6 +2367,12 @@ finalize_plan(PlannerInfo *root, Plan *plan,
context.paramids = bms_add_members(context.paramids, scan_params);
break;
+ case T_TidRangeScan:
+ finalize_primnode((Node *) ((TidRangeScan *) plan)->tidrangequals,
+ &context);
+ context.paramids = bms_add_members(context.paramids, scan_params);
+ break;
+
case T_SubqueryScan:
{
SubqueryScan *sscan = (SubqueryScan *) plan;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 9be0c4a6af5..69b83071cf2 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1204,6 +1204,35 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
}
/*
+ * create_tidrangescan_path
+ * Creates a path corresponding to a scan by a range of TIDs, returning
+ * the pathnode.
+ */
+TidRangePath *
+create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
+ List *tidrangequals, Relids required_outer)
+{
+ TidRangePath *pathnode = makeNode(TidRangePath);
+
+ pathnode->path.pathtype = T_TidRangeScan;
+ pathnode->path.parent = rel;
+ pathnode->path.pathtarget = rel->reltarget;
+ pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
+ required_outer);
+ pathnode->path.parallel_aware = false;
+ pathnode->path.parallel_safe = rel->consider_parallel;
+ pathnode->path.parallel_workers = 0;
+ pathnode->path.pathkeys = NIL; /* always unordered */
+
+ pathnode->tidrangequals = tidrangequals;
+
+ cost_tidrangescan(&pathnode->path, root, rel, tidrangequals,
+ pathnode->path.param_info);
+
+ return pathnode;
+}
+
+/*
* create_append_path
* Creates a path corresponding to an Append plan, returning the
* pathnode.
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 177e6e336ab..c5947fa4185 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -467,6 +467,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
/* Collect info about relation's foreign keys, if relevant */
get_relation_foreign_keys(root, rel, relation, inhparent);
+ /* Collect info about functions implemented by the rel's table AM. */
+ if (relation->rd_tableam &&
+ relation->rd_tableam->scan_set_tidrange != NULL &&
+ relation->rd_tableam->scan_getnextslot_tidrange != NULL)
+ rel->amflags |= AMFLAG_HAS_TID_RANGE;
+
/*
* Collect info about relation's partitioning scheme, if any. Only
* inheritance parents may be partitioned.
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 731ff708b90..345c877aeb3 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -234,6 +234,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->subroot = NULL;
rel->subplan_params = NIL;
rel->rel_parallel_workers = -1; /* set up in get_relation_info */
+ rel->amflags = 0;
rel->serverid = InvalidOid;
rel->userid = rte->checkAsUser;
rel->useridiscurrent = false;
@@ -646,6 +647,7 @@ build_join_rel(PlannerInfo *root,
joinrel->subroot = NULL;
joinrel->subplan_params = NIL;
joinrel->rel_parallel_workers = -1;
+ joinrel->amflags = 0;
joinrel->serverid = InvalidOid;
joinrel->userid = InvalidOid;
joinrel->useridiscurrent = false;
@@ -826,6 +828,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->eclass_indexes = NULL;
joinrel->subroot = NULL;
joinrel->subplan_params = NIL;
+ joinrel->amflags = 0;
joinrel->serverid = InvalidOid;
joinrel->userid = InvalidOid;
joinrel->useridiscurrent = false;
diff --git a/src/backend/storage/page/itemptr.c b/src/backend/storage/page/itemptr.c
index 55759c383b6..f40d6c22a02 100644
--- a/src/backend/storage/page/itemptr.c
+++ b/src/backend/storage/page/itemptr.c
@@ -71,3 +71,62 @@ ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
else
return 0;
}
+
+/*
+ * ItemPointerInc
+ * Increment 'pointer' by 1 only paying attention to the ItemPointer's
+ * type's range limits and not MaxOffsetNumber and FirstOffsetNumber.
+ * This may result in 'pointer' becoming !OffsetNumberIsValid.
+ *
+ * If the pointer is already the maximum possible values permitted by the
+ * range of the ItemPointer's types, then do nothing.
+ */
+void
+ItemPointerInc(ItemPointer pointer)
+{
+ BlockNumber blk = ItemPointerGetBlockNumberNoCheck(pointer);
+ OffsetNumber off = ItemPointerGetOffsetNumberNoCheck(pointer);
+
+ if (off == PG_UINT16_MAX)
+ {
+ if (blk != InvalidBlockNumber)
+ {
+ off = 0;
+ blk++;
+ }
+ }
+ else
+ off++;
+
+ ItemPointerSet(pointer, blk, off);
+}
+
+/*
+ * ItemPointerDec
+ * Decrement 'pointer' by 1 only paying attention to the ItemPointer's
+ * type's range limits and not MaxOffsetNumber and FirstOffsetNumber.
+ * This may result in 'pointer' becoming !OffsetNumberIsValid.
+ *
+ * If the pointer is already the minimum possible values permitted by the
+ * range of the ItemPointer's types, then do nothing. This does rely on
+ * FirstOffsetNumber being 1 rather than 0.
+ */
+void
+ItemPointerDec(ItemPointer pointer)
+{
+ BlockNumber blk = ItemPointerGetBlockNumberNoCheck(pointer);
+ OffsetNumber off = ItemPointerGetOffsetNumberNoCheck(pointer);
+
+ if (off == 0)
+ {
+ if (blk != 0)
+ {
+ off = PG_UINT16_MAX;
+ blk--;
+ }
+ }
+ else
+ off--;
+
+ ItemPointerSet(pointer, blk, off);
+}