Add TID Range Scans to support efficient scanning ranges of TIDs
authorDavid Rowley <drowley@postgresql.org>
Sat, 27 Feb 2021 09:59:36 +0000 (22:59 +1300)
committerDavid Rowley <drowley@postgresql.org>
Sat, 27 Feb 2021 09:59:36 +0000 (22:59 +1300)
This adds a new executor node named TID Range Scan.  The query planner
will generate paths for TID Range scans when quals are discovered on base
relations which search for ranges on the table's ctid column.  These
ranges may be open at either end. For example, WHERE ctid >= '(10,0)';
will return all tuples on page 10 and over.

To support this, two new optional callback functions have been added to
table AM.  scan_set_tidrange is used to set the scan range to just the
given range of TIDs.  scan_getnextslot_tidrange fetches the next tuple
in the given range.

For AMs were scanning ranges of TIDs would not make sense, these functions
can be set to NULL in the TableAmRoutine.  The query planner won't
generate TID Range Scan Paths in that case.

Author: Edmund Horner, David Rowley
Reviewed-by: David Rowley, Tomas Vondra, Tom Lane, Andres Freund, Zhihong Yu
Discussion: https://postgr.es/m/CAMyN-kB-nFTkF=VA_JPwFNo08S0d-Yk0F741S2B7LDmYAi8eyA@mail.gmail.com

36 files changed:
src/backend/access/heap/heapam.c
src/backend/access/heap/heapam_handler.c
src/backend/commands/explain.c
src/backend/executor/Makefile
src/backend/executor/execAmi.c
src/backend/executor/execCurrent.c
src/backend/executor/execProcnode.c
src/backend/executor/nodeTidrangescan.c [new file with mode: 0644]
src/backend/nodes/copyfuncs.c
src/backend/nodes/outfuncs.c
src/backend/optimizer/README
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/tidpath.c
src/backend/optimizer/plan/createplan.c
src/backend/optimizer/plan/setrefs.c
src/backend/optimizer/plan/subselect.c
src/backend/optimizer/util/pathnode.c
src/backend/optimizer/util/plancat.c
src/backend/optimizer/util/relnode.c
src/backend/storage/page/itemptr.c
src/include/access/heapam.h
src/include/access/relscan.h
src/include/access/tableam.h
src/include/catalog/pg_operator.dat
src/include/executor/nodeTidrangescan.h [new file with mode: 0644]
src/include/nodes/execnodes.h
src/include/nodes/nodes.h
src/include/nodes/pathnodes.h
src/include/nodes/plannodes.h
src/include/optimizer/cost.h
src/include/optimizer/pathnode.h
src/include/storage/itemptr.h
src/test/regress/expected/tidrangescan.out [new file with mode: 0644]
src/test/regress/parallel_schedule
src/test/regress/serial_schedule
src/test/regress/sql/tidrangescan.sql [new file with mode: 0644]

index 9c1d590dc714841066335d072cb9e4831b0411e9..3b435c107d0c6c213c345eb2a3f7c4a9642fe7fe 100644 (file)
@@ -1391,6 +1391,153 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s
    return true;
 }
 
+void
+heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
+                 ItemPointer maxtid)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   BlockNumber startBlk;
+   BlockNumber numBlks;
+   ItemPointerData highestItem;
+   ItemPointerData lowestItem;
+
+   /*
+    * For relations without any pages, we can simply leave the TID range
+    * unset.  There will be no tuples to scan, therefore no tuples outside
+    * the given TID range.
+    */
+   if (scan->rs_nblocks == 0)
+       return;
+
+   /*
+    * Set up some ItemPointers which point to the first and last possible
+    * tuples in the heap.
+    */
+   ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
+   ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
+
+   /*
+    * If the given maximum TID is below the highest possible TID in the
+    * relation, then restrict the range to that, otherwise we scan to the end
+    * of the relation.
+    */
+   if (ItemPointerCompare(maxtid, &highestItem) < 0)
+       ItemPointerCopy(maxtid, &highestItem);
+
+   /*
+    * If the given minimum TID is above the lowest possible TID in the
+    * relation, then restrict the range to only scan for TIDs above that.
+    */
+   if (ItemPointerCompare(mintid, &lowestItem) > 0)
+       ItemPointerCopy(mintid, &lowestItem);
+
+   /*
+    * Check for an empty range and protect from would be negative results
+    * from the numBlks calculation below.
+    */
+   if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
+   {
+       /* Set an empty range of blocks to scan */
+       heap_setscanlimits(sscan, 0, 0);
+       return;
+   }
+
+   /*
+    * Calculate the first block and the number of blocks we must scan. We
+    * could be more aggressive here and perform some more validation to try
+    * and further narrow the scope of blocks to scan by checking if the
+    * lowerItem has an offset above MaxOffsetNumber.  In this case, we could
+    * advance startBlk by one.  Likewise, if highestItem has an offset of 0
+    * we could scan one fewer blocks.  However, such an optimization does not
+    * seem worth troubling over, currently.
+    */
+   startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
+
+   numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
+       ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
+
+   /* Set the start block and number of blocks to scan */
+   heap_setscanlimits(sscan, startBlk, numBlks);
+
+   /* Finally, set the TID range in sscan */
+   ItemPointerCopy(&lowestItem, &sscan->rs_mintid);
+   ItemPointerCopy(&highestItem, &sscan->rs_maxtid);
+}
+
+bool
+heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
+                         TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   ItemPointer mintid = &sscan->rs_mintid;
+   ItemPointer maxtid = &sscan->rs_maxtid;
+
+   /* Note: no locking manipulations needed */
+   for (;;)
+   {
+       if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
+           heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
+       else
+           heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
+
+       if (scan->rs_ctup.t_data == NULL)
+       {
+           ExecClearTuple(slot);
+           return false;
+       }
+
+       /*
+        * heap_set_tidrange will have used heap_setscanlimits to limit the
+        * range of pages we scan to only ones that can contain the TID range
+        * we're scanning for.  Here we must filter out any tuples from these
+        * pages that are outwith that range.
+        */
+       if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
+       {
+           ExecClearTuple(slot);
+
+           /*
+            * When scanning backwards, the TIDs will be in descending order.
+            * Future tuples in this direction will be lower still, so we can
+            * just return false to indicate there will be no more tuples.
+            */
+           if (ScanDirectionIsBackward(direction))
+               return false;
+
+           continue;
+       }
+
+       /*
+        * Likewise for the final page, we must filter out TIDs greater than
+        * maxtid.
+        */
+       if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
+       {
+           ExecClearTuple(slot);
+
+           /*
+            * When scanning forward, the TIDs will be in ascending order.
+            * Future tuples in this direction will be higher still, so we can
+            * just return false to indicate there will be no more tuples.
+            */
+           if (ScanDirectionIsForward(direction))
+               return false;
+           continue;
+       }
+
+       break;
+   }
+
+   /*
+    * if we get here it means we have a new current scan tuple, so point to
+    * the proper return buffer and return the tuple.
+    */
+   pgstat_count_heap_getnext(scan->rs_base.rs_rd);
+
+   ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
+   return true;
+}
+
 /*
  * heap_fetch      - retrieve tuple with given tid
  *
index 4a70e20a14308147bc6f776500c1392078d69286..bd5faf0c1fb493715b28a957501d916d929298c2 100644 (file)
@@ -2542,6 +2542,9 @@ static const TableAmRoutine heapam_methods = {
    .scan_rescan = heap_rescan,
    .scan_getnextslot = heap_getnextslot,
 
+   .scan_set_tidrange = heap_set_tidrange,
+   .scan_getnextslot_tidrange = heap_getnextslot_tidrange,
+
    .parallelscan_estimate = table_block_parallelscan_estimate,
    .parallelscan_initialize = table_block_parallelscan_initialize,
    .parallelscan_reinitialize = table_block_parallelscan_reinitialize,
index f80e379973aabd7af6064af5cff7b6a561749027..afc45429ba4560e987dc909ca2eccdddd1a34d83 100644 (file)
@@ -1057,6 +1057,7 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
        case T_IndexOnlyScan:
        case T_BitmapHeapScan:
        case T_TidScan:
+       case T_TidRangeScan:
        case T_SubqueryScan:
        case T_FunctionScan:
        case T_TableFuncScan:
@@ -1223,6 +1224,9 @@ ExplainNode(PlanState *planstate, List *ancestors,
        case T_TidScan:
            pname = sname = "Tid Scan";
            break;
+       case T_TidRangeScan:
+           pname = sname = "Tid Range Scan";
+           break;
        case T_SubqueryScan:
            pname = sname = "Subquery Scan";
            break;
@@ -1417,6 +1421,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
        case T_SampleScan:
        case T_BitmapHeapScan:
        case T_TidScan:
+       case T_TidRangeScan:
        case T_SubqueryScan:
        case T_FunctionScan:
        case T_TableFuncScan:
@@ -1871,6 +1876,23 @@ ExplainNode(PlanState *planstate, List *ancestors,
                                               planstate, es);
            }
            break;
+       case T_TidRangeScan:
+           {
+               /*
+                * The tidrangequals list has AND semantics, so be sure to
+                * show it as an AND condition.
+                */
+               List       *tidquals = ((TidRangeScan *) plan)->tidrangequals;
+
+               if (list_length(tidquals) > 1)
+                   tidquals = list_make1(make_andclause(tidquals));
+               show_scan_qual(tidquals, "TID Cond", planstate, ancestors, es);
+               show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
+               if (plan->qual)
+                   show_instrumentation_count("Rows Removed by Filter", 1,
+                                              planstate, es);
+           }
+           break;
        case T_ForeignScan:
            show_scan_qual(plan->qual, "Filter", planstate, ancestors, es);
            if (plan->qual)
@@ -3558,6 +3580,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es)
        case T_IndexOnlyScan:
        case T_BitmapHeapScan:
        case T_TidScan:
+       case T_TidRangeScan:
        case T_ForeignScan:
        case T_CustomScan:
        case T_ModifyTable:
index f990c6473a37599aa17df6b0f8c2313ee8cb8daa..74ac59faa138b33cfd6c541dd35a9dd860bd7659 100644 (file)
@@ -67,6 +67,7 @@ OBJS = \
    nodeSubplan.o \
    nodeSubqueryscan.o \
    nodeTableFuncscan.o \
+   nodeTidrangescan.o \
    nodeTidscan.o \
    nodeUnique.o \
    nodeValuesscan.o \
index 23bdb53cd1037e4b476b9286f7dd3c915be830b2..4543ac79edfb40c346542ac6f86365c24458cc2d 100644 (file)
@@ -51,6 +51,7 @@
 #include "executor/nodeSubplan.h"
 #include "executor/nodeSubqueryscan.h"
 #include "executor/nodeTableFuncscan.h"
+#include "executor/nodeTidrangescan.h"
 #include "executor/nodeTidscan.h"
 #include "executor/nodeUnique.h"
 #include "executor/nodeValuesscan.h"
@@ -197,6 +198,10 @@ ExecReScan(PlanState *node)
            ExecReScanTidScan((TidScanState *) node);
            break;
 
+       case T_TidRangeScanState:
+           ExecReScanTidRangeScan((TidRangeScanState *) node);
+           break;
+
        case T_SubqueryScanState:
            ExecReScanSubqueryScan((SubqueryScanState *) node);
            break;
@@ -562,6 +567,7 @@ ExecSupportsBackwardScan(Plan *node)
 
        case T_SeqScan:
        case T_TidScan:
+       case T_TidRangeScan:
        case T_FunctionScan:
        case T_ValuesScan:
        case T_CteScan:
index 33221a4d6ce1afad421bc741a15ed5d2b623aaaa..4f430fb16034a6b30954de93e7c1859868f33166 100644 (file)
@@ -336,6 +336,7 @@ search_plan_tree(PlanState *node, Oid table_oid,
        case T_IndexOnlyScanState:
        case T_BitmapHeapScanState:
        case T_TidScanState:
+       case T_TidRangeScanState:
        case T_ForeignScanState:
        case T_CustomScanState:
            {
index 414df50a0545e567726280f7e64041c8064616f8..29766d8196f9695a76ed4e9b1c3e468440d48506 100644 (file)
 #include "executor/nodeSubplan.h"
 #include "executor/nodeSubqueryscan.h"
 #include "executor/nodeTableFuncscan.h"
+#include "executor/nodeTidrangescan.h"
 #include "executor/nodeTidscan.h"
 #include "executor/nodeUnique.h"
 #include "executor/nodeValuesscan.h"
@@ -238,6 +239,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
                                                   estate, eflags);
            break;
 
+       case T_TidRangeScan:
+           result = (PlanState *) ExecInitTidRangeScan((TidRangeScan *) node,
+                                                       estate, eflags);
+           break;
+
        case T_SubqueryScan:
            result = (PlanState *) ExecInitSubqueryScan((SubqueryScan *) node,
                                                        estate, eflags);
@@ -637,6 +643,10 @@ ExecEndNode(PlanState *node)
            ExecEndTidScan((TidScanState *) node);
            break;
 
+       case T_TidRangeScanState:
+           ExecEndTidRangeScan((TidRangeScanState *) node);
+           break;
+
        case T_SubqueryScanState:
            ExecEndSubqueryScan((SubqueryScanState *) node);
            break;
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
new file mode 100644 (file)
index 0000000..2b0d205
--- /dev/null
@@ -0,0 +1,413 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeTidrangescan.c
+ *   Routines to support TID range scans of relations
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *   src/backend/executor/nodeTidrangescan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "catalog/pg_operator.h"
+#include "executor/execdebug.h"
+#include "executor/nodeTidrangescan.h"
+#include "nodes/nodeFuncs.h"
+#include "storage/bufmgr.h"
+#include "utils/rel.h"
+
+
+#define IsCTIDVar(node)  \
+   ((node) != NULL && \
+    IsA((node), Var) && \
+    ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
+    ((Var *) (node))->varlevelsup == 0)
+
+typedef enum
+{
+   TIDEXPR_UPPER_BOUND,
+   TIDEXPR_LOWER_BOUND
+} TidExprType;
+
+/* Upper or lower range bound for scan */
+typedef struct TidOpExpr
+{
+   TidExprType exprtype;       /* type of op; lower or upper */
+   ExprState  *exprstate;      /* ExprState for a TID-yielding subexpr */
+   bool        inclusive;      /* whether op is inclusive */
+} TidOpExpr;
+
+/*
+ * For the given 'expr', build and return an appropriate TidOpExpr taking into
+ * account the expr's operator and operand order.
+ */
+static TidOpExpr *
+MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate)
+{
+   Node       *arg1 = get_leftop((Expr *) expr);
+   Node       *arg2 = get_rightop((Expr *) expr);
+   ExprState  *exprstate = NULL;
+   bool        invert = false;
+   TidOpExpr  *tidopexpr;
+
+   if (IsCTIDVar(arg1))
+       exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps);
+   else if (IsCTIDVar(arg2))
+   {
+       exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps);
+       invert = true;
+   }
+   else
+       elog(ERROR, "could not identify CTID variable");
+
+   tidopexpr = (TidOpExpr *) palloc(sizeof(TidOpExpr));
+   tidopexpr->inclusive = false;   /* for now */
+
+   switch (expr->opno)
+   {
+       case TIDLessEqOperator:
+           tidopexpr->inclusive = true;
+           /* fall through */
+       case TIDLessOperator:
+           tidopexpr->exprtype = invert ? TIDEXPR_LOWER_BOUND : TIDEXPR_UPPER_BOUND;
+           break;
+       case TIDGreaterEqOperator:
+           tidopexpr->inclusive = true;
+           /* fall through */
+       case TIDGreaterOperator:
+           tidopexpr->exprtype = invert ? TIDEXPR_UPPER_BOUND : TIDEXPR_LOWER_BOUND;
+           break;
+       default:
+           elog(ERROR, "could not identify CTID operator");
+   }
+
+   tidopexpr->exprstate = exprstate;
+
+   return tidopexpr;
+}
+
+/*
+ * Extract the qual subexpressions that yield TIDs to search for,
+ * and compile them into ExprStates if they're ordinary expressions.
+ */
+static void
+TidExprListCreate(TidRangeScanState *tidrangestate)
+{
+   TidRangeScan *node = (TidRangeScan *) tidrangestate->ss.ps.plan;
+   List       *tidexprs = NIL;
+   ListCell   *l;
+
+   foreach(l, node->tidrangequals)
+   {
+       OpExpr     *opexpr = lfirst(l);
+       TidOpExpr  *tidopexpr;
+
+       if (!IsA(opexpr, OpExpr))
+           elog(ERROR, "could not identify CTID expression");
+
+       tidopexpr = MakeTidOpExpr(opexpr, tidrangestate);
+       tidexprs = lappend(tidexprs, tidopexpr);
+   }
+
+   tidrangestate->trss_tidexprs = tidexprs;
+}
+
+/* ----------------------------------------------------------------
+ *     TidRangeEval
+ *
+ *     Compute and set node's block and offset range to scan by evaluating
+ *     the trss_tidexprs.  Returns false if we detect the range cannot
+ *     contain any tuples.  Returns true if it's possible for the range to
+ *     contain tuples.
+ * ----------------------------------------------------------------
+ */
+static bool
+TidRangeEval(TidRangeScanState *node)
+{
+   ExprContext *econtext = node->ss.ps.ps_ExprContext;
+   ItemPointerData lowerBound;
+   ItemPointerData upperBound;
+   ListCell   *l;
+
+   /*
+    * Set the upper and lower bounds to the absolute limits of the range of
+    * the ItemPointer type.  Below we'll try to narrow this range on either
+    * side by looking at the TidOpExprs.
+    */
+   ItemPointerSet(&lowerBound, 0, 0);
+   ItemPointerSet(&upperBound, InvalidBlockNumber, PG_UINT16_MAX);
+
+   foreach(l, node->trss_tidexprs)
+   {
+       TidOpExpr  *tidopexpr = (TidOpExpr *) lfirst(l);
+       ItemPointer itemptr;
+       bool        isNull;
+
+       /* Evaluate this bound. */
+       itemptr = (ItemPointer)
+           DatumGetPointer(ExecEvalExprSwitchContext(tidopexpr->exprstate,
+                                                     econtext,
+                                                     &isNull));
+
+       /* If the bound is NULL, *nothing* matches the qual. */
+       if (isNull)
+           return false;
+
+       if (tidopexpr->exprtype == TIDEXPR_LOWER_BOUND)
+       {
+           ItemPointerData lb;
+
+           ItemPointerCopy(itemptr, &lb);
+
+           /*
+            * Normalize non-inclusive ranges to become inclusive.  The
+            * resulting ItemPointer here may not be a valid item pointer.
+            */
+           if (!tidopexpr->inclusive)
+               ItemPointerInc(&lb);
+
+           /* Check if we can narrow the range using this qual */
+           if (ItemPointerCompare(&lb, &lowerBound) > 0)
+               ItemPointerCopy(&lb, &lowerBound);
+       }
+
+       else if (tidopexpr->exprtype == TIDEXPR_UPPER_BOUND)
+       {
+           ItemPointerData ub;
+
+           ItemPointerCopy(itemptr, &ub);
+
+           /*
+            * Normalize non-inclusive ranges to become inclusive.  The
+            * resulting ItemPointer here may not be a valid item pointer.
+            */
+           if (!tidopexpr->inclusive)
+               ItemPointerDec(&ub);
+
+           /* Check if we can narrow the range using this qual */
+           if (ItemPointerCompare(&ub, &upperBound) < 0)
+               ItemPointerCopy(&ub, &upperBound);
+       }
+   }
+
+   ItemPointerCopy(&lowerBound, &node->trss_mintid);
+   ItemPointerCopy(&upperBound, &node->trss_maxtid);
+
+   return true;
+}
+
+/* ----------------------------------------------------------------
+ *     TidRangeNext
+ *
+ *     Retrieve a tuple from the TidRangeScan node's currentRelation
+ *     using the TIDs in the TidRangeScanState information.
+ *
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+TidRangeNext(TidRangeScanState *node)
+{
+   TableScanDesc scandesc;
+   EState     *estate;
+   ScanDirection direction;
+   TupleTableSlot *slot;
+
+   /*
+    * extract necessary information from TID scan node
+    */
+   scandesc = node->ss.ss_currentScanDesc;
+   estate = node->ss.ps.state;
+   slot = node->ss.ss_ScanTupleSlot;
+   direction = estate->es_direction;
+
+   if (!node->trss_inScan)
+   {
+       /* First time through, compute TID range to scan */
+       if (!TidRangeEval(node))
+           return NULL;
+
+       if (scandesc == NULL)
+       {
+           scandesc = table_beginscan_tidrange(node->ss.ss_currentRelation,
+                                               estate->es_snapshot,
+                                               &node->trss_mintid,
+                                               &node->trss_maxtid);
+           node->ss.ss_currentScanDesc = scandesc;
+       }
+       else
+       {
+           /* rescan with the updated TID range */
+           table_rescan_tidrange(scandesc, &node->trss_mintid,
+                                 &node->trss_maxtid);
+       }
+
+       node->trss_inScan = true;
+   }
+
+   /* Fetch the next tuple. */
+   if (!table_scan_getnextslot_tidrange(scandesc, direction, slot))
+   {
+       node->trss_inScan = false;
+       ExecClearTuple(slot);
+   }
+
+   return slot;
+}
+
+/*
+ * TidRangeRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ */
+static bool
+TidRangeRecheck(TidRangeScanState *node, TupleTableSlot *slot)
+{
+   return true;
+}
+
+/* ----------------------------------------------------------------
+ *     ExecTidRangeScan(node)
+ *
+ *     Scans the relation using tids and returns the next qualifying tuple.
+ *     We call the ExecScan() routine and pass it the appropriate
+ *     access method functions.
+ *
+ *     Conditions:
+ *       -- the "cursor" maintained by the AMI is positioned at the tuple
+ *          returned previously.
+ *
+ *     Initial States:
+ *       -- the relation indicated is opened for TID range scanning.
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+ExecTidRangeScan(PlanState *pstate)
+{
+   TidRangeScanState *node = castNode(TidRangeScanState, pstate);
+
+   return ExecScan(&node->ss,
+                   (ExecScanAccessMtd) TidRangeNext,
+                   (ExecScanRecheckMtd) TidRangeRecheck);
+}
+
+/* ----------------------------------------------------------------
+ *     ExecReScanTidRangeScan(node)
+ * ----------------------------------------------------------------
+ */
+void
+ExecReScanTidRangeScan(TidRangeScanState *node)
+{
+   /* mark scan as not in progress, and tid range list as not computed yet */
+   node->trss_inScan = false;
+
+   /*
+    * We must wait until TidRangeNext before calling table_rescan_tidrange.
+    */
+   ExecScanReScan(&node->ss);
+}
+
+/* ----------------------------------------------------------------
+ *     ExecEndTidRangeScan
+ *
+ *     Releases any storage allocated through C routines.
+ *     Returns nothing.
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndTidRangeScan(TidRangeScanState *node)
+{
+   TableScanDesc scan = node->ss.ss_currentScanDesc;
+
+   if (scan != NULL)
+       table_endscan(scan);
+
+   /*
+    * Free the exprcontext
+    */
+   ExecFreeExprContext(&node->ss.ps);
+
+   /*
+    * clear out tuple table slots
+    */
+   if (node->ss.ps.ps_ResultTupleSlot)
+       ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+   ExecClearTuple(node->ss.ss_ScanTupleSlot);
+}
+
+/* ----------------------------------------------------------------
+ *     ExecInitTidRangeScan
+ *
+ *     Initializes the tid range scan's state information, creates
+ *     scan keys, and opens the scan relation.
+ *
+ *     Parameters:
+ *       node: TidRangeScan node produced by the planner.
+ *       estate: the execution state initialized in InitPlan.
+ * ----------------------------------------------------------------
+ */
+TidRangeScanState *
+ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags)
+{
+   TidRangeScanState *tidrangestate;
+   Relation    currentRelation;
+
+   /*
+    * create state structure
+    */
+   tidrangestate = makeNode(TidRangeScanState);
+   tidrangestate->ss.ps.plan = (Plan *) node;
+   tidrangestate->ss.ps.state = estate;
+   tidrangestate->ss.ps.ExecProcNode = ExecTidRangeScan;
+
+   /*
+    * Miscellaneous initialization
+    *
+    * create expression context for node
+    */
+   ExecAssignExprContext(estate, &tidrangestate->ss.ps);
+
+   /*
+    * mark scan as not in progress, and TID range as not computed yet
+    */
+   tidrangestate->trss_inScan = false;
+
+   /*
+    * open the scan relation
+    */
+   currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
+
+   tidrangestate->ss.ss_currentRelation = currentRelation;
+   tidrangestate->ss.ss_currentScanDesc = NULL;    /* no table scan here */
+
+   /*
+    * get the scan type from the relation descriptor.
+    */
+   ExecInitScanTupleSlot(estate, &tidrangestate->ss,
+                         RelationGetDescr(currentRelation),
+                         table_slot_callbacks(currentRelation));
+
+   /*
+    * Initialize result type and projection.
+    */
+   ExecInitResultTypeTL(&tidrangestate->ss.ps);
+   ExecAssignScanProjectionInfo(&tidrangestate->ss);
+
+   /*
+    * initialize child expressions
+    */
+   tidrangestate->ss.ps.qual =
+       ExecInitQual(node->scan.plan.qual, (PlanState *) tidrangestate);
+
+   TidExprListCreate(tidrangestate);
+
+   /*
+    * all done.
+    */
+   return tidrangestate;
+}
index 65bbc18ecbadbcc0cb495edcac5257ffaca75d03..aaba1ec2c4a9947aa5547882d4af8c68bbde848a 100644 (file)
@@ -585,6 +585,27 @@ _copyTidScan(const TidScan *from)
    return newnode;
 }
 
+/*
+ * _copyTidRangeScan
+ */
+static TidRangeScan *
+_copyTidRangeScan(const TidRangeScan *from)
+{
+   TidRangeScan *newnode = makeNode(TidRangeScan);
+
+   /*
+    * copy node superclass fields
+    */
+   CopyScanFields((const Scan *) from, (Scan *) newnode);
+
+   /*
+    * copy remainder of node
+    */
+   COPY_NODE_FIELD(tidrangequals);
+
+   return newnode;
+}
+
 /*
  * _copySubqueryScan
  */
@@ -4938,6 +4959,9 @@ copyObjectImpl(const void *from)
        case T_TidScan:
            retval = _copyTidScan(from);
            break;
+       case T_TidRangeScan:
+           retval = _copyTidRangeScan(from);
+           break;
        case T_SubqueryScan:
            retval = _copySubqueryScan(from);
            break;
index f5dcedf6e89ec471ae52db4aea5775871cadf043..8fc432bfe17b8ed31f9a8007bf83c77fd4ba83c0 100644 (file)
@@ -608,6 +608,16 @@ _outTidScan(StringInfo str, const TidScan *node)
    WRITE_NODE_FIELD(tidquals);
 }
 
+static void
+_outTidRangeScan(StringInfo str, const TidRangeScan *node)
+{
+   WRITE_NODE_TYPE("TIDRANGESCAN");
+
+   _outScanInfo(str, (const Scan *) node);
+
+   WRITE_NODE_FIELD(tidrangequals);
+}
+
 static void
 _outSubqueryScan(StringInfo str, const SubqueryScan *node)
 {
@@ -2314,6 +2324,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
    WRITE_NODE_FIELD(subroot);
    WRITE_NODE_FIELD(subplan_params);
    WRITE_INT_FIELD(rel_parallel_workers);
+   WRITE_UINT_FIELD(amflags);
    WRITE_OID_FIELD(serverid);
    WRITE_OID_FIELD(userid);
    WRITE_BOOL_FIELD(useridiscurrent);
@@ -3810,6 +3821,9 @@ outNode(StringInfo str, const void *obj)
            case T_TidScan:
                _outTidScan(str, obj);
                break;
+           case T_TidRangeScan:
+               _outTidRangeScan(str, obj);
+               break;
            case T_SubqueryScan:
                _outSubqueryScan(str, obj);
                break;
index efb52858c88b1d90d8229458612d5ee9ee3419e4..4a6c3481623bd04a0298dbb534609fb4673c4553 100644 (file)
@@ -374,6 +374,7 @@ RelOptInfo      - a relation or joined relations
   IndexPath     - index scan
   BitmapHeapPath - top of a bitmapped index scan
   TidPath       - scan by CTID
+  TidRangePath  - scan a contiguous range of CTIDs
   SubqueryScanPath - scan a subquery-in-FROM
   ForeignPath   - scan a foreign table, foreign join or foreign upper-relation
   CustomPath    - for custom scan providers
index aab06c7d213ea347bb8c20aa1003a508061fc107..a25b674a1927f02d4919acb3ab05856b7832590a 100644 (file)
@@ -1283,6 +1283,101 @@ cost_tidscan(Path *path, PlannerInfo *root,
    path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * cost_tidrangescan
+ *   Determines and sets the costs of scanning a relation using a range of
+ *   TIDs for 'path'
+ *
+ * 'baserel' is the relation to be scanned
+ * 'tidrangequals' is the list of TID-checkable range quals
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ */
+void
+cost_tidrangescan(Path *path, PlannerInfo *root,
+                 RelOptInfo *baserel, List *tidrangequals,
+                 ParamPathInfo *param_info)
+{
+   Selectivity selectivity;
+   double      pages;
+   Cost        startup_cost = 0;
+   Cost        run_cost = 0;
+   QualCost    qpqual_cost;
+   Cost        cpu_per_tuple;
+   QualCost    tid_qual_cost;
+   double      ntuples;
+   double      nseqpages;
+   double      spc_random_page_cost;
+   double      spc_seq_page_cost;
+
+   /* Should only be applied to base relations */
+   Assert(baserel->relid > 0);
+   Assert(baserel->rtekind == RTE_RELATION);
+
+   /* Mark the path with the correct row estimate */
+   if (param_info)
+       path->rows = param_info->ppi_rows;
+   else
+       path->rows = baserel->rows;
+
+   /* Count how many tuples and pages we expect to scan */
+   selectivity = clauselist_selectivity(root, tidrangequals, baserel->relid,
+                                        JOIN_INNER, NULL);
+   pages = ceil(selectivity * baserel->pages);
+
+   if (pages <= 0.0)
+       pages = 1.0;
+
+   /*
+    * The first page in a range requires a random seek, but each subsequent
+    * page is just a normal sequential page read. NOTE: it's desirable for
+    * TID Range Scans to cost more than the equivalent Sequential Scans,
+    * because Seq Scans have some performance advantages such as scan
+    * synchronization and parallelizability, and we'd prefer one of them to
+    * be picked unless a TID Range Scan really is better.
+    */
+   ntuples = selectivity * baserel->tuples;
+   nseqpages = pages - 1.0;
+
+   if (!enable_tidscan)
+       startup_cost += disable_cost;
+
+   /*
+    * The TID qual expressions will be computed once, any other baserestrict
+    * quals once per retrieved tuple.
+    */
+   cost_qual_eval(&tid_qual_cost, tidrangequals, root);
+
+   /* fetch estimated page cost for tablespace containing table */
+   get_tablespace_page_costs(baserel->reltablespace,
+                             &spc_random_page_cost,
+                             &spc_seq_page_cost);
+
+   /* disk costs; 1 random page and the remainder as seq pages */
+   run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages;
+
+   /* Add scanning CPU costs */
+   get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+   /*
+    * XXX currently we assume TID quals are a subset of qpquals at this
+    * point; they will be removed (if possible) when we create the plan, so
+    * we subtract their cost from the total qpqual cost.  (If the TID quals
+    * can't be removed, this is a mistake and we're going to underestimate
+    * the CPU cost a bit.)
+    */
+   startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple;
+   cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple -
+       tid_qual_cost.per_tuple;
+   run_cost += cpu_per_tuple * ntuples;
+
+   /* tlist eval costs are paid per output row, not per tuple scanned */
+   startup_cost += path->pathtarget->cost.startup;
+   run_cost += path->pathtarget->cost.per_tuple * path->rows;
+
+   path->startup_cost = startup_cost;
+   path->total_cost = startup_cost + run_cost;
+}
+
 /*
  * cost_subqueryscan
  *   Determines and returns the cost of scanning a subquery RTE.
index 0845b460e2c9a783e8f74c170a822967ec8c549e..0725d950c5405cd9a28a9eb2249510989e7cca58 100644 (file)
@@ -2,9 +2,9 @@
  *
  * tidpath.c
  *   Routines to determine which TID conditions are usable for scanning
- *   a given relation, and create TidPaths accordingly.
+ *   a given relation, and create TidPaths and TidRangePaths accordingly.
  *
- * What we are looking for here is WHERE conditions of the form
+ * For TidPaths, we look for WHERE conditions of the form
  * "CTID = pseudoconstant", which can be implemented by just fetching
  * the tuple directly via heap_fetch().  We can also handle OR'd conditions
  * such as (CTID = const1) OR (CTID = const2), as well as ScalarArrayOpExpr
@@ -23,6 +23,9 @@
  * a function, but in practice it works better to keep the special node
  * representation all the way through to execution.
  *
+ * Additionally, TidRangePaths may be created for conditions of the form
+ * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=, and
+ * AND-clauses composed of such conditions.
  *
  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -63,14 +66,14 @@ IsCTIDVar(Var *var, RelOptInfo *rel)
 
 /*
  * Check to see if a RestrictInfo is of the form
- *     CTID = pseudoconstant
+ *     CTID OP pseudoconstant
  * or
- *     pseudoconstant = CTID
- * where the CTID Var belongs to relation "rel", and nothing on the
- * other side of the clause does.
+ *     pseudoconstant OP CTID
+ * where OP is a binary operation, the CTID Var belongs to relation "rel",
+ * and nothing on the other side of the clause does.
  */
 static bool
-IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
+IsBinaryTidClause(RestrictInfo *rinfo, RelOptInfo *rel)
 {
    OpExpr     *node;
    Node       *arg1,
@@ -83,10 +86,9 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
        return false;
    node = (OpExpr *) rinfo->clause;
 
-   /* Operator must be tideq */
-   if (node->opno != TIDEqualOperator)
+   /* OpExpr must have two arguments */
+   if (list_length(node->args) != 2)
        return false;
-   Assert(list_length(node->args) == 2);
    arg1 = linitial(node->args);
    arg2 = lsecond(node->args);
 
@@ -116,6 +118,50 @@ IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
    return true;                /* success */
 }
 
+/*
+ * Check to see if a RestrictInfo is of the form
+ *     CTID = pseudoconstant
+ * or
+ *     pseudoconstant = CTID
+ * where the CTID Var belongs to relation "rel", and nothing on the
+ * other side of the clause does.
+ */
+static bool
+IsTidEqualClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+   if (!IsBinaryTidClause(rinfo, rel))
+       return false;
+
+   if (((OpExpr *) rinfo->clause)->opno == TIDEqualOperator)
+       return true;
+
+   return false;
+}
+
+/*
+ * Check to see if a RestrictInfo is of the form
+ *     CTID OP pseudoconstant
+ * or
+ *     pseudoconstant OP CTID
+ * where OP is a range operator such as <, <=, >, or >=, the CTID Var belongs
+ * to relation "rel", and nothing on the other side of the clause does.
+ */
+static bool
+IsTidRangeClause(RestrictInfo *rinfo, RelOptInfo *rel)
+{
+   Oid         opno;
+
+   if (!IsBinaryTidClause(rinfo, rel))
+       return false;
+   opno = ((OpExpr *) rinfo->clause)->opno;
+
+   if (opno == TIDLessOperator || opno == TIDLessEqOperator ||
+       opno == TIDGreaterOperator || opno == TIDGreaterEqOperator)
+       return true;
+
+   return false;
+}
+
 /*
  * Check to see if a RestrictInfo is of the form
  *     CTID = ANY (pseudoconstant_array)
@@ -222,7 +268,7 @@ TidQualFromRestrictInfo(PlannerInfo *root, RestrictInfo *rinfo, RelOptInfo *rel)
  *
  * Returns a List of CTID qual RestrictInfos for the specified rel (with
  * implicit OR semantics across the list), or NIL if there are no usable
- * conditions.
+ * equality conditions.
  *
  * This function is just concerned with handling AND/OR recursion.
  */
@@ -301,6 +347,34 @@ TidQualFromRestrictInfoList(PlannerInfo *root, List *rlist, RelOptInfo *rel)
    return rlst;
 }
 
+/*
+ * Extract a set of CTID range conditions from implicit-AND List of RestrictInfos
+ *
+ * Returns a List of CTID range qual RestrictInfos for the specified rel
+ * (with implicit AND semantics across the list), or NIL if there are no
+ * usable range conditions or if the rel's table AM does not support TID range
+ * scans.
+ */
+static List *
+TidRangeQualFromRestrictInfoList(List *rlist, RelOptInfo *rel)
+{
+   List       *rlst = NIL;
+   ListCell   *l;
+
+   if ((rel->amflags & AMFLAG_HAS_TID_RANGE) == 0)
+       return NIL;
+
+   foreach(l, rlist)
+   {
+       RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+       if (IsTidRangeClause(rinfo, rel))
+           rlst = lappend(rlst, rinfo);
+   }
+
+   return rlst;
+}
+
 /*
  * Given a list of join clauses involving our rel, create a parameterized
  * TidPath for each one that is a suitable TidEqual clause.
@@ -385,6 +459,7 @@ void
 create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
 {
    List       *tidquals;
+   List       *tidrangequals;
 
    /*
     * If any suitable quals exist in the rel's baserestrict list, generate a
@@ -392,7 +467,7 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
     */
    tidquals = TidQualFromRestrictInfoList(root, rel->baserestrictinfo, rel);
 
-   if (tidquals)
+   if (tidquals != NIL)
    {
        /*
         * This path uses no join clauses, but it could still have required
@@ -404,6 +479,26 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel)
                                                   required_outer));
    }
 
+   /*
+    * If there are range quals in the baserestrict list, generate a
+    * TidRangePath.
+    */
+   tidrangequals = TidRangeQualFromRestrictInfoList(rel->baserestrictinfo,
+                                                    rel);
+
+   if (tidrangequals != NIL)
+   {
+       /*
+        * This path uses no join clauses, but it could still have required
+        * parameterization due to LATERAL refs in its tlist.
+        */
+       Relids      required_outer = rel->lateral_relids;
+
+       add_path(rel, (Path *) create_tidrangescan_path(root, rel,
+                                                       tidrangequals,
+                                                       required_outer));
+   }
+
    /*
     * Try to generate parameterized TidPaths using equality clauses extracted
     * from EquivalenceClasses.  (This is important since simple "t1.ctid =
index 6c8305c977e64753bce5c6ed91125d671439e240..906cab7053229011fba6fe7f2bcd7ae15279c73b 100644 (file)
@@ -129,6 +129,10 @@ static Plan *create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual,
 static void bitmap_subplan_mark_shared(Plan *plan);
 static TidScan *create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
                                    List *tlist, List *scan_clauses);
+static TidRangeScan *create_tidrangescan_plan(PlannerInfo *root,
+                                             TidRangePath *best_path,
+                                             List *tlist,
+                                             List *scan_clauses);
 static SubqueryScan *create_subqueryscan_plan(PlannerInfo *root,
                                              SubqueryScanPath *best_path,
                                              List *tlist, List *scan_clauses);
@@ -193,6 +197,8 @@ static BitmapHeapScan *make_bitmap_heapscan(List *qptlist,
                                            Index scanrelid);
 static TidScan *make_tidscan(List *qptlist, List *qpqual, Index scanrelid,
                             List *tidquals);
+static TidRangeScan *make_tidrangescan(List *qptlist, List *qpqual,
+                                      Index scanrelid, List *tidrangequals);
 static SubqueryScan *make_subqueryscan(List *qptlist,
                                       List *qpqual,
                                       Index scanrelid,
@@ -384,6 +390,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path, int flags)
        case T_IndexOnlyScan:
        case T_BitmapHeapScan:
        case T_TidScan:
+       case T_TidRangeScan:
        case T_SubqueryScan:
        case T_FunctionScan:
        case T_TableFuncScan:
@@ -679,6 +686,13 @@ create_scan_plan(PlannerInfo *root, Path *best_path, int flags)
                                                scan_clauses);
            break;
 
+       case T_TidRangeScan:
+           plan = (Plan *) create_tidrangescan_plan(root,
+                                                    (TidRangePath *) best_path,
+                                                    tlist,
+                                                    scan_clauses);
+           break;
+
        case T_SubqueryScan:
            plan = (Plan *) create_subqueryscan_plan(root,
                                                     (SubqueryScanPath *) best_path,
@@ -3436,6 +3450,71 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
    return scan_plan;
 }
 
+/*
+ * create_tidrangescan_plan
+ *  Returns a tidrangescan plan for the base relation scanned by 'best_path'
+ *  with restriction clauses 'scan_clauses' and targetlist 'tlist'.
+ */
+static TidRangeScan *
+create_tidrangescan_plan(PlannerInfo *root, TidRangePath *best_path,
+                        List *tlist, List *scan_clauses)
+{
+   TidRangeScan *scan_plan;
+   Index       scan_relid = best_path->path.parent->relid;
+   List       *tidrangequals = best_path->tidrangequals;
+
+   /* it should be a base rel... */
+   Assert(scan_relid > 0);
+   Assert(best_path->path.parent->rtekind == RTE_RELATION);
+
+   /*
+    * The qpqual list must contain all restrictions not enforced by the
+    * tidrangequals list.  tidrangequals has AND semantics, so we can simply
+    * remove any qual that appears in it.
+    */
+   {
+       List       *qpqual = NIL;
+       ListCell   *l;
+
+       foreach(l, scan_clauses)
+       {
+           RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
+
+           if (rinfo->pseudoconstant)
+               continue;       /* we may drop pseudoconstants here */
+           if (list_member_ptr(tidrangequals, rinfo))
+               continue;       /* simple duplicate */
+           qpqual = lappend(qpqual, rinfo);
+       }
+       scan_clauses = qpqual;
+   }
+
+   /* Sort clauses into best execution order */
+   scan_clauses = order_qual_clauses(root, scan_clauses);
+
+   /* Reduce RestrictInfo lists to bare expressions; ignore pseudoconstants */
+   tidrangequals = extract_actual_clauses(tidrangequals, false);
+   scan_clauses = extract_actual_clauses(scan_clauses, false);
+
+   /* Replace any outer-relation variables with nestloop params */
+   if (best_path->path.param_info)
+   {
+       tidrangequals = (List *)
+           replace_nestloop_params(root, (Node *) tidrangequals);
+       scan_clauses = (List *)
+           replace_nestloop_params(root, (Node *) scan_clauses);
+   }
+
+   scan_plan = make_tidrangescan(tlist,
+                                 scan_clauses,
+                                 scan_relid,
+                                 tidrangequals);
+
+   copy_generic_path_info(&scan_plan->scan.plan, &best_path->path);
+
+   return scan_plan;
+}
+
 /*
  * create_subqueryscan_plan
  *  Returns a subqueryscan plan for the base relation scanned by 'best_path'
@@ -5369,6 +5448,25 @@ make_tidscan(List *qptlist,
    return node;
 }
 
+static TidRangeScan *
+make_tidrangescan(List *qptlist,
+                 List *qpqual,
+                 Index scanrelid,
+                 List *tidrangequals)
+{
+   TidRangeScan *node = makeNode(TidRangeScan);
+   Plan       *plan = &node->scan.plan;
+
+   plan->targetlist = qptlist;
+   plan->qual = qpqual;
+   plan->lefttree = NULL;
+   plan->righttree = NULL;
+   node->scan.scanrelid = scanrelid;
+   node->tidrangequals = tidrangequals;
+
+   return node;
+}
+
 static SubqueryScan *
 make_subqueryscan(List *qptlist,
                  List *qpqual,
index c3c36be13e10e5cecc8db4dbc20505e907e829b0..42f088ad7148cc3aa2c443a127cef6f59c81a8be 100644 (file)
@@ -619,6 +619,22 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
                                  rtoffset, 1);
            }
            break;
+       case T_TidRangeScan:
+           {
+               TidRangeScan *splan = (TidRangeScan *) plan;
+
+               splan->scan.scanrelid += rtoffset;
+               splan->scan.plan.targetlist =
+                   fix_scan_list(root, splan->scan.plan.targetlist,
+                                 rtoffset, NUM_EXEC_TLIST(plan));
+               splan->scan.plan.qual =
+                   fix_scan_list(root, splan->scan.plan.qual,
+                                 rtoffset, NUM_EXEC_QUAL(plan));
+               splan->tidrangequals =
+                   fix_scan_list(root, splan->tidrangequals,
+                                 rtoffset, 1);
+           }
+           break;
        case T_SubqueryScan:
            /* Needs special treatment, see comments below */
            return set_subqueryscan_references(root,
index 54ef61bfb350d31ba16595197b1e604871d10b2e..f3e46e0959ef5ca8bd33356a34100a41d41168e4 100644 (file)
@@ -2367,6 +2367,12 @@ finalize_plan(PlannerInfo *root, Plan *plan,
            context.paramids = bms_add_members(context.paramids, scan_params);
            break;
 
+       case T_TidRangeScan:
+           finalize_primnode((Node *) ((TidRangeScan *) plan)->tidrangequals,
+                             &context);
+           context.paramids = bms_add_members(context.paramids, scan_params);
+           break;
+
        case T_SubqueryScan:
            {
                SubqueryScan *sscan = (SubqueryScan *) plan;
index 9be0c4a6af59292ef79e33f0ebe1d368f45ef134..69b83071cf2190e020fcf1f5051aeb1e82b84497 100644 (file)
@@ -1203,6 +1203,35 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals,
    return pathnode;
 }
 
+/*
+ * create_tidrangescan_path
+ *   Creates a path corresponding to a scan by a range of TIDs, returning
+ *   the pathnode.
+ */
+TidRangePath *
+create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel,
+                        List *tidrangequals, Relids required_outer)
+{
+   TidRangePath *pathnode = makeNode(TidRangePath);
+
+   pathnode->path.pathtype = T_TidRangeScan;
+   pathnode->path.parent = rel;
+   pathnode->path.pathtarget = rel->reltarget;
+   pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
+                                                         required_outer);
+   pathnode->path.parallel_aware = false;
+   pathnode->path.parallel_safe = rel->consider_parallel;
+   pathnode->path.parallel_workers = 0;
+   pathnode->path.pathkeys = NIL;  /* always unordered */
+
+   pathnode->tidrangequals = tidrangequals;
+
+   cost_tidrangescan(&pathnode->path, root, rel, tidrangequals,
+                     pathnode->path.param_info);
+
+   return pathnode;
+}
+
 /*
  * create_append_path
  *   Creates a path corresponding to an Append plan, returning the
index 177e6e336ab9d77a7e099a9b91434402beb87a90..c5947fa41851bed1fe97ac996f7e03c8acc13d33 100644 (file)
@@ -467,6 +467,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
    /* Collect info about relation's foreign keys, if relevant */
    get_relation_foreign_keys(root, rel, relation, inhparent);
 
+   /* Collect info about functions implemented by the rel's table AM. */
+   if (relation->rd_tableam &&
+       relation->rd_tableam->scan_set_tidrange != NULL &&
+       relation->rd_tableam->scan_getnextslot_tidrange != NULL)
+       rel->amflags |= AMFLAG_HAS_TID_RANGE;
+
    /*
     * Collect info about relation's partitioning scheme, if any. Only
     * inheritance parents may be partitioned.
index 731ff708b905e1516a5337f5f167287d917fdff6..345c877aeb391c2180db73afde3c8a7b2be16c63 100644 (file)
@@ -234,6 +234,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
    rel->subroot = NULL;
    rel->subplan_params = NIL;
    rel->rel_parallel_workers = -1; /* set up in get_relation_info */
+   rel->amflags = 0;
    rel->serverid = InvalidOid;
    rel->userid = rte->checkAsUser;
    rel->useridiscurrent = false;
@@ -646,6 +647,7 @@ build_join_rel(PlannerInfo *root,
    joinrel->subroot = NULL;
    joinrel->subplan_params = NIL;
    joinrel->rel_parallel_workers = -1;
+   joinrel->amflags = 0;
    joinrel->serverid = InvalidOid;
    joinrel->userid = InvalidOid;
    joinrel->useridiscurrent = false;
@@ -826,6 +828,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
    joinrel->eclass_indexes = NULL;
    joinrel->subroot = NULL;
    joinrel->subplan_params = NIL;
+   joinrel->amflags = 0;
    joinrel->serverid = InvalidOid;
    joinrel->userid = InvalidOid;
    joinrel->useridiscurrent = false;
index 55759c383b6160fbbcfcf5954b79b4d1185c6103..f40d6c22a024371dcc393dc5c6477b4f38642bd2 100644 (file)
@@ -71,3 +71,62 @@ ItemPointerCompare(ItemPointer arg1, ItemPointer arg2)
    else
        return 0;
 }
+
+/*
+ * ItemPointerInc
+ *     Increment 'pointer' by 1 only paying attention to the ItemPointer's
+ *     type's range limits and not MaxOffsetNumber and FirstOffsetNumber.
+ *     This may result in 'pointer' becoming !OffsetNumberIsValid.
+ *
+ * If the pointer is already the maximum possible values permitted by the
+ * range of the ItemPointer's types, then do nothing.
+ */
+void
+ItemPointerInc(ItemPointer pointer)
+{
+   BlockNumber blk = ItemPointerGetBlockNumberNoCheck(pointer);
+   OffsetNumber off = ItemPointerGetOffsetNumberNoCheck(pointer);
+
+   if (off == PG_UINT16_MAX)
+   {
+       if (blk != InvalidBlockNumber)
+       {
+           off = 0;
+           blk++;
+       }
+   }
+   else
+       off++;
+
+   ItemPointerSet(pointer, blk, off);
+}
+
+/*
+ * ItemPointerDec
+ *     Decrement 'pointer' by 1 only paying attention to the ItemPointer's
+ *     type's range limits and not MaxOffsetNumber and FirstOffsetNumber.
+ *     This may result in 'pointer' becoming !OffsetNumberIsValid.
+ *
+ * If the pointer is already the minimum possible values permitted by the
+ * range of the ItemPointer's types, then do nothing.  This does rely on
+ * FirstOffsetNumber being 1 rather than 0.
+ */
+void
+ItemPointerDec(ItemPointer pointer)
+{
+   BlockNumber blk = ItemPointerGetBlockNumberNoCheck(pointer);
+   OffsetNumber off = ItemPointerGetOffsetNumberNoCheck(pointer);
+
+   if (off == 0)
+   {
+       if (blk != 0)
+       {
+           off = PG_UINT16_MAX;
+           blk--;
+       }
+   }
+   else
+       off--;
+
+   ItemPointerSet(pointer, blk, off);
+}
index 60e5cd3109bd6d5cb549e833063ffb0bbbf15d12..bc0936bc2dea2a6070b51c54d32304d8642acf4a 100644 (file)
@@ -121,7 +121,11 @@ extern void heap_endscan(TableScanDesc scan);
 extern HeapTuple heap_getnext(TableScanDesc scan, ScanDirection direction);
 extern bool heap_getnextslot(TableScanDesc sscan,
                             ScanDirection direction, struct TupleTableSlot *slot);
-
+extern void heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
+                             ItemPointer maxtid);
+extern bool heap_getnextslot_tidrange(TableScanDesc sscan,
+                                     ScanDirection direction,
+                                     TupleTableSlot *slot);
 extern bool heap_fetch(Relation relation, Snapshot snapshot,
                       HeapTuple tuple, Buffer *userbuf);
 extern bool heap_hot_search_buffer(ItemPointer tid, Relation relation,
index 005f3fdd2b82772db4e28739c251434037226189..0ef6d8edf7f40a73a3b191748438eec798cdda1d 100644 (file)
@@ -36,6 +36,10 @@ typedef struct TableScanDescData
    int         rs_nkeys;       /* number of scan keys */
    struct ScanKeyData *rs_key; /* array of scan key descriptors */
 
+   /* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */
+   ItemPointerData rs_mintid;
+   ItemPointerData rs_maxtid;
+
    /*
     * Information about type and behaviour of the scan, a bitmask of members
     * of the ScanOptions enum (see tableam.h).
index 33bffb6815b46d83aaad27c142eae92f890ed098..414b6b4d5786218ff26bcd036077f47a75a9585a 100644 (file)
@@ -49,18 +49,19 @@ typedef enum ScanOptions
    SO_TYPE_BITMAPSCAN = 1 << 1,
    SO_TYPE_SAMPLESCAN = 1 << 2,
    SO_TYPE_TIDSCAN = 1 << 3,
-   SO_TYPE_ANALYZE = 1 << 4,
+   SO_TYPE_TIDRANGESCAN = 1 << 4,
+   SO_TYPE_ANALYZE = 1 << 5,
 
    /* several of SO_ALLOW_* may be specified */
    /* allow or disallow use of access strategy */
-   SO_ALLOW_STRAT = 1 << 5,
+   SO_ALLOW_STRAT = 1 << 6,
    /* report location to syncscan logic? */
-   SO_ALLOW_SYNC = 1 << 6,
+   SO_ALLOW_SYNC = 1 << 7,
    /* verify visibility page-at-a-time? */
-   SO_ALLOW_PAGEMODE = 1 << 7,
+   SO_ALLOW_PAGEMODE = 1 << 8,
 
    /* unregister snapshot at scan end? */
-   SO_TEMP_SNAPSHOT = 1 << 8
+   SO_TEMP_SNAPSHOT = 1 << 9
 } ScanOptions;
 
 /*
@@ -325,6 +326,34 @@ typedef struct TableAmRoutine
                                     ScanDirection direction,
                                     TupleTableSlot *slot);
 
+   /*-----------
+    * Optional functions to provide scanning for ranges of ItemPointers.
+    * Implementations must either provide both of these functions, or neither
+    * of them.
+    *
+    * Implementations of scan_set_tidrange must themselves handle
+    * ItemPointers of any value. i.e, they must handle each of the following:
+    *
+    * 1) mintid or maxtid is beyond the end of the table; and
+    * 2) mintid is above maxtid; and
+    * 3) item offset for mintid or maxtid is beyond the maximum offset
+    * allowed by the AM.
+    *
+    * Implementations can assume that scan_set_tidrange is always called
+    * before can_getnextslot_tidrange or after scan_rescan and before any
+    * further calls to scan_getnextslot_tidrange.
+    */
+   void        (*scan_set_tidrange) (TableScanDesc scan,
+                                     ItemPointer mintid,
+                                     ItemPointer maxtid);
+
+   /*
+    * Return next tuple from `scan` that's in the range of TIDs defined by
+    * scan_set_tidrange.
+    */
+   bool        (*scan_getnextslot_tidrange) (TableScanDesc scan,
+                                             ScanDirection direction,
+                                             TupleTableSlot *slot);
 
    /* ------------------------------------------------------------------------
     * Parallel table scan related functions.
@@ -1015,6 +1044,64 @@ table_scan_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableS
    return sscan->rs_rd->rd_tableam->scan_getnextslot(sscan, direction, slot);
 }
 
+/* ----------------------------------------------------------------------------
+ * TID Range scanning related functions.
+ * ----------------------------------------------------------------------------
+ */
+
+/*
+ * table_beginscan_tidrange is the entry point for setting up a TableScanDesc
+ * for a TID range scan.
+ */
+static inline TableScanDesc
+table_beginscan_tidrange(Relation rel, Snapshot snapshot,
+                        ItemPointer mintid,
+                        ItemPointer maxtid)
+{
+   TableScanDesc sscan;
+   uint32      flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE;
+
+   sscan = rel->rd_tableam->scan_begin(rel, snapshot, 0, NULL, NULL, flags);
+
+   /* Set the range of TIDs to scan */
+   sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
+
+   return sscan;
+}
+
+/*
+ * table_rescan_tidrange resets the scan position and sets the minimum and
+ * maximum TID range to scan for a TableScanDesc created by
+ * table_beginscan_tidrange.
+ */
+static inline void
+table_rescan_tidrange(TableScanDesc sscan, ItemPointer mintid,
+                     ItemPointer maxtid)
+{
+   /* Ensure table_beginscan_tidrange() was used. */
+   Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
+
+   sscan->rs_rd->rd_tableam->scan_rescan(sscan, NULL, false, false, false, false);
+   sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
+}
+
+/*
+ * Fetch the next tuple from `sscan` for a TID range scan created by
+ * table_beginscan_tidrange().  Stores the tuple in `slot` and returns true,
+ * or returns false if no more tuples exist in the range.
+ */
+static inline bool
+table_scan_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
+                               TupleTableSlot *slot)
+{
+   /* Ensure table_beginscan_tidrange() was used. */
+   Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
+
+   return sscan->rs_rd->rd_tableam->scan_getnextslot_tidrange(sscan,
+                                                              direction,
+                                                              slot);
+}
+
 
 /* ----------------------------------------------------------------------------
  * Parallel table scan related functions.
index 0d4eac8f96375e5b51ae08de0e92dc039d3321c0..85395a81eec04cd29f97c1141a8e8b6740af5178 100644 (file)
   oprname => '<', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
   oprcom => '>(tid,tid)', oprnegate => '>=(tid,tid)', oprcode => 'tidlt',
   oprrest => 'scalarltsel', oprjoin => 'scalarltjoinsel' },
-{ oid => '2800', descr => 'greater than',
+{ oid => '2800', oid_symbol => 'TIDGreaterOperator', descr => 'greater than',
   oprname => '>', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
   oprcom => '<(tid,tid)', oprnegate => '<=(tid,tid)', oprcode => 'tidgt',
   oprrest => 'scalargtsel', oprjoin => 'scalargtjoinsel' },
-{ oid => '2801', descr => 'less than or equal',
+{ oid => '2801', oid_symbol => 'TIDLessEqOperator', descr => 'less than or equal',
   oprname => '<=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
   oprcom => '>=(tid,tid)', oprnegate => '>(tid,tid)', oprcode => 'tidle',
   oprrest => 'scalarlesel', oprjoin => 'scalarlejoinsel' },
-{ oid => '2802', descr => 'greater than or equal',
+{ oid => '2802', oid_symbol => 'TIDGreaterEqOperator', descr => 'greater than or equal',
   oprname => '>=', oprleft => 'tid', oprright => 'tid', oprresult => 'bool',
   oprcom => '<=(tid,tid)', oprnegate => '<(tid,tid)', oprcode => 'tidge',
   oprrest => 'scalargesel', oprjoin => 'scalargejoinsel' },
diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h
new file mode 100644 (file)
index 0000000..a57a47e
--- /dev/null
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeTidrangescan.h
+ *
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/executor/nodeTidrangescan.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef NODETIDRANGESCAN_H
+#define NODETIDRANGESCAN_H
+
+#include "nodes/execnodes.h"
+
+extern TidRangeScanState *ExecInitTidRangeScan(TidRangeScan *node,
+                                              EState *estate, int eflags);
+extern void ExecEndTidRangeScan(TidRangeScanState *node);
+extern void ExecReScanTidRangeScan(TidRangeScanState *node);
+
+#endif                         /* NODETIDRANGESCAN_H */
index 943931f65d06a2254a441c00da39c13e1565fddf..e31ad6204e64b9f576efb131ed2ea170f6024740 100644 (file)
@@ -1624,6 +1624,24 @@ typedef struct TidScanState
    HeapTupleData tss_htup;
 } TidScanState;
 
+/* ----------------
+ *  TidRangeScanState information
+ *
+ *     trss_tidexprs       list of TidOpExpr structs (see nodeTidrangescan.c)
+ *     trss_mintid         the lowest TID in the scan range
+ *     trss_maxtid         the highest TID in the scan range
+ *     trss_inScan         is a scan currently in progress?
+ * ----------------
+ */
+typedef struct TidRangeScanState
+{
+   ScanState   ss;             /* its first field is NodeTag */
+   List       *trss_tidexprs;
+   ItemPointerData trss_mintid;
+   ItemPointerData trss_maxtid;
+   bool        trss_inScan;
+} TidRangeScanState;
+
 /* ----------------
  *  SubqueryScanState information
  *
index 40ae489c235c256d61ab457fbc7661602077bf40..e22df890ef4cf522199d339cc725568494064dd8 100644 (file)
@@ -59,6 +59,7 @@ typedef enum NodeTag
    T_BitmapIndexScan,
    T_BitmapHeapScan,
    T_TidScan,
+   T_TidRangeScan,
    T_SubqueryScan,
    T_FunctionScan,
    T_ValuesScan,
@@ -116,6 +117,7 @@ typedef enum NodeTag
    T_BitmapIndexScanState,
    T_BitmapHeapScanState,
    T_TidScanState,
+   T_TidRangeScanState,
    T_SubqueryScanState,
    T_FunctionScanState,
    T_TableFuncScanState,
@@ -229,6 +231,7 @@ typedef enum NodeTag
    T_BitmapAndPath,
    T_BitmapOrPath,
    T_TidPath,
+   T_TidRangePath,
    T_SubqueryScanPath,
    T_ForeignPath,
    T_CustomPath,
index 0ec93e648c4c6875a9f8dfe9e8419fdec4a78bd6..b8a6e0fc9f4e9caef8bc773678a48e7fbf50100a 100644 (file)
@@ -621,6 +621,10 @@ typedef struct PartitionSchemeData *PartitionScheme;
  * to simplify matching join clauses to those lists.
  *----------
  */
+
+/* Bitmask of flags supported by table AMs */
+#define AMFLAG_HAS_TID_RANGE (1 << 0)
+
 typedef enum RelOptKind
 {
    RELOPT_BASEREL,
@@ -710,6 +714,8 @@ typedef struct RelOptInfo
    PlannerInfo *subroot;       /* if subquery */
    List       *subplan_params; /* if subquery */
    int         rel_parallel_workers;   /* wanted number of parallel workers */
+   uint32      amflags;        /* Bitmask of optional features supported by
+                                * the table AM */
 
    /* Information about foreign tables and foreign joins */
    Oid         serverid;       /* identifies server for the table or join */
@@ -1323,6 +1329,18 @@ typedef struct TidPath
    List       *tidquals;       /* qual(s) involving CTID = something */
 } TidPath;
 
+/*
+ * TidRangePath represents a scan by a continguous range of TIDs
+ *
+ * tidrangequals is an implicitly AND'ed list of qual expressions of the form
+ * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=.
+ */
+typedef struct TidRangePath
+{
+   Path        path;
+   List       *tidrangequals;
+} TidRangePath;
+
 /*
  * SubqueryScanPath represents a scan of an unflattened subquery-in-FROM
  *
index 43160439f058074c928a5050d77a6c5e054b6c73..6e62104d0b77b4f75278b33b8bebeaed7ae12001 100644 (file)
@@ -485,6 +485,19 @@ typedef struct TidScan
    List       *tidquals;       /* qual(s) involving CTID = something */
 } TidScan;
 
+/* ----------------
+ *     tid range scan node
+ *
+ * tidrangequals is an implicitly AND'ed list of qual expressions of the form
+ * "CTID relop pseudoconstant", where relop is one of >,>=,<,<=.
+ * ----------------
+ */
+typedef struct TidRangeScan
+{
+   Scan        scan;
+   List       *tidrangequals;  /* qual(s) involving CTID op something */
+} TidRangeScan;
+
 /* ----------------
  *     subquery scan node
  *
index ed2e4af4be73c1e1486b0b35b8753471006a0da6..1be93be09836c7d05c369b782ebd380e3b7e8c27 100644 (file)
@@ -83,6 +83,9 @@ extern void cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root);
 extern void cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec);
 extern void cost_tidscan(Path *path, PlannerInfo *root,
                         RelOptInfo *baserel, List *tidquals, ParamPathInfo *param_info);
+extern void cost_tidrangescan(Path *path, PlannerInfo *root,
+                             RelOptInfo *baserel, List *tidrangequals,
+                             ParamPathInfo *param_info);
 extern void cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
                              RelOptInfo *baserel, ParamPathInfo *param_info);
 extern void cost_functionscan(Path *path, PlannerInfo *root,
index 8dfc36a4e1546d20b983c8d762097710c44bd5a4..54f4b782fc74eb02e849d4a37050f23a02ca23de 100644 (file)
@@ -63,6 +63,10 @@ extern BitmapOrPath *create_bitmap_or_path(PlannerInfo *root,
                                           List *bitmapquals);
 extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel,
                                    List *tidquals, Relids required_outer);
+extern TidRangePath *create_tidrangescan_path(PlannerInfo *root,
+                                             RelOptInfo *rel,
+                                             List *tidrangequals,
+                                             Relids required_outer);
 extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel,
                                      List *subpaths, List *partial_subpaths,
                                      List *pathkeys, Relids required_outer,
index 0e6990140b801980498d4520a236142d20b6a550..cd4b8fbacb2840129a679fbb9b1dbeb285a658d8 100644 (file)
@@ -202,5 +202,7 @@ typedef ItemPointerData *ItemPointer;
 
 extern bool ItemPointerEquals(ItemPointer pointer1, ItemPointer pointer2);
 extern int32 ItemPointerCompare(ItemPointer arg1, ItemPointer arg2);
+extern void ItemPointerInc(ItemPointer pointer);
+extern void ItemPointerDec(ItemPointer pointer);
 
 #endif                         /* ITEMPTR_H */
diff --git a/src/test/regress/expected/tidrangescan.out b/src/test/regress/expected/tidrangescan.out
new file mode 100644 (file)
index 0000000..721f3b9
--- /dev/null
@@ -0,0 +1,300 @@
+-- tests for tidrangescans
+SET enable_seqscan TO off;
+CREATE TABLE tidrangescan(id integer, data text);
+-- empty table
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)';
+            QUERY PLAN             
+-----------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid < '(1,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)';
+ ctid 
+------
+(0 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)';
+            QUERY PLAN             
+-----------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid > '(9,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)';
+ ctid 
+------
+(0 rows)
+
+-- insert enough tuples to fill at least two pages
+INSERT INTO tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,200) AS s(i);
+-- remove all tuples after the 10th tuple on each page.  Trying to ensure
+-- we get the same layout with all CPU architectures and smaller than standard
+-- page sizes.
+DELETE FROM tidrangescan
+WHERE substring(ctid::text FROM ',(\d+)\)')::integer > 10 OR substring(ctid::text FROM '\((\d+),')::integer > 2;
+VACUUM tidrangescan;
+-- range scans with upper bound
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+            QUERY PLAN             
+-----------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid < '(1,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+  ctid  
+--------
+ (0,1)
+ (0,2)
+ (0,3)
+ (0,4)
+ (0,5)
+ (0,6)
+ (0,7)
+ (0,8)
+ (0,9)
+ (0,10)
+(10 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)';
+             QUERY PLAN             
+------------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid <= '(1,5)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)';
+  ctid  
+--------
+ (0,1)
+ (0,2)
+ (0,3)
+ (0,4)
+ (0,5)
+ (0,6)
+ (0,7)
+ (0,8)
+ (0,9)
+ (0,10)
+ (1,1)
+ (1,2)
+ (1,3)
+ (1,4)
+ (1,5)
+(15 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+            QUERY PLAN             
+-----------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid < '(0,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+ ctid 
+------
+(0 rows)
+
+-- range scans with lower bound
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)';
+            QUERY PLAN             
+-----------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid > '(2,8)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)';
+  ctid  
+--------
+ (2,9)
+ (2,10)
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid;
+            QUERY PLAN             
+-----------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: ('(2,8)'::tid < ctid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid;
+  ctid  
+--------
+ (2,9)
+ (2,10)
+(2 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)';
+             QUERY PLAN             
+------------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid >= '(2,8)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)';
+  ctid  
+--------
+ (2,8)
+ (2,9)
+ (2,10)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)';
+              QUERY PLAN              
+--------------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid >= '(100,0)'::tid)
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)';
+ ctid 
+------
+(0 rows)
+
+-- range scans with both bounds
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: ((ctid > '(1,4)'::tid) AND ('(1,7)'::tid >= ctid))
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;
+ ctid  
+-------
+ (1,5)
+ (1,6)
+ (1,7)
+(3 rows)
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';
+                           QUERY PLAN                           
+----------------------------------------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (('(1,7)'::tid >= ctid) AND (ctid > '(1,4)'::tid))
+(2 rows)
+
+SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';
+ ctid  
+-------
+ (1,5)
+ (1,6)
+ (1,7)
+(3 rows)
+
+-- extreme offsets
+SELECT ctid FROM tidrangescan WHERE ctid > '(0,65535)' AND ctid < '(1,0)' LIMIT 1;
+ ctid 
+------
+(0 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)' LIMIT 1;
+ ctid 
+------
+(0 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(4294967295,65535)';
+ ctid 
+------
+(0 rows)
+
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+ ctid 
+------
+(0 rows)
+
+-- NULLs in the range cannot return tuples
+SELECT ctid FROM tidrangescan WHERE ctid >= (SELECT NULL::tid);
+ ctid 
+------
+(0 rows)
+
+-- rescans
+EXPLAIN (COSTS OFF)
+SELECT t.ctid,t2.c FROM tidrangescan t,
+LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2
+WHERE t.ctid < '(1,0)';
+                  QUERY PLAN                   
+-----------------------------------------------
+ Nested Loop
+   ->  Tid Range Scan on tidrangescan t
+         TID Cond: (ctid < '(1,0)'::tid)
+   ->  Aggregate
+         ->  Tid Range Scan on tidrangescan t2
+               TID Cond: (ctid <= t.ctid)
+(6 rows)
+
+SELECT t.ctid,t2.c FROM tidrangescan t,
+LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2
+WHERE t.ctid < '(1,0)';
+  ctid  | c  
+--------+----
+ (0,1)  |  1
+ (0,2)  |  2
+ (0,3)  |  3
+ (0,4)  |  4
+ (0,5)  |  5
+ (0,6)  |  6
+ (0,7)  |  7
+ (0,8)  |  8
+ (0,9)  |  9
+ (0,10) | 10
+(10 rows)
+
+-- cursors
+-- Ensure we get a TID Range scan without a Materialize node.
+EXPLAIN (COSTS OFF)
+DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+            QUERY PLAN             
+-----------------------------------
+ Tid Range Scan on tidrangescan
+   TID Cond: (ctid < '(1,0)'::tid)
+(2 rows)
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+FETCH NEXT c;
+ ctid  
+-------
+ (0,1)
+(1 row)
+
+FETCH NEXT c;
+ ctid  
+-------
+ (0,2)
+(1 row)
+
+FETCH PRIOR c;
+ ctid  
+-------
+ (0,1)
+(1 row)
+
+FETCH FIRST c;
+ ctid  
+-------
+ (0,1)
+(1 row)
+
+FETCH LAST c;
+  ctid  
+--------
+ (0,10)
+(1 row)
+
+COMMIT;
+DROP TABLE tidrangescan;
+RESET enable_seqscan;
index 12bb67e4911882cd7d91df54ce81abe255cc2219..c77b0d7342f5e911bf4c1f580f5a6b81caa020ee 100644 (file)
@@ -80,7 +80,7 @@ test: brin gin gist spgist privileges init_privs security_label collate matview
 # ----------
 # Another group of parallel tests
 # ----------
-test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tid tidscan collate.icu.utf8 incremental_sort
+test: create_table_like alter_generic alter_operator misc async dbsize misc_functions sysviews tsrf tid tidscan tidrangescan collate.icu.utf8 incremental_sort
 
 # rules cannot run concurrently with any test that creates
 # a view or rule in the public schema
index 59b416fd80cb4a80d769ce711e152f8118edbd91..0264a97324c219c9c1d2f2e0577e8521bf180f9d 100644 (file)
@@ -138,6 +138,7 @@ test: sysviews
 test: tsrf
 test: tid
 test: tidscan
+test: tidrangescan
 test: collate.icu.utf8
 test: rules
 test: psql
diff --git a/src/test/regress/sql/tidrangescan.sql b/src/test/regress/sql/tidrangescan.sql
new file mode 100644 (file)
index 0000000..ac09ebb
--- /dev/null
@@ -0,0 +1,101 @@
+-- tests for tidrangescans
+
+SET enable_seqscan TO off;
+CREATE TABLE tidrangescan(id integer, data text);
+
+-- empty table
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)';
+SELECT ctid FROM tidrangescan WHERE ctid < '(1, 0)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)';
+SELECT ctid FROM tidrangescan WHERE ctid > '(9, 0)';
+
+-- insert enough tuples to fill at least two pages
+INSERT INTO tidrangescan SELECT i,repeat('x', 100) FROM generate_series(1,200) AS s(i);
+
+-- remove all tuples after the 10th tuple on each page.  Trying to ensure
+-- we get the same layout with all CPU architectures and smaller than standard
+-- page sizes.
+DELETE FROM tidrangescan
+WHERE substring(ctid::text FROM ',(\d+)\)')::integer > 10 OR substring(ctid::text FROM '\((\d+),')::integer > 2;
+VACUUM tidrangescan;
+
+-- range scans with upper bound
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)';
+SELECT ctid FROM tidrangescan WHERE ctid <= '(1,5)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+
+-- range scans with lower bound
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)';
+SELECT ctid FROM tidrangescan WHERE ctid > '(2,8)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid;
+SELECT ctid FROM tidrangescan WHERE '(2,8)' < ctid;
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)';
+SELECT ctid FROM tidrangescan WHERE ctid >= '(2,8)';
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)';
+SELECT ctid FROM tidrangescan WHERE ctid >= '(100,0)';
+
+-- range scans with both bounds
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;
+SELECT ctid FROM tidrangescan WHERE ctid > '(1,4)' AND '(1,7)' >= ctid;
+
+EXPLAIN (COSTS OFF)
+SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';
+SELECT ctid FROM tidrangescan WHERE '(1,7)' >= ctid AND ctid > '(1,4)';
+
+-- extreme offsets
+SELECT ctid FROM tidrangescan WHERE ctid > '(0,65535)' AND ctid < '(1,0)' LIMIT 1;
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)' LIMIT 1;
+
+SELECT ctid FROM tidrangescan WHERE ctid > '(4294967295,65535)';
+SELECT ctid FROM tidrangescan WHERE ctid < '(0,0)';
+
+-- NULLs in the range cannot return tuples
+SELECT ctid FROM tidrangescan WHERE ctid >= (SELECT NULL::tid);
+
+-- rescans
+EXPLAIN (COSTS OFF)
+SELECT t.ctid,t2.c FROM tidrangescan t,
+LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2
+WHERE t.ctid < '(1,0)';
+
+SELECT t.ctid,t2.c FROM tidrangescan t,
+LATERAL (SELECT count(*) c FROM tidrangescan t2 WHERE t2.ctid <= t.ctid) t2
+WHERE t.ctid < '(1,0)';
+
+-- cursors
+
+-- Ensure we get a TID Range scan without a Materialize node.
+EXPLAIN (COSTS OFF)
+DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+
+BEGIN;
+DECLARE c SCROLL CURSOR FOR SELECT ctid FROM tidrangescan WHERE ctid < '(1,0)';
+FETCH NEXT c;
+FETCH NEXT c;
+FETCH PRIOR c;
+FETCH FIRST c;
+FETCH LAST c;
+COMMIT;
+
+DROP TABLE tidrangescan;
+
+RESET enable_seqscan;