summaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
authorMelanie Plageman2024-10-25 14:11:58 +0000
committerMelanie Plageman2024-10-25 14:11:58 +0000
commitde380a62b5dae610b3504b5036e5d5b1150cc4a4 (patch)
treee1e874e8a346f1e7107c5b935f8e12c7b741ab58 /src/include
parent7bd7aa4d30676de006636bb2c9c079c363d9d56c (diff)
Make table_scan_bitmap_next_block() async-friendly
Move all responsibility for indicating a block is exhuasted into table_scan_bitmap_next_tuple() and advance the main iterator in heap-specific code. This flow control makes more sense and is a step toward using the read stream API for bitmap heap scans. Previously, table_scan_bitmap_next_block() returned false to indicate table_scan_bitmap_next_tuple() should not be called for the tuples on the page. This happened both when 1) there were no visible tuples on the page and 2) when the block returned by the iterator was past the end of the table. BitmapHeapNext() (generic bitmap table scan code) handled the case when the bitmap was exhausted. It makes more sense for table_scan_bitmap_next_tuple() to return false when there are no visible tuples on the page and table_scan_bitmap_next_block() to return false when the bitmap is exhausted or there are no more blocks in the table. As part of this new design, TBMIterateResults are no longer used as a flow control mechanism in BitmapHeapNext(), so we removed table_scan_bitmap_next_tuple's TBMIterateResult parameter. Note that the prefetch iterator is still saved in the BitmapHeapScanState node and advanced in generic bitmap table scan code. This is because 1) it was not necessary to change the prefetch iterator location to change the flow control in BitmapHeapNext() 2) modifying prefetch iterator management requires several more steps better split over multiple commits and 3) the prefetch iterator will be removed once the read stream API is used. Author: Melanie Plageman Reviewed-by: Tomas Vondra, Andres Freund, Heikki Linnakangas, Mark Dilger Discussion: https://postgr.es/m/063e4eb4-32d9-439e-a0b1-75565a9835a8%40iki.fi
Diffstat (limited to 'src/include')
-rw-r--r--src/include/access/relscan.h28
-rw-r--r--src/include/access/tableam.h64
-rw-r--r--src/include/nodes/execnodes.h12
3 files changed, 67 insertions, 37 deletions
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 114a85dc47c..e1884acf493 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -25,6 +25,9 @@
struct ParallelTableScanDescData;
+struct TBMIterator;
+struct TBMSharedIterator;
+
/*
* Generic descriptor for table scans. This is the base-class for table scans,
* which needs to be embedded in the scans of individual AMs.
@@ -37,9 +40,28 @@ typedef struct TableScanDescData
int rs_nkeys; /* number of scan keys */
struct ScanKeyData *rs_key; /* array of scan key descriptors */
- /* Range of ItemPointers for table_scan_getnextslot_tidrange() to scan. */
- ItemPointerData rs_mintid;
- ItemPointerData rs_maxtid;
+ /*
+ * Scan type-specific members
+ */
+ union
+ {
+ /* Iterators for Bitmap Table Scans */
+ struct
+ {
+ struct TBMIterator *rs_iterator;
+ struct TBMSharedIterator *rs_shared_iterator;
+ } bitmap;
+
+ /*
+ * Range of ItemPointers for table_scan_getnextslot_tidrange() to
+ * scan.
+ */
+ struct
+ {
+ ItemPointerData rs_mintid;
+ ItemPointerData rs_maxtid;
+ } tidrange;
+ } st;
/*
* Information about type and behaviour of the scan, a bitmask of members
diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h
index be09d180d45..adb478a93ca 100644
--- a/src/include/access/tableam.h
+++ b/src/include/access/tableam.h
@@ -36,7 +36,6 @@ extern PGDLLIMPORT bool synchronize_seqscans;
struct BulkInsertStateData;
struct IndexInfo;
struct SampleScanState;
-struct TBMIterateResult;
struct VacuumParams;
struct ValidateIndexState;
@@ -780,26 +779,29 @@ typedef struct TableAmRoutine
*/
/*
- * Prepare to fetch / check / return tuples from `tbmres->blockno` as part
- * of a bitmap table scan. `scan` was started via table_beginscan_bm().
- * Return false if there are no tuples to be found on the page, true
- * otherwise.
+ * Prepare to fetch / check / return tuples from `blockno` as part of a
+ * bitmap table scan. `scan` was started via table_beginscan_bm(). Return
+ * false if the bitmap is exhausted and true otherwise.
*
* This will typically read and pin the target block, and do the necessary
* work to allow scan_bitmap_next_tuple() to return tuples (e.g. it might
- * make sense to perform tuple visibility checks at this time). For some
- * AMs it will make more sense to do all the work referencing `tbmres`
- * contents here, for others it might be better to defer more work to
- * scan_bitmap_next_tuple.
- *
- * If `tbmres->blockno` is -1, this is a lossy scan and all visible tuples
- * on the page have to be returned, otherwise the tuples at offsets in
- * `tbmres->offsets` need to be returned.
+ * make sense to perform tuple visibility checks at this time).
*
* `lossy_pages` and `exact_pages` are EXPLAIN counters that can be
* incremented by the table AM to indicate whether or not the block's
* representation in the bitmap is lossy.
*
+ * `recheck` is set by the table AM to indicate whether or not the tuples
+ * from this block should be rechecked. Tuples from lossy pages will
+ * always need to be rechecked, but some non-lossy pages' tuples may also
+ * require recheck.
+ *
+ * `blockno` is the current block and is set by the table AM. The table AM
+ * is responsible for advancing the main iterator, but the bitmap table
+ * scan code still advances the prefetch iterator. `blockno` is used by
+ * bitmap table scan code to validate that the prefetch block stays ahead
+ * of the current block.
+ *
* XXX: Currently this may only be implemented if the AM uses md.c as its
* storage manager, and uses ItemPointer->ip_blkid in a manner that maps
* blockids directly to the underlying storage. nodeBitmapHeapscan.c
@@ -815,7 +817,8 @@ typedef struct TableAmRoutine
* scan_bitmap_next_tuple need to exist, or neither.
*/
bool (*scan_bitmap_next_block) (TableScanDesc scan,
- struct TBMIterateResult *tbmres,
+ BlockNumber *blockno,
+ bool *recheck,
uint64 *lossy_pages,
uint64 *exact_pages);
@@ -823,15 +826,10 @@ typedef struct TableAmRoutine
* Fetch the next tuple of a bitmap table scan into `slot` and return true
* if a visible tuple was found, false otherwise.
*
- * For some AMs it will make more sense to do all the work referencing
- * `tbmres` contents in scan_bitmap_next_block, for others it might be
- * better to defer more work to this callback.
- *
* Optional callback, but either both scan_bitmap_next_block and
* scan_bitmap_next_tuple need to exist, or neither.
*/
bool (*scan_bitmap_next_tuple) (TableScanDesc scan,
- struct TBMIterateResult *tbmres,
TupleTableSlot *slot);
/*
@@ -959,12 +957,17 @@ static inline TableScanDesc
table_beginscan_bm(Relation rel, Snapshot snapshot,
int nkeys, struct ScanKeyData *key, bool need_tuple)
{
+ TableScanDesc result;
uint32 flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
if (need_tuple)
flags |= SO_NEED_TUPLES;
- return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
+ result = rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key,
+ NULL, flags);
+ result->st.bitmap.rs_shared_iterator = NULL;
+ result->st.bitmap.rs_iterator = NULL;
+ return result;
}
/*
@@ -1955,21 +1958,28 @@ table_relation_estimate_size(Relation rel, int32 *attr_widths,
*/
/*
- * Prepare to fetch / check / return tuples from `tbmres->blockno` as part of
- * a bitmap table scan. `scan` needs to have been started via
- * table_beginscan_bm(). Returns false if there are no tuples to be found on
- * the page, true otherwise.
+ * Prepare to fetch / check / return tuples as part of a bitmap table scan.
+ * `scan` needs to have been started via table_beginscan_bm(). Returns false
+ * if there are no more blocks in the bitmap, true otherwise.
*
* `lossy_pages` and `exact_pages` are EXPLAIN counters that can be
* incremented by the table AM to indicate whether or not the block's
* representation in the bitmap is lossy.
*
+ * `recheck` is set by the table AM to indicate whether or not the tuples
+ * from this block should be rechecked.
+ *
+ * `blockno` is the current block and is set by the table AM and is used by
+ * bitmap table scan code to validate that the prefetch block stays ahead of
+ * the current block.
+ *
* Note, this is an optionally implemented function, therefore should only be
* used after verifying the presence (at plan time or such).
*/
static inline bool
table_scan_bitmap_next_block(TableScanDesc scan,
- struct TBMIterateResult *tbmres,
+ BlockNumber *blockno,
+ bool *recheck,
uint64 *lossy_pages,
uint64 *exact_pages)
{
@@ -1982,7 +1992,7 @@ table_scan_bitmap_next_block(TableScanDesc scan,
elog(ERROR, "unexpected table_scan_bitmap_next_block call during logical decoding");
return scan->rs_rd->rd_tableam->scan_bitmap_next_block(scan,
- tbmres,
+ blockno, recheck,
lossy_pages,
exact_pages);
}
@@ -1997,7 +2007,6 @@ table_scan_bitmap_next_block(TableScanDesc scan,
*/
static inline bool
table_scan_bitmap_next_tuple(TableScanDesc scan,
- struct TBMIterateResult *tbmres,
TupleTableSlot *slot)
{
/*
@@ -2009,7 +2018,6 @@ table_scan_bitmap_next_tuple(TableScanDesc scan,
elog(ERROR, "unexpected table_scan_bitmap_next_tuple call during logical decoding");
return scan->rs_rd->rd_tableam->scan_bitmap_next_tuple(scan,
- tbmres,
slot);
}
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index e4698a28c4f..b67d5186a2d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -1833,8 +1833,6 @@ typedef struct SharedBitmapHeapInstrumentation
*
* bitmapqualorig execution state for bitmapqualorig expressions
* tbm bitmap obtained from child index scan(s)
- * tbmiterator iterator for scanning current pages
- * tbmres current-page data
* pvmbuffer buffer for visibility-map lookups of prefetched pages
* stats execution statistics
* prefetch_iterator iterator for prefetching ahead of current page
@@ -1842,10 +1840,12 @@ typedef struct SharedBitmapHeapInstrumentation
* prefetch_target current target prefetch distance
* prefetch_maximum maximum value for prefetch_target
* initialized is node is ready to iterate
- * shared_tbmiterator shared iterator
* shared_prefetch_iterator shared iterator for prefetching
* pstate shared state for parallel bitmap scan
* sinstrument statistics for parallel workers
+ * recheck do current page's tuples need recheck
+ * blockno used to validate pf and current block stay in sync
+ * prefetch_blockno used to validate pf stays ahead of current block
* ----------------
*/
typedef struct BitmapHeapScanState
@@ -1853,8 +1853,6 @@ typedef struct BitmapHeapScanState
ScanState ss; /* its first field is NodeTag */
ExprState *bitmapqualorig;
TIDBitmap *tbm;
- TBMIterator *tbmiterator;
- TBMIterateResult *tbmres;
Buffer pvmbuffer;
BitmapHeapScanInstrumentation stats;
TBMIterator *prefetch_iterator;
@@ -1862,10 +1860,12 @@ typedef struct BitmapHeapScanState
int prefetch_target;
int prefetch_maximum;
bool initialized;
- TBMSharedIterator *shared_tbmiterator;
TBMSharedIterator *shared_prefetch_iterator;
ParallelBitmapHeapState *pstate;
SharedBitmapHeapInstrumentation *sinstrument;
+ bool recheck;
+ BlockNumber blockno;
+ BlockNumber prefetch_blockno;
} BitmapHeapScanState;
/* ----------------