Separate TBM[Shared|Private]Iterator and TBMIterateResult
authorMelanie Plageman <melanieplageman@gmail.com>
Sat, 15 Mar 2025 14:10:51 +0000 (10:10 -0400)
committerMelanie Plageman <melanieplageman@gmail.com>
Sat, 15 Mar 2025 14:11:19 +0000 (10:11 -0400)
Remove the TBMIterateResult member from the TBMPrivateIterator and
TBMSharedIterator and make tbm_[shared|private_]iterate() take a
TBMIterateResult as a parameter.

This allows tidbitmap API users to manage multiple TBMIterateResults per
scan. This is required for bitmap heap scan to use the read stream API,
with which there may be multiple I/Os in flight at once, each one with a
TBMIterateResult.

Reviewed-by: Tomas Vondra <tomas@vondra.me>
Discussion: https://postgr.es/m/d4bb26c9-fe07-439e-ac53-c0e244387e01%40vondra.me

src/backend/access/gin/ginget.c
src/backend/access/gin/ginscan.c
src/backend/access/heap/heapam_handler.c
src/backend/executor/nodeBitmapHeapscan.c
src/backend/nodes/tidbitmap.c
src/include/access/gin_private.h
src/include/nodes/tidbitmap.h

index 4a56f19390dcd4cd8610020042904d0f3f4c6a20..f29ccd3c2d1ff31e2678a644a5277d09fb983f49 100644 (file)
@@ -332,8 +332,8 @@ restartScanEntry:
    entry->list = NULL;
    entry->nlist = 0;
    entry->matchBitmap = NULL;
-   entry->matchResult = NULL;
    entry->matchNtuples = -1;
+   entry->matchResult.blockno = InvalidBlockNumber;
    entry->reduceResult = false;
    entry->predictNumberResult = 0;
 
@@ -827,20 +827,19 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
        {
            /*
             * If we've exhausted all items on this block, move to next block
-            * in the bitmap.
+            * in the bitmap. tbm_private_iterate() sets matchResult.blockno
+            * to InvalidBlockNumber when the bitmap is exhausted.
             */
-           while (entry->matchResult == NULL ||
-                  (!entry->matchResult->lossy &&
+           while ((!BlockNumberIsValid(entry->matchResult.blockno)) ||
+                  (!entry->matchResult.lossy &&
                    entry->offset >= entry->matchNtuples) ||
-                  entry->matchResult->blockno < advancePastBlk ||
+                  entry->matchResult.blockno < advancePastBlk ||
                   (ItemPointerIsLossyPage(&advancePast) &&
-                   entry->matchResult->blockno == advancePastBlk))
+                   entry->matchResult.blockno == advancePastBlk))
            {
-               entry->matchResult =
-                   tbm_private_iterate(entry->matchIterator);
-
-               if (entry->matchResult == NULL)
+               if (!tbm_private_iterate(entry->matchIterator, &entry->matchResult))
                {
+                   Assert(!BlockNumberIsValid(entry->matchResult.blockno));
                    ItemPointerSetInvalid(&entry->curItem);
                    tbm_end_private_iterate(entry->matchIterator);
                    entry->matchIterator = NULL;
@@ -849,14 +848,14 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
                }
 
                /* Exact pages need their tuple offsets extracted. */
-               if (!entry->matchResult->lossy)
-                   entry->matchNtuples = tbm_extract_page_tuple(entry->matchResult,
+               if (!entry->matchResult.lossy)
+                   entry->matchNtuples = tbm_extract_page_tuple(&entry->matchResult,
                                                                 entry->matchOffsets,
                                                                 TBM_MAX_TUPLES_PER_PAGE);
 
                /*
                 * Reset counter to the beginning of entry->matchResult. Note:
-                * entry->offset is still greater than entry->matchNtuples if
+                * entry->offset is still greater than matchResult.ntuples if
                 * matchResult is lossy.  So, on next call we will get next
                 * result from TIDBitmap.
                 */
@@ -869,10 +868,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
             * We're now on the first page after advancePast which has any
             * items on it. If it's a lossy result, return that.
             */
-           if (entry->matchResult->lossy)
+           if (entry->matchResult.lossy)
            {
                ItemPointerSetLossyPage(&entry->curItem,
-                                       entry->matchResult->blockno);
+                                       entry->matchResult.blockno);
 
                /*
                 * We might as well fall out of the loop; we could not
@@ -889,7 +888,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
            Assert(entry->matchNtuples > -1);
 
            /* Skip over any offsets <= advancePast, and return that. */
-           if (entry->matchResult->blockno == advancePastBlk)
+           if (entry->matchResult.blockno == advancePastBlk)
            {
                Assert(entry->matchNtuples > 0);
 
@@ -910,7 +909,7 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
            }
 
            ItemPointerSet(&entry->curItem,
-                          entry->matchResult->blockno,
+                          entry->matchResult.blockno,
                           entry->matchOffsets[entry->offset]);
            entry->offset++;
 
index f6cdd098a028a19cdafcdb190c23071256f1dace..c2d1771bd77b5ddb846e6389be18d6f9ab1c6314 100644 (file)
@@ -111,7 +111,7 @@ ginFillScanEntry(GinScanOpaque so, OffsetNumber attnum,
    ItemPointerSetMin(&scanEntry->curItem);
    scanEntry->matchBitmap = NULL;
    scanEntry->matchIterator = NULL;
-   scanEntry->matchResult = NULL;
+   scanEntry->matchResult.blockno = InvalidBlockNumber;
    scanEntry->matchNtuples = -1;
    scanEntry->list = NULL;
    scanEntry->nlist = 0;
index eecff8ffd67d2f53ce6a694b1eea39ad26f049f4..25d26409e2cd619a8292505fb257ca83ea351322 100644 (file)
@@ -2126,7 +2126,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
    Buffer      buffer;
    Snapshot    snapshot;
    int         ntup;
-   TBMIterateResult *tbmres;
+   TBMIterateResult tbmres;
    OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
    int         noffsets = -1;
 
@@ -2142,14 +2142,12 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
    {
        CHECK_FOR_INTERRUPTS();
 
-       tbmres = tbm_iterate(&scan->st.rs_tbmiterator);
-
-       if (tbmres == NULL)
+       if (!tbm_iterate(&scan->st.rs_tbmiterator, &tbmres))
            return false;
 
        /* Exact pages need their tuple offsets extracted. */
-       if (!tbmres->lossy)
-           noffsets = tbm_extract_page_tuple(tbmres, offsets,
+       if (!tbmres.lossy)
+           noffsets = tbm_extract_page_tuple(&tbmres, offsets,
                                              TBM_MAX_TUPLES_PER_PAGE);
 
        /*
@@ -2161,11 +2159,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
         * reachable by the index.
         */
    } while (!IsolationIsSerializable() &&
-            tbmres->blockno >= hscan->rs_nblocks);
+            tbmres.blockno >= hscan->rs_nblocks);
 
    /* Got a valid block */
-   *blockno = tbmres->blockno;
-   *recheck = tbmres->recheck;
+   *blockno = tbmres.blockno;
+   *recheck = tbmres.recheck;
 
    /*
     * We can skip fetching the heap page if we don't need any fields from the
@@ -2173,11 +2171,11 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
     * page are visible to our transaction.
     */
    if (!(scan->rs_flags & SO_NEED_TUPLES) &&
-       !tbmres->recheck &&
-       VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &bscan->rs_vmbuffer))
+       !tbmres.recheck &&
+       VM_ALL_VISIBLE(scan->rs_rd, tbmres.blockno, &bscan->rs_vmbuffer))
    {
        /* can't be lossy in the skip_fetch case */
-       Assert(!tbmres->lossy);
+       Assert(!tbmres.lossy);
        Assert(bscan->rs_empty_tuples_pending >= 0);
        Assert(noffsets > -1);
 
@@ -2186,7 +2184,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
        return true;
    }
 
-   block = tbmres->blockno;
+   block = tbmres.blockno;
 
    /*
     * Acquire pin on the target heap page, trading in any pin we held before.
@@ -2215,7 +2213,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
    /*
     * We need two separate strategies for lossy and non-lossy cases.
     */
-   if (!tbmres->lossy)
+   if (!tbmres.lossy)
    {
        /*
         * Bitmap is non-lossy, so we just look through the offsets listed in
@@ -2279,7 +2277,7 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
    Assert(ntup <= MaxHeapTuplesPerPage);
    hscan->rs_ntuples = ntup;
 
-   if (tbmres->lossy)
+   if (tbmres.lossy)
        (*lossy_pages)++;
    else
        (*exact_pages)++;
index be0d24d901b5e32fc22d36b9a816ecc2e1373d37..3b4ea0f61446e8f9f09bfaab153a89a4ce7d264e 100644 (file)
@@ -317,7 +317,7 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
 {
 #ifdef USE_PREFETCH
    ParallelBitmapHeapState *pstate = node->pstate;
-   TBMIterateResult *tbmpre;
+   TBMIterateResult tbmpre;
 
    if (pstate == NULL)
    {
@@ -330,9 +330,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
        }
        else if (!tbm_exhausted(prefetch_iterator))
        {
-           tbmpre = tbm_iterate(prefetch_iterator);
-           node->prefetch_blockno = tbmpre ? tbmpre->blockno :
-               InvalidBlockNumber;
+           tbm_iterate(prefetch_iterator, &tbmpre);
+           node->prefetch_blockno = tbmpre.blockno;
        }
        return;
    }
@@ -371,9 +370,8 @@ BitmapAdjustPrefetchIterator(BitmapHeapScanState *node)
             */
            if (!tbm_exhausted(prefetch_iterator))
            {
-               tbmpre = tbm_iterate(prefetch_iterator);
-               node->prefetch_blockno = tbmpre ? tbmpre->blockno :
-                   InvalidBlockNumber;
+               tbm_iterate(prefetch_iterator, &tbmpre);
+               node->prefetch_blockno = tbmpre.blockno;
            }
        }
    }
@@ -441,17 +439,18 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
        {
            while (node->prefetch_pages < node->prefetch_target)
            {
-               TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
+               TBMIterateResult tbmpre;
                bool        skip_fetch;
 
-               if (tbmpre == NULL)
+               if (!tbm_iterate(prefetch_iterator, &tbmpre))
                {
                    /* No more pages to prefetch */
+                   Assert(!BlockNumberIsValid(tbmpre.blockno));
                    tbm_end_iterate(prefetch_iterator);
                    break;
                }
                node->prefetch_pages++;
-               node->prefetch_blockno = tbmpre->blockno;
+               node->prefetch_blockno = tbmpre.blockno;
 
                /*
                 * If we expect not to have to actually read this heap page,
@@ -460,13 +459,13 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
                 * prefetch_pages?)
                 */
                skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
-                             !tbmpre->recheck &&
+                             !tbmpre.recheck &&
                              VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-                                            tbmpre->blockno,
+                                            tbmpre.blockno,
                                             &node->pvmbuffer));
 
                if (!skip_fetch)
-                   PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+                   PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno);
            }
        }
 
@@ -481,7 +480,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
        {
            while (1)
            {
-               TBMIterateResult *tbmpre;
+               TBMIterateResult tbmpre;
                bool        do_prefetch = false;
                bool        skip_fetch;
 
@@ -500,25 +499,25 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
                if (!do_prefetch)
                    return;
 
-               tbmpre = tbm_iterate(prefetch_iterator);
-               if (tbmpre == NULL)
+               if (!tbm_iterate(prefetch_iterator, &tbmpre))
                {
+                   Assert(!BlockNumberIsValid(tbmpre.blockno));
                    /* No more pages to prefetch */
                    tbm_end_iterate(prefetch_iterator);
                    break;
                }
 
-               node->prefetch_blockno = tbmpre->blockno;
+               node->prefetch_blockno = tbmpre.blockno;
 
                /* As above, skip prefetch if we expect not to need page */
                skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
-                             !tbmpre->recheck &&
+                             !tbmpre.recheck &&
                              VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-                                            tbmpre->blockno,
+                                            tbmpre.blockno,
                                             &node->pvmbuffer));
 
                if (!skip_fetch)
-                   PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+                   PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre.blockno);
            }
        }
    }
index 3d835024caa23dd86d8d792bac1c5840ad3ad096..41031aa8f2fa85dd34989ee4299a996b4eb8cb8a 100644 (file)
@@ -172,7 +172,6 @@ struct TBMPrivateIterator
    int         spageptr;       /* next spages index */
    int         schunkptr;      /* next schunks index */
    int         schunkbit;      /* next bit to check in current schunk */
-   TBMIterateResult output;
 };
 
 /*
@@ -213,7 +212,6 @@ struct TBMSharedIterator
    PTEntryArray *ptbase;       /* pagetable element array */
    PTIterationArray *ptpages;  /* sorted exact page index list */
    PTIterationArray *ptchunks; /* sorted lossy page index list */
-   TBMIterateResult output;
 };
 
 /* Local function prototypes */
@@ -957,21 +955,28 @@ tbm_advance_schunkbit(PagetableEntry *chunk, int *schunkbitp)
 /*
  * tbm_private_iterate - scan through next page of a TIDBitmap
  *
- * Returns a TBMIterateResult representing one page, or NULL if there are
- * no more pages to scan.  Pages are guaranteed to be delivered in numerical
- * order.  If lossy is true, then the bitmap is "lossy" and failed to
- * remember the exact tuples to look at on this page --- the caller must
- * examine all tuples on the page and check if they meet the intended
- * condition.  result->ntuples is set to -1 when the bitmap is lossy.
- * If result->recheck is true, only the indicated tuples need
- * be examined, but the condition must be rechecked anyway.  (For ease of
- * testing, recheck is always set true when lossy is true.)
+ * Caller must pass in a TBMIterateResult to be filled.
+ *
+ * Pages are guaranteed to be delivered in numerical order.
+ *
+ * Returns false when there are no more pages to scan and true otherwise. When
+ * there are no more pages to scan, tbmres->blockno is set to
+ * InvalidBlockNumber.
+ *
+ * If lossy is true, then the bitmap is "lossy" and failed to remember
+ * the exact tuples to look at on this page --- the caller must examine all
+ * tuples on the page and check if they meet the intended condition. If lossy
+ * is false, the caller must later extract the tuple offsets from the page
+ * pointed to by internal_page with tbm_extract_page_tuple.
+ *
+ * If tbmres->recheck is true, only the indicated tuples need be examined, but
+ * the condition must be rechecked anyway.  (For ease of testing, recheck is
+ * always set true when lossy is true.)
  */
-TBMIterateResult *
-tbm_private_iterate(TBMPrivateIterator *iterator)
+bool
+tbm_private_iterate(TBMPrivateIterator *iterator, TBMIterateResult *tbmres)
 {
    TIDBitmap  *tbm = iterator->tbm;
-   TBMIterateResult *output = &(iterator->output);
 
    Assert(tbm->iterating == TBM_ITERATING_PRIVATE);
 
@@ -1009,12 +1014,12 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
            chunk_blockno < tbm->spages[iterator->spageptr]->blockno)
        {
            /* Return a lossy page indicator from the chunk */
-           output->blockno = chunk_blockno;
-           output->lossy = true;
-           output->recheck = true;
-           output->internal_page = NULL;
+           tbmres->blockno = chunk_blockno;
+           tbmres->lossy = true;
+           tbmres->recheck = true;
+           tbmres->internal_page = NULL;
            iterator->schunkbit++;
-           return output;
+           return true;
        }
    }
 
@@ -1028,16 +1033,17 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
        else
            page = tbm->spages[iterator->spageptr];
 
-       output->internal_page = page;
-       output->blockno = page->blockno;
-       output->lossy = false;
-       output->recheck = page->recheck;
+       tbmres->internal_page = page;
+       tbmres->blockno = page->blockno;
+       tbmres->lossy = false;
+       tbmres->recheck = page->recheck;
        iterator->spageptr++;
-       return output;
+       return true;
    }
 
    /* Nothing more in the bitmap */
-   return NULL;
+   tbmres->blockno = InvalidBlockNumber;
+   return false;
 }
 
 /*
@@ -1047,10 +1053,9 @@ tbm_private_iterate(TBMPrivateIterator *iterator)
  * across multiple processes.  We need to acquire the iterator LWLock,
  * before accessing the shared members.
  */
-TBMIterateResult *
-tbm_shared_iterate(TBMSharedIterator *iterator)
+bool
+tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres)
 {
-   TBMIterateResult *output = &iterator->output;
    TBMSharedIteratorState *istate = iterator->state;
    PagetableEntry *ptbase = NULL;
    int        *idxpages = NULL;
@@ -1101,14 +1106,14 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
            chunk_blockno < ptbase[idxpages[istate->spageptr]].blockno)
        {
            /* Return a lossy page indicator from the chunk */
-           output->blockno = chunk_blockno;
-           output->lossy = true;
-           output->recheck = true;
-           output->internal_page = NULL;
+           tbmres->blockno = chunk_blockno;
+           tbmres->lossy = true;
+           tbmres->recheck = true;
+           tbmres->internal_page = NULL;
            istate->schunkbit++;
 
            LWLockRelease(&istate->lock);
-           return output;
+           return true;
        }
    }
 
@@ -1116,21 +1121,22 @@ tbm_shared_iterate(TBMSharedIterator *iterator)
    {
        PagetableEntry *page = &ptbase[idxpages[istate->spageptr]];
 
-       output->internal_page = page;
-       output->blockno = page->blockno;
-       output->lossy = false;
-       output->recheck = page->recheck;
+       tbmres->internal_page = page;
+       tbmres->blockno = page->blockno;
+       tbmres->lossy = false;
+       tbmres->recheck = page->recheck;
        istate->spageptr++;
 
        LWLockRelease(&istate->lock);
 
-       return output;
+       return true;
    }
 
    LWLockRelease(&istate->lock);
 
    /* Nothing more in the bitmap */
-   return NULL;
+   tbmres->blockno = InvalidBlockNumber;
+   return false;
 }
 
 /*
@@ -1604,15 +1610,17 @@ tbm_end_iterate(TBMIterator *iterator)
 }
 
 /*
- * Get the next TBMIterateResult from the shared or private bitmap iterator.
+ * Populate the next TBMIterateResult using the shared or private bitmap
+ * iterator. Returns false when there is nothing more to scan.
  */
-TBMIterateResult *
-tbm_iterate(TBMIterator *iterator)
+bool
+tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres)
 {
    Assert(iterator);
+   Assert(tbmres);
 
    if (iterator->shared)
-       return tbm_shared_iterate(iterator->i.shared_iterator);
+       return tbm_shared_iterate(iterator->i.shared_iterator, tbmres);
    else
-       return tbm_private_iterate(iterator->i.private_iterator);
+       return tbm_private_iterate(iterator->i.private_iterator, tbmres);
 }
index 95d8805b66f2c9d4e3da850aeab3274ae3fcf13b..aee1f70c22eee717c8a831d839f20cb16b52b3fa 100644 (file)
@@ -354,7 +354,12 @@ typedef struct GinScanEntryData
    /* for a partial-match or full-scan query, we accumulate all TIDs here */
    TIDBitmap  *matchBitmap;
    TBMPrivateIterator *matchIterator;
-   TBMIterateResult *matchResult;
+
+   /*
+    * If blockno is InvalidBlockNumber, all of the other fields in the
+    * matchResult are meaningless.
+    */
+   TBMIterateResult matchResult;
    OffsetNumber matchOffsets[TBM_MAX_TUPLES_PER_PAGE];
    int         matchNtuples;
 
index e185635c10be446a3e7511aa09e81e6a99c93b98..99f795ceab526925972a9a3ca4416db9111ca77e 100644 (file)
@@ -101,8 +101,8 @@ extern bool tbm_is_empty(const TIDBitmap *tbm);
 
 extern TBMPrivateIterator *tbm_begin_private_iterate(TIDBitmap *tbm);
 extern dsa_pointer tbm_prepare_shared_iterate(TIDBitmap *tbm);
-extern TBMIterateResult *tbm_private_iterate(TBMPrivateIterator *iterator);
-extern TBMIterateResult *tbm_shared_iterate(TBMSharedIterator *iterator);
+extern bool tbm_private_iterate(TBMPrivateIterator *iterator, TBMIterateResult *tbmres);
+extern bool tbm_shared_iterate(TBMSharedIterator *iterator, TBMIterateResult *tbmres);
 extern void tbm_end_private_iterate(TBMPrivateIterator *iterator);
 extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
 extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
@@ -113,7 +113,7 @@ extern TBMIterator tbm_begin_iterate(TIDBitmap *tbm,
                                     dsa_area *dsa, dsa_pointer dsp);
 extern void tbm_end_iterate(TBMIterator *iterator);
 
-extern TBMIterateResult *tbm_iterate(TBMIterator *iterator);
+extern bool tbm_iterate(TBMIterator *iterator, TBMIterateResult *tbmres);
 
 static inline bool
 tbm_exhausted(TBMIterator *iterator)