* 1. Otherwise every access would need to subtract 1.
*/
bool marked[MaxHeapTuplesPerPage + 1];
-
- /*
- * Tuple visibility is only computed once for each tuple, for correctness
- * and efficiency reasons; see comment in heap_page_prune() for details.
- * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
- * indicate no visibility has been computed, e.g. for LP_DEAD items.
- *
- * Same indexing as ->marked.
- */
- int8 htsv[MaxHeapTuplesPerPage + 1];
} PruneState;
/* Local functions */
Buffer buffer);
static int heap_prune_chain(Buffer buffer,
OffsetNumber rootoffnum,
+ int8 *htsv,
PruneState *prstate);
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
static void heap_prune_record_redirect(PruneState *prstate,
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
+ /*
+ * presult->htsv is not initialized here because all ntuple spots in the
+ * array will be set either to a valid HTSV_Result value or -1.
+ */
presult->ndeleted = 0;
presult->nnewlpdead = 0;
/* Nothing to do if slot doesn't contain a tuple */
if (!ItemIdIsNormal(itemid))
{
- prstate.htsv[offnum] = -1;
+ presult->htsv[offnum] = -1;
continue;
}
if (off_loc)
*off_loc = offnum;
- prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
- buffer);
+ presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+ buffer);
}
/* Scan the page */
continue;
/* Process this item or chain of items */
- presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate);
+ presult->ndeleted += heap_prune_chain(buffer, offnum,
+ presult->htsv, &prstate);
}
/* Clear the offset information once we have processed the given page. */
/*
* Prune specified line pointer or a HOT chain originating at line pointer.
*
+ * Tuple visibility information is provided in htsv.
+ *
* If the item is an index-referenced tuple (i.e. not a heap-only tuple),
* the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
* chain. We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
* Returns the number of tuples (to be) deleted from the page.
*/
static int
-heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
+heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
+ int8 *htsv, PruneState *prstate)
{
int ndeleted = 0;
Page dp = (Page) BufferGetPage(buffer);
*/
if (ItemIdIsNormal(rootlp))
{
- Assert(prstate->htsv[rootoffnum] != -1);
+ Assert(htsv[rootoffnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
if (HeapTupleHeaderIsHeapOnly(htup))
* either here or while following a chain below. Whichever path
* gets there first will mark the tuple unused.
*/
- if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
+ if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
!HeapTupleHeaderIsHotUpdated(htup))
{
heap_prune_record_unused(prstate, rootoffnum);
break;
Assert(ItemIdIsNormal(lp));
- Assert(prstate->htsv[offnum] != -1);
htup = (HeapTupleHeader) PageGetItem(dp, lp);
/*
*/
tupdead = recent_dead = false;
- switch ((HTSV_Result) prstate->htsv[offnum])
+ switch (htsv_get_valid_status(htsv[offnum]))
{
case HEAPTUPLE_DEAD:
tupdead = true;
* of complexity just so we could deal with tuples that were DEAD to VACUUM,
* but nevertheless were left with storage after pruning.
*
- * The approach we take now is to restart pruning when the race condition is
- * detected. This allows heap_page_prune() to prune the tuples inserted by
- * the now-aborted transaction. This is a little crude, but it guarantees
- * that any items that make it into the dead_items array are simple LP_DEAD
- * line pointers, and that every remaining item with tuple storage is
- * considered as a candidate for freezing.
+ * As of Postgres 17, we circumvent this problem altogether by reusing the
+ * result of heap_page_prune()'s visibility check. Without the second call to
+ * HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
+ * disagreement. We'll just handle such tuples as if they had become fully dead
+ * right after this operation completes instead of in the middle of it. Note that
+ * any tuple that becomes dead after the call to heap_page_prune() can't need to
+ * be frozen, because it was visible to another session when vacuum started.
*/
static void
lazy_scan_prune(LVRelState *vacrel,
OffsetNumber offnum,
maxoff;
ItemId itemid;
- HeapTupleData tuple;
- HTSV_Result res;
PruneResult presult;
int tuples_frozen,
lpdead_items,
*/
maxoff = PageGetMaxOffsetNumber(page);
-retry:
-
/* Initialize (or reset) page-level state */
pagefrz.freeze_required = false;
pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
offnum <= maxoff;
offnum = OffsetNumberNext(offnum))
{
+ HeapTupleHeader htup;
bool totally_frozen;
/*
Assert(ItemIdIsNormal(itemid));
- ItemPointerSet(&(tuple.t_self), blkno, offnum);
- tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
- tuple.t_len = ItemIdGetLength(itemid);
- tuple.t_tableOid = RelationGetRelid(rel);
-
- /*
- * DEAD tuples are almost always pruned into LP_DEAD line pointers by
- * heap_page_prune(), but it's possible that the tuple state changed
- * since heap_page_prune() looked. Handle that here by restarting.
- * (See comments at the top of function for a full explanation.)
- */
- res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
- buf);
-
- if (unlikely(res == HEAPTUPLE_DEAD))
- goto retry;
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
/*
* The criteria for counting a tuple as live in this block need to
* (Cases where we bypass index vacuuming will violate this optimistic
* assumption, but the overall impact of that should be negligible.)
*/
- switch (res)
+ switch (htsv_get_valid_status(presult.htsv[offnum]))
{
case HEAPTUPLE_LIVE:
{
TransactionId xmin;
- if (!HeapTupleHeaderXminCommitted(tuple.t_data))
+ if (!HeapTupleHeaderXminCommitted(htup))
{
prunestate->all_visible = false;
break;
* The inserter definitely committed. But is it old enough
* that everyone sees it as committed?
*/
- xmin = HeapTupleHeaderGetXmin(tuple.t_data);
+ xmin = HeapTupleHeaderGetXmin(htup);
if (!TransactionIdPrecedes(xmin,
vacrel->cutoffs.OldestXmin))
{
prunestate->hastup = true; /* page makes rel truncation unsafe */
/* Tuple with storage -- consider need to freeze */
- if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
+ if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
&frozen[tuples_frozen], &totally_frozen))
{
/* Save prepared freeze plan for later */
{
int ndeleted; /* Number of tuples deleted from the page */
int nnewlpdead; /* Number of newly LP_DEAD items */
+
+ /*
+ * Tuple visibility is only computed once for each tuple, for correctness
+ * and efficiency reasons; see comment in heap_page_prune() for details.
+ * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
+ * indicate no visibility has been computed, e.g. for LP_DEAD items.
+ *
+ * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+ * 1. Otherwise every access would need to subtract 1.
+ */
+ int8 htsv[MaxHeapTuplesPerPage + 1];
} PruneResult;
+/*
+ * Pruning calculates tuple visibility once and saves the results in an array
+ * of int8. See PruneResult.htsv for details. This helper function is meant to
+ * guard against examining visibility status array members which have not yet
+ * been computed.
+ */
+static inline HTSV_Result
+htsv_get_valid_status(int status)
+{
+ Assert(status >= HEAPTUPLE_DEAD &&
+ status <= HEAPTUPLE_DELETE_IN_PROGRESS);
+ return (HTSV_Result) status;
+}
+
/* ----------------
* function prototypes for heap access method
*