Remove retry loop in heap_page_prune().
authorRobert Haas <rhaas@postgresql.org>
Mon, 2 Oct 2023 15:40:07 +0000 (11:40 -0400)
committerRobert Haas <rhaas@postgresql.org>
Mon, 2 Oct 2023 15:40:07 +0000 (11:40 -0400)
The retry loop is needed because heap_page_prune() calls
HeapTupleSatisfiesVacuum() and then lazy_scan_prune() does the same
thing again, and they might get different answers due to concurrent
clog updates.  But this patch makes heap_page_prune() return the
HeapTupleSatisfiesVacuum() results that it computed back to the
caller, which allows lazy_scan_prune() to avoid needing to recompute
those values in the first place. That's nice both because it eliminates
the need for a retry loop and also because it's cheaper.

Melanie Plageman, reviewed by David Geier, Andres Freund, and me.

Discussion: https://postgr.es/m/CAAKRu_br124qsGJieuYA0nGjywEukhK1dKBfRdby_4yY3E9SXA%40mail.gmail.com

src/backend/access/heap/pruneheap.c
src/backend/access/heap/vacuumlazy.c
src/include/access/heapam.h

index d5892a2db46b3e66d7d8d0e646e3327b9e0f885c..c5f1abd95a9a087901d49507a5dd26d294944b34 100644 (file)
@@ -53,16 +53,6 @@ typedef struct
         * 1. Otherwise every access would need to subtract 1.
         */
        bool            marked[MaxHeapTuplesPerPage + 1];
-
-       /*
-        * Tuple visibility is only computed once for each tuple, for correctness
-        * and efficiency reasons; see comment in heap_page_prune() for details.
-        * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
-        * indicate no visibility has been computed, e.g. for LP_DEAD items.
-        *
-        * Same indexing as ->marked.
-        */
-       int8            htsv[MaxHeapTuplesPerPage + 1];
 } PruneState;
 
 /* Local functions */
@@ -71,6 +61,7 @@ static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
                                                                                           Buffer buffer);
 static int     heap_prune_chain(Buffer buffer,
                                                         OffsetNumber rootoffnum,
+                                                        int8 *htsv,
                                                         PruneState *prstate);
 static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
 static void heap_prune_record_redirect(PruneState *prstate,
@@ -240,6 +231,10 @@ heap_page_prune(Relation relation, Buffer buffer,
        prstate.nredirected = prstate.ndead = prstate.nunused = 0;
        memset(prstate.marked, 0, sizeof(prstate.marked));
 
+       /*
+        * presult->htsv is not initialized here because all ntuple spots in the
+        * array will be set either to a valid HTSV_Result value or -1.
+        */
        presult->ndeleted = 0;
        presult->nnewlpdead = 0;
 
@@ -276,7 +271,7 @@ heap_page_prune(Relation relation, Buffer buffer,
                /* Nothing to do if slot doesn't contain a tuple */
                if (!ItemIdIsNormal(itemid))
                {
-                       prstate.htsv[offnum] = -1;
+                       presult->htsv[offnum] = -1;
                        continue;
                }
 
@@ -292,8 +287,8 @@ heap_page_prune(Relation relation, Buffer buffer,
                if (off_loc)
                        *off_loc = offnum;
 
-               prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
-                                                                                                                  buffer);
+               presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+                                                                                                                       buffer);
        }
 
        /* Scan the page */
@@ -317,7 +312,8 @@ heap_page_prune(Relation relation, Buffer buffer,
                        continue;
 
                /* Process this item or chain of items */
-               presult->ndeleted += heap_prune_chain(buffer, offnum, &prstate);
+               presult->ndeleted += heap_prune_chain(buffer, offnum,
+                                                                                         presult->htsv, &prstate);
        }
 
        /* Clear the offset information once we have processed the given page. */
@@ -446,6 +442,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
 /*
  * Prune specified line pointer or a HOT chain originating at line pointer.
  *
+ * Tuple visibility information is provided in htsv.
+ *
  * If the item is an index-referenced tuple (i.e. not a heap-only tuple),
  * the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
  * chain.  We also prune any RECENTLY_DEAD tuples preceding a DEAD tuple.
@@ -473,7 +471,8 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
  * Returns the number of tuples (to be) deleted from the page.
  */
 static int
-heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
+heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
+                                int8 *htsv, PruneState *prstate)
 {
        int                     ndeleted = 0;
        Page            dp = (Page) BufferGetPage(buffer);
@@ -494,7 +493,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
         */
        if (ItemIdIsNormal(rootlp))
        {
-               Assert(prstate->htsv[rootoffnum] != -1);
+               Assert(htsv[rootoffnum] != -1);
                htup = (HeapTupleHeader) PageGetItem(dp, rootlp);
 
                if (HeapTupleHeaderIsHeapOnly(htup))
@@ -517,7 +516,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
                         * either here or while following a chain below.  Whichever path
                         * gets there first will mark the tuple unused.
                         */
-                       if (prstate->htsv[rootoffnum] == HEAPTUPLE_DEAD &&
+                       if (htsv[rootoffnum] == HEAPTUPLE_DEAD &&
                                !HeapTupleHeaderIsHotUpdated(htup))
                        {
                                heap_prune_record_unused(prstate, rootoffnum);
@@ -585,7 +584,6 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
                        break;
 
                Assert(ItemIdIsNormal(lp));
-               Assert(prstate->htsv[offnum] != -1);
                htup = (HeapTupleHeader) PageGetItem(dp, lp);
 
                /*
@@ -605,7 +603,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum, PruneState *prstate)
                 */
                tupdead = recent_dead = false;
 
-               switch ((HTSV_Result) prstate->htsv[offnum])
+               switch (htsv_get_valid_status(htsv[offnum]))
                {
                        case HEAPTUPLE_DEAD:
                                tupdead = true;
index fa77ef7f4ad88059255169fc4f25e29955b89dbc..42e49bc159b680dbdb637571dbda212525b549a1 100644 (file)
@@ -1524,12 +1524,13 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
  * of complexity just so we could deal with tuples that were DEAD to VACUUM,
  * but nevertheless were left with storage after pruning.
  *
- * The approach we take now is to restart pruning when the race condition is
- * detected.  This allows heap_page_prune() to prune the tuples inserted by
- * the now-aborted transaction.  This is a little crude, but it guarantees
- * that any items that make it into the dead_items array are simple LP_DEAD
- * line pointers, and that every remaining item with tuple storage is
- * considered as a candidate for freezing.
+ * As of Postgres 17, we circumvent this problem altogether by reusing the
+ * result of heap_page_prune()'s visibility check. Without the second call to
+ * HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
+ * disagreement. We'll just handle such tuples as if they had become fully dead
+ * right after this operation completes instead of in the middle of it. Note that
+ * any tuple that becomes dead after the call to heap_page_prune() can't need to
+ * be frozen, because it was visible to another session when vacuum started.
  */
 static void
 lazy_scan_prune(LVRelState *vacrel,
@@ -1542,8 +1543,6 @@ lazy_scan_prune(LVRelState *vacrel,
        OffsetNumber offnum,
                                maxoff;
        ItemId          itemid;
-       HeapTupleData tuple;
-       HTSV_Result res;
        PruneResult presult;
        int                     tuples_frozen,
                                lpdead_items,
@@ -1563,8 +1562,6 @@ lazy_scan_prune(LVRelState *vacrel,
         */
        maxoff = PageGetMaxOffsetNumber(page);
 
-retry:
-
        /* Initialize (or reset) page-level state */
        pagefrz.freeze_required = false;
        pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
@@ -1600,6 +1597,7 @@ retry:
                 offnum <= maxoff;
                 offnum = OffsetNumberNext(offnum))
        {
+               HeapTupleHeader htup;
                bool            totally_frozen;
 
                /*
@@ -1642,22 +1640,7 @@ retry:
 
                Assert(ItemIdIsNormal(itemid));
 
-               ItemPointerSet(&(tuple.t_self), blkno, offnum);
-               tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-               tuple.t_len = ItemIdGetLength(itemid);
-               tuple.t_tableOid = RelationGetRelid(rel);
-
-               /*
-                * DEAD tuples are almost always pruned into LP_DEAD line pointers by
-                * heap_page_prune(), but it's possible that the tuple state changed
-                * since heap_page_prune() looked.  Handle that here by restarting.
-                * (See comments at the top of function for a full explanation.)
-                */
-               res = HeapTupleSatisfiesVacuum(&tuple, vacrel->cutoffs.OldestXmin,
-                                                                          buf);
-
-               if (unlikely(res == HEAPTUPLE_DEAD))
-                       goto retry;
+               htup = (HeapTupleHeader) PageGetItem(page, itemid);
 
                /*
                 * The criteria for counting a tuple as live in this block need to
@@ -1678,7 +1661,7 @@ retry:
                 * (Cases where we bypass index vacuuming will violate this optimistic
                 * assumption, but the overall impact of that should be negligible.)
                 */
-               switch (res)
+               switch (htsv_get_valid_status(presult.htsv[offnum]))
                {
                        case HEAPTUPLE_LIVE:
 
@@ -1700,7 +1683,7 @@ retry:
                                {
                                        TransactionId xmin;
 
-                                       if (!HeapTupleHeaderXminCommitted(tuple.t_data))
+                                       if (!HeapTupleHeaderXminCommitted(htup))
                                        {
                                                prunestate->all_visible = false;
                                                break;
@@ -1710,7 +1693,7 @@ retry:
                                         * The inserter definitely committed. But is it old enough
                                         * that everyone sees it as committed?
                                         */
-                                       xmin = HeapTupleHeaderGetXmin(tuple.t_data);
+                                       xmin = HeapTupleHeaderGetXmin(htup);
                                        if (!TransactionIdPrecedes(xmin,
                                                                                           vacrel->cutoffs.OldestXmin))
                                        {
@@ -1764,7 +1747,7 @@ retry:
                prunestate->hastup = true;      /* page makes rel truncation unsafe */
 
                /* Tuple with storage -- consider need to freeze */
-               if (heap_prepare_freeze_tuple(tuple.t_data, &vacrel->cutoffs, &pagefrz,
+               if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
                                                                          &frozen[tuples_frozen], &totally_frozen))
                {
                        /* Save prepared freeze plan for later */
index 2d3f149e4f0babf3301219cabbcda73f6f6005f3..62fac1d5d29b0d626d454ae5f3e218952713aab7 100644 (file)
@@ -198,8 +198,33 @@ typedef struct PruneResult
 {
        int                     ndeleted;               /* Number of tuples deleted from the page */
        int                     nnewlpdead;             /* Number of newly LP_DEAD items */
+
+       /*
+        * Tuple visibility is only computed once for each tuple, for correctness
+        * and efficiency reasons; see comment in heap_page_prune() for details.
+        * This is of type int8[], instead of HTSV_Result[], so we can use -1 to
+        * indicate no visibility has been computed, e.g. for LP_DEAD items.
+        *
+        * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+        * 1. Otherwise every access would need to subtract 1.
+        */
+       int8            htsv[MaxHeapTuplesPerPage + 1];
 } PruneResult;
 
+/*
+ * Pruning calculates tuple visibility once and saves the results in an array
+ * of int8. See PruneResult.htsv for details. This helper function is meant to
+ * guard against examining visibility status array members which have not yet
+ * been computed.
+ */
+static inline HTSV_Result
+htsv_get_valid_status(int status)
+{
+       Assert(status >= HEAPTUPLE_DEAD &&
+                  status <= HEAPTUPLE_DELETE_IN_PROGRESS);
+       return (HTSV_Result) status;
+}
+
 /* ----------------
  *             function prototypes for heap access method
  *