Optimize vacuuming of relations with no indexes.
authorRobert Haas <rhaas@postgresql.org>
Thu, 18 Jan 2024 15:03:42 +0000 (10:03 -0500)
committerRobert Haas <rhaas@postgresql.org>
Thu, 18 Jan 2024 15:03:42 +0000 (10:03 -0500)
If there are no indexes on a relation, items can be marked LP_UNUSED
instead of LP_DEAD when pruning. This significantly reduces WAL
volume, since we no longer need to emit one WAL record for pruning
and a second to change the LP_DEAD line pointers thus created to
LP_UNUSED.

Melanie Plageman, reviewed by Andres Freund, Peter Geoghegan, and me

Discussion: https://postgr.es/m/CAAKRu_bgvb_k0gKOXWzNKWHt560R0smrGe3E8zewKPs8fiMKkw%40mail.gmail.com

src/backend/access/heap/pruneheap.c
src/backend/access/heap/vacuumlazy.c
src/include/access/heapam.h

index 3e0a1a260e6b975ecb4e9a3f1fa6528a6bf4d6df..59176335676394facd04fbcface155a429402c32 100644 (file)
@@ -35,6 +35,8 @@ typedef struct
 
        /* tuple visibility test, initialized for the relation */
        GlobalVisState *vistest;
+       /* whether or not dead items can be set LP_UNUSED during pruning */
+       bool            mark_unused_now;
 
        TransactionId new_prune_xid;    /* new prune hint value for page */
        TransactionId snapshotConflictHorizon;  /* latest xid removed */
@@ -67,6 +69,7 @@ static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
 static void heap_prune_record_redirect(PruneState *prstate,
                                                                           OffsetNumber offnum, OffsetNumber rdoffnum);
 static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum);
+static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum);
 static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum);
 static void page_verify_redirects(Page page);
 
@@ -148,7 +151,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
                {
                        PruneResult presult;
 
-                       heap_page_prune(relation, buffer, vistest, &presult, NULL);
+                       /*
+                        * For now, pass mark_unused_now as false regardless of whether or
+                        * not the relation has indexes, since we cannot safely determine
+                        * that during on-access pruning with the current implementation.
+                        */
+                       heap_page_prune(relation, buffer, vistest, false,
+                                                       &presult, NULL);
 
                        /*
                         * Report the number of tuples reclaimed to pgstats.  This is
@@ -193,6 +202,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  * (see heap_prune_satisfies_vacuum and
  * HeapTupleSatisfiesVacuum).
  *
+ * mark_unused_now indicates whether or not dead items can be set LP_UNUSED during
+ * pruning.
+ *
  * off_loc is the offset location required by the caller to use in error
  * callback.
  *
@@ -203,6 +215,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 void
 heap_page_prune(Relation relation, Buffer buffer,
                                GlobalVisState *vistest,
+                               bool mark_unused_now,
                                PruneResult *presult,
                                OffsetNumber *off_loc)
 {
@@ -227,6 +240,7 @@ heap_page_prune(Relation relation, Buffer buffer,
        prstate.new_prune_xid = InvalidTransactionId;
        prstate.rel = relation;
        prstate.vistest = vistest;
+       prstate.mark_unused_now = mark_unused_now;
        prstate.snapshotConflictHorizon = InvalidTransactionId;
        prstate.nredirected = prstate.ndead = prstate.nunused = 0;
        memset(prstate.marked, 0, sizeof(prstate.marked));
@@ -306,9 +320,9 @@ heap_page_prune(Relation relation, Buffer buffer,
                if (off_loc)
                        *off_loc = offnum;
 
-               /* Nothing to do if slot is empty or already dead */
+               /* Nothing to do if slot is empty */
                itemid = PageGetItemId(page, offnum);
-               if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
+               if (!ItemIdIsUsed(itemid))
                        continue;
 
                /* Process this item or chain of items */
@@ -581,7 +595,17 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
                 * function.)
                 */
                if (ItemIdIsDead(lp))
+               {
+                       /*
+                        * If the caller set mark_unused_now true, we can set dead line
+                        * pointers LP_UNUSED now. We don't increment ndeleted here since
+                        * the LP was already marked dead.
+                        */
+                       if (unlikely(prstate->mark_unused_now))
+                               heap_prune_record_unused(prstate, offnum);
+
                        break;
+               }
 
                Assert(ItemIdIsNormal(lp));
                htup = (HeapTupleHeader) PageGetItem(dp, lp);
@@ -715,7 +739,7 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
                 * redirect the root to the correct chain member.
                 */
                if (i >= nchain)
-                       heap_prune_record_dead(prstate, rootoffnum);
+                       heap_prune_record_dead_or_unused(prstate, rootoffnum);
                else
                        heap_prune_record_redirect(prstate, rootoffnum, chainitems[i]);
        }
@@ -726,9 +750,9 @@ heap_prune_chain(Buffer buffer, OffsetNumber rootoffnum,
                 * item.  This can happen if the loop in heap_page_prune caused us to
                 * visit the dead successor of a redirect item before visiting the
                 * redirect item.  We can clean up by setting the redirect item to
-                * DEAD state.
+                * DEAD state or LP_UNUSED if the caller indicated.
                 */
-               heap_prune_record_dead(prstate, rootoffnum);
+               heap_prune_record_dead_or_unused(prstate, rootoffnum);
        }
 
        return ndeleted;
@@ -774,6 +798,27 @@ heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum)
        prstate->marked[offnum] = true;
 }
 
+/*
+ * Depending on whether or not the caller set mark_unused_now to true, record that a
+ * line pointer should be marked LP_DEAD or LP_UNUSED. There are other cases in
+ * which we will mark line pointers LP_UNUSED, but we will not mark line
+ * pointers LP_DEAD if mark_unused_now is true.
+ */
+static void
+heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum)
+{
+       /*
+        * If the caller set mark_unused_now to true, we can remove dead tuples
+        * during pruning instead of marking their line pointers dead. Set this
+        * tuple's line pointer LP_UNUSED. We hint that this option is less
+        * likely.
+        */
+       if (unlikely(prstate->mark_unused_now))
+               heap_prune_record_unused(prstate, offnum);
+       else
+               heap_prune_record_dead(prstate, offnum);
+}
+
 /* Record line pointer to be marked unused */
 static void
 heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum)
@@ -903,13 +948,24 @@ heap_page_prune_execute(Buffer buffer,
 #ifdef USE_ASSERT_CHECKING
 
                /*
-                * Only heap-only tuples can become LP_UNUSED during pruning.  They
-                * don't need to be left in place as LP_DEAD items until VACUUM gets
-                * around to doing index vacuuming.
+                * When heap_page_prune() was called, mark_unused_now may have been
+                * passed as true, which allows would-be LP_DEAD items to be made
+                * LP_UNUSED instead. This is only possible if the relation has no
+                * indexes. If there are any dead items, then mark_unused_now was not
+                * true and every item being marked LP_UNUSED must refer to a
+                * heap-only tuple.
                 */
-               Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
-               htup = (HeapTupleHeader) PageGetItem(page, lp);
-               Assert(HeapTupleHeaderIsHeapOnly(htup));
+               if (ndead > 0)
+               {
+                       Assert(ItemIdHasStorage(lp) && ItemIdIsNormal(lp));
+                       htup = (HeapTupleHeader) PageGetItem(page, lp);
+                       Assert(HeapTupleHeaderIsHeapOnly(htup));
+               }
+               else
+               {
+                       Assert(ItemIdIsUsed(lp));
+               }
+
 #endif
 
                ItemIdSetUnused(lp);
index e7a942e183539590ccad0d746ec7a102ba8ab878..2f530ad62c3fe8b5c39588682532ccf33be11ae2 100644 (file)
@@ -1036,69 +1036,6 @@ lazy_scan_heap(LVRelState *vacrel)
 
                Assert(!prunestate.all_visible || !prunestate.has_lpdead_items);
 
-               if (vacrel->nindexes == 0)
-               {
-                       /*
-                        * Consider the need to do page-at-a-time heap vacuuming when
-                        * using the one-pass strategy now.
-                        *
-                        * The one-pass strategy will never call lazy_vacuum().  The steps
-                        * performed here can be thought of as the one-pass equivalent of
-                        * a call to lazy_vacuum().
-                        */
-                       if (prunestate.has_lpdead_items)
-                       {
-                               Size            freespace;
-
-                               lazy_vacuum_heap_page(vacrel, blkno, buf, 0, vmbuffer);
-
-                               /* Forget the LP_DEAD items that we just vacuumed */
-                               dead_items->num_items = 0;
-
-                               /*
-                                * Now perform FSM processing for blkno, and move on to next
-                                * page.
-                                *
-                                * Our call to lazy_vacuum_heap_page() will have considered if
-                                * it's possible to set all_visible/all_frozen independently
-                                * of lazy_scan_prune().  Note that prunestate was invalidated
-                                * by lazy_vacuum_heap_page() call.
-                                */
-                               freespace = PageGetHeapFreeSpace(page);
-
-                               UnlockReleaseBuffer(buf);
-                               RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
-
-                               /*
-                                * Periodically perform FSM vacuuming to make newly-freed
-                                * space visible on upper FSM pages. FreeSpaceMapVacuumRange()
-                                * vacuums the portion of the freespace map covering heap
-                                * pages from start to end - 1. Include the block we just
-                                * vacuumed by passing it blkno + 1. Overflow isn't an issue
-                                * because MaxBlockNumber + 1 is InvalidBlockNumber which
-                                * causes FreeSpaceMapVacuumRange() to vacuum freespace map
-                                * pages covering the remainder of the relation.
-                                */
-                               if (blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
-                               {
-                                       FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
-                                                                                       blkno + 1);
-                                       next_fsm_block_to_vacuum = blkno + 1;
-                               }
-
-                               continue;
-                       }
-
-                       /*
-                        * There was no call to lazy_vacuum_heap_page() because pruning
-                        * didn't encounter/create any LP_DEAD items that needed to be
-                        * vacuumed.  Prune state has not been invalidated, so proceed
-                        * with prunestate-driven visibility map and FSM steps (just like
-                        * the two-pass strategy).
-                        */
-                       Assert(dead_items->num_items == 0);
-               }
-
                /*
                 * Handle setting visibility map bit based on information from the VM
                 * (as of last lazy_scan_skip() call), and from prunestate
@@ -1209,38 +1146,45 @@ lazy_scan_heap(LVRelState *vacrel)
 
                /*
                 * Final steps for block: drop cleanup lock, record free space in the
-                * FSM
+                * FSM.
+                *
+                * If we will likely do index vacuuming, wait until
+                * lazy_vacuum_heap_rel() to save free space. This doesn't just save
+                * us some cycles; it also allows us to record any additional free
+                * space that lazy_vacuum_heap_page() will make available in cases
+                * where it's possible to truncate the page's line pointer array.
+                *
+                * Note: It's not in fact 100% certain that we really will call
+                * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip index
+                * vacuuming (and so must skip heap vacuuming).  This is deemed okay
+                * because it only happens in emergencies, or when there is very
+                * little free space anyway. (Besides, we start recording free space
+                * in the FSM once index vacuuming has been abandoned.)
                 */
-               if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
-               {
-                       /*
-                        * Wait until lazy_vacuum_heap_rel() to save free space.  This
-                        * doesn't just save us some cycles; it also allows us to record
-                        * any additional free space that lazy_vacuum_heap_page() will
-                        * make available in cases where it's possible to truncate the
-                        * page's line pointer array.
-                        *
-                        * Note: It's not in fact 100% certain that we really will call
-                        * lazy_vacuum_heap_rel() -- lazy_vacuum() might yet opt to skip
-                        * index vacuuming (and so must skip heap vacuuming).  This is
-                        * deemed okay because it only happens in emergencies, or when
-                        * there is very little free space anyway. (Besides, we start
-                        * recording free space in the FSM once index vacuuming has been
-                        * abandoned.)
-                        *
-                        * Note: The one-pass (no indexes) case is only supposed to make
-                        * it this far when there were no LP_DEAD items during pruning.
-                        */
-                       Assert(vacrel->nindexes > 0);
-                       UnlockReleaseBuffer(buf);
-               }
-               else
+               if (vacrel->nindexes == 0
+                       || !vacrel->do_index_vacuuming
+                       || !prunestate.has_lpdead_items)
                {
                        Size            freespace = PageGetHeapFreeSpace(page);
 
                        UnlockReleaseBuffer(buf);
                        RecordPageWithFreeSpace(vacrel->rel, blkno, freespace);
+
+                       /*
+                        * Periodically perform FSM vacuuming to make newly-freed space
+                        * visible on upper FSM pages. This is done after vacuuming if the
+                        * table has indexes.
+                        */
+                       if (vacrel->nindexes == 0 && prunestate.has_lpdead_items &&
+                               blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
+                       {
+                               FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
+                                                                               blkno);
+                               next_fsm_block_to_vacuum = blkno;
+                       }
                }
+               else
+                       UnlockReleaseBuffer(buf);
        }
 
        vacrel->blkno = InvalidBlockNumber;
@@ -1596,8 +1540,13 @@ lazy_scan_prune(LVRelState *vacrel,
         * in presult.ndeleted. It should not be confused with lpdead_items;
         * lpdead_items's final value can be thought of as the number of tuples
         * that were deleted from indexes.
+        *
+        * If the relation has no indexes, we can immediately mark would-be dead
+        * items LP_UNUSED, so mark_unused_now should be true if no indexes and
+        * false otherwise.
         */
-       heap_page_prune(rel, buf, vacrel->vistest, &presult, &vacrel->offnum);
+       heap_page_prune(rel, buf, vacrel->vistest, vacrel->nindexes == 0,
+                                       &presult, &vacrel->offnum);
 
        /*
         * Now scan the page to collect LP_DEAD items and check for tuples
@@ -2520,7 +2469,7 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
        bool            all_frozen;
        LVSavedErrInfo saved_err_info;
 
-       Assert(vacrel->nindexes == 0 || vacrel->do_index_vacuuming);
+       Assert(vacrel->do_index_vacuuming);
 
        pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
 
index 932ec0d6f2bab985109e302bd22fa6c0bf0b28b7..4b133f685934b7ba13baccffb38be6e83eac0b4a 100644 (file)
@@ -320,6 +320,7 @@ struct GlobalVisState;
 extern void heap_page_prune_opt(Relation relation, Buffer buffer);
 extern void heap_page_prune(Relation relation, Buffer buffer,
                                                        struct GlobalVisState *vistest,
+                                                       bool mark_unused_now,
                                                        PruneResult *presult,
                                                        OffsetNumber *off_loc);
 extern void heap_page_prune_execute(Buffer buffer,