VACUUM VERBOSE: Count "newly deleted" index pages.
authorPeter Geoghegan <pg@bowt.ie>
Thu, 25 Feb 2021 22:32:18 +0000 (14:32 -0800)
committerPeter Geoghegan <pg@bowt.ie>
Thu, 25 Feb 2021 22:32:18 +0000 (14:32 -0800)
Teach VACUUM VERBOSE to report on pages deleted by the _current_ VACUUM
operation -- these are newly deleted pages.  VACUUM VERBOSE continues to
report on the total number of deleted pages in the entire index (no
change there).  The former is a subset of the latter.

The distinction between each category of deleted index page only arises
with index AMs where page deletion is supported and is decoupled from
page recycling for performance reasons.

This is follow-up work to commit e5d8a999, which made nbtree store
64-bit XIDs (not 32-bit XIDs) in pages at the point at which they're
deleted.  Note that the btm_last_cleanup_num_delpages metapage field
added by that commit usually gets set to pages_newly_deleted.  The
exceptions (the scenarios in which they're not equal) all seem to be
tricky cases for the implementation (of page deletion and recycling) in
general.

Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAH2-WznpdHvujGUwYZ8sihX%3Dd5u-tRYhi-F4wnV2uN2zHpMUXw%40mail.gmail.com

src/backend/access/gin/ginvacuum.c
src/backend/access/gist/gistvacuum.c
src/backend/access/heap/vacuumlazy.c
src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtree.c
src/backend/access/spgist/spgvacuum.c
src/include/access/genam.h
src/include/access/nbtree.h

index a0453b36cde5b6d89b0443852cd9195304725ffc..a276eb020b5dd35b94ebad69489215729ed0327c 100644 (file)
@@ -231,6 +231,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
 
    END_CRIT_SECTION();
 
+   gvs->result->pages_newly_deleted++;
    gvs->result->pages_deleted++;
 }
 
index ddecb8ab18ef435b63f56dfd63c3f0d700853e8d..0663193531a73e3f34e2161bf198801ec9ef69c4 100644 (file)
@@ -133,9 +133,21 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
    MemoryContext oldctx;
 
    /*
-    * Reset counts that will be incremented during the scan; needed in case
-    * of multiple scans during a single VACUUM command.
+    * Reset fields that track information about the entire index now.  This
+    * avoids double-counting in the case where a single VACUUM command
+    * requires multiple scans of the index.
+    *
+    * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
+    * since they track information about the VACUUM command, and so must last
+    * across each call to gistvacuumscan().
+    *
+    * (Note that pages_free is treated as state about the whole index, not
+    * the current VACUUM.  This is appropriate because RecordFreeIndexPage()
+    * calls are idempotent, and get repeated for the same deleted pages in
+    * some scenarios.  The point for us is to track the number of recyclable
+    * pages in the index at the end of the VACUUM command.)
     */
+   stats->num_pages = 0;
    stats->estimated_count = false;
    stats->num_index_tuples = 0;
    stats->pages_deleted = 0;
@@ -281,8 +293,8 @@ restart:
    {
        /* Okay to recycle this page */
        RecordFreeIndexPage(rel, blkno);
-       vstate->stats->pages_free++;
        vstate->stats->pages_deleted++;
+       vstate->stats->pages_free++;
    }
    else if (GistPageIsDeleted(page))
    {
@@ -636,6 +648,7 @@ gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
    /* mark the page as deleted */
    MarkBufferDirty(leafBuffer);
    GistPageSetDeleted(leafPage, txid);
+   stats->pages_newly_deleted++;
    stats->pages_deleted++;
 
    /* remove the downlink from the parent */
index 0bb78162f546d0a93a259dfc694d9d78df06e1b2..d8f847b0e6673c435168ca4304f15f23aaac9162 100644 (file)
@@ -2521,9 +2521,11 @@ lazy_cleanup_index(Relation indrel,
                        (*stats)->num_index_tuples,
                        (*stats)->num_pages),
                 errdetail("%.0f index row versions were removed.\n"
-                          "%u index pages have been deleted, %u are currently reusable.\n"
+                          "%u index pages were newly deleted.\n"
+                          "%u index pages are currently deleted, of which %u are currently reusable.\n"
                           "%s.",
                           (*stats)->tuples_removed,
+                          (*stats)->pages_newly_deleted,
                           (*stats)->pages_deleted, (*stats)->pages_free,
                           pg_rusage_show(&ru0))));
    }
index a43805a7b09e179708d3dcc46b5580ebaa1f9585..629a23628ef6cb341d2de80caca5d26695c5085d 100644 (file)
@@ -50,7 +50,7 @@ static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
 static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
                                     BlockNumber scanblkno,
                                     bool *rightsib_empty,
-                                    uint32 *ndeleted);
+                                    BTVacState *vstate);
 static bool _bt_lock_subtree_parent(Relation rel, BlockNumber child,
                                    BTStack stack,
                                    Buffer *subtreeparent,
@@ -1760,20 +1760,22 @@ _bt_rightsib_halfdeadflag(Relation rel, BlockNumber leafrightsib)
  * should never pass a buffer containing an existing deleted page here.  The
  * lock and pin on caller's buffer will be dropped before we return.
  *
- * Returns the number of pages successfully deleted (zero if page cannot
- * be deleted now; could be more than one if parent or right sibling pages
- * were deleted too).  Note that this does not include pages that we delete
- * that the btvacuumscan scan has yet to reach; they'll get counted later
- * instead.
+ * Maintains bulk delete stats for caller, which are taken from vstate.  We
+ * need to cooperate closely with caller here so that whole VACUUM operation
+ * reliably avoids any double counting of subsidiary-to-leafbuf pages that we
+ * delete in passing.  If such pages happen to be from a block number that is
+ * ahead of the current scanblkno position, then caller is expected to count
+ * them directly later on.  It's simpler for us to understand caller's
+ * requirements than it would be for caller to understand when or how a
+ * deleted page became deleted after the fact.
  *
  * NOTE: this leaks memory.  Rather than trying to clean up everything
  * carefully, it's better to run it in a temp context that can be reset
  * frequently.
  */
-uint32
-_bt_pagedel(Relation rel, Buffer leafbuf)
+void
+_bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate)
 {
-   uint32      ndeleted = 0;
    BlockNumber rightsib;
    bool        rightsib_empty;
    Page        page;
@@ -1781,7 +1783,8 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
 
    /*
     * Save original leafbuf block number from caller.  Only deleted blocks
-    * that are <= scanblkno get counted in ndeleted return value.
+    * that are <= scanblkno are added to bulk delete stat's pages_deleted
+    * count.
     */
    BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
 
@@ -1843,7 +1846,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
                                         RelationGetRelationName(rel))));
 
            _bt_relbuf(rel, leafbuf);
-           return ndeleted;
+           return;
        }
 
        /*
@@ -1873,7 +1876,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
            Assert(!P_ISHALFDEAD(opaque));
 
            _bt_relbuf(rel, leafbuf);
-           return ndeleted;
+           return;
        }
 
        /*
@@ -1922,8 +1925,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
                if (_bt_leftsib_splitflag(rel, leftsib, leafblkno))
                {
                    ReleaseBuffer(leafbuf);
-                   Assert(ndeleted == 0);
-                   return ndeleted;
+                   return;
                }
 
                /* we need an insertion scan key for the search, so build one */
@@ -1964,7 +1966,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
            if (!_bt_mark_page_halfdead(rel, leafbuf, stack))
            {
                _bt_relbuf(rel, leafbuf);
-               return ndeleted;
+               return;
            }
        }
 
@@ -1979,7 +1981,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
        {
            /* Check for interrupts in _bt_unlink_halfdead_page */
            if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
-                                         &rightsib_empty, &ndeleted))
+                                         &rightsib_empty, vstate))
            {
                /*
                 * _bt_unlink_halfdead_page should never fail, since we
@@ -1990,7 +1992,7 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
                 * lock and pin on leafbuf for us.
                 */
                Assert(false);
-               return ndeleted;
+               return;
            }
        }
 
@@ -2026,8 +2028,6 @@ _bt_pagedel(Relation rel, Buffer leafbuf)
 
        leafbuf = _bt_getbuf(rel, rightsib, BT_WRITE);
    }
-
-   return ndeleted;
 }
 
 /*
@@ -2262,9 +2262,10 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
  */
 static bool
 _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
-                        bool *rightsib_empty, uint32 *ndeleted)
+                        bool *rightsib_empty, BTVacState *vstate)
 {
    BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
+   IndexBulkDeleteResult *stats = vstate->stats;
    BlockNumber leafleftsib;
    BlockNumber leafrightsib;
    BlockNumber target;
@@ -2674,12 +2675,17 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
        _bt_relbuf(rel, buf);
 
    /*
-    * If btvacuumscan won't revisit this page in a future btvacuumpage call
-    * and count it as deleted then, we count it as deleted by current
-    * btvacuumpage call
+    * Maintain pages_newly_deleted, which is simply the number of pages
+    * deleted by the ongoing VACUUM operation.
+    *
+    * Maintain pages_deleted in a way that takes into account how
+    * btvacuumpage() will count deleted pages that have yet to become
+    * scanblkno -- only count page when it's not going to get that treatment
+    * later on.
     */
+   stats->pages_newly_deleted++;
    if (target <= scanblkno)
-       (*ndeleted)++;
+       stats->pages_deleted++;
 
    return true;
 }
index 3b2e0aa5cb794c9a1dbd2b8341cc8f04c9e5b3aa..504f5bef17a4362154a73580dd4fa2e6cc20ee77 100644 (file)
 #include "utils/memutils.h"
 
 
-/* Working state needed by btvacuumpage */
-typedef struct
-{
-   IndexVacuumInfo *info;
-   IndexBulkDeleteResult *stats;
-   IndexBulkDeleteCallback callback;
-   void       *callback_state;
-   BTCycleId   cycleid;
-   MemoryContext pagedelcontext;
-} BTVacState;
-
 /*
  * BTPARALLEL_NOT_INITIALIZED indicates that the scan has not started.
  *
@@ -1016,9 +1005,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
     * avoids double-counting in the case where a single VACUUM command
     * requires multiple scans of the index.
     *
-    * Avoid resetting the tuples_removed field here, since it tracks
-    * information about the VACUUM command, and so must last across each call
-    * to btvacuumscan().
+    * Avoid resetting the tuples_removed and pages_newly_deleted fields here,
+    * since they track information about the VACUUM command, and so must last
+    * across each call to btvacuumscan().
     *
     * (Note that pages_free is treated as state about the whole index, not
     * the current VACUUM.  This is appropriate because RecordFreeIndexPage()
@@ -1237,11 +1226,13 @@ backtrack:
    }
    else if (P_ISHALFDEAD(opaque))
    {
+       /* Half-dead leaf page (from interrupted VACUUM) -- finish deleting */
+       attempt_pagedel = true;
+
        /*
-        * Half-dead leaf page.  Try to delete now.  Might update
-        * pages_deleted below.
+        * _bt_pagedel() will increment both pages_newly_deleted and
+        * pages_deleted stats in all cases (barring corruption)
         */
-       attempt_pagedel = true;
    }
    else if (P_ISLEAF(opaque))
    {
@@ -1451,12 +1442,12 @@ backtrack:
        oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
 
        /*
-        * We trust the _bt_pagedel return value because it does not include
-        * any page that a future call here from btvacuumscan is expected to
-        * count.  There will be no double-counting.
+        * _bt_pagedel maintains the bulk delete stats on our behalf;
+        * pages_newly_deleted and pages_deleted are likely to be incremented
+        * during call
         */
        Assert(blkno == scanblkno);
-       stats->pages_deleted += _bt_pagedel(rel, buf);
+       _bt_pagedel(rel, buf, vstate);
 
        MemoryContextSwitchTo(oldcontext);
        /* pagedel released buffer, so we shouldn't */
index 0d02a02222e9eb5565e03f02d86eea91389b635d..a9ffca5183bd2b30fd54cbc99e7fcd68e70ceb6f 100644 (file)
@@ -891,6 +891,7 @@ spgvacuumscan(spgBulkDeleteState *bds)
 
    /* Report final stats */
    bds->stats->num_pages = num_pages;
+   bds->stats->pages_newly_deleted = bds->stats->pages_deleted;
    bds->stats->pages_free = bds->stats->pages_deleted;
 }
 
index ffa1a4c80dbe190ca452fd9241af67920479c746..4515401869fb415bd11d3ee1bdcf129eee6b14cf 100644 (file)
@@ -63,8 +63,11 @@ typedef struct IndexVacuumInfo
  * of which this is just the first field; this provides a way for ambulkdelete
  * to communicate additional private data to amvacuumcleanup.
  *
- * Note: pages_deleted and pages_free refer to free space within the index
- * file.  Some index AMs may compute num_index_tuples by reference to
+ * Note: pages_newly_deleted is the number of pages in the index that were
+ * deleted by the current vacuum operation.  pages_deleted and pages_free
+ * refer to free space within the index file.
+ *
+ * Note: Some index AMs may compute num_index_tuples by reference to
  * num_heap_tuples, in which case they should copy the estimated_count field
  * from IndexVacuumInfo.
  */
@@ -74,7 +77,8 @@ typedef struct IndexBulkDeleteResult
    bool        estimated_count;    /* num_index_tuples is an estimate */
    double      num_index_tuples;   /* tuples remaining */
    double      tuples_removed; /* # removed during vacuum operation */
-   BlockNumber pages_deleted;  /* # unused pages in index */
+   BlockNumber pages_newly_deleted;    /* # pages marked deleted by us  */
+   BlockNumber pages_deleted;  /* # pages marked deleted (could be by us) */
    BlockNumber pages_free;     /* # pages available for reuse */
 } IndexBulkDeleteResult;
 
index 9ac90d7439836aeae45eab93b7c0657e40b926cf..b56b7b7868eb4b6badfe0fe5eed24443e66dac3e 100644 (file)
@@ -312,6 +312,20 @@ BTPageIsRecyclable(Page page)
    return false;
 }
 
+/*
+ * BTVacState is private nbtree.c state used during VACUUM.  It is exported
+ * for use by page deletion related code in nbtpage.c.
+ */
+typedef struct BTVacState
+{
+   IndexVacuumInfo *info;
+   IndexBulkDeleteResult *stats;
+   IndexBulkDeleteCallback callback;
+   void       *callback_state;
+   BTCycleId   cycleid;
+   MemoryContext pagedelcontext;
+} BTVacState;
+
 /*
  * Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
  * page.  The high key is not a tuple that is used to visit the heap.  It is
@@ -1181,7 +1195,7 @@ extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
 extern void _bt_delitems_delete_check(Relation rel, Buffer buf,
                                      Relation heapRel,
                                      TM_IndexDeleteOp *delstate);
-extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf);
+extern void _bt_pagedel(Relation rel, Buffer leafbuf, BTVacState *vstate);
 
 /*
  * prototypes for functions in nbtsearch.c