Fix undercounting in VACUUM VERBOSE output.
authorPeter Geoghegan <pg@bowt.ie>
Fri, 1 May 2020 16:51:08 +0000 (09:51 -0700)
committerPeter Geoghegan <pg@bowt.ie>
Fri, 1 May 2020 16:51:08 +0000 (09:51 -0700)
The logic for determining how many nbtree pages in an index are deleted
pages sometimes undercounted pages.  Pages that were deleted by the
current VACUUM operation (as opposed to some previous VACUUM operation
whose deleted pages have yet to be reused) were sometimes overlooked.
The final count is exposed to users through VACUUM VERBOSE's "%u index
pages have been deleted" output.

btvacuumpage() avoided double-counting when _bt_pagedel() deleted more
than one page by assuming that only one page was deleted, and that the
additional deleted pages would get picked up during a future call to
btvacuumpage() by the same VACUUM operation.  _bt_pagedel() can
legitimately delete pages that the btvacuumscan() scan will not visit
again, though, so that assumption was slightly faulty.

Fix the accounting by teaching _bt_pagedel() about its caller's
requirements.  It now only reports on pages that it knows btvacuumscan()
won't visit again (including the current btvacuumpage() page), so
everything works out in the end.

This bug has been around forever.  Only backpatch to v11, though, to
keep _bt_pagedel() is sync on the branches that have today's bugfix
commit b0229f26da.  Note that this commit changes the signature of
_bt_pagedel(), just like commit b0229f26da.

Author: Peter Geoghegan
Reviewed-By: Masahiko Sawada
Discussion: https://postgr.es/m/CAH2-WzkrXBcMQWAYUJMFTTvzx_r4q=pYSjDe07JnUXhe+OZnJA@mail.gmail.com
Backpatch: 11-

src/backend/access/nbtree/nbtpage.c
src/backend/access/nbtree/nbtree.c
src/include/access/nbtree.h

index f30c965428ff812457d13081fddd30a922625578..1e1d8a7e2f800d68f72a5a93751aea08eb0c7cad 100644 (file)
@@ -37,8 +37,10 @@ static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf);
 static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
                                   BTStack stack);
 static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
+                                    BlockNumber scanblkno,
                                     bool *rightsib_empty,
-                                    TransactionId *oldestBtpoXact);
+                                    TransactionId *oldestBtpoXact,
+                                    uint32 *ndeleted);
 static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
                                   BTStack stack, Buffer *topparent, OffsetNumber *topoff,
                                   BlockNumber *target, BlockNumber *rightsib);
@@ -1301,7 +1303,9 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
  *
  * Returns the number of pages successfully deleted (zero if page cannot
  * be deleted now; could be more than one if parent or right sibling pages
- * were deleted too).
+ * were deleted too).  Note that this does not include pages that we delete
+ * that the btvacuumscan scan has yet to reach; they'll get counted later
+ * instead.
  *
  * Maintains *oldestBtpoXact for any pages that get deleted.  Caller is
  * responsible for maintaining *oldestBtpoXact in the case of pages that were
@@ -1311,15 +1315,21 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack,
  * carefully, it's better to run it in a temp context that can be reset
  * frequently.
  */
-int
+uint32
 _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
 {
-   int         ndeleted = 0;
+   uint32      ndeleted = 0;
    BlockNumber rightsib;
    bool        rightsib_empty;
    Page        page;
    BTPageOpaque opaque;
 
+   /*
+    * Save original leafbuf block number from caller.  Only deleted blocks
+    * that are <= scanblkno get counted in ndeleted return value.
+    */
+   BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
+
    /*
     * "stack" is a search stack leading (approximately) to the target page.
     * It is initially NULL, but when iterating, we keep it to avoid
@@ -1370,8 +1380,9 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
            if (P_ISDELETED(opaque))
                ereport(LOG,
                        (errcode(ERRCODE_INDEX_CORRUPTED),
-                        errmsg_internal("found deleted block %u while following right link in index \"%s\"",
+                        errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
                                         BufferGetBlockNumber(leafbuf),
+                                        scanblkno,
                                         RelationGetRelationName(rel))));
 
            _bt_relbuf(rel, leafbuf);
@@ -1521,13 +1532,13 @@ _bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
        while (P_ISHALFDEAD(opaque))
        {
            /* Check for interrupts in _bt_unlink_halfdead_page */
-           if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty,
-                                         oldestBtpoXact))
+           if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
+                                         &rightsib_empty, oldestBtpoXact,
+                                         &ndeleted))
            {
                /* _bt_unlink_halfdead_page failed, released buffer */
                return ndeleted;
            }
-           ndeleted++;
        }
 
        Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
@@ -1779,8 +1790,9 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
  * to avoid having to reacquire a lock we already released).
  */
 static bool
-_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
-                        TransactionId *oldestBtpoXact)
+_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
+                        bool *rightsib_empty, TransactionId *oldestBtpoXact,
+                        uint32 *ndeleted)
 {
    BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
    BlockNumber leafleftsib;
@@ -2166,6 +2178,14 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
        TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
        *oldestBtpoXact = opaque->btpo.xact;
 
+   /*
+    * If btvacuumscan won't revisit this page in a future btvacuumpage call
+    * and count it as deleted then, we count it as deleted by current
+    * btvacuumpage call
+    */
+   if (target <= scanblkno)
+       (*ndeleted)++;
+
    /*
     * Release the target, if it was not the leaf block.  The leaf is always
     * kept locked.
index 8da25a71d807882feee98fcf96e28b8fc952fba4..9b53e1218138a27096015700d237f7765a354e13 100644 (file)
@@ -1349,17 +1349,17 @@ restart:
    if (delete_now)
    {
        MemoryContext oldcontext;
-       int         ndel;
 
        /* Run pagedel in a temp context to avoid memory leakage */
        MemoryContextReset(vstate->pagedelcontext);
        oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
 
-       ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
-
-       /* count only this page, else may double-count parent */
-       if (ndel)
-           stats->pages_deleted++;
+       /*
+        * We trust the _bt_pagedel return value because it does not include
+        * any page that a future call here from btvacuumscan is expected to
+        * count.  There will be no double-counting.
+        */
+       stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
 
        MemoryContextSwitchTo(oldcontext);
        /* pagedel released buffer, so we shouldn't */
index 6640581fd6d625b5a4e8971d73a62d8d46cc690d..e8b7a5fde1950663dca1b0aad12f64ef19a86273 100644 (file)
@@ -764,8 +764,8 @@ extern void _bt_delitems_delete(Relation rel, Buffer buf,
 extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
                                OffsetNumber *itemnos, int nitems,
                                BlockNumber lastBlockVacuumed);
-extern int _bt_pagedel(Relation rel, Buffer leafbuf,
-                       TransactionId *oldestBtpoXact);
+extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf,
+                         TransactionId *oldestBtpoXact);
 
 /*
  * prototypes for functions in nbtsearch.c