static bool _bt_mark_page_halfdead(Relation rel, Buffer leafbuf,
BTStack stack);
static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf,
+ BlockNumber scanblkno,
bool *rightsib_empty,
- TransactionId *oldestBtpoXact);
+ TransactionId *oldestBtpoXact,
+ uint32 *ndeleted);
static bool _bt_lock_branch_parent(Relation rel, BlockNumber child,
BTStack stack, Buffer *topparent, OffsetNumber *topoff,
BlockNumber *target, BlockNumber *rightsib);
*
* Returns the number of pages successfully deleted (zero if page cannot
* be deleted now; could be more than one if parent or right sibling pages
- * were deleted too).
+ * were deleted too). Note that this does not include pages that we delete
+ * that the btvacuumscan scan has yet to reach; they'll get counted later
+ * instead.
*
* Maintains *oldestBtpoXact for any pages that get deleted. Caller is
* responsible for maintaining *oldestBtpoXact in the case of pages that were
* carefully, it's better to run it in a temp context that can be reset
* frequently.
*/
-int
+uint32
_bt_pagedel(Relation rel, Buffer leafbuf, TransactionId *oldestBtpoXact)
{
- int ndeleted = 0;
+ uint32 ndeleted = 0;
BlockNumber rightsib;
bool rightsib_empty;
Page page;
BTPageOpaque opaque;
+ /*
+ * Save original leafbuf block number from caller. Only deleted blocks
+ * that are <= scanblkno get counted in ndeleted return value.
+ */
+ BlockNumber scanblkno = BufferGetBlockNumber(leafbuf);
+
/*
* "stack" is a search stack leading (approximately) to the target page.
* It is initially NULL, but when iterating, we keep it to avoid
if (P_ISDELETED(opaque))
ereport(LOG,
(errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg_internal("found deleted block %u while following right link in index \"%s\"",
+ errmsg_internal("found deleted block %u while following right link from block %u in index \"%s\"",
BufferGetBlockNumber(leafbuf),
+ scanblkno,
RelationGetRelationName(rel))));
_bt_relbuf(rel, leafbuf);
while (P_ISHALFDEAD(opaque))
{
/* Check for interrupts in _bt_unlink_halfdead_page */
- if (!_bt_unlink_halfdead_page(rel, leafbuf, &rightsib_empty,
- oldestBtpoXact))
+ if (!_bt_unlink_halfdead_page(rel, leafbuf, scanblkno,
+ &rightsib_empty, oldestBtpoXact,
+ &ndeleted))
{
/* _bt_unlink_halfdead_page failed, released buffer */
return ndeleted;
}
- ndeleted++;
}
Assert(P_ISLEAF(opaque) && P_ISDELETED(opaque));
* to avoid having to reacquire a lock we already released).
*/
static bool
-_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty,
- TransactionId *oldestBtpoXact)
+_bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
+ bool *rightsib_empty, TransactionId *oldestBtpoXact,
+ uint32 *ndeleted)
{
BlockNumber leafblkno = BufferGetBlockNumber(leafbuf);
BlockNumber leafleftsib;
TransactionIdPrecedes(opaque->btpo.xact, *oldestBtpoXact))
*oldestBtpoXact = opaque->btpo.xact;
+ /*
+ * If btvacuumscan won't revisit this page in a future btvacuumpage call
+ * and count it as deleted then, we count it as deleted by current
+ * btvacuumpage call
+ */
+ if (target <= scanblkno)
+ (*ndeleted)++;
+
/*
* Release the target, if it was not the leaf block. The leaf is always
* kept locked.
if (delete_now)
{
MemoryContext oldcontext;
- int ndel;
/* Run pagedel in a temp context to avoid memory leakage */
MemoryContextReset(vstate->pagedelcontext);
oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);
- ndel = _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
-
- /* count only this page, else may double-count parent */
- if (ndel)
- stats->pages_deleted++;
+ /*
+ * We trust the _bt_pagedel return value because it does not include
+ * any page that a future call here from btvacuumscan is expected to
+ * count. There will be no double-counting.
+ */
+ stats->pages_deleted += _bt_pagedel(rel, buf, &vstate->oldestBtpoXact);
MemoryContextSwitchTo(oldcontext);
/* pagedel released buffer, so we shouldn't */