Fix "single value strategy" index deletion issue.

author Peter Geoghegan <pg@bowt.ie>

Wed, 22 Sep 2021 01:57:32 +0000 (18:57 -0700)

committer Peter Geoghegan <pg@bowt.ie>

Wed, 22 Sep 2021 01:57:32 +0000 (18:57 -0700)
author Peter Geoghegan <pg@bowt.ie>
Wed, 22 Sep 2021 01:57:32 +0000 (18:57 -0700)
committer Peter Geoghegan <pg@bowt.ie>
Wed, 22 Sep 2021 01:57:32 +0000 (18:57 -0700)
diff --git a/src/backend/access/nbtree/nbtdedup.c b/src/backend/access/nbtree/nbtdedup.c

index 271994b08df13af40118ae701d826feedb69621e..6401fce57b91ef38fc4e09b175c831078f26d754 100644 (file)
--- a/src/backend/access/nbtree/nbtdedup.c
+++ b/src/backend/access/nbtree/nbtdedup.c
@@ -34,14 +34,17 @@ static bool _bt_posting_valid(IndexTuple posting);
   *
   * The general approach taken here is to perform as much deduplication as
   * possible to free as much space as possible.  Note, however, that "single
- * value" strategy is sometimes used for !checkingunique callers, in which
- * case deduplication will leave a few tuples untouched at the end of the
- * page.  The general idea is to prepare the page for an anticipated page
- * split that uses nbtsplitloc.c's "single value" strategy to determine a
- * split point.  (There is no reason to deduplicate items that will end up on
- * the right half of the page after the anticipated page split; better to
- * handle those if and when the anticipated right half page gets its own
- * deduplication pass, following further inserts of duplicates.)
+ * value" strategy is used for !bottomupdedup callers when the page is full of
+ * tuples of a single value.  Deduplication passes that apply the strategy
+ * will leave behind a few untouched tuples at the end of the page, preparing
+ * the page for an anticipated page split that uses nbtsplitloc.c's own single
+ * value strategy.  Our high level goal is to delay merging the untouched
+ * tuples until after the page splits.
+ *
+ * When a call to _bt_bottomupdel_pass() just took place (and failed), our
+ * high level goal is to prevent a page split entirely by buying more time.
+ * We still hope that a page split can be avoided altogether.  That's why
+ * single value strategy is not even considered for bottomupdedup callers.
   *
   * The page will have to be split if we cannot successfully free at least
   * newitemsz (we also need space for newitem's line pointer, which isn't
@@ -52,7 +55,7 @@ static bool _bt_posting_valid(IndexTuple posting);
   */
  void
  _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem,
-              Size newitemsz, bool checkingunique)
+              Size newitemsz, bool bottomupdedup)
  {
     OffsetNumber offnum,
                 minoff,
@@ -97,8 +100,11 @@ _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel, IndexTuple newitem,
     minoff = P_FIRSTDATAKEY(opaque);
     maxoff = PageGetMaxOffsetNumber(page);
  
-   /* Determine if "single value" strategy should be used */
-   if (!checkingunique)
+   /*
+    * Consider applying "single value" strategy, though only if the page
+    * seems likely to be split in the near future
+    */
+   if (!bottomupdedup)
         singlevalstrat = _bt_do_singleval(rel, page, state, minoff, newitem);
  
     /*
@@ -764,14 +770,6 @@ _bt_bottomupdel_finish_pending(Page page, BTDedupState state,
   * the first pass) won't spend many cycles on the large posting list tuples
   * left by previous passes.  Each pass will find a large contiguous group of
   * smaller duplicate tuples to merge together at the end of the page.
- *
- * Note: We deliberately don't bother checking if the high key is a distinct
- * value (prior to the TID tiebreaker column) before proceeding, unlike
- * nbtsplitloc.c.  Its single value strategy only gets applied on the
- * rightmost page of duplicates of the same value (other leaf pages full of
- * duplicates will get a simple 50:50 page split instead of splitting towards
- * the end of the page).  There is little point in making the same distinction
- * here.
   */
  static bool
  _bt_do_singleval(Relation rel, Page page, BTDedupState state,
diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c

index 6ac205c98ee8e8147e75f32b13ed07a44d38049f..7355e1dba136339ec0ce62255db185e820eb4dad 100644 (file)
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -2748,7 +2748,7 @@ _bt_delete_or_dedup_one_page(Relation rel, Relation heapRel,
     /* Perform deduplication pass (when enabled and index-is-allequalimage) */
     if (BTGetDeduplicateItems(rel) && itup_key->allequalimage)
         _bt_dedup_pass(rel, buffer, heapRel, insertstate->itup,
-                      insertstate->itemsz, checkingunique);
+                      insertstate->itemsz, (indexUnchanged || uniquedup));
  }
  
  /*
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h

index 42c66fac57c4953fce41a773ea2bfdc1f359f064..30a216e4c0d0c733f419a2d34600ce591c1fb42a 100644 (file)
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1155,7 +1155,7 @@ extern void _bt_parallel_advance_array_keys(IndexScanDesc scan);
   */
  extern void _bt_dedup_pass(Relation rel, Buffer buf, Relation heapRel,
                            IndexTuple newitem, Size newitemsz,
-                          bool checkingunique);
+                          bool bottomupdedup);
  extern bool _bt_bottomupdel_pass(Relation rel, Buffer buf, Relation heapRel,
                                  Size newitemsz);
  extern void _bt_dedup_start_pending(BTDedupState state, IndexTuple base,
author	Peter Geoghegan <pg@bowt.ie>
	Wed, 22 Sep 2021 01:57:32 +0000 (18:57 -0700)
committer	Peter Geoghegan <pg@bowt.ie>
	Wed, 22 Sep 2021 01:57:32 +0000 (18:57 -0700)
src/backend/access/nbtree/nbtdedup.c		patch \| blob \| blame \| history
src/backend/access/nbtree/nbtinsert.c		patch \| blob \| blame \| history
src/include/access/nbtree.h		patch \| blob \| blame \| history