More hacking.
authorRobert Haas <rhaas@postgresql.org>
Wed, 19 Feb 2014 22:18:19 +0000 (17:18 -0500)
committerRobert Haas <rhaas@postgresql.org>
Wed, 19 Feb 2014 22:18:19 +0000 (17:18 -0500)
src/backend/utils/mmgr/freepage.c

index 63a46e05faabe2d0b4d6c06cd43daf8a252264d9..cb6c8690f5d473c215eb243aece421104aa80f70 100644 (file)
@@ -88,9 +88,12 @@ static void FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm,
 static bool FreePageManagerGetInternal(FreePageManager *fpm, Size npages,
                                                   Size *first_page);
 static void FreePageBtreeRecycle(FreePageManager *fpm, Size pageno);
-static void FreePageBtreePageRemove(FreePageBtree *btp, Size index);
+static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
+                                       Size index);
 static bool FreePageManagerPutInternal(FreePageManager *fpm, Size first_page,
                                                   Size npages, bool soft);
+static void FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp,
+                                       Size index);
 static void FreePageBtreeSearch(FreePageManager *fpm, Size first_page,
                                        FreePageBtreeSearchResult *result);
 static Size FreePageBtreeSearchInternal(FreePageBtree *btp, Size first_page);
@@ -197,10 +200,10 @@ FreePageManagerPut(FreePageManager *fpm, Size first_page, Size npages)
 /*
  * The first_page value stored it index zero in any non-root page must match
  * the first_page value stored in its parent at the index which points to that
- * page.  So when the value stored at index zero in a leaf page changes, we've
+ * page.  So when the value stored at index zero in a btree page changes, we've
  * got to walk up the tree adjusting ancestor keys until we reach an ancestor
  * where that key isn't index zero.  This function should be called after
- * updating the first key on the leaf page; it will propagate the change
+ * updating the first key on the target page; it will propagate the change
  * upward as far as needed.
  *
  * We assume here that the first key on the page has not changed enough to
@@ -218,11 +221,22 @@ FreePageBtreeAdjustAncestorKeys(FreePageManager *fpm, FreePageBtree *btp)
        FreePageBtree *parent;
        FreePageBtree *child;
 
-       Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
-       Assert(btp->hdr.nused > 0 && btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
-       first_page = btp->u.leaf_key[0].first_page;
+       /* This might be either a leaf or an internal page. */
+       Assert(btp->hdr.nused > 0);
+       if (btp->hdr.magic == FREE_PAGE_LEAF_MAGIC)
+       {
+               Assert(btp->hdr.nused <= FPM_ITEMS_PER_LEAF_PAGE);
+               first_page = btp->u.leaf_key[0].first_page;
+       }
+       else
+       {
+               Assert(btp->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
+               Assert(btp->hdr.nused <= FPM_ITEMS_PER_INTERNAL_PAGE);
+               first_page = btp->u.internal_key[0].first_page;
+       }
        child = btp;
 
+       /* Loop until we find an ancestor that does not require adjustment. */
        for (;;)
        {
                Size    s;
@@ -403,15 +417,89 @@ FreePageBtreeRecycle(FreePageManager *fpm, Size pageno)
  * Remove an item from the btree at the given position on the given page.
  */
 static void
-FreePageBtreeRemoveLeaf(FreePageBtree *btp, Size index)
+FreePageBtreeRemove(FreePageManager *fpm, FreePageBtree *btp, Size index)
 {
+       char *base = fpm_segment_base(fpm);
+       FreePageBtree *parent;
+       FreePageBtree *child;
+       Size    first_page;
+
        Assert(btp->hdr.magic == FREE_PAGE_LEAF_MAGIC);
        Assert(index < btp->hdr.nused);
 
-       --btp->hdr.nused;
-       if (index < btp->hdr.nused)
-               memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
-                               sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
+       /* 
+        * If there's more than one key remaining on the page, then things are
+        * pretty simple.  We need to physically remove the key from the page;
+        * and if it's the first key on the page, then we need to adjust its
+        * ancestor keys.  Then we're done.
+        */
+       if (btp->hdr.nused > 1)
+       {
+               --btp->hdr.nused;
+               if (index < btp->hdr.nused)
+                       memmove(&btp->u.leaf_key[index], &btp->u.leaf_key[index + 1],
+                                       sizeof(FreePageBtreeLeafKey) * (btp->hdr.nused - index));
+               if (index == 0)
+                       FreePageBtreeAdjustAncestorKeys(fpm, btp);
+
+               /*
+                * XXX. We could try to consolidate the page with its left or right
+                * sibling, or some more complicated key redistribution scheme, to
+                * avoid ending up with lots of mostly-empty btree pages.
+                */
+
+               return;
+       }
+
+       /*
+        * We're removing the last key on the leaf page; this will require
+        * removing the downlink from the parent, which may in turn cause the
+        * parent to become empty.  We work our way up the tree until we reach
+        * a point where removing the key doesn't leave the tree empty, or until
+        * we reach the root.
+        */
+       first_page = btp->u.leaf_key[index].first_page;
+       child = btp;
+       for (;;)
+       {
+               /* Find parent page. */
+               parent = relptr_access(base, child->hdr.parent);
+
+               /* Recycle child page. */
+               FreePageBtreeRecycle(fpm, fpm_pointer_to_page(base, child));
+
+               /* Stop if we don't need to remove this page, or it's the root. */
+               if (parent == NULL || parent->hdr.nused > 1)
+                       break;
+
+               /* Prepare to loop around again. */
+               child = parent;
+       }
+
+       /* Handle the case where we've just recycled the root. */
+       if (parent == NULL)
+       {
+               relptr_store(base, fpm->btree_root, (FreePageBtree *) NULL);
+               fpm->btree_depth = 0;
+               Assert(fpm->singleton_first_page == 0);
+               Assert(fpm->singleton_npages == 0);
+               return;
+       }
+
+       /*
+        * We've reached an internal page containing more than one key. Remove
+        * the downlink, and adjust ancestor keys as needed.
+        */
+       Assert(parent->hdr.nused > 1);
+       Assert(parent->hdr.magic == FREE_PAGE_INTERNAL_MAGIC);
+       index = FreePageBtreeSearchInternal(parent, first_page);
+       Assert(parent->u.internal_key[index].first_page == first_page);
+       --parent->hdr.nused;
+       if (index < parent->hdr.nused)
+               memmove(&btp->u.internal_key[index], &btp->u.internal_key[index + 1],
+                               sizeof(FreePageBtreeInternalKey) * (btp->hdr.nused - index));
+       if (index == 0)
+               FreePageBtreeAdjustAncestorKeys(fpm, parent);
 }
 
 /*
@@ -751,6 +839,10 @@ FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
                 * sure to update result.split_pages.
                 */
 
+               /* If this is a soft insert, it's time to give up. */
+               if (soft)
+                       return false;
+
                /* Check whether we need to allocate more btree pages to split. */
                if (result.split_pages > fpm->btree_recycle_count)
                {
@@ -819,4 +911,6 @@ FreePageManagerPutInternal(FreePageManager *fpm, Size first_page, Size npages,
        /* If new first key on page, ancestors might need adjustment. */
        if (index == 0)
                FreePageBtreeAdjustAncestorKeys(fpm, result.page_next);
+
+       return true;
 }