Optimize the case where a btree indexscan has current and mark positions
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 24 Aug 2006 01:18:34 +0000 (01:18 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 24 Aug 2006 01:18:34 +0000 (01:18 +0000)
on the same index page; we can avoid data copying as well as buffer refcount
manipulations in this common case.  Makes for a small but noticeable
improvement in mergejoin speed.

Heikki Linnakangas

src/backend/access/nbtree/nbtree.c
src/backend/access/nbtree/nbtsearch.c
src/include/access/nbtree.h

index 3329321c0ffd14727301296d441ace0dd1ad4bce..c58974cca139fabec66eb6fcf521d77de697cb1b 100644 (file)
@@ -12,7 +12,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.149 2006/05/10 23:18:39 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.150 2006/08/24 01:18:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -392,6 +392,7 @@ btrescan(PG_FUNCTION_ARGS)
                ReleaseBuffer(so->markPos.buf);
                so->markPos.buf = InvalidBuffer;
        }
+       so->markItemIndex = -1;
 
        /*
         * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
@@ -430,6 +431,7 @@ btendscan(PG_FUNCTION_ARGS)
                ReleaseBuffer(so->markPos.buf);
                so->markPos.buf = InvalidBuffer;
        }
+       so->markItemIndex = -1;
 
        if (so->killedItems != NULL)
                pfree(so->killedItems);
@@ -456,14 +458,16 @@ btmarkpos(PG_FUNCTION_ARGS)
                so->markPos.buf = InvalidBuffer;
        }
 
-       /* bump pin on current buffer for assignment to mark buffer */
+       /*
+        * Just record the current itemIndex.  If we later step to next page
+        * before releasing the marked position, _bt_steppage makes a full copy
+        * of the currPos struct in markPos.  If (as often happens) the mark is
+        * moved before we leave the page, we don't have to do that work.
+        */
        if (BTScanPosIsValid(so->currPos))
-       {
-               IncrBufferRefCount(so->currPos.buf);
-               memcpy(&so->markPos, &so->currPos,
-                          offsetof(BTScanPosData, items[1]) +
-                          so->currPos.lastItem * sizeof(BTScanPosItem));
-       }
+               so->markItemIndex = so->currPos.itemIndex;
+       else
+               so->markItemIndex = -1;
 
        PG_RETURN_VOID();
 }
@@ -477,24 +481,35 @@ btrestrpos(PG_FUNCTION_ARGS)
        IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
        BTScanOpaque so = (BTScanOpaque) scan->opaque;
 
-       /* we aren't holding any read locks, but gotta drop the pin */
-       if (BTScanPosIsValid(so->currPos))
+       if (so->markItemIndex >= 0)
        {
-               /* Before leaving current page, deal with any killed items */
-               if (so->numKilled > 0 &&
-                       so->currPos.buf != so->markPos.buf)
-                       _bt_killitems(scan, false);
-               ReleaseBuffer(so->currPos.buf);
-               so->currPos.buf = InvalidBuffer;
-       }
-
-       /* bump pin on marked buffer */
-       if (BTScanPosIsValid(so->markPos))
+               /*
+                * The mark position is on the same page we are currently on.
+                * Just restore the itemIndex.
+                */
+               so->currPos.itemIndex = so->markItemIndex;
+       } 
+       else
        {
-               IncrBufferRefCount(so->markPos.buf);
-               memcpy(&so->currPos, &so->markPos,
-                          offsetof(BTScanPosData, items[1]) +
-                          so->markPos.lastItem * sizeof(BTScanPosItem));
+               /* we aren't holding any read locks, but gotta drop the pin */
+               if (BTScanPosIsValid(so->currPos))
+               {
+                       /* Before leaving current page, deal with any killed items */
+                       if (so->numKilled > 0 &&
+                               so->currPos.buf != so->markPos.buf)
+                               _bt_killitems(scan, false);
+                       ReleaseBuffer(so->currPos.buf);
+                       so->currPos.buf = InvalidBuffer;
+               }
+
+               if (BTScanPosIsValid(so->markPos))
+               {
+                       /* bump pin on mark buffer for assignment to current buffer */
+                       IncrBufferRefCount(so->markPos.buf);
+                       memcpy(&so->currPos, &so->markPos,
+                                  offsetof(BTScanPosData, items[1]) +
+                                  so->markPos.lastItem * sizeof(BTScanPosItem));
+               }
        }
 
        PG_RETURN_VOID();
index 2c1dfc3eb474ffef4ab801f146532c9e2aa500b7..07bc076e49b3026cf30cc0f38c3b7eadbb905549 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.105 2006/05/07 01:21:30 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.106 2006/08/24 01:18:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -815,6 +815,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
                so->currPos.moreRight = false;
        }
        so->numKilled = 0;                      /* just paranoia */
+       so->markItemIndex = -1;         /* ditto */
 
        /* position to the precise item on the page */
        offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey);
@@ -1053,6 +1054,21 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
        if (so->numKilled > 0)
                _bt_killitems(scan, true);
 
+       /*
+        * Before we modify currPos, make a copy of the page data if there
+        * was a mark position that needs it.
+        */
+       if (so->markItemIndex >= 0)
+       {
+               /* bump pin on current buffer for assignment to mark buffer */
+               IncrBufferRefCount(so->currPos.buf);
+               memcpy(&so->markPos, &so->currPos,
+                          offsetof(BTScanPosData, items[1]) +
+                          so->currPos.lastItem * sizeof(BTScanPosItem));
+               so->markPos.itemIndex = so->markItemIndex;
+               so->markItemIndex = -1;
+       }
+
        rel = scan->indexRelation;
 
        if (ScanDirectionIsForward(dir))
@@ -1408,6 +1424,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
                so->currPos.moreRight = false;
        }
        so->numKilled = 0;                      /* just paranoia */
+       so->markItemIndex = -1;         /* ditto */
 
        /*
         * Now load data from the first page of the scan.
index e460bbb0d50cf2bf39a50c14b3b512b0510734b9..33d295a71b24b034e26d3117f567e61e9704780d 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.103 2006/08/07 16:57:57 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.104 2006/08/24 01:18:34 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -438,6 +438,15 @@ typedef struct BTScanOpaqueData
        int                *killedItems;        /* currPos.items indexes of killed items */
        int                     numKilled;              /* number of currently stored items */
 
+       /*
+        * If the marked position is on the same page as current position,
+        * we don't use markPos, but just keep the marked itemIndex in
+        * markItemIndex (all the rest of currPos is valid for the mark position).
+        * Hence, to determine if there is a mark, first look at markItemIndex,
+        * then at markPos.
+        */
+       int                     markItemIndex;  /* itemIndex, or -1 if not valid */
+
        /* keep these last in struct for efficiency */
        BTScanPosData currPos;          /* current position data */
        BTScanPosData markPos;          /* marked position, if any */