summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeikki Linnakangas2014-01-29 19:22:08 +0000
committerHeikki Linnakangas2014-01-29 19:24:38 +0000
commit626a120656a75bf4fe64b1d0d83c23cb38d3771a (patch)
tree5df49f8a6e195dde0fd21e5e7fb29fef5d666bfb
parent8440897b38be38903ecc2041002bba08e08308ad (diff)
Further optimize GIN multi-key searches.
When skipping over some items in a posting tree, re-find the new location by descending the tree from root, rather than walking the right links. This can save a lot of I/O. Heavily modified from Alexander Korotkov's fast scan patch.
-rw-r--r--src/backend/access/gin/gindatapage.c9
-rw-r--r--src/backend/access/gin/ginget.c115
-rw-r--r--src/include/access/gin_private.h3
3 files changed, 98 insertions, 29 deletions
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index 9a0b8ab1f21..c6230f3bc5a 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -1639,16 +1639,15 @@ ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
* Starts a new scan on a posting tree.
*/
GinBtreeStack *
-ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno)
+ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno)
{
- GinBtreeData btree;
GinBtreeStack *stack;
- ginPrepareDataScan(&btree, index, rootBlkno);
+ ginPrepareDataScan(btree, index, rootBlkno);
- btree.fullScan = TRUE;
+ btree->fullScan = TRUE;
- stack = ginFindLeafPage(&btree, TRUE);
+ stack = ginFindLeafPage(btree, TRUE);
return stack;
}
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 49e47c6859c..a45d72212e9 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -99,12 +99,13 @@ static void
scanPostingTree(Relation index, GinScanEntry scanEntry,
BlockNumber rootPostingTree)
{
+ GinBtreeData btree;
GinBtreeStack *stack;
Buffer buffer;
Page page;
/* Descend to the leftmost leaf page */
- stack = ginScanBeginPostingTree(index, rootPostingTree);
+ stack = ginScanBeginPostingTree(&btree, index, rootPostingTree);
buffer = stack->buffer;
IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
@@ -412,7 +413,8 @@ restartScanEntry:
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
needUnlock = FALSE;
- stack = ginScanBeginPostingTree(ginstate->index, rootPostingTree);
+ stack = ginScanBeginPostingTree(&entry->btree, ginstate->index,
+ rootPostingTree);
entry->buffer = stack->buffer;
/*
@@ -506,8 +508,60 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
{
Page page;
int i;
+ bool stepright;
+
+ if (!BufferIsValid(entry->buffer))
+ {
+ entry->isFinished = true;
+ return;
+ }
+
+ /*
+ * We have two strategies for finding the correct page: step right from
+ * the current page, or descend the tree again from the root. If
+ * advancePast equals the current item, the next matching item should be
+ * on the next page, so we step right. Otherwise, descend from root.
+ */
+ if (ginCompareItemPointers(&entry->curItem, &advancePast) == 0)
+ {
+ stepright = true;
+ LockBuffer(entry->buffer, GIN_SHARE);
+ }
+ else
+ {
+ GinBtreeStack *stack;
+
+ ReleaseBuffer(entry->buffer);
+
+ /*
+ * Set the search key, and find the correct leaf page.
+ */
+ if (ItemPointerIsLossyPage(&advancePast))
+ {
+ ItemPointerSet(&entry->btree.itemptr,
+ GinItemPointerGetBlockNumber(&advancePast) + 1,
+ FirstOffsetNumber);
+ }
+ else
+ {
+ entry->btree.itemptr = advancePast;
+ entry->btree.itemptr.ip_posid++;
+ }
+ entry->btree.fullScan = false;
+ stack = ginFindLeafPage(&entry->btree, true);
+
+ /* we don't need the stack, just the buffer. */
+ entry->buffer = stack->buffer;
+ IncrBufferRefCount(entry->buffer);
+ freeGinBtreeStack(stack);
+ stepright = false;
+ }
+
+ elog(DEBUG2, "entryLoadMoreItems, %u/%u, skip: %d",
+ GinItemPointerGetBlockNumber(&advancePast),
+ GinItemPointerGetOffsetNumber(&advancePast),
+ !stepright);
- LockBuffer(entry->buffer, GIN_SHARE);
page = BufferGetPage(entry->buffer);
for (;;)
{
@@ -519,31 +573,35 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
entry->nlist = 0;
}
- /*
- * We've processed all the entries on this page. If it was the last
- * page in the tree, we're done.
- */
- if (GinPageRightMost(page))
+ if (stepright)
{
- UnlockReleaseBuffer(entry->buffer);
- entry->buffer = InvalidBuffer;
- entry->isFinished = TRUE;
- return;
+ /*
+ * We've processed all the entries on this page. If it was the last
+ * page in the tree, we're done.
+ */
+ if (GinPageRightMost(page))
+ {
+ UnlockReleaseBuffer(entry->buffer);
+ entry->buffer = InvalidBuffer;
+ entry->isFinished = TRUE;
+ return;
+ }
+
+ /*
+ * Step to next page, following the right link. then find the first
+ * ItemPointer greater than advancePast.
+ */
+ entry->buffer = ginStepRight(entry->buffer,
+ ginstate->index,
+ GIN_SHARE);
+ page = BufferGetPage(entry->buffer);
}
+ stepright = true;
if (GinPageGetOpaque(page)->flags & GIN_DELETED)
continue; /* page was deleted by concurrent vacuum */
/*
- * Step to next page, following the right link. then find the first
- * ItemPointer greater than advancePast.
- */
- entry->buffer = ginStepRight(entry->buffer,
- ginstate->index,
- GIN_SHARE);
- page = BufferGetPage(entry->buffer);
-
- /*
* The first item > advancePast might not be on this page, but
* somewhere to the right, if the page was split, or a non-match from
* another key in the query allowed us to skip some items from this
@@ -566,8 +624,16 @@ entryLoadMoreItems(GinState *ginstate, GinScanEntry entry, ItemPointerData advan
{
if (ginCompareItemPointers(&advancePast, &entry->list[i]) < 0)
{
- LockBuffer(entry->buffer, GIN_UNLOCK);
entry->offset = i;
+
+ if (GinPageRightMost(page))
+ {
+ /* after processing the copied items, we're done. */
+ UnlockReleaseBuffer(entry->buffer);
+ entry->buffer = InvalidBuffer;
+ }
+ else
+ LockBuffer(entry->buffer, GIN_UNLOCK);
return;
}
}
@@ -677,7 +743,10 @@ entryGetItem(GinState *ginstate, GinScanEntry entry,
}
else if (!BufferIsValid(entry->buffer))
{
- /* A posting list from an entry tuple */
+ /*
+ * A posting list from an entry tuple, or the last page of a posting
+ * tree.
+ */
do
{
if (entry->offset >= entry->nlist)
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index ea9ae31acc0..bb0ab317cbc 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -702,7 +702,7 @@ extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
ItemPointerData *items, uint32 nitem,
GinStatsData *buildStats);
-extern GinBtreeStack *ginScanBeginPostingTree(Relation index, BlockNumber rootBlkno);
+extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno);
extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
@@ -802,6 +802,7 @@ typedef struct GinScanEntryData
bool isFinished;
bool reduceResult;
uint32 predictNumberResult;
+ GinBtreeData btree;
} GinScanEntryData;
typedef struct GinScanOpaqueData