* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.83 2003/11/29 19:51:40 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.84 2003/12/21 01:23:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
* _bt_search() -- Search the tree for a particular scankey,
* or more precisely for the first leaf page it could be on.
*
+ * When nextkey is false (the usual case), we are looking for the first
+ * item >= scankey. When nextkey is true, we are looking for the first
+ * item strictly greater than scankey.
+ *
* Return value is a stack of parent-page pointers. *bufP is set to the
* address of the leaf-page buffer, which is read-locked and pinned.
* No locks are held on the parent pages, however!
* will result in *bufP being set to InvalidBuffer.
*/
BTStack
-_bt_search(Relation rel, int keysz, ScanKey scankey,
+_bt_search(Relation rel, int keysz, ScanKey scankey, bool nextkey,
Buffer *bufP, int access)
{
BTStack stack_in = NULL;
* its pointer in the parent (or metapage). If it has, we may
* need to move right to its new sibling. Do that.
*/
- *bufP = _bt_moveright(rel, *bufP, keysz, scankey, BT_READ);
+ *bufP = _bt_moveright(rel, *bufP, keysz, scankey, nextkey, BT_READ);
/* if this is a leaf page, we're done */
page = BufferGetPage(*bufP);
* Find the appropriate item on the internal page, and get the
* child page that it points to.
*/
- offnum = _bt_binsrch(rel, *bufP, keysz, scankey);
+ offnum = _bt_binsrch(rel, *bufP, keysz, scankey, nextkey);
itemid = PageGetItemId(page, offnum);
btitem = (BTItem) PageGetItem(page, itemid);
itup = &(btitem->bti_itup);
/*
* _bt_moveright() -- move right in the btree if necessary.
*
- * When we follow a pointer to reach a page, it is possible that
- * the page has changed in the meanwhile. If this happens, we're
- * guaranteed that the page has "split right" -- that is, that any
- * data that appeared on the page originally is either on the page
- * or strictly to the right of it.
+ * When we follow a pointer to reach a page, it is possible that
+ * the page has changed in the meanwhile. If this happens, we're
+ * guaranteed that the page has "split right" -- that is, that any
+ * data that appeared on the page originally is either on the page
+ * or strictly to the right of it.
+ *
+ * When nextkey is false (the usual case), we are looking for the first
+ * item >= scankey. When nextkey is true, we are looking for the first
+ * item strictly greater than scankey.
*
- * This routine decides whether or not we need to move right in the
- * tree by examining the high key entry on the page. If that entry
- * is strictly less than one we expect to be on the page, then our
- * picture of the page is incorrect and we need to move right.
+ * This routine decides whether or not we need to move right in the
+ * tree by examining the high key entry on the page. If that entry
+ * is strictly less than the scankey, or <= the scankey in the nextkey=true
+ * case, then we followed the wrong link and we need to move right.
*
- * On entry, we have the buffer pinned and a lock of the proper type.
- * If we move right, we release the buffer and lock and acquire the
- * same on the right sibling. Return value is the buffer we stop at.
+ * On entry, we have the buffer pinned and a lock of the type specified by
+ * 'access'. If we move right, we release the buffer and lock and acquire
+ * the same on the right sibling. Return value is the buffer we stop at.
*/
Buffer
_bt_moveright(Relation rel,
Buffer buf,
int keysz,
ScanKey scankey,
+ bool nextkey,
int access)
{
Page page;
BTPageOpaque opaque;
+ int32 cmpval;
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/*
- * If the scan key that brought us to this page is > the high key
- * stored on the page, then the page has split and we need to move
- * right. (If the scan key is equal to the high key, we might or
- * might not need to move right; have to scan the page first anyway.)
- * It could even have split more than once, so scan as far as needed.
+ * When nextkey = false (normal case): if the scan key that brought us to
+ * this page is > the high key stored on the page, then the page has split
+ * and we need to move right. (If the scan key is equal to the high key,
+ * we might or might not need to move right; have to scan the page first
+ * anyway.)
+ *
+ * When nextkey = true: move right if the scan key is >= page's high key.
+ *
+ * The page could even have split more than once, so scan as far as needed.
*
* We also have to move right if we followed a link that brought us to a
* dead page.
*/
+ cmpval = nextkey ? 0 : 1;
+
while (!P_RIGHTMOST(opaque) &&
(P_IGNORE(opaque) ||
- _bt_compare(rel, keysz, scankey, page, P_HIKEY) > 0))
+ _bt_compare(rel, keysz, scankey, page, P_HIKEY) >= cmpval))
{
/* step right one page */
BlockNumber rblkno = opaque->btpo_next;
/*
* _bt_binsrch() -- Do a binary search for a key on a particular page.
*
+ * When nextkey is false (the usual case), we are looking for the first
+ * item >= scankey. When nextkey is true, we are looking for the first
+ * item strictly greater than scankey.
+ *
* The scankey we get has the compare function stored in the procedure
* entry of each data struct. We invoke this regproc to do the
* comparison for every key in the scankey.
*
* On a leaf page, _bt_binsrch() returns the OffsetNumber of the first
- * key >= given scankey. (NOTE: in particular, this means it is possible
- * to return a value 1 greater than the number of keys on the page,
- * if the scankey is > all keys on the page.)
+ * key >= given scankey, or > scankey if nextkey is true. (NOTE: in
+ * particular, this means it is possible to return a value 1 greater than the
+ * number of keys on the page, if the scankey is > all keys on the page.)
*
* On an internal (non-leaf) page, _bt_binsrch() returns the OffsetNumber
- * of the last key < given scankey. (Since _bt_compare treats the first
- * data key of such a page as minus infinity, there will be at least one
- * key < scankey, so the result always points at one of the keys on the
- * page.) This key indicates the right place to descend to be sure we
- * find all leaf keys >= given scankey.
+ * of the last key < given scankey, or last key <= given scankey if nextkey
+ * is true. (Since _bt_compare treats the first data key of such a page as
+ * minus infinity, there will be at least one key < scankey, so the result
+ * always points at one of the keys on the page.) This key indicates the
+ * right place to descend to be sure we find all leaf keys >= given scankey
+ * (or leaf keys > given scankey when nextkey is true).
*
* This procedure is not responsible for walking right, it just examines
* the given page. _bt_binsrch() has no lock or refcount side effects
_bt_binsrch(Relation rel,
Buffer buf,
int keysz,
- ScanKey scankey)
+ ScanKey scankey,
+ bool nextkey)
{
TupleDesc itupdesc;
Page page;
BTPageOpaque opaque;
OffsetNumber low,
high;
- int32 result;
+ int32 result,
+ cmpval;
itupdesc = RelationGetDescr(rel);
page = BufferGetPage(buf);
return low;
/*
- * Binary search to find the first key on the page >= scan key. Loop
- * invariant: all slots before 'low' are < scan key, all slots at or
- * after 'high' are >= scan key. We can fall out when high == low.
+ * Binary search to find the first key on the page >= scan key, or
+ * first key > scankey when nextkey is true.
+ *
+ * For nextkey=false (cmpval=1), the loop invariant is: all slots
+ * before 'low' are < scan key, all slots at or after 'high'
+ * are >= scan key.
+ *
+ * For nextkey=true (cmpval=0), the loop invariant is: all slots
+ * before 'low' are <= scan key, all slots at or after 'high'
+ * are > scan key.
+ *
+ * We can fall out when high == low.
*/
high++; /* establish the loop invariant for high */
+ cmpval = nextkey ? 0 : 1; /* select comparison value */
+
while (high > low)
{
OffsetNumber mid = low + ((high - low) / 2);
result = _bt_compare(rel, keysz, scankey, page, mid);
- if (result > 0)
+ if (result >= cmpval)
low = mid + 1;
else
high = mid;
* At this point we have high == low, but be careful: they could point
* past the last slot on the page.
*
- * On a leaf page, we always return the first key >= scan key (which
- * could be the last slot + 1).
+ * On a leaf page, we always return the first key >= scan key (resp.
+ * > scan key), which could be the last slot + 1.
*/
if (P_ISLEAF(opaque))
return low;
/*
- * On a non-leaf page, return the last key < scan key. There must be
- * one if _bt_compare() is playing by the rules.
+ * On a non-leaf page, return the last key < scan key (resp. <= scan key).
+ * There must be one if _bt_compare() is playing by the rules.
*/
Assert(low > P_FIRSTDATAKEY(opaque));
StrategyNumber strat;
bool res;
int32 result;
+ bool nextkey;
bool continuescan;
ScanKey scankeys = NULL;
ScanKey *startKeys = NULL;
current = &(scan->currentItemData);
+ /*
+ * We want to locate either the first item >= boundary point, or
+ * first item > boundary point, depending on the initial-positioning
+ * strategy we just chose.
+ */
+ switch (strat_total)
+ {
+ case BTLessStrategyNumber:
+ nextkey = false;
+ break;
+
+ case BTLessEqualStrategyNumber:
+ nextkey = true;
+ break;
+
+ case BTEqualStrategyNumber:
+ /*
+ * If a backward scan was specified, need to start with last
+ * equal item not first one.
+ */
+ if (ScanDirectionIsBackward(dir))
+ nextkey = true;
+ else
+ nextkey = false;
+ break;
+
+ case BTGreaterEqualStrategyNumber:
+ nextkey = false;
+ break;
+
+ case BTGreaterStrategyNumber:
+ nextkey = true;
+ break;
+
+ default:
+ /* can't get here, but keep compiler quiet */
+ elog(ERROR, "unrecognized strat_total: %d", (int) strat_total);
+ return false;
+ }
+
/*
* Use the manufactured scan key to descend the tree and position
* ourselves on the target leaf page.
*/
- stack = _bt_search(rel, keysCount, scankeys, &buf, BT_READ);
+ stack = _bt_search(rel, keysCount, scankeys, nextkey, &buf, BT_READ);
/* don't need to keep the stack around... */
_bt_freestack(stack);
page = BufferGetPage(buf);
/* position to the precise item on the page */
- offnum = _bt_binsrch(rel, buf, keysCount, scankeys);
+ offnum = _bt_binsrch(rel, buf, keysCount, scankeys, nextkey);
ItemPointerSet(current, blkno, offnum);
/*
- * At this point we are positioned at the first item >= scan key, or
- * possibly at the end of a page on which all the existing items are
+ * It's now time to examine the initial-positioning strategy to find the
+ * exact place to start the scan.
+ *
+ * If nextkey = false, we are positioned at the first item >= scan key,
+ * or possibly at the end of a page on which all the existing items are
* less than the scan key and we know that everything on later pages
* is greater than or equal to scan key.
*
- * We could step forward in the latter case, but that'd be a waste of
- * time if we want to scan backwards. So, it's now time to examine
- * the initial-positioning strategy to find the exact place to start
- * the scan.
+ * If nextkey = true, we are positioned at the first item > scan key,
+ * or possibly at the end of a page on which all the existing items are
+ * less than or equal to the scan key and we know that everything on
+ * later pages is greater than scan key.
+ *
+ * The actually desired starting point is either this item or an adjacent
+ * one, or in the end-of-page case it's the last item on this page or
+ * the first item on the next. We apply _bt_step if needed to get to
+ * the right place.
*
* Note: if _bt_step fails (meaning we fell off the end of the index in
- * one direction or the other), we either return false (no matches) or
- * call _bt_endpoint() to set up a scan starting at that index
- * endpoint, as appropriate for the desired scan type.
+ * one direction or the other), then there are no matches so we just
+ * return false.
*
* it's yet other place to add some code later for is(not)null ...
*/
-
switch (strat_total)
{
case BTLessStrategyNumber:
/*
- * Back up one to arrive at last item < scankey
+ * We are on first item >= scankey.
+ *
+ * Back up one to arrive at last item < scankey. (Note: this
+ * positioning strategy is only used for a backward scan, so
+ * that is always the correct starting position.)
*/
if (!_bt_step(scan, &buf, BackwardScanDirection))
{
case BTLessEqualStrategyNumber:
/*
- * We need to find the last item <= scankey, so step forward
- * till we find one > scankey, then step back one.
+ * We are on first item > scankey.
+ *
+ * Back up one to arrive at last item <= scankey. (Note: this
+ * positioning strategy is only used for a backward scan, so
+ * that is always the correct starting position.)
*/
- if (offnum > PageGetMaxOffsetNumber(page))
- {
- if (!_bt_step(scan, &buf, ForwardScanDirection))
- {
- pfree(scankeys);
- return _bt_endpoint(scan, dir);
- }
- }
- for (;;)
- {
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- result = _bt_compare(rel, keysCount, scankeys, page, offnum);
- if (result < 0)
- break;
- if (!_bt_step(scan, &buf, ForwardScanDirection))
- {
- pfree(scankeys);
- return _bt_endpoint(scan, dir);
- }
- }
if (!_bt_step(scan, &buf, BackwardScanDirection))
{
pfree(scankeys);
break;
case BTEqualStrategyNumber:
-
/*
- * Make sure we are on the first equal item; might have to
- * step forward if currently at end of page.
+ * If a backward scan was specified, need to start with last
+ * equal item not first one.
*/
- if (offnum > PageGetMaxOffsetNumber(page))
+ if (ScanDirectionIsBackward(dir))
{
- if (!_bt_step(scan, &buf, ForwardScanDirection))
+ /*
+ * We are on first item > scankey.
+ *
+ * Back up one to arrive at last item <= scankey, then
+ * check to see if it is equal to scankey.
+ */
+ if (!_bt_step(scan, &buf, BackwardScanDirection))
{
pfree(scankeys);
return false;
}
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
}
- result = _bt_compare(rel, keysCount, scankeys, page, offnum);
- if (result != 0)
- goto nomatches; /* no equal items! */
-
- /*
- * If a backward scan was specified, need to start with last
- * equal item not first one.
- */
- if (ScanDirectionIsBackward(dir))
+ else
{
- do
+ /*
+ * We are on first item >= scankey.
+ *
+ * Make sure we are on a real item; might have to
+ * step forward if currently at end of page. Then check
+ * to see if it is equal to scankey.
+ */
+ if (offnum > PageGetMaxOffsetNumber(page))
{
if (!_bt_step(scan, &buf, ForwardScanDirection))
{
pfree(scankeys);
- return _bt_endpoint(scan, dir);
+ return false;
}
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- result = _bt_compare(rel, keysCount, scankeys, page, offnum);
- } while (result == 0);
- if (!_bt_step(scan, &buf, BackwardScanDirection))
- elog(ERROR, "equal items disappeared?");
+ }
}
+
+ /* If we are not now on an equal item, then there ain't any. */
+ offnum = ItemPointerGetOffsetNumber(current);
+ page = BufferGetPage(buf);
+ result = _bt_compare(rel, keysCount, scankeys, page, offnum);
+ if (result != 0)
+ goto nomatches; /* no equal items! */
break;
case BTGreaterEqualStrategyNumber:
case BTGreaterStrategyNumber:
/*
- * We want the first item > scankey, so make sure we are on an
- * item and then step over any equal items.
+ * We want the first item > scankey, which is where we are...
+ * unless we're not anywhere at all...
*/
if (offnum > PageGetMaxOffsetNumber(page))
{
pfree(scankeys);
return false;
}
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- }
- result = _bt_compare(rel, keysCount, scankeys, page, offnum);
- while (result == 0)
- {
- if (!_bt_step(scan, &buf, ForwardScanDirection))
- {
- pfree(scankeys);
- return false;
- }
- offnum = ItemPointerGetOffsetNumber(current);
- page = BufferGetPage(buf);
- result = _bt_compare(rel, keysCount, scankeys, page, offnum);
}
break;
}