📄 nbtsearch.c
字号:
* we must save the page's right-link while scanning it; this tells us * where to step right to after we're done with these items. There is no * corresponding need for the left-link, since splits always go right. */ so->currPos.nextPage = opaque->btpo_next; if (ScanDirectionIsForward(dir)) { /* load items[] in ascending order */ itemIndex = 0; offnum = Max(offnum, minoff); while (offnum <= maxoff) { if (_bt_checkkeys(scan, page, offnum, dir, &continuescan)) { /* tuple passes all scan key conditions, so remember it */ /* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */ so->currPos.items[itemIndex].heapTid = scan->xs_ctup.t_self; so->currPos.items[itemIndex].indexOffset = offnum; itemIndex++; } if (!continuescan) { /* there can't be any more matches, so stop */ so->currPos.moreRight = false; break; } offnum = OffsetNumberNext(offnum); } Assert(itemIndex <= MaxIndexTuplesPerPage); so->currPos.firstItem = 0; so->currPos.lastItem = itemIndex - 1; so->currPos.itemIndex = 0; } else { /* load items[] in descending order */ itemIndex = MaxIndexTuplesPerPage; offnum = Min(offnum, maxoff); while (offnum >= minoff) { if (_bt_checkkeys(scan, page, offnum, dir, &continuescan)) { /* tuple passes all scan key conditions, so remember it */ /* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */ itemIndex--; so->currPos.items[itemIndex].heapTid = scan->xs_ctup.t_self; so->currPos.items[itemIndex].indexOffset = offnum; } if (!continuescan) { /* there can't be any more matches, so stop */ so->currPos.moreLeft = false; break; } offnum = OffsetNumberPrev(offnum); } Assert(itemIndex >= 0); so->currPos.firstItem = itemIndex; so->currPos.lastItem = MaxIndexTuplesPerPage - 1; so->currPos.itemIndex = MaxIndexTuplesPerPage - 1; } return (so->currPos.firstItem <= so->currPos.lastItem);}/* * _bt_steppage() -- Step to next page containing valid data for scan * * On entry, so->currPos.buf must be pinned and read-locked. We'll drop * the lock and pin before moving to next page. * * On success exit, we hold pin and read-lock on the next interesting page, * and so->currPos is updated to contain data from that page. * * If there are no more matching records in the given direction, we drop all * locks and pins, set so->currPos.buf to InvalidBuffer, and return FALSE. */static bool_bt_steppage(IndexScanDesc scan, ScanDirection dir){ BTScanOpaque so = (BTScanOpaque) scan->opaque; Relation rel; Page page; BTPageOpaque opaque; /* we must have the buffer pinned and locked */ Assert(BufferIsValid(so->currPos.buf)); /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, true); /* * Before we modify currPos, make a copy of the page data if there was a * mark position that needs it. */ if (so->markItemIndex >= 0) { /* bump pin on current buffer for assignment to mark buffer */ IncrBufferRefCount(so->currPos.buf); memcpy(&so->markPos, &so->currPos, offsetof(BTScanPosData, items[1]) + so->currPos.lastItem * sizeof(BTScanPosItem)); so->markPos.itemIndex = so->markItemIndex; so->markItemIndex = -1; } rel = scan->indexRelation; if (ScanDirectionIsForward(dir)) { /* Walk right to the next page with data */ /* We must rely on the previously saved nextPage link! */ BlockNumber blkno = so->currPos.nextPage; /* Remember we left a page with data */ so->currPos.moreLeft = true; for (;;) { /* if we're at end of scan, release the buffer and return */ if (blkno == P_NONE || !so->currPos.moreRight) { _bt_relbuf(rel, so->currPos.buf); so->currPos.buf = InvalidBuffer; return false; } /* step right one page */ so->currPos.buf = _bt_relandgetbuf(rel, so->currPos.buf, blkno, BT_READ); /* check for deleted page */ page = BufferGetPage(so->currPos.buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { /* see if there are any matches on this page */ /* note that this will clear moreRight if we can stop */ if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque))) break; } /* nope, keep going */ blkno = opaque->btpo_next; } } else { /* Remember we left a page with data */ so->currPos.moreRight = true; /* * Walk left to the next page with data. This is much more complex * than the walk-right case because of the possibility that the page * to our left splits while we are in flight to it, plus the * possibility that the page we were on gets deleted after we leave * it. See nbtree/README for details. */ for (;;) { /* Done if we know there are no matching keys to the left */ if (!so->currPos.moreLeft) { _bt_relbuf(rel, so->currPos.buf); so->currPos.buf = InvalidBuffer; return false; } /* Step to next physical page */ so->currPos.buf = _bt_walk_left(rel, so->currPos.buf); /* if we're physically at end of index, return failure */ if (so->currPos.buf == InvalidBuffer) return false; /* * Okay, we managed to move left to a non-deleted page. Done if * it's not half-dead and contains matching tuples. Else loop back * and do it all again. */ page = BufferGetPage(so->currPos.buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { /* see if there are any matches on this page */ /* note that this will clear moreLeft if we can stop */ if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page))) break; } } } return true;}/* * _bt_walk_left() -- step left one page, if possible * * The given buffer must be pinned and read-locked. This will be dropped * before stepping left. On return, we have pin and read lock on the * returned page, instead. * * Returns InvalidBuffer if there is no page to the left (no lock is held * in that case). * * When working on a non-leaf level, it is possible for the returned page * to be half-dead; the caller should check that condition and step left * again if it's important. */static Buffer_bt_walk_left(Relation rel, Buffer buf){ Page page; BTPageOpaque opaque; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); for (;;) { BlockNumber obknum; BlockNumber lblkno; BlockNumber blkno; int tries; /* if we're at end of tree, release buf and return failure */ if (P_LEFTMOST(opaque)) { _bt_relbuf(rel, buf); break; } /* remember original page we are stepping left from */ obknum = BufferGetBlockNumber(buf); /* step left */ blkno = lblkno = opaque->btpo_prev; buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* * If this isn't the page we want, walk right till we find what we * want --- but go no more than four hops (an arbitrary limit). If we * don't find the correct page by then, the most likely bet is that * the original page got deleted and isn't in the sibling chain at all * anymore, not that its left sibling got split more than four times. * * Note that it is correct to test P_ISDELETED not P_IGNORE here, * because half-dead pages are still in the sibling chain. Caller * must reject half-dead pages if wanted. */ tries = 0; for (;;) { if (!P_ISDELETED(opaque) && opaque->btpo_next == obknum) { /* Found desired page, return it */ return buf; } if (P_RIGHTMOST(opaque) || ++tries > 4) break; blkno = opaque->btpo_next; buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); } /* Return to the original page to see what's up */ buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (P_ISDELETED(opaque)) { /* * It was deleted. Move right to first nondeleted page (there * must be one); that is the page that has acquired the deleted * one's keyspace, so stepping left from it will take us where we * want to be. */ for (;;) { if (P_RIGHTMOST(opaque)) elog(ERROR, "fell off the end of index \"%s\"", RelationGetRelationName(rel)); blkno = opaque->btpo_next; buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_ISDELETED(opaque)) break; } /* * Now return to top of loop, resetting obknum to point to this * nondeleted page, and try again. */ } else { /* * It wasn't deleted; the explanation had better be that the page * to the left got split or deleted. Without this check, we'd go * into an infinite loop if there's anything wrong. */ if (opaque->btpo_prev == lblkno) elog(ERROR, "could not find left sibling of block %u in index \"%s\"", obknum, RelationGetRelationName(rel)); /* Okay to try again with new lblkno value */ } } return InvalidBuffer;}/* * _bt_get_endpoint() -- Find the first or last page on a given tree level * * If the index is empty, we will return InvalidBuffer; any other failure * condition causes ereport(). We will not return a dead page. * * The returned buffer is pinned and read-locked. */Buffer_bt_get_endpoint(Relation rel, uint32 level, bool rightmost){ Buffer buf; Page page; BTPageOpaque opaque; OffsetNumber offnum; BlockNumber blkno; IndexTuple itup; /* * If we are looking for a leaf page, okay to descend from fast root; * otherwise better descend from true root. (There is no point in being * smarter about intermediate levels.) */ if (level == 0) buf = _bt_getroot(rel, BT_READ); else buf = _bt_gettrueroot(rel); if (!BufferIsValid(buf)) { /* empty index... */ return InvalidBuffer; } page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); for (;;) { /* * If we landed on a deleted page, step right to find a live page * (there must be one). Also, if we want the rightmost page, step * right if needed to get to it (this could happen if the page split * since we obtained a pointer to it). */ while (P_IGNORE(opaque) || (rightmost && !P_RIGHTMOST(opaque))) { blkno = opaque->btpo_next; if (blkno == P_NONE) elog(ERROR, "fell off the end of index \"%s\"", RelationGetRelationName(rel)); buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); } /* Done? */ if (opaque->btpo.level == level) break; if (opaque->btpo.level < level) elog(ERROR, "btree level %u not found in index \"%s\"", level, RelationGetRelationName(rel)); /* Descend to leftmost or rightmost child page */ if (rightmost) offnum = PageGetMaxOffsetNumber(page); else offnum = P_FIRSTDATAKEY(opaque); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); blkno = ItemPointerGetBlockNumber(&(itup->t_tid)); buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); } return buf;}/* * _bt_endpoint() -- Find the first or last page in the index, and scan * from there to the first key satisfying all the quals. * * This is used by _bt_first() to set up a scan when we've determined * that the scan must start at the beginning or end of the index (for * a forward or backward scan respectively). Exit conditions are the * same as for _bt_first(). */static bool_bt_endpoint(IndexScanDesc scan, ScanDirection dir){ Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; Page page; BTPageOpaque opaque; OffsetNumber start; /* * Scan down to the leftmost or rightmost leaf page. This is a simplified * version of _bt_search(). We don't maintain a stack since we know we * won't need it. */ buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir)); if (!BufferIsValid(buf)) { /* empty index... */ so->currPos.buf = InvalidBuffer; return false; } page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); Assert(P_ISLEAF(opaque)); if (ScanDirectionIsForward(dir)) { /* There could be dead pages to the left, so not this: */ /* Assert(P_LEFTMOST(opaque)); */ start = P_FIRSTDATAKEY(opaque); } else if (ScanDirectionIsBackward(dir)) { Assert(P_RIGHTMOST(opaque)); start = PageGetMaxOffsetNumber(page); } else { elog(ERROR, "invalid scan direction: %d", (int) dir); start = 0; /* keep compiler quiet */ } /* remember which buffer we have pinned */ so->currPos.buf = buf; /* initialize moreLeft/moreRight appropriately for scan direction */ if (ScanDirectionIsForward(dir)) { so->currPos.moreLeft = false; so->currPos.moreRight = true; } else { so->currPos.moreLeft = true; so->currPos.moreRight = false; } so->numKilled = 0; /* just paranoia */ so->markItemIndex = -1; /* ditto */ /* * Now load data from the first page of the scan. */ if (!_bt_readpage(scan, dir, start)) { /* * There's no actually-matching data on this page. Try to advance to * the next page. Return false if there's no matching data at all. */ if (!_bt_steppage(scan, dir)) return false; } /* Drop the lock, but not pin, on the current page */ LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); /* OK, itemIndex says what to return */ scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid; return true;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -