📄 nbtinsert.c
字号:
*/ if (is_root) { Buffer rootbuf; Assert(stack == NULL); Assert(is_only); /* create a new root node and update the metapage */ rootbuf = _bt_newroot(rel, buf, rbuf); /* release the split buffers */ _bt_relbuf(rel, rootbuf); _bt_relbuf(rel, rbuf); _bt_relbuf(rel, buf); } else { BlockNumber bknum = BufferGetBlockNumber(buf); BlockNumber rbknum = BufferGetBlockNumber(rbuf); Page page = BufferGetPage(buf); IndexTuple new_item; BTStackData fakestack; IndexTuple ritem; Buffer pbuf; if (stack == NULL) { BTPageOpaque lpageop; if (!InRecovery) elog(DEBUG2, "concurrent ROOT page split"); lpageop = (BTPageOpaque) PageGetSpecialPointer(page); /* Find the leftmost page at the next level up */ pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false); /* Set up a phony stack entry pointing there */ stack = &fakestack; stack->bts_blkno = BufferGetBlockNumber(pbuf); stack->bts_offset = InvalidOffsetNumber; /* bts_btentry will be initialized below */ stack->bts_parent = NULL; _bt_relbuf(rel, pbuf); } /* get high key from left page == lowest key on new right page */ ritem = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_HIKEY)); /* form an index tuple that points at the new right page */ new_item = CopyIndexTuple(ritem); ItemPointerSet(&(new_item->t_tid), rbknum, P_HIKEY); /* * Find the parent buffer and get the parent page. * * Oops - if we were moved right then we need to change stack item! We * want to find parent pointing to where we are, right ? - vadim * 05/27/97 */ ItemPointerSet(&(stack->bts_btentry.t_tid), bknum, P_HIKEY); pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); /* Now we can unlock the children */ _bt_relbuf(rel, rbuf); _bt_relbuf(rel, buf); /* Check for error only after writing children */ if (pbuf == InvalidBuffer) elog(ERROR, "failed to re-find parent key in index \"%s\" for split pages %u/%u", RelationGetRelationName(rel), bknum, rbknum); /* Recursively update the parent */ _bt_insertonpg(rel, pbuf, stack->bts_parent, new_item, stack->bts_offset + 1, is_only); /* be tidy */ pfree(new_item); }}/* * _bt_getstackbuf() -- Walk back up the tree one step, and find the item * we last looked at in the parent. * * This is possible because we save the downlink from the parent item, * which is enough to uniquely identify it. Insertions into the parent * level could cause the item to move right; deletions could cause it * to move left, but not left of the page we previously found it in. * * Adjusts bts_blkno & bts_offset if changed. * * Returns InvalidBuffer if item not found (should not happen). */Buffer_bt_getstackbuf(Relation rel, BTStack stack, int access){ BlockNumber blkno; OffsetNumber start; blkno = stack->bts_blkno; start = stack->bts_offset; for (;;) { Buffer buf; Page page; BTPageOpaque opaque; buf = _bt_getbuf(rel, blkno, access); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { OffsetNumber offnum, minoff, maxoff; ItemId itemid; IndexTuple item; minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); /* * start = InvalidOffsetNumber means "search the whole page". We * need this test anyway due to possibility that page has a high * key now when it didn't before. */ if (start < minoff) start = minoff; /* * Need this check too, to guard against possibility that page * split since we visited it originally. */ if (start > maxoff) start = OffsetNumberNext(maxoff); /* * These loops will check every item on the page --- but in an * order that's attuned to the probability of where it actually * is. Scan to the right first, then to the left. */ for (offnum = start; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { itemid = PageGetItemId(page, offnum); item = (IndexTuple) PageGetItem(page, itemid); if (BTEntrySame(item, &stack->bts_btentry)) { /* Return accurate pointer to where link is now */ stack->bts_blkno = blkno; stack->bts_offset = offnum; return buf; } } for (offnum = OffsetNumberPrev(start); offnum >= minoff; offnum = OffsetNumberPrev(offnum)) { itemid = PageGetItemId(page, offnum); item = (IndexTuple) PageGetItem(page, itemid); if (BTEntrySame(item, &stack->bts_btentry)) { /* Return accurate pointer to where link is now */ stack->bts_blkno = blkno; stack->bts_offset = offnum; return buf; } } } /* * The item we're looking for moved right at least one page. */ if (P_RIGHTMOST(opaque)) { _bt_relbuf(rel, buf); return InvalidBuffer; } blkno = opaque->btpo_next; start = InvalidOffsetNumber; _bt_relbuf(rel, buf); }}/* * _bt_newroot() -- Create a new root page for the index. * * We've just split the old root page and need to create a new one. * In order to do this, we add a new root page to the file, then lock * the metadata page and update it. This is guaranteed to be deadlock- * free, because all readers release their locks on the metadata page * before trying to lock the root, and all writers lock the root before * trying to lock the metadata page. We have a write lock on the old * root page, so we have not introduced any cycles into the waits-for * graph. * * On entry, lbuf (the old root) and rbuf (its new peer) are write- * locked. On exit, a new root page exists with entries for the * two new children, metapage is updated and unlocked/unpinned. * The new root buffer is returned to caller which has to unlock/unpin * lbuf, rbuf & rootbuf. */static Buffer_bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf){ Buffer rootbuf; Page lpage, rootpage; BlockNumber lbkno, rbkno; BlockNumber rootblknum; BTPageOpaque rootopaque; ItemId itemid; IndexTuple item; Size itemsz; IndexTuple new_item; Buffer metabuf; Page metapg; BTMetaPageData *metad; lbkno = BufferGetBlockNumber(lbuf); rbkno = BufferGetBlockNumber(rbuf); lpage = BufferGetPage(lbuf); /* get a new root page */ rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootpage = BufferGetPage(rootbuf); rootblknum = BufferGetBlockNumber(rootbuf); /* acquire lock on the metapage */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); /* NO EREPORT(ERROR) from here till newroot op is logged */ START_CRIT_SECTION(); /* set btree special data */ rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE; rootopaque->btpo_flags = BTP_ROOT; rootopaque->btpo.level = ((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo.level + 1; rootopaque->btpo_cycleid = 0; /* update metapage data */ metad->btm_root = rootblknum; metad->btm_level = rootopaque->btpo.level; metad->btm_fastroot = rootblknum; metad->btm_fastlevel = rootopaque->btpo.level; /* * Create downlink item for left page (old root). Since this will be the * first item in a non-leaf page, it implicitly has minus-infinity key * value, so we need not store any actual key in it. */ itemsz = sizeof(IndexTupleData); new_item = (IndexTuple) palloc(itemsz); new_item->t_info = itemsz; ItemPointerSet(&(new_item->t_tid), lbkno, P_HIKEY); /* * Insert the left page pointer into the new root page. The root page is * the rightmost page on its level so there is no "high key" in it; the * two items will go into positions P_HIKEY and P_FIRSTKEY. * * Note: we *must* insert the two items in item-number order, for the * benefit of _bt_restore_page(). */ if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, false, false) == InvalidOffsetNumber) elog(PANIC, "failed to add leftkey to new root page" " while splitting block %u of index \"%s\"", BufferGetBlockNumber(lbuf), RelationGetRelationName(rel)); pfree(new_item); /* * Create downlink item for right page. The key for it is obtained from * the "high key" position in the left page. */ itemid = PageGetItemId(lpage, P_HIKEY); itemsz = ItemIdGetLength(itemid); item = (IndexTuple) PageGetItem(lpage, itemid); new_item = CopyIndexTuple(item); ItemPointerSet(&(new_item->t_tid), rbkno, P_HIKEY); /* * insert the right page pointer into the new root page. */ if (PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY, false, false) == InvalidOffsetNumber) elog(PANIC, "failed to add rightkey to new root page" " while splitting block %u of index \"%s\"", BufferGetBlockNumber(lbuf), RelationGetRelationName(rel)); pfree(new_item); MarkBufferDirty(rootbuf); MarkBufferDirty(metabuf); /* XLOG stuff */ if (!rel->rd_istemp) { xl_btree_newroot xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; xlrec.node = rel->rd_node; xlrec.rootblk = rootblknum; xlrec.level = metad->btm_level; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeNewroot; rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* * Direct access to page is not good but faster - we should implement * some new func in page API. */ rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper; rdata[1].len = ((PageHeader) rootpage)->pd_special - ((PageHeader) rootpage)->pd_upper; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata); PageSetLSN(rootpage, recptr); PageSetTLI(rootpage, ThisTimeLineID); PageSetLSN(metapg, recptr); PageSetTLI(metapg, ThisTimeLineID); } END_CRIT_SECTION(); /* send out relcache inval for metapage change */ if (!InRecovery) CacheInvalidateRelcache(rel); /* done with metapage */ _bt_relbuf(rel, metabuf); return rootbuf;}/* * _bt_pgaddtup() -- add a tuple to a particular page in the index. * * This routine adds the tuple to the page as requested. It does * not affect pin/lock status, but you'd better have a write lock * and pin on the target buffer! Don't forget to write and release * the buffer afterwards, either. * * The main difference between this routine and a bare PageAddItem call * is that this code knows that the leftmost index tuple on a non-leaf * btree page doesn't need to have a key. Therefore, it strips such * tuples down to just the tuple header. CAUTION: this works ONLY if * we insert the tuples in order, so that the given itup_off does * represent the final position of the tuple! */static void_bt_pgaddtup(Relation rel, Page page, Size itemsize, IndexTuple itup, OffsetNumber itup_off, const char *where){ BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); IndexTupleData trunctuple; if (!P_ISLEAF(opaque) && itup_off == P_FIRSTDATAKEY(opaque)) { trunctuple = *itup; trunctuple.t_info = sizeof(IndexTupleData); itup = &trunctuple; itemsize = sizeof(IndexTupleData); } if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false) == InvalidOffsetNumber) elog(PANIC, "failed to add item to the %s in index \"%s\"", where, RelationGetRelationName(rel));}/* * _bt_isequal - used in _bt_doinsert in check for duplicates. * * This is very similar to _bt_compare, except for NULL handling. * Rule is simple: NOT_NULL not equal NULL, NULL not equal NULL too. */static bool_bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey){ IndexTuple itup; int i; /* Better be comparing to a leaf item */ Assert(P_ISLEAF((BTPageOpaque) PageGetSpecialPointer(page))); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); for (i = 1; i <= keysz; i++) { AttrNumber attno; Datum datum; bool isNull; int32 result; attno = scankey->sk_attno; Assert(attno == i); datum = index_getattr(itup, attno, itupdesc, &isNull); /* NULLs are never equal to anything */ if (isNull || (scankey->sk_flags & SK_ISNULL)) return false; result = DatumGetInt32(FunctionCall2(&scankey->sk_func, datum, scankey->sk_argument)); if (result != 0) return false; scankey++; } /* if we get here, the keys are equal */ return true;}/* * _bt_vacuum_one_page - vacuum just one index page. * * Try to remove LP_DEAD items from the given page. The passed buffer * must be exclusive-locked, but unlike a real VACUUM, we don't need a * super-exclusive "cleanup" lock (see nbtree/README). */static void_bt_vacuum_one_page(Relation rel, Buffer buffer){ OffsetNumber deletable[MaxOffsetNumber]; int ndeletable = 0; OffsetNumber offnum, minoff, maxoff; Page page = BufferGetPage(buffer); BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* * Scan over all items to see which ones need to be deleted according to * LP_DEAD flags. */ minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); for (offnum = minoff; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { ItemId itemId = PageGetItemId(page, offnum); if (ItemIdIsDead(itemId)) deletable[ndeletable++] = offnum; } if (ndeletable > 0) _bt_delitems(rel, buffer, deletable, ndeletable); /* * Note: if we didn't find any LP_DEAD items, then the page's * BTP_HAS_GARBAGE hint bit is falsely set. We do not bother expending a * separate write to clear it, however. We will clear it when we split * the page. */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -