📄 nbtinsert.c
字号:
lpageop = (BTPageOpaque) PageGetSpecialPointer(page); /* Find the leftmost page at the next level up */ pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false); /* Set up a phony stack entry pointing there */ stack = &fakestack; stack->bts_blkno = BufferGetBlockNumber(pbuf); stack->bts_offset = InvalidOffsetNumber; /* bts_btitem will be initialized below */ stack->bts_parent = NULL; _bt_relbuf(rel, pbuf); } /* get high key from left page == lowest key on new right page */ ritem = (BTItem) PageGetItem(page, PageGetItemId(page, P_HIKEY)); /* form an index tuple that points at the new right page */ new_item = _bt_formitem(&(ritem->bti_itup)); ItemPointerSet(&(new_item->bti_itup.t_tid), rbknum, P_HIKEY); /* * Find the parent buffer and get the parent page. * * Oops - if we were moved right then we need to change stack item! We * want to find parent pointing to where we are, right ? - vadim * 05/27/97 */ ItemPointerSet(&(stack->bts_btitem.bti_itup.t_tid), bknum, P_HIKEY); pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); /* Now we can write and unlock the children */ _bt_wrtbuf(rel, rbuf); _bt_wrtbuf(rel, buf); /* Check for error only after writing children */ if (pbuf == InvalidBuffer) elog(ERROR, "failed to re-find parent key in \"%s\"", RelationGetRelationName(rel)); /* Recursively update the parent */ _bt_insertonpg(rel, pbuf, stack->bts_parent, 0, NULL, new_item, stack->bts_offset, is_only); /* be tidy */ pfree(new_item); }}/* * _bt_getstackbuf() -- Walk back up the tree one step, and find the item * we last looked at in the parent. * * This is possible because we save the downlink from the parent item, * which is enough to uniquely identify it. Insertions into the parent * level could cause the item to move right; deletions could cause it * to move left, but not left of the page we previously found it in. * * Adjusts bts_blkno & bts_offset if changed. * * Returns InvalidBuffer if item not found (should not happen). */Buffer_bt_getstackbuf(Relation rel, BTStack stack, int access){ BlockNumber blkno; OffsetNumber start; blkno = stack->bts_blkno; start = stack->bts_offset; for (;;) { Buffer buf; Page page; BTPageOpaque opaque; buf = _bt_getbuf(rel, blkno, access); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { OffsetNumber offnum, minoff, maxoff; ItemId itemid; BTItem item; minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); /* * start = InvalidOffsetNumber means "search the whole page". We * need this test anyway due to possibility that page has a high * key now when it didn't before. */ if (start < minoff) start = minoff; /* * Need this check too, to guard against possibility that page * split since we visited it originally. */ if (start > maxoff) start = OffsetNumberNext(maxoff); /* * These loops will check every item on the page --- but in an * order that's attuned to the probability of where it actually * is. Scan to the right first, then to the left. */ for (offnum = start; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { itemid = PageGetItemId(page, offnum); item = (BTItem) PageGetItem(page, itemid); if (BTItemSame(item, &stack->bts_btitem)) { /* Return accurate pointer to where link is now */ stack->bts_blkno = blkno; stack->bts_offset = offnum; return buf; } } for (offnum = OffsetNumberPrev(start); offnum >= minoff; offnum = OffsetNumberPrev(offnum)) { itemid = PageGetItemId(page, offnum); item = (BTItem) PageGetItem(page, itemid); if (BTItemSame(item, &stack->bts_btitem)) { /* Return accurate pointer to where link is now */ stack->bts_blkno = blkno; stack->bts_offset = offnum; return buf; } } } /* * The item we're looking for moved right at least one page. */ if (P_RIGHTMOST(opaque)) { _bt_relbuf(rel, buf); return InvalidBuffer; } blkno = opaque->btpo_next; start = InvalidOffsetNumber; _bt_relbuf(rel, buf); }}/* * _bt_newroot() -- Create a new root page for the index. * * We've just split the old root page and need to create a new one. * In order to do this, we add a new root page to the file, then lock * the metadata page and update it. This is guaranteed to be deadlock- * free, because all readers release their locks on the metadata page * before trying to lock the root, and all writers lock the root before * trying to lock the metadata page. We have a write lock on the old * root page, so we have not introduced any cycles into the waits-for * graph. * * On entry, lbuf (the old root) and rbuf (its new peer) are write- * locked. On exit, a new root page exists with entries for the * two new children, metapage is updated and unlocked/unpinned. * The new root buffer is returned to caller which has to unlock/unpin * lbuf, rbuf & rootbuf. */static Buffer_bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf){ Buffer rootbuf; Page lpage, rpage, rootpage; BlockNumber lbkno, rbkno; BlockNumber rootblknum; BTPageOpaque rootopaque; ItemId itemid; BTItem item; Size itemsz; BTItem new_item; Buffer metabuf; Page metapg; BTMetaPageData *metad; lbkno = BufferGetBlockNumber(lbuf); rbkno = BufferGetBlockNumber(rbuf); lpage = BufferGetPage(lbuf); rpage = BufferGetPage(rbuf); /* get a new root page */ rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootpage = BufferGetPage(rootbuf); rootblknum = BufferGetBlockNumber(rootbuf); /* acquire lock on the metapage */ metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE); metapg = BufferGetPage(metabuf); metad = BTPageGetMeta(metapg); /* NO EREPORT(ERROR) from here till newroot op is logged */ START_CRIT_SECTION(); /* set btree special data */ rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage); rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE; rootopaque->btpo_flags = BTP_ROOT; rootopaque->btpo.level = ((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo.level + 1; /* update metapage data */ metad->btm_root = rootblknum; metad->btm_level = rootopaque->btpo.level; metad->btm_fastroot = rootblknum; metad->btm_fastlevel = rootopaque->btpo.level; /* * Create downlink item for left page (old root). Since this will be the * first item in a non-leaf page, it implicitly has minus-infinity key * value, so we need not store any actual key in it. */ itemsz = sizeof(BTItemData); new_item = (BTItem) palloc(itemsz); new_item->bti_itup.t_info = itemsz; ItemPointerSet(&(new_item->bti_itup.t_tid), lbkno, P_HIKEY); /* * Insert the left page pointer into the new root page. The root page is * the rightmost page on its level so there is no "high key" in it; the * two items will go into positions P_HIKEY and P_FIRSTKEY. */ if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY, LP_USED) == InvalidOffsetNumber) elog(PANIC, "failed to add leftkey to new root page"); pfree(new_item); /* * Create downlink item for right page. The key for it is obtained from * the "high key" position in the left page. */ itemid = PageGetItemId(lpage, P_HIKEY); itemsz = ItemIdGetLength(itemid); item = (BTItem) PageGetItem(lpage, itemid); new_item = _bt_formitem(&(item->bti_itup)); ItemPointerSet(&(new_item->bti_itup.t_tid), rbkno, P_HIKEY); /* * insert the right page pointer into the new root page. */ if (PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY, LP_USED) == InvalidOffsetNumber) elog(PANIC, "failed to add rightkey to new root page"); pfree(new_item); /* XLOG stuff */ if (!rel->rd_istemp) { xl_btree_newroot xlrec; XLogRecPtr recptr; XLogRecData rdata[2]; xlrec.node = rel->rd_node; xlrec.rootblk = rootblknum; xlrec.level = metad->btm_level; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeNewroot; rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); /* * Direct access to page is not good but faster - we should implement * some new func in page API. */ rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper; rdata[1].len = ((PageHeader) rootpage)->pd_special - ((PageHeader) rootpage)->pd_upper; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata); PageSetLSN(rootpage, recptr); PageSetTLI(rootpage, ThisTimeLineID); PageSetLSN(metapg, recptr); PageSetTLI(metapg, ThisTimeLineID); PageSetLSN(lpage, recptr); PageSetTLI(lpage, ThisTimeLineID); PageSetLSN(rpage, recptr); PageSetTLI(rpage, ThisTimeLineID); } END_CRIT_SECTION(); /* write and let go of metapage buffer */ _bt_wrtbuf(rel, metabuf); return (rootbuf);}/* * _bt_pgaddtup() -- add a tuple to a particular page in the index. * * This routine adds the tuple to the page as requested. It does * not affect pin/lock status, but you'd better have a write lock * and pin on the target buffer! Don't forget to write and release * the buffer afterwards, either. * * The main difference between this routine and a bare PageAddItem call * is that this code knows that the leftmost data item on a non-leaf * btree page doesn't need to have a key. Therefore, it strips such * items down to just the item header. CAUTION: this works ONLY if * we insert the items in order, so that the given itup_off does * represent the final position of the item! */static void_bt_pgaddtup(Relation rel, Page page, Size itemsize, BTItem btitem, OffsetNumber itup_off, const char *where){ BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); BTItemData truncitem; if (!P_ISLEAF(opaque) && itup_off == P_FIRSTDATAKEY(opaque)) { memcpy(&truncitem, btitem, sizeof(BTItemData)); truncitem.bti_itup.t_info = sizeof(BTItemData); btitem = &truncitem; itemsize = sizeof(BTItemData); } if (PageAddItem(page, (Item) btitem, itemsize, itup_off, LP_USED) == InvalidOffsetNumber) elog(PANIC, "failed to add item to the %s for \"%s\"", where, RelationGetRelationName(rel));}/* * _bt_isequal - used in _bt_doinsert in check for duplicates. * * This is very similar to _bt_compare, except for NULL handling. * Rule is simple: NOT_NULL not equal NULL, NULL not equal NULL too. */static bool_bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum, int keysz, ScanKey scankey){ BTItem btitem; IndexTuple itup; int i; /* Better be comparing to a leaf item */ Assert(P_ISLEAF((BTPageOpaque) PageGetSpecialPointer(page))); btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); itup = &(btitem->bti_itup); for (i = 1; i <= keysz; i++) { AttrNumber attno; Datum datum; bool isNull; int32 result; attno = scankey->sk_attno; Assert(attno == i); datum = index_getattr(itup, attno, itupdesc, &isNull); /* NULLs are never equal to anything */ if (isNull || (scankey->sk_flags & SK_ISNULL)) return false; result = DatumGetInt32(FunctionCall2(&scankey->sk_func, datum, scankey->sk_argument)); if (result != 0) return false; scankey++; } /* if we get here, the keys are equal */ return true;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -