📄 nbtinsert.c
字号:
} else if (_bt_skeycmp(rel, keysz, scankey, rpage, PageGetItemId(rpage, P_HIKEY), BTGreaterStrategyNumber)) elog(FATAL, "btree: hikey is out of order"); else if (rpageop->btpo_flags & BTP_CHAIN) /* * If hikey > scankey then it's last page in chain and * BTP_CHAIN must be OFF */ elog(FATAL, "btree: lost last page in the chain of duplicates"); } else/* rightmost page */ { Assert(!(rpageop->btpo_flags & BTP_CHAIN)); } _bt_relbuf(rel, buf, BT_WRITE); return (_bt_insertonpg(rel, rbuf, stack, keysz, scankey, btitem, afteritem)); } /* * If after splitting un-chained page we'll got chain of pages * with duplicates then we want to know 1. on which of two pages * new btitem will go (current _bt_findsplitloc is quite bad); 2. * what parent (if there's one) thinking about it (remember about * deletions) */ else if (!(lpageop->btpo_flags & BTP_CHAIN)) { OffsetNumber start = (P_RIGHTMOST(lpageop)) ? P_HIKEY : P_FIRSTKEY; Size llimit; maxoff = PageGetMaxOffsetNumber(page); llimit = PageGetPageSize(page) - sizeof(PageHeaderData) - MAXALIGN(sizeof(BTPageOpaqueData)) +sizeof(ItemIdData); llimit /= 2; firstright = _bt_findsplitloc(rel, page, start, maxoff, llimit); if (_bt_itemcmp(rel, keysz, (BTItem) PageGetItem(page, PageGetItemId(page, start)), (BTItem) PageGetItem(page, PageGetItemId(page, firstright)), BTEqualStrategyNumber)) { if (_bt_skeycmp(rel, keysz, scankey, page, PageGetItemId(page, firstright), BTLessStrategyNumber)) /* * force moving current items to the new page: new * item will go on the current page. */ firstright = start; else /* * new btitem >= firstright, start item == firstright * - new chain of duplicates: if this non-leftmost * leaf page and parent item < start item then force * moving all items to the new page - current page * will be "empty" after it. */ { if (!P_LEFTMOST(lpageop) && (lpageop->btpo_flags & BTP_LEAF)) { ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), bknum, P_HIKEY); pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); if (_bt_itemcmp(rel, keysz, stack->bts_btitem, (BTItem) PageGetItem(page, PageGetItemId(page, start)), BTLessStrategyNumber)) { firstright = start; shifted = true; } _bt_relbuf(rel, pbuf, BT_WRITE); } } } /* else - no new chain if start item < * firstright one */ } /* split the buffer into left and right halves */ rbuf = _bt_split(rel, buf, firstright); /* which new page (left half or right half) gets the tuple? */ if (_bt_goesonpg(rel, buf, keysz, scankey, afteritem)) { /* left page */ itup_off = _bt_pgaddtup(rel, buf, keysz, scankey, itemsz, btitem, afteritem); itup_blkno = BufferGetBlockNumber(buf); } else { /* right page */ itup_off = _bt_pgaddtup(rel, rbuf, keysz, scankey, itemsz, btitem, afteritem); itup_blkno = BufferGetBlockNumber(rbuf); } maxoff = PageGetMaxOffsetNumber(page); if (shifted) { if (maxoff > P_FIRSTKEY) elog(FATAL, "btree: shifted page is not empty"); lowLeftItem = (BTItem) NULL; } else { if (maxoff < P_FIRSTKEY) elog(FATAL, "btree: un-shifted page is empty"); lowLeftItem = (BTItem) PageGetItem(page, PageGetItemId(page, P_FIRSTKEY)); if (_bt_itemcmp(rel, keysz, lowLeftItem, (BTItem) PageGetItem(page, PageGetItemId(page, P_HIKEY)), BTEqualStrategyNumber)) lpageop->btpo_flags |= BTP_CHAIN; } /* * By here, * * + our target page has been split; + the original tuple has been * inserted; + we have write locks on both the old (left half) * and new (right half) buffers, after the split; and + we have * the key we want to insert into the parent. * * Do the parent insertion. We need to hold onto the locks for the * child pages until we locate the parent, but we can release them * before doing the actual insertion (see Lehman and Yao for the * reasoning). */l_spl: ; if (stack == (BTStack) NULL) { if (!is_root) /* if this page was not root page */ { elog(DEBUG, "btree: concurrent ROOT page split"); stack = (BTStack) palloc(sizeof(BTStackData)); stack->bts_blkno = lpageop->btpo_parent; stack->bts_offset = InvalidOffsetNumber; stack->bts_btitem = (BTItem) palloc(sizeof(BTItemData)); /* bts_btitem will be initialized below */ stack->bts_parent = NULL; goto l_spl; } /* create a new root node and release the split buffers */ _bt_newroot(rel, buf, rbuf); } else { ScanKey newskey; InsertIndexResult newres; BTItem new_item; OffsetNumber upditem_offset = P_HIKEY; bool do_update = false; bool update_in_place = true; bool parent_chained; /* form a index tuple that points at the new right page */ rbknum = BufferGetBlockNumber(rbuf); rpage = BufferGetPage(rbuf); rpageop = (BTPageOpaque) PageGetSpecialPointer(rpage); /* * By convention, the first entry (1) on every non-rightmost * page is the high key for that page. In order to get the * lowest key on the new right page, we actually look at its * second (2) entry. */ if (!P_RIGHTMOST(rpageop)) { ritem = (BTItem) PageGetItem(rpage, PageGetItemId(rpage, P_FIRSTKEY)); if (_bt_itemcmp(rel, keysz, ritem, (BTItem) PageGetItem(rpage, PageGetItemId(rpage, P_HIKEY)), BTEqualStrategyNumber)) rpageop->btpo_flags |= BTP_CHAIN; } else ritem = (BTItem) PageGetItem(rpage, PageGetItemId(rpage, P_HIKEY)); /* get a unique btitem for this key */ new_item = _bt_formitem(&(ritem->bti_itup)); ItemPointerSet(&(new_item->bti_itup.t_tid), rbknum, P_HIKEY); /* * Find the parent buffer and get the parent page. * * Oops - if we were moved right then we need to change stack * item! We want to find parent pointing to where we are, * right ? - vadim 05/27/97 */ ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), bknum, P_HIKEY); pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); ppage = BufferGetPage(pbuf); ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); parent_chained = ((ppageop->btpo_flags & BTP_CHAIN)) ? true : false; if (parent_chained && !left_chained) elog(FATAL, "nbtree: unexpected chained parent of unchained page"); /* * If the key of new_item is < than the key of the item in the * parent page pointing to the left page (stack->bts_btitem), * we have to update the latter key; otherwise the keys on the * parent page wouldn't be monotonically increasing after we * inserted the new pointer to the right page (new_item). This * only happens if our left page is the leftmost page and a * new minimum key had been inserted before, which is not * reflected in the parent page but didn't matter so far. If * there are duplicate keys and this new minimum key spills * over to our new right page, we get an inconsistency if we * don't update the left key in the parent page. * * Also, new duplicates handling code require us to update parent * item if some smaller items left on the left page (which is * possible in splitting leftmost page) and current parent * item == new_item. - vadim 05/27/97 */ if (_bt_itemcmp(rel, keysz, stack->bts_btitem, new_item, BTGreaterStrategyNumber) || (!shifted && _bt_itemcmp(rel, keysz, stack->bts_btitem, new_item, BTEqualStrategyNumber) && _bt_itemcmp(rel, keysz, lowLeftItem, new_item, BTLessStrategyNumber))) { do_update = true; /* * figure out which key is leftmost (if the parent page is * rightmost, too, it must be the root) */ if (P_RIGHTMOST(ppageop)) upditem_offset = P_HIKEY; else upditem_offset = P_FIRSTKEY; if (!P_LEFTMOST(lpageop) || stack->bts_offset != upditem_offset) elog(FATAL, "btree: items are out of order (leftmost %d, stack %u, update %u)", P_LEFTMOST(lpageop), stack->bts_offset, upditem_offset); } if (do_update) { if (shifted) elog(FATAL, "btree: attempt to update parent for shifted page"); /* * Try to update in place. If out parent page is chained * then we must forse insertion. */ if (!parent_chained && MAXALIGN(IndexTupleDSize(lowLeftItem->bti_itup)) == MAXALIGN(IndexTupleDSize(stack->bts_btitem->bti_itup))) { _bt_updateitem(rel, keysz, pbuf, stack->bts_btitem, lowLeftItem); _bt_wrtbuf(rel, buf); _bt_wrtbuf(rel, rbuf); } else { update_in_place = false; PageIndexTupleDelete(ppage, upditem_offset); /* * don't write anything out yet--we still have the * write lock, and now we call another _bt_insertonpg * to insert the correct key. First, make a new item, * using the tuple data from lowLeftItem. Point it to * the left child. Update it on the stack at the same * time. */ pfree(stack->bts_btitem); stack->bts_btitem = _bt_formitem(&(lowLeftItem->bti_itup)); ItemPointerSet(&(stack->bts_btitem->bti_itup.t_tid), bknum, P_HIKEY); /* * Unlock the children before doing this */ _bt_wrtbuf(rel, buf); _bt_wrtbuf(rel, rbuf); /* * A regular _bt_binsrch should find the right place * to put the new entry, since it should be lower than * any other key on the page. Therefore set afteritem * to NULL. */ newskey = _bt_mkscankey(rel, &(stack->bts_btitem->bti_itup)); newres = _bt_insertonpg(rel, pbuf, stack->bts_parent, keysz, newskey, stack->bts_btitem, NULL); pfree(newres); pfree(newskey); /* * we have now lost our lock on the parent buffer, and * need to get it back. */ pbuf = _bt_getstackbuf(rel, stack, BT_WRITE); } } else { _bt_wrtbuf(rel, buf); _bt_wrtbuf(rel, rbuf); } newskey = _bt_mkscankey(rel, &(new_item->bti_itup)); afteritem = stack->bts_btitem; if (parent_chained && !update_in_place) { ppage = BufferGetPage(pbuf); ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); if (ppageop->btpo_flags & BTP_CHAIN) elog(FATAL, "btree: unexpected BTP_CHAIN flag in parent after update"); if (P_RIGHTMOST(ppageop)) elog(FATAL, "btree: chained parent is RIGHTMOST after update"); maxoff = PageGetMaxOffsetNumber(ppage); if (maxoff != P_FIRSTKEY) elog(FATAL, "btree: FIRSTKEY was unexpected in parent after update"); if (_bt_skeycmp(rel, keysz, newskey, ppage, PageGetItemId(ppage, P_FIRSTKEY), BTLessEqualStrategyNumber)) elog(FATAL, "btree: parent FIRSTKEY is >= duplicate key after update"); if (!_bt_skeycmp(rel, keysz, newskey, ppage, PageGetItemId(ppage, P_HIKEY), BTEqualStrategyNumber)) elog(FATAL, "btree: parent HIGHKEY is not equal duplicate key after update"); afteritem = (BTItem) NULL; } else if (left_chained && !update_in_place) { ppage = BufferGetPage(pbuf); ppageop = (BTPageOpaque) PageGetSpecialPointer(ppage); if (!P_RIGHTMOST(ppageop) && _bt_skeycmp(rel, keysz, newskey, ppage, PageGetItemId(ppage, P_HIKEY), BTGreaterStrategyNumber)) afteritem = (BTItem) NULL; } if (afteritem == (BTItem) NULL) { rbuf = _bt_getbuf(rel, ppageop->btpo_next, BT_WRITE); _bt_relbuf(rel, pbuf, BT_WRITE); pbuf = rbuf; } newres = _bt_insertonpg(rel, pbuf, stack->bts_parent, keysz, newskey, new_item, afteritem); /* be tidy */ pfree(newres); pfree(newskey); pfree(new_item); } } else { itup_off = _bt_pgaddtup(rel, buf, keysz, scankey, itemsz, btitem, afteritem); itup_blkno = BufferGetBlockNumber(buf); _bt_relbuf(rel, buf, BT_WRITE); } /* by here, the new tuple is inserted */ res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); return res;}/* * _bt_split() -- split a page in the btree. * * On entry, buf is the page to split, and is write-locked and pinned. * Returns the new right sibling of buf, pinned and write-locked. The * pin and lock on buf are maintained. */static Buffer_bt_split(Relation rel, Buffer buf, OffsetNumber firstright){ Buffer rbuf; Page origpage; Page leftpage, rightpage; BTPageOpaque ropaque, lopaque, oopaque; Buffer sbuf; Page spage; BTPageOpaque sopaque; Size itemsz; ItemId itemid; BTItem item; OffsetNumber leftoff,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -