⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nbtinsert.c

📁 postgresql8.3.4源码,开源数据库
💻 C
📖 第 1 页 / 共 4 页
字号:
	 */	if (is_root)	{		Buffer		rootbuf;		Assert(stack == NULL);		Assert(is_only);		/* create a new root node and update the metapage */		rootbuf = _bt_newroot(rel, buf, rbuf);		/* release the split buffers */		_bt_relbuf(rel, rootbuf);		_bt_relbuf(rel, rbuf);		_bt_relbuf(rel, buf);	}	else	{		BlockNumber bknum = BufferGetBlockNumber(buf);		BlockNumber rbknum = BufferGetBlockNumber(rbuf);		Page		page = BufferGetPage(buf);		IndexTuple	new_item;		BTStackData fakestack;		IndexTuple	ritem;		Buffer		pbuf;		if (stack == NULL)		{			BTPageOpaque lpageop;			if (!InRecovery)				elog(DEBUG2, "concurrent ROOT page split");			lpageop = (BTPageOpaque) PageGetSpecialPointer(page);			/* Find the leftmost page at the next level up */			pbuf = _bt_get_endpoint(rel, lpageop->btpo.level + 1, false);			/* Set up a phony stack entry pointing there */			stack = &fakestack;			stack->bts_blkno = BufferGetBlockNumber(pbuf);			stack->bts_offset = InvalidOffsetNumber;			/* bts_btentry will be initialized below */			stack->bts_parent = NULL;			_bt_relbuf(rel, pbuf);		}		/* get high key from left page == lowest key on new right page */		ritem = (IndexTuple) PageGetItem(page,										 PageGetItemId(page, P_HIKEY));		/* form an index tuple that points at the new right page */		new_item = CopyIndexTuple(ritem);		ItemPointerSet(&(new_item->t_tid), rbknum, P_HIKEY);		/*		 * Find the parent buffer and get the parent page.		 *		 * Oops - if we were moved right then we need to change stack item! We		 * want to find parent pointing to where we are, right ?	- vadim		 * 05/27/97		 */		ItemPointerSet(&(stack->bts_btentry.t_tid), bknum, P_HIKEY);		pbuf = _bt_getstackbuf(rel, stack, BT_WRITE);		/* Now we can unlock the children */		_bt_relbuf(rel, rbuf);		_bt_relbuf(rel, buf);		/* Check for error only after writing children */		if (pbuf == InvalidBuffer)			elog(ERROR, "failed to re-find parent key in index \"%s\" for split pages %u/%u",				 RelationGetRelationName(rel), bknum, rbknum);		/* Recursively update the parent */		_bt_insertonpg(rel, pbuf, stack->bts_parent,					   new_item, stack->bts_offset + 1,					   is_only);		/* be tidy */		pfree(new_item);	}}/* *	_bt_getstackbuf() -- Walk back up the tree one step, and find the item *						 we last looked at in the parent. * *		This is possible because we save the downlink from the parent item, *		which is enough to uniquely identify it.  Insertions into the parent *		level could cause the item to move right; deletions could cause it *		to move left, but not left of the page we previously found it in. * *		Adjusts bts_blkno & bts_offset if changed. * *		Returns InvalidBuffer if item not found (should not happen). */Buffer_bt_getstackbuf(Relation rel, BTStack stack, int access){	BlockNumber blkno;	OffsetNumber start;	blkno = stack->bts_blkno;	start = stack->bts_offset;	for (;;)	{		Buffer		buf;		Page		page;		BTPageOpaque opaque;		buf = _bt_getbuf(rel, blkno, access);		page = BufferGetPage(buf);		opaque = (BTPageOpaque) PageGetSpecialPointer(page);		if (!P_IGNORE(opaque))		{			OffsetNumber offnum,						minoff,						maxoff;			ItemId		itemid;			IndexTuple	item;			minoff = P_FIRSTDATAKEY(opaque);			maxoff = PageGetMaxOffsetNumber(page);			/*			 * start = InvalidOffsetNumber means "search the whole page". We			 * need this test anyway due to possibility that page has a high			 * key now when it didn't before.			 */			if (start < minoff)				start = minoff;			/*			 * Need this check too, to guard against possibility that page			 * split since we visited it originally.			 */			if (start > maxoff)				start = OffsetNumberNext(maxoff);			/*			 * These loops will check every item on the page --- but in an			 * order that's attuned to the probability of where it actually			 * is.	Scan to the right first, then to the left.			 */			for (offnum = start;				 offnum <= maxoff;				 offnum = OffsetNumberNext(offnum))			{				itemid = PageGetItemId(page, offnum);				item = (IndexTuple) PageGetItem(page, itemid);				if (BTEntrySame(item, &stack->bts_btentry))				{					/* Return accurate pointer to where link is now */					stack->bts_blkno = blkno;					stack->bts_offset = offnum;					return buf;				}			}			for (offnum = OffsetNumberPrev(start);				 offnum >= minoff;				 offnum = OffsetNumberPrev(offnum))			{				itemid = PageGetItemId(page, offnum);				item = (IndexTuple) PageGetItem(page, itemid);				if (BTEntrySame(item, &stack->bts_btentry))				{					/* Return accurate pointer to where link is now */					stack->bts_blkno = blkno;					stack->bts_offset = offnum;					return buf;				}			}		}		/*		 * The item we're looking for moved right at least one page.		 */		if (P_RIGHTMOST(opaque))		{			_bt_relbuf(rel, buf);			return InvalidBuffer;		}		blkno = opaque->btpo_next;		start = InvalidOffsetNumber;		_bt_relbuf(rel, buf);	}}/* *	_bt_newroot() -- Create a new root page for the index. * *		We've just split the old root page and need to create a new one. *		In order to do this, we add a new root page to the file, then lock *		the metadata page and update it.  This is guaranteed to be deadlock- *		free, because all readers release their locks on the metadata page *		before trying to lock the root, and all writers lock the root before *		trying to lock the metadata page.  We have a write lock on the old *		root page, so we have not introduced any cycles into the waits-for *		graph. * *		On entry, lbuf (the old root) and rbuf (its new peer) are write- *		locked. On exit, a new root page exists with entries for the *		two new children, metapage is updated and unlocked/unpinned. *		The new root buffer is returned to caller which has to unlock/unpin *		lbuf, rbuf & rootbuf. */static Buffer_bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf){	Buffer		rootbuf;	Page		lpage,				rootpage;	BlockNumber lbkno,				rbkno;	BlockNumber rootblknum;	BTPageOpaque rootopaque;	ItemId		itemid;	IndexTuple	item;	Size		itemsz;	IndexTuple	new_item;	Buffer		metabuf;	Page		metapg;	BTMetaPageData *metad;	lbkno = BufferGetBlockNumber(lbuf);	rbkno = BufferGetBlockNumber(rbuf);	lpage = BufferGetPage(lbuf);	/* get a new root page */	rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);	rootpage = BufferGetPage(rootbuf);	rootblknum = BufferGetBlockNumber(rootbuf);	/* acquire lock on the metapage */	metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);	metapg = BufferGetPage(metabuf);	metad = BTPageGetMeta(metapg);	/* NO EREPORT(ERROR) from here till newroot op is logged */	START_CRIT_SECTION();	/* set btree special data */	rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);	rootopaque->btpo_prev = rootopaque->btpo_next = P_NONE;	rootopaque->btpo_flags = BTP_ROOT;	rootopaque->btpo.level =		((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo.level + 1;	rootopaque->btpo_cycleid = 0;	/* update metapage data */	metad->btm_root = rootblknum;	metad->btm_level = rootopaque->btpo.level;	metad->btm_fastroot = rootblknum;	metad->btm_fastlevel = rootopaque->btpo.level;	/*	 * Create downlink item for left page (old root).  Since this will be the	 * first item in a non-leaf page, it implicitly has minus-infinity key	 * value, so we need not store any actual key in it.	 */	itemsz = sizeof(IndexTupleData);	new_item = (IndexTuple) palloc(itemsz);	new_item->t_info = itemsz;	ItemPointerSet(&(new_item->t_tid), lbkno, P_HIKEY);	/*	 * Insert the left page pointer into the new root page.  The root page is	 * the rightmost page on its level so there is no "high key" in it; the	 * two items will go into positions P_HIKEY and P_FIRSTKEY.	 *	 * Note: we *must* insert the two items in item-number order, for the	 * benefit of _bt_restore_page().	 */	if (PageAddItem(rootpage, (Item) new_item, itemsz, P_HIKEY,					false, false) == InvalidOffsetNumber)		elog(PANIC, "failed to add leftkey to new root page"			 " while splitting block %u of index \"%s\"",			 BufferGetBlockNumber(lbuf), RelationGetRelationName(rel));	pfree(new_item);	/*	 * Create downlink item for right page.  The key for it is obtained from	 * the "high key" position in the left page.	 */	itemid = PageGetItemId(lpage, P_HIKEY);	itemsz = ItemIdGetLength(itemid);	item = (IndexTuple) PageGetItem(lpage, itemid);	new_item = CopyIndexTuple(item);	ItemPointerSet(&(new_item->t_tid), rbkno, P_HIKEY);	/*	 * insert the right page pointer into the new root page.	 */	if (PageAddItem(rootpage, (Item) new_item, itemsz, P_FIRSTKEY,					false, false) == InvalidOffsetNumber)		elog(PANIC, "failed to add rightkey to new root page"			 " while splitting block %u of index \"%s\"",			 BufferGetBlockNumber(lbuf), RelationGetRelationName(rel));	pfree(new_item);	MarkBufferDirty(rootbuf);	MarkBufferDirty(metabuf);	/* XLOG stuff */	if (!rel->rd_istemp)	{		xl_btree_newroot xlrec;		XLogRecPtr	recptr;		XLogRecData rdata[2];		xlrec.node = rel->rd_node;		xlrec.rootblk = rootblknum;		xlrec.level = metad->btm_level;		rdata[0].data = (char *) &xlrec;		rdata[0].len = SizeOfBtreeNewroot;		rdata[0].buffer = InvalidBuffer;		rdata[0].next = &(rdata[1]);		/*		 * Direct access to page is not good but faster - we should implement		 * some new func in page API.		 */		rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper;		rdata[1].len = ((PageHeader) rootpage)->pd_special -			((PageHeader) rootpage)->pd_upper;		rdata[1].buffer = InvalidBuffer;		rdata[1].next = NULL;		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);		PageSetLSN(rootpage, recptr);		PageSetTLI(rootpage, ThisTimeLineID);		PageSetLSN(metapg, recptr);		PageSetTLI(metapg, ThisTimeLineID);	}	END_CRIT_SECTION();	/* send out relcache inval for metapage change */	if (!InRecovery)		CacheInvalidateRelcache(rel);	/* done with metapage */	_bt_relbuf(rel, metabuf);	return rootbuf;}/* *	_bt_pgaddtup() -- add a tuple to a particular page in the index. * *		This routine adds the tuple to the page as requested.  It does *		not affect pin/lock status, but you'd better have a write lock *		and pin on the target buffer!  Don't forget to write and release *		the buffer afterwards, either. * *		The main difference between this routine and a bare PageAddItem call *		is that this code knows that the leftmost index tuple on a non-leaf *		btree page doesn't need to have a key.  Therefore, it strips such *		tuples down to just the tuple header.  CAUTION: this works ONLY if *		we insert the tuples in order, so that the given itup_off does *		represent the final position of the tuple! */static void_bt_pgaddtup(Relation rel,			 Page page,			 Size itemsize,			 IndexTuple itup,			 OffsetNumber itup_off,			 const char *where){	BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);	IndexTupleData trunctuple;	if (!P_ISLEAF(opaque) && itup_off == P_FIRSTDATAKEY(opaque))	{		trunctuple = *itup;		trunctuple.t_info = sizeof(IndexTupleData);		itup = &trunctuple;		itemsize = sizeof(IndexTupleData);	}	if (PageAddItem(page, (Item) itup, itemsize, itup_off,					false, false) == InvalidOffsetNumber)		elog(PANIC, "failed to add item to the %s in index \"%s\"",			 where, RelationGetRelationName(rel));}/* * _bt_isequal - used in _bt_doinsert in check for duplicates. * * This is very similar to _bt_compare, except for NULL handling. * Rule is simple: NOT_NULL not equal NULL, NULL not equal NULL too. */static bool_bt_isequal(TupleDesc itupdesc, Page page, OffsetNumber offnum,			int keysz, ScanKey scankey){	IndexTuple	itup;	int			i;	/* Better be comparing to a leaf item */	Assert(P_ISLEAF((BTPageOpaque) PageGetSpecialPointer(page)));	itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));	for (i = 1; i <= keysz; i++)	{		AttrNumber	attno;		Datum		datum;		bool		isNull;		int32		result;		attno = scankey->sk_attno;		Assert(attno == i);		datum = index_getattr(itup, attno, itupdesc, &isNull);		/* NULLs are never equal to anything */		if (isNull || (scankey->sk_flags & SK_ISNULL))			return false;		result = DatumGetInt32(FunctionCall2(&scankey->sk_func,											 datum,											 scankey->sk_argument));		if (result != 0)			return false;		scankey++;	}	/* if we get here, the keys are equal */	return true;}/* * _bt_vacuum_one_page - vacuum just one index page. * * Try to remove LP_DEAD items from the given page.  The passed buffer * must be exclusive-locked, but unlike a real VACUUM, we don't need a * super-exclusive "cleanup" lock (see nbtree/README). */static void_bt_vacuum_one_page(Relation rel, Buffer buffer){	OffsetNumber deletable[MaxOffsetNumber];	int			ndeletable = 0;	OffsetNumber offnum,				minoff,				maxoff;	Page		page = BufferGetPage(buffer);	BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);	/*	 * Scan over all items to see which ones need to be deleted according to	 * LP_DEAD flags.	 */	minoff = P_FIRSTDATAKEY(opaque);	maxoff = PageGetMaxOffsetNumber(page);	for (offnum = minoff;		 offnum <= maxoff;		 offnum = OffsetNumberNext(offnum))	{		ItemId		itemId = PageGetItemId(page, offnum);		if (ItemIdIsDead(itemId))			deletable[ndeletable++] = offnum;	}	if (ndeletable > 0)		_bt_delitems(rel, buffer, deletable, ndeletable);	/*	 * Note: if we didn't find any LP_DEAD items, then the page's	 * BTP_HAS_GARBAGE hint bit is falsely set.  We do not bother expending a	 * separate write to clear it, however.  We will clear it when we split	 * the page.	 */}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -