📄 nbtpage.c

📁 postgresql8.3.4源码,开源数据库
💻 C
📖 第 1 页 / 共 3 页
字号:
				 errhint("Please REINDEX it.")));}/* *	_bt_getbuf() -- Get a buffer by block number for read or write. * *		blkno == P_NEW means to get an unallocated index page.	The page *		will be initialized before returning it. * *		When this routine returns, the appropriate lock is set on the *		requested buffer and its reference count has been incremented *		(ie, the buffer is "locked and pinned").  Also, we apply *		_bt_checkpage to sanity-check the page (except in P_NEW case). */Buffer_bt_getbuf(Relation rel, BlockNumber blkno, int access){	Buffer		buf;	if (blkno != P_NEW)	{		/* Read an existing block of the relation */		buf = ReadBuffer(rel, blkno);		LockBuffer(buf, access);		_bt_checkpage(rel, buf);	}	else	{		bool		needLock;		Page		page;		Assert(access == BT_WRITE);		/*		 * First see if the FSM knows of any free pages.		 *		 * We can't trust the FSM's report unreservedly; we have to check that		 * the page is still free.	(For example, an already-free page could		 * have been re-used between the time the last VACUUM scanned it and		 * the time the VACUUM made its FSM updates.)		 *		 * In fact, it's worse than that: we can't even assume that it's safe		 * to take a lock on the reported page.  If somebody else has a lock		 * on it, or even worse our own caller does, we could deadlock.  (The		 * own-caller scenario is actually not improbable. Consider an index		 * on a serial or timestamp column.  Nearly all splits will be at the		 * rightmost page, so it's entirely likely that _bt_split will call us		 * while holding a lock on the page most recently acquired from FSM. A		 * VACUUM running concurrently with the previous split could well have		 * placed that page back in FSM.)		 *		 * To get around that, we ask for only a conditional lock on the		 * reported page.  If we fail, then someone else is using the page,		 * and we may reasonably assume it's not free.  (If we happen to be		 * wrong, the worst consequence is the page will be lost to use till		 * the next VACUUM, which is no big problem.)		 */		for (;;)		{			blkno = GetFreeIndexPage(&rel->rd_node);			if (blkno == InvalidBlockNumber)				break;			buf = ReadBuffer(rel, blkno);			if (ConditionalLockBuffer(buf))			{				page = BufferGetPage(buf);				if (_bt_page_recyclable(page))				{					/* Okay to use page.  Re-initialize and return it */					_bt_pageinit(page, BufferGetPageSize(buf));					return buf;				}				elog(DEBUG2, "FSM returned nonrecyclable page");				_bt_relbuf(rel, buf);			}			else			{				elog(DEBUG2, "FSM returned nonlockable page");				/* couldn't get lock, so just drop pin */				ReleaseBuffer(buf);			}		}		/*		 * Extend the relation by one page.		 *		 * We have to use a lock to ensure no one else is extending the rel at		 * the same time, else we will both try to initialize the same new		 * page.  We can skip locking for new or temp relations, however,		 * since no one else could be accessing them.		 */		needLock = !RELATION_IS_LOCAL(rel);		if (needLock)			LockRelationForExtension(rel, ExclusiveLock);		buf = ReadBuffer(rel, P_NEW);		/* Acquire buffer lock on new page */		LockBuffer(buf, BT_WRITE);		/*		 * Release the file-extension lock; it's now OK for someone else to		 * extend the relation some more.  Note that we cannot release this		 * lock before we have buffer lock on the new page, or we risk a race		 * condition against btvacuumscan --- see comments therein.		 */		if (needLock)			UnlockRelationForExtension(rel, ExclusiveLock);		/* Initialize the new page before returning it */		page = BufferGetPage(buf);		Assert(PageIsNew((PageHeader) page));		_bt_pageinit(page, BufferGetPageSize(buf));	}	/* ref count and lock type are correct */	return buf;}/* *	_bt_relandgetbuf() -- release a locked buffer and get another one. * * This is equivalent to _bt_relbuf followed by _bt_getbuf, with the * exception that blkno may not be P_NEW.  Also, if obuf is InvalidBuffer * then it reduces to just _bt_getbuf; allowing this case simplifies some * callers. The motivation for using this is to avoid two entries to the * bufmgr when one will do. */Buffer_bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access){	Buffer		buf;	Assert(blkno != P_NEW);	if (BufferIsValid(obuf))		LockBuffer(obuf, BUFFER_LOCK_UNLOCK);	buf = ReleaseAndReadBuffer(obuf, rel, blkno);	LockBuffer(buf, access);	_bt_checkpage(rel, buf);	return buf;}/* *	_bt_relbuf() -- release a locked buffer. * * Lock and pin (refcount) are both dropped. */void_bt_relbuf(Relation rel, Buffer buf){	UnlockReleaseBuffer(buf);}/* *	_bt_pageinit() -- Initialize a new page. * * On return, the page header is initialized; data space is empty; * special space is zeroed out. */void_bt_pageinit(Page page, Size size){	PageInit(page, size, sizeof(BTPageOpaqueData));}/* *	_bt_page_recyclable() -- Is an existing page recyclable? * * This exists to make sure _bt_getbuf and btvacuumscan have the same * policy about whether a page is safe to re-use. */bool_bt_page_recyclable(Page page){	BTPageOpaque opaque;	/*	 * It's possible to find an all-zeroes page in an index --- for example, a	 * backend might successfully extend the relation one page and then crash	 * before it is able to make a WAL entry for adding the page. If we find a	 * zeroed page then reclaim it.	 */	if (PageIsNew(page))		return true;	/*	 * Otherwise, recycle if deleted and too old to have any processes	 * interested in it.	 */	opaque = (BTPageOpaque) PageGetSpecialPointer(page);	if (P_ISDELETED(opaque) &&		TransactionIdPrecedesOrEquals(opaque->btpo.xact, RecentXmin))		return true;	return false;}/* * Delete item(s) from a btree page. * * This must only be used for deleting leaf items.	Deleting an item on a * non-leaf page has to be done as part of an atomic action that includes * deleting the page it points to. * * This routine assumes that the caller has pinned and locked the buffer. * Also, the given itemnos *must* appear in increasing order in the array. */void_bt_delitems(Relation rel, Buffer buf,			 OffsetNumber *itemnos, int nitems){	Page		page = BufferGetPage(buf);	BTPageOpaque opaque;	/* No ereport(ERROR) until changes are logged */	START_CRIT_SECTION();	/* Fix the page */	PageIndexMultiDelete(page, itemnos, nitems);	/*	 * We can clear the vacuum cycle ID since this page has certainly been	 * processed by the current vacuum scan.	 */	opaque = (BTPageOpaque) PageGetSpecialPointer(page);	opaque->btpo_cycleid = 0;	/*	 * Mark the page as not containing any LP_DEAD items.  This is not	 * certainly true (there might be some that have recently been marked, but	 * weren't included in our target-item list), but it will almost always be	 * true and it doesn't seem worth an additional page scan to check it.	 * Remember that BTP_HAS_GARBAGE is only a hint anyway.	 */	opaque->btpo_flags &= ~BTP_HAS_GARBAGE;	MarkBufferDirty(buf);	/* XLOG stuff */	if (!rel->rd_istemp)	{		xl_btree_delete xlrec;		XLogRecPtr	recptr;		XLogRecData rdata[2];		xlrec.node = rel->rd_node;		xlrec.block = BufferGetBlockNumber(buf);		rdata[0].data = (char *) &xlrec;		rdata[0].len = SizeOfBtreeDelete;		rdata[0].buffer = InvalidBuffer;		rdata[0].next = &(rdata[1]);		/*		 * The target-offsets array is not in the buffer, but pretend that it		 * is.	When XLogInsert stores the whole buffer, the offsets array		 * need not be stored too.		 */		if (nitems > 0)		{			rdata[1].data = (char *) itemnos;			rdata[1].len = nitems * sizeof(OffsetNumber);		}		else		{			rdata[1].data = NULL;			rdata[1].len = 0;		}		rdata[1].buffer = buf;		rdata[1].buffer_std = true;		rdata[1].next = NULL;		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);		PageSetLSN(page, recptr);		PageSetTLI(page, ThisTimeLineID);	}	END_CRIT_SECTION();}/* * Subroutine to pre-check whether a page deletion is safe, that is, its * parent page would be left in a valid or deletable state. * * "target" is the page we wish to delete, and "stack" is a search stack * leading to it (approximately).  Note that we will update the stack * entry(s) to reflect current downlink positions --- this is harmless and * indeed saves later search effort in _bt_pagedel. * * Note: it's OK to release page locks after checking, because a safe * deletion can't become unsafe due to concurrent activity.  A non-rightmost * page cannot become rightmost unless there's a concurrent page deletion, * but only VACUUM does page deletion and we only allow one VACUUM on an index * at a time.  An only child could acquire a sibling (of the same parent) only * by being split ... but that would make it a non-rightmost child so the * deletion is still safe. */static bool_bt_parent_deletion_safe(Relation rel, BlockNumber target, BTStack stack){	BlockNumber parent;	OffsetNumber poffset,				maxoff;	Buffer		pbuf;	Page		page;	BTPageOpaque opaque;	/*	 * In recovery mode, assume the deletion being replayed is valid.  We	 * can't always check it because we won't have a full search stack, and we	 * should complain if there's a problem, anyway.	 */	if (InRecovery)		return true;	/* Locate the parent's downlink (updating the stack entry if needed) */	ItemPointerSet(&(stack->bts_btentry.t_tid), target, P_HIKEY);	pbuf = _bt_getstackbuf(rel, stack, BT_READ);	if (pbuf == InvalidBuffer)		elog(ERROR, "failed to re-find parent key in index \"%s\" for deletion target page %u",			 RelationGetRelationName(rel), target);	parent = stack->bts_blkno;	poffset = stack->bts_offset;	page = BufferGetPage(pbuf);	opaque = (BTPageOpaque) PageGetSpecialPointer(page);	maxoff = PageGetMaxOffsetNumber(page);	/*	 * If the target is the rightmost child of its parent, then we can't	 * delete, unless it's also the only child.	 */	if (poffset >= maxoff)	{		/* It's rightmost child... */		if (poffset == P_FIRSTDATAKEY(opaque))		{			/*			 * It's only child, so safe if parent would itself be removable.			 * We have to check the parent itself, and then recurse to test			 * the conditions at the parent's parent.			 */			if (P_RIGHTMOST(opaque) || P_ISROOT(opaque))			{				_bt_relbuf(rel, pbuf);				return false;			}			_bt_relbuf(rel, pbuf);			return _bt_parent_deletion_safe(rel, parent, stack->bts_parent);		}		else		{			/* Unsafe to delete */			_bt_relbuf(rel, pbuf);			return false;		}	}	else	{		/* Not rightmost child, so safe to delete */		_bt_relbuf(rel, pbuf);		return true;	}}/* * _bt_pagedel() -- Delete a page from the b-tree, if legal to do so. * * This action unlinks the page from the b-tree structure, removing all * pointers leading to it --- but not touching its own left and right links. * The page cannot be physically reclaimed right away, since other processes * may currently be trying to follow links leading to the page; they have to * be allowed to use its right-link to recover.  See nbtree/README. * * On entry, the target buffer must be pinned and locked (either read or write * lock is OK).  This lock and pin will be dropped before exiting. * * The "stack" argument can be a search stack leading (approximately) to the * target page, or NULL --- outside callers typically pass NULL since they * have not done such a search, but internal recursion cases pass the stack * to avoid duplicated search effort. * * Returns the number of pages successfully deleted (zero if page cannot * be deleted now; could be more than one if parent pages were deleted too). * * NOTE: this leaks memory.  Rather than trying to clean up everything * carefully, it's better to run it in a temp context that can be reset * frequently. */int_bt_pagedel(Relation rel, Buffer buf, BTStack stack, bool vacuum_full){	int			result;	BlockNumber target,				leftsib,				rightsib,				parent;	OffsetNumber poffset,				maxoff;	uint32		targetlevel,				ilevel;	ItemId		itemid;	IndexTuple	targetkey,				itup;	ScanKey		itup_scankey;	Buffer		lbuf,				rbuf,				pbuf;	bool		parent_half_dead;	bool		parent_one_child;	bool		rightsib_empty;	Buffer		metabuf = InvalidBuffer;	Page		metapg = NULL;	BTMetaPageData *metad = NULL;	Page		page;	BTPageOpaque opaque;	/*	 * We can never delete rightmost pages nor root pages.	While at it, check	 * that page is not already deleted and is empty.	 */	page = BufferGetPage(buf);	opaque = (BTPageOpaque) PageGetSpecialPointer(page);	if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||		P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page))	{		/* Should never fail to delete a half-dead page */		Assert(!P_ISHALFDEAD(opaque));		_bt_relbuf(rel, buf);		return 0;	}	/*	 * Save info about page, including a copy of its high key (it must have	 * one, being non-rightmost).	 */	target = BufferGetBlockNumber(buf);	targetlevel = opaque->btpo.level;	leftsib = opaque->btpo_prev;	itemid = PageGetItemId(page, P_HIKEY);
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -