nbtpage.c

来自「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」· C语言代码 · 共 1,138 行 · 第 1/3 页
1,138 行
	rootbuf = metabuf;	for (;;)	{		rootbuf = _bt_relandgetbuf(rel, rootbuf, rootblkno, BT_READ);		rootpage = BufferGetPage(rootbuf);		rootopaque = (BTPageOpaque) PageGetSpecialPointer(rootpage);		if (!P_IGNORE(rootopaque))			break;		/* it's dead, Jim.  step right one page */		if (P_RIGHTMOST(rootopaque))			elog(ERROR, "no live root page found in \"%s\"",				 RelationGetRelationName(rel));		rootblkno = rootopaque->btpo_next;	}	/* Note: can't check btpo.level on deleted pages */	if (rootopaque->btpo.level != rootlevel)		elog(ERROR, "root page %u of \"%s\" has level %u, expected %u",			 rootblkno, RelationGetRelationName(rel),			 rootopaque->btpo.level, rootlevel);	return rootbuf;}/* *	_bt_getbuf() -- Get a buffer by block number for read or write. * *		blkno == P_NEW means to get an unallocated index page. * *		When this routine returns, the appropriate lock is set on the *		requested buffer and its reference count has been incremented *		(ie, the buffer is "locked and pinned"). */Buffer_bt_getbuf(Relation rel, BlockNumber blkno, int access){	Buffer		buf;	if (blkno != P_NEW)	{		/* Read an existing block of the relation */		buf = ReadBuffer(rel, blkno);		LockBuffer(buf, access);	}	else	{		bool		needLock;		Page		page;		Assert(access == BT_WRITE);		/*		 * First see if the FSM knows of any free pages.		 *		 * We can't trust the FSM's report unreservedly; we have to check that		 * the page is still free.	(For example, an already-free page could		 * have been re-used between the time the last VACUUM scanned it and		 * the time the VACUUM made its FSM updates.)		 *		 * In fact, it's worse than that: we can't even assume that it's safe		 * to take a lock on the reported page.  If somebody else has a lock		 * on it, or even worse our own caller does, we could deadlock.  (The		 * own-caller scenario is actually not improbable. Consider an index		 * on a serial or timestamp column.  Nearly all splits will be at the		 * rightmost page, so it's entirely likely that _bt_split will call us		 * while holding a lock on the page most recently acquired from FSM. A		 * VACUUM running concurrently with the previous split could well have		 * placed that page back in FSM.)		 *		 * To get around that, we ask for only a conditional lock on the		 * reported page.  If we fail, then someone else is using the page,		 * and we may reasonably assume it's not free.  (If we happen to be		 * wrong, the worst consequence is the page will be lost to use till		 * the next VACUUM, which is no big problem.)		 */		for (;;)		{			blkno = GetFreeIndexPage(&rel->rd_node);			if (blkno == InvalidBlockNumber)				break;			buf = ReadBuffer(rel, blkno);			if (ConditionalLockBuffer(buf))			{				page = BufferGetPage(buf);				if (_bt_page_recyclable(page))				{					/* Okay to use page.  Re-initialize and return it */					_bt_pageinit(page, BufferGetPageSize(buf));					return buf;				}				elog(DEBUG2, "FSM returned nonrecyclable page");				_bt_relbuf(rel, buf);			}			else			{				elog(DEBUG2, "FSM returned nonlockable page");				/* couldn't get lock, so just drop pin */				ReleaseBuffer(buf);			}		}		/*		 * Extend the relation by one page.		 *		 * We have to use a lock to ensure no one else is extending the rel at		 * the same time, else we will both try to initialize the same new		 * page.  We can skip locking for new or temp relations, however,		 * since no one else could be accessing them.		 */		needLock = !RELATION_IS_LOCAL(rel);		if (needLock)			LockRelationForExtension(rel, ExclusiveLock);		buf = ReadBuffer(rel, P_NEW);		/* Acquire buffer lock on new page */		LockBuffer(buf, BT_WRITE);		/*		 * Release the file-extension lock; it's now OK for someone else to		 * extend the relation some more.  Note that we cannot release this		 * lock before we have buffer lock on the new page, or we risk a race		 * condition against btvacuumcleanup --- see comments therein.		 */		if (needLock)			UnlockRelationForExtension(rel, ExclusiveLock);		/* Initialize the new page before returning it */		page = BufferGetPage(buf);		Assert(PageIsNew((PageHeader) page));		_bt_pageinit(page, BufferGetPageSize(buf));	}	/* ref count and lock type are correct */	return buf;}/* *	_bt_relandgetbuf() -- release a locked buffer and get another one. * * This is equivalent to _bt_relbuf followed by _bt_getbuf, with the * exception that blkno may not be P_NEW.  Also, if obuf is InvalidBuffer * then it reduces to just _bt_getbuf; allowing this case simplifies some * callers. The motivation for using this is to avoid two entries to the * bufmgr when one will do. */Buffer_bt_relandgetbuf(Relation rel, Buffer obuf, BlockNumber blkno, int access){	Buffer		buf;	Assert(blkno != P_NEW);	if (BufferIsValid(obuf))		LockBuffer(obuf, BUFFER_LOCK_UNLOCK);	buf = ReleaseAndReadBuffer(obuf, rel, blkno);	LockBuffer(buf, access);	return buf;}/* *	_bt_relbuf() -- release a locked buffer. * * Lock and pin (refcount) are both dropped.  Note that either read or * write lock can be dropped this way, but if we modified the buffer, * this is NOT the right way to release a write lock. */void_bt_relbuf(Relation rel, Buffer buf){	LockBuffer(buf, BUFFER_LOCK_UNLOCK);	ReleaseBuffer(buf);}/* *	_bt_wrtbuf() -- write a btree page to disk. * *		This routine releases the lock held on the buffer and our refcount *		for it.  It is an error to call _bt_wrtbuf() without a write lock *		and a pin on the buffer. * * NOTE: actually, the buffer manager just marks the shared buffer page * dirty here; the real I/O happens later.	This is okay since we are not * relying on write ordering anyway.  The WAL mechanism is responsible for * guaranteeing correctness after a crash. */void_bt_wrtbuf(Relation rel, Buffer buf){	LockBuffer(buf, BUFFER_LOCK_UNLOCK);	WriteBuffer(buf);}/* *	_bt_wrtnorelbuf() -- write a btree page to disk, but do not release *						 our reference or lock. * *		It is an error to call _bt_wrtnorelbuf() without a write lock *		and a pin on the buffer. * * See above NOTE. */void_bt_wrtnorelbuf(Relation rel, Buffer buf){	WriteNoReleaseBuffer(buf);}/* *	_bt_pageinit() -- Initialize a new page. * * On return, the page header is initialized; data space is empty; * special space is zeroed out. */void_bt_pageinit(Page page, Size size){	PageInit(page, size, sizeof(BTPageOpaqueData));}/* *	_bt_page_recyclable() -- Is an existing page recyclable? * * This exists to make sure _bt_getbuf and btvacuumcleanup have the same * policy about whether a page is safe to re-use. */bool_bt_page_recyclable(Page page){	BTPageOpaque opaque;	/*	 * It's possible to find an all-zeroes page in an index --- for example, a	 * backend might successfully extend the relation one page and then crash	 * before it is able to make a WAL entry for adding the page. If we find a	 * zeroed page then reclaim it.	 */	if (PageIsNew(page))		return true;	/*	 * Otherwise, recycle if deleted and too old to have any processes	 * interested in it.	 */	opaque = (BTPageOpaque) PageGetSpecialPointer(page);	if (P_ISDELETED(opaque) &&		TransactionIdPrecedesOrEquals(opaque->btpo.xact, RecentXmin))		return true;	return false;}/* * Delete item(s) from a btree page. * * This must only be used for deleting leaf items.	Deleting an item on a * non-leaf page has to be done as part of an atomic action that includes * deleting the page it points to. * * This routine assumes that the caller has pinned and locked the buffer, * and will write the buffer afterwards.  Also, the given itemnos *must* * appear in increasing order in the array. */void_bt_delitems(Relation rel, Buffer buf,			 OffsetNumber *itemnos, int nitems){	Page		page = BufferGetPage(buf);	/* No ereport(ERROR) until changes are logged */	START_CRIT_SECTION();	/* Fix the page */	PageIndexMultiDelete(page, itemnos, nitems);	/* XLOG stuff */	if (!rel->rd_istemp)	{		xl_btree_delete xlrec;		XLogRecPtr	recptr;		XLogRecData rdata[2];		xlrec.node = rel->rd_node;		xlrec.block = BufferGetBlockNumber(buf);		rdata[0].data = (char *) &xlrec;		rdata[0].len = SizeOfBtreeDelete;		rdata[0].buffer = InvalidBuffer;		rdata[0].next = &(rdata[1]);		/*		 * The target-offsets array is not in the buffer, but pretend that it		 * is.	When XLogInsert stores the whole buffer, the offsets array		 * need not be stored too.		 */		if (nitems > 0)		{			rdata[1].data = (char *) itemnos;			rdata[1].len = nitems * sizeof(OffsetNumber);		}		else		{			rdata[1].data = NULL;			rdata[1].len = 0;		}		rdata[1].buffer = buf;		rdata[1].buffer_std = true;		rdata[1].next = NULL;		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);		PageSetLSN(page, recptr);		PageSetTLI(page, ThisTimeLineID);	}	END_CRIT_SECTION();}/* * _bt_pagedel() -- Delete a page from the b-tree. * * This action unlinks the page from the b-tree structure, removing all * pointers leading to it --- but not touching its own left and right links. * The page cannot be physically reclaimed right away, since other processes * may currently be trying to follow links leading to the page; they have to * be allowed to use its right-link to recover.  See nbtree/README. * * On entry, the target buffer must be pinned and read-locked.	This lock and * pin will be dropped before exiting. * * Returns the number of pages successfully deleted (zero on failure; could * be more than one if parent blocks were deleted). * * NOTE: this leaks memory.  Rather than trying to clean up everything * carefully, it's better to run it in a temp context that can be reset * frequently. */int_bt_pagedel(Relation rel, Buffer buf, bool vacuum_full){	BlockNumber target,				leftsib,				rightsib,				parent;	OffsetNumber poffset,				maxoff;	uint32		targetlevel,				ilevel;	ItemId		itemid;	BTItem		targetkey,				btitem;	ScanKey		itup_scankey;	BTStack		stack;	Buffer		lbuf,				rbuf,				pbuf;	bool		parent_half_dead;	bool		parent_one_child;	bool		rightsib_empty;	Buffer		metabuf = InvalidBuffer;	Page		metapg = NULL;	BTMetaPageData *metad = NULL;	Page		page;	BTPageOpaque opaque;	/*	 * We can never delete rightmost pages nor root pages.	While at it, check	 * that page is not already deleted and is empty.	 */	page = BufferGetPage(buf);	opaque = (BTPageOpaque) PageGetSpecialPointer(page);	if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) ||		P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page))	{		_bt_relbuf(rel, buf);		return 0;	}
nbtpage.c - 源码说明

本页面展示了「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」中的 nbtpage.c 源码文件，采用 C语言编程语言编写，共 1,138 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?