nbtpage.c

来自「PostgreSQL7.4.6 for Linux」· C语言代码 · 共 1,163 行 · 第 1/3 页
1,163 行
 *		When this routine returns, the appropriate lock is set on the *		requested buffer and its reference count has been incremented *		(ie, the buffer is "locked and pinned"). */Buffer_bt_getbuf(Relation rel, BlockNumber blkno, int access){	Buffer		buf;	if (blkno != P_NEW)	{		/* Read an existing block of the relation */		buf = ReadBuffer(rel, blkno);		LockBuffer(buf, access);	}	else	{		bool		needLock;		Page		page;		Assert(access == BT_WRITE);		/*		 * First see if the FSM knows of any free pages.		 *		 * We can't trust the FSM's report unreservedly; we have to check		 * that the page is still free.  (For example, an already-free		 * page could have been re-used between the time the last VACUUM		 * scanned it and the time the VACUUM made its FSM updates.)		 *		 * In fact, it's worse than that: we can't even assume that it's		 * safe to take a lock on the reported page.  If somebody else		 * has a lock on it, or even worse our own caller does, we could		 * deadlock.  (The own-caller scenario is actually not improbable.		 * Consider an index on a serial or timestamp column.  Nearly all		 * splits will be at the rightmost page, so it's entirely likely		 * that _bt_split will call us while holding a lock on the page most		 * recently acquired from FSM.  A VACUUM running concurrently with		 * the previous split could well have placed that page back in FSM.)		 *		 * To get around that, we ask for only a conditional lock on the		 * reported page.  If we fail, then someone else is using the page,		 * and we may reasonably assume it's not free.  (If we happen to be		 * wrong, the worst consequence is the page will be lost to use till		 * the next VACUUM, which is no big problem.)		 */		for (;;)		{			blkno = GetFreeIndexPage(&rel->rd_node);			if (blkno == InvalidBlockNumber)				break;			buf = ReadBuffer(rel, blkno);			if (ConditionalLockBuffer(buf))			{				page = BufferGetPage(buf);				if (_bt_page_recyclable(page))				{					/* Okay to use page.  Re-initialize and return it */					_bt_pageinit(page, BufferGetPageSize(buf));					return buf;				}				elog(DEBUG2, "FSM returned nonrecyclable page");				_bt_relbuf(rel, buf);			}			else			{				elog(DEBUG2, "FSM returned nonlockable page");				/* couldn't get lock, so just drop pin */				ReleaseBuffer(buf);			}		}		/*		 * Extend the relation by one page.		 *		 * We have to use a lock to ensure no one else is extending the rel		 * at the same time, else we will both try to initialize the same		 * new page.  We can skip locking for new or temp relations,		 * however, since no one else could be accessing them.		 */		needLock = !(rel->rd_isnew || rel->rd_istemp);		if (needLock)			LockPage(rel, 0, ExclusiveLock);		buf = ReadBuffer(rel, P_NEW);		/*		 * Release the file-extension lock; it's now OK for someone else		 * to extend the relation some more.		 */		if (needLock)			UnlockPage(rel, 0, ExclusiveLock);		/* Acquire appropriate buffer lock on new page */		LockBuffer(buf, access);		/* Initialize the new page before returning it */		page = BufferGetPage(buf);		_bt_pageinit(page, BufferGetPageSize(buf));	}	/* ref count and lock type are correct */	return buf;}/* *	_bt_relbuf() -- release a locked buffer. * * Lock and pin (refcount) are both dropped.  Note that either read or * write lock can be dropped this way, but if we modified the buffer, * this is NOT the right way to release a write lock. */void_bt_relbuf(Relation rel, Buffer buf){	LockBuffer(buf, BUFFER_LOCK_UNLOCK);	ReleaseBuffer(buf);}/* *	_bt_wrtbuf() -- write a btree page to disk. * *		This routine releases the lock held on the buffer and our refcount *		for it.  It is an error to call _bt_wrtbuf() without a write lock *		and a pin on the buffer. * * NOTE: actually, the buffer manager just marks the shared buffer page * dirty here; the real I/O happens later.	This is okay since we are not * relying on write ordering anyway.  The WAL mechanism is responsible for * guaranteeing correctness after a crash. */void_bt_wrtbuf(Relation rel, Buffer buf){	LockBuffer(buf, BUFFER_LOCK_UNLOCK);	WriteBuffer(buf);}/* *	_bt_wrtnorelbuf() -- write a btree page to disk, but do not release *						 our reference or lock. * *		It is an error to call _bt_wrtnorelbuf() without a write lock *		and a pin on the buffer. * * See above NOTE. */void_bt_wrtnorelbuf(Relation rel, Buffer buf){	WriteNoReleaseBuffer(buf);}/* *	_bt_pageinit() -- Initialize a new page. * * On return, the page header is initialized; data space is empty; * special space is zeroed out. */void_bt_pageinit(Page page, Size size){	PageInit(page, size, sizeof(BTPageOpaqueData));}/* *	_bt_page_recyclable() -- Is an existing page recyclable? * * This exists to make sure _bt_getbuf and btvacuumcleanup have the same * policy about whether a page is safe to re-use. */bool_bt_page_recyclable(Page page){	BTPageOpaque opaque;	/*	 * It's possible to find an all-zeroes page in an index --- for	 * example, a backend might successfully extend the relation one page	 * and then crash before it is able to make a WAL entry for adding the	 * page. If we find a zeroed page then reclaim it.	 */	if (PageIsNew(page))		return true;	/*	 * Otherwise, recycle if deleted and too old to have any processes	 * interested in it.	 */	opaque = (BTPageOpaque) PageGetSpecialPointer(page);	if (P_ISDELETED(opaque) &&		TransactionIdPrecedesOrEquals(opaque->btpo.xact, RecentXmin))		return true;	return false;}/* *	_bt_metaproot() -- Change the root page of the btree. * *		Lehman and Yao require that the root page move around in order to *		guarantee deadlock-free short-term, fine-granularity locking.  When *		we split the root page, we record the new parent in the metadata page *		for the relation.  This routine does the work. * *		No direct preconditions, but if you don't have the write lock on *		at least the old root page when you call this, you're making a big *		mistake.  On exit, metapage data is correct and we no longer have *		a pin or lock on the metapage. * * Actually this is not used for splitting on-the-fly anymore.	It's only used * in nbtsort.c at the completion of btree building, where we know we have * sole access to the index anyway. */void_bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level){	Buffer		metabuf;	Page		metap;	BTPageOpaque metaopaque;	BTMetaPageData *metad;	metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_WRITE);	metap = BufferGetPage(metabuf);	metaopaque = (BTPageOpaque) PageGetSpecialPointer(metap);	Assert(metaopaque->btpo_flags & BTP_META);	/* NO ELOG(ERROR) from here till newmeta op is logged */	START_CRIT_SECTION();	metad = BTPageGetMeta(metap);	Assert(metad->btm_magic == BTREE_MAGIC || metad->btm_magic == 0);	metad->btm_magic = BTREE_MAGIC;		/* it's valid now for sure */	metad->btm_root = rootbknum;	metad->btm_level = level;	metad->btm_fastroot = rootbknum;	metad->btm_fastlevel = level;	/* XLOG stuff */	if (!rel->rd_istemp)	{		xl_btree_newmeta xlrec;		XLogRecPtr	recptr;		XLogRecData rdata[1];		xlrec.node = rel->rd_node;		xlrec.meta.root = metad->btm_root;		xlrec.meta.level = metad->btm_level;		xlrec.meta.fastroot = metad->btm_fastroot;		xlrec.meta.fastlevel = metad->btm_fastlevel;		rdata[0].buffer = InvalidBuffer;		rdata[0].data = (char *) &xlrec;		rdata[0].len = SizeOfBtreeNewmeta;		rdata[0].next = NULL;		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWMETA, rdata);		PageSetLSN(metap, recptr);		PageSetSUI(metap, ThisStartUpID);	}	END_CRIT_SECTION();	_bt_wrtbuf(rel, metabuf);}/* * Delete item(s) from a btree page. * * This must only be used for deleting leaf items.	Deleting an item on a * non-leaf page has to be done as part of an atomic action that includes * deleting the page it points to. * * This routine assumes that the caller has pinned and locked the buffer, * and will write the buffer afterwards.  Also, the given itemnos *must* * appear in increasing order in the array. */void_bt_delitems(Relation rel, Buffer buf,			 OffsetNumber *itemnos, int nitems){	Page		page = BufferGetPage(buf);	int			i;	/* No ereport(ERROR) until changes are logged */	START_CRIT_SECTION();	/*	 * Delete the items in reverse order so we don't have to think about	 * adjusting item numbers for previous deletions.	 */	for (i = nitems - 1; i >= 0; i--)		PageIndexTupleDelete(page, itemnos[i]);	/* XLOG stuff */	if (!rel->rd_istemp)	{		xl_btree_delete xlrec;		XLogRecPtr	recptr;		XLogRecData rdata[2];		xlrec.node = rel->rd_node;		xlrec.block = BufferGetBlockNumber(buf);		rdata[0].buffer = InvalidBuffer;		rdata[0].data = (char *) &xlrec;		rdata[0].len = SizeOfBtreeDelete;		rdata[0].next = &(rdata[1]);		/*		 * The target-offsets array is not in the buffer, but pretend that		 * it is.  When XLogInsert stores the whole buffer, the offsets		 * array need not be stored too.		 */		rdata[1].buffer = buf;		if (nitems > 0)		{			rdata[1].data = (char *) itemnos;			rdata[1].len = nitems * sizeof(OffsetNumber);		}		else		{			rdata[1].data = NULL;			rdata[1].len = 0;		}		rdata[1].next = NULL;		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);		PageSetLSN(page, recptr);		PageSetSUI(page, ThisStartUpID);	}	END_CRIT_SECTION();}/* * _bt_pagedel() -- Delete a page from the b-tree. * * This action unlinks the page from the b-tree structure, removing all * pointers leading to it --- but not touching its own left and right links. * The page cannot be physically reclaimed right away, since other processes * may currently be trying to follow links leading to the page; they have to * be allowed to use its right-link to recover.  See nbtree/README. * * On entry, the target buffer must be pinned and read-locked.	This lock and * pin will be dropped before exiting. * * Returns the number of pages successfully deleted (zero on failure; could * be more than one if parent blocks were deleted). * * NOTE: this leaks memory.  Rather than trying to clean up everything * carefully, it's better to run it in a temp context that can be reset * frequently. */int_bt_pagedel(Relation rel, Buffer buf, bool vacuum_full){	BlockNumber target,				leftsib,				rightsib,				parent;	OffsetNumber poffset,				maxoff;	uint32		targetlevel,				ilevel;	ItemId		itemid;	BTItem		targetkey,				btitem;	ScanKey		itup_scankey;	BTStack		stack;	Buffer		lbuf,				rbuf,				pbuf;	bool		parent_half_dead;	bool		parent_one_child;	bool		rightsib_empty;	Buffer		metabuf = InvalidBuffer;	Page		metapg = NULL;	BTMetaPageData *metad = NULL;	Page		page;	BTPageOpaque opaque;	/*	 * We can never delete rightmost pages nor root pages.	While at it,	 * check that page is not already deleted and is empty.	 */
nbtpage.c - 源码说明

本页面展示了「PostgreSQL7.4.6 for Linux」中的 nbtpage.c 源码文件，采用 C语言编程语言编写，共 1,163 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?