⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nbtree.c

📁 postgresql8.3.4源码,开源数据库
💻 C
📖 第 1 页 / 共 2 页
字号:
		 * restore the itemIndex.		 */		so->currPos.itemIndex = so->markItemIndex;	}	else	{		/* we aren't holding any read locks, but gotta drop the pin */		if (BTScanPosIsValid(so->currPos))		{			/* Before leaving current page, deal with any killed items */			if (so->numKilled > 0 &&				so->currPos.buf != so->markPos.buf)				_bt_killitems(scan, false);			ReleaseBuffer(so->currPos.buf);			so->currPos.buf = InvalidBuffer;		}		if (BTScanPosIsValid(so->markPos))		{			/* bump pin on mark buffer for assignment to current buffer */			IncrBufferRefCount(so->markPos.buf);			memcpy(&so->currPos, &so->markPos,				   offsetof(BTScanPosData, items[1]) +				   so->markPos.lastItem * sizeof(BTScanPosItem));		}	}	PG_RETURN_VOID();}/* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells * whether any given heap tuple (identified by ItemPointer) is being deleted. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */Datumbtbulkdelete(PG_FUNCTION_ARGS){	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);	IndexBulkDeleteResult *volatile stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);	IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(2);	void	   *callback_state = (void *) PG_GETARG_POINTER(3);	Relation	rel = info->index;	BTCycleId	cycleid;	/* allocate stats if first time through, else re-use existing struct */	if (stats == NULL)		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));	/* Establish the vacuum cycle ID to use for this scan */	/* The ENSURE stuff ensures we clean up shared memory on failure */	PG_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));	{		cycleid = _bt_start_vacuum(rel);		btvacuumscan(info, stats, callback, callback_state, cycleid);	}	PG_END_ENSURE_ERROR_CLEANUP(_bt_end_vacuum_callback, PointerGetDatum(rel));	_bt_end_vacuum(rel);	PG_RETURN_POINTER(stats);}/* * Post-VACUUM cleanup. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */Datumbtvacuumcleanup(PG_FUNCTION_ARGS){	IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);	IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);	/*	 * If btbulkdelete was called, we need not do anything, just return the	 * stats from the latest btbulkdelete call.  If it wasn't called, we must	 * still do a pass over the index, to recycle any newly-recyclable pages	 * and to obtain index statistics.	 *	 * Since we aren't going to actually delete any leaf items, there's no	 * need to go through all the vacuum-cycle-ID pushups.	 */	if (stats == NULL)	{		stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));		btvacuumscan(info, stats, NULL, NULL, 0);	}	/*	 * During a non-FULL vacuum it's quite possible for us to be fooled by	 * concurrent page splits into double-counting some index tuples, so	 * disbelieve any total that exceeds the underlying heap's count. (We	 * can't check this during btbulkdelete.)	 */	if (!info->vacuum_full)	{		if (stats->num_index_tuples > info->num_heap_tuples)			stats->num_index_tuples = info->num_heap_tuples;	}	PG_RETURN_POINTER(stats);}/* * btvacuumscan --- scan the index for VACUUMing purposes * * This combines the functions of looking for leaf tuples that are deletable * according to the vacuum callback, looking for empty pages that can be * deleted, and looking for old deleted pages that can be recycled.  Both * btbulkdelete and btvacuumcleanup invoke this (the latter only if no * btbulkdelete call occurred). * * The caller is responsible for initially allocating/zeroing a stats struct * and for obtaining a vacuum cycle ID if necessary. */static voidbtvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,			 IndexBulkDeleteCallback callback, void *callback_state,			 BTCycleId cycleid){	Relation	rel = info->index;	BTVacState	vstate;	BlockNumber num_pages;	BlockNumber blkno;	bool		needLock;	/*	 * Reset counts that will be incremented during the scan; needed in case	 * of multiple scans during a single VACUUM command	 */	stats->num_index_tuples = 0;	stats->pages_deleted = 0;	/* Set up info to pass down to btvacuumpage */	vstate.info = info;	vstate.stats = stats;	vstate.callback = callback;	vstate.callback_state = callback_state;	vstate.cycleid = cycleid;	vstate.freePages = NULL;	/* temporarily */	vstate.nFreePages = 0;	vstate.maxFreePages = 0;	vstate.totFreePages = 0;	/* Create a temporary memory context to run _bt_pagedel in */	vstate.pagedelcontext = AllocSetContextCreate(CurrentMemoryContext,												  "_bt_pagedel",												  ALLOCSET_DEFAULT_MINSIZE,												  ALLOCSET_DEFAULT_INITSIZE,												  ALLOCSET_DEFAULT_MAXSIZE);	/*	 * The outer loop iterates over all index pages except the metapage, in	 * physical order (we hope the kernel will cooperate in providing	 * read-ahead for speed).  It is critical that we visit all leaf pages,	 * including ones added after we start the scan, else we might fail to	 * delete some deletable tuples.  Hence, we must repeatedly check the	 * relation length.  We must acquire the relation-extension lock while	 * doing so to avoid a race condition: if someone else is extending the	 * relation, there is a window where bufmgr/smgr have created a new	 * all-zero page but it hasn't yet been write-locked by _bt_getbuf(). If	 * we manage to scan such a page here, we'll improperly assume it can be	 * recycled.  Taking the lock synchronizes things enough to prevent a	 * problem: either num_pages won't include the new page, or _bt_getbuf	 * already has write lock on the buffer and it will be fully initialized	 * before we can examine it.  (See also vacuumlazy.c, which has the same	 * issue.)	Also, we need not worry if a page is added immediately after	 * we look; the page splitting code already has write-lock on the left	 * page before it adds a right page, so we must already have processed any	 * tuples due to be moved into such a page.	 *	 * We can skip locking for new or temp relations, however, since no one	 * else could be accessing them.	 */	needLock = !RELATION_IS_LOCAL(rel);	blkno = BTREE_METAPAGE + 1;	for (;;)	{		/* Get the current relation length */		if (needLock)			LockRelationForExtension(rel, ExclusiveLock);		num_pages = RelationGetNumberOfBlocks(rel);		if (needLock)			UnlockRelationForExtension(rel, ExclusiveLock);		/* Allocate freePages after we read num_pages the first time */		if (vstate.freePages == NULL)		{			/* No point in remembering more than MaxFSMPages pages */			vstate.maxFreePages = MaxFSMPages;			if ((BlockNumber) vstate.maxFreePages > num_pages)				vstate.maxFreePages = (int) num_pages;			vstate.freePages = (BlockNumber *)				palloc(vstate.maxFreePages * sizeof(BlockNumber));		}		/* Quit if we've scanned the whole relation */		if (blkno >= num_pages)			break;		/* Iterate over pages, then loop back to recheck length */		for (; blkno < num_pages; blkno++)		{			btvacuumpage(&vstate, blkno, blkno);		}	}	/*	 * During VACUUM FULL, we truncate off any recyclable pages at the end of	 * the index.  In a normal vacuum it'd be unsafe to do this except by	 * acquiring exclusive lock on the index and then rechecking all the	 * pages; doesn't seem worth it.	 */	if (info->vacuum_full && vstate.nFreePages > 0)	{		BlockNumber new_pages = num_pages;		while (vstate.nFreePages > 0 &&			   vstate.freePages[vstate.nFreePages - 1] == new_pages - 1)		{			new_pages--;			stats->pages_deleted--;			vstate.nFreePages--;			vstate.totFreePages = vstate.nFreePages;	/* can't be more */		}		if (new_pages != num_pages)		{			/*			 * Okay to truncate.			 */			RelationTruncate(rel, new_pages);			/* update statistics */			stats->pages_removed += num_pages - new_pages;			num_pages = new_pages;		}	}	/*	 * Update the shared Free Space Map with the info we now have about free	 * pages in the index, discarding any old info the map may have. We do not	 * need to sort the page numbers; they're in order already.	 */	RecordIndexFreeSpace(&rel->rd_node, vstate.totFreePages,						 vstate.nFreePages, vstate.freePages);	pfree(vstate.freePages);	MemoryContextDelete(vstate.pagedelcontext);	/* update statistics */	stats->num_pages = num_pages;	stats->pages_free = vstate.totFreePages;}/* * btvacuumpage --- VACUUM one page * * This processes a single page for btvacuumscan().  In some cases we * must go back and re-examine previously-scanned pages; this routine * recurses when necessary to handle that case. * * blkno is the page to process.  orig_blkno is the highest block number * reached by the outer btvacuumscan loop (the same as blkno, unless we * are recursing to re-examine a previous page). */static voidbtvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno){	IndexVacuumInfo *info = vstate->info;	IndexBulkDeleteResult *stats = vstate->stats;	IndexBulkDeleteCallback callback = vstate->callback;	void	   *callback_state = vstate->callback_state;	Relation	rel = info->index;	bool		delete_now;	BlockNumber recurse_to;	Buffer		buf;	Page		page;	BTPageOpaque opaque;restart:	delete_now = false;	recurse_to = P_NONE;	/* call vacuum_delay_point while not holding any buffer lock */	vacuum_delay_point();	/*	 * We can't use _bt_getbuf() here because it always applies	 * _bt_checkpage(), which will barf on an all-zero page. We want to	 * recycle all-zero pages, not fail.  Also, we want to use a nondefault	 * buffer access strategy.	 */	buf = ReadBufferWithStrategy(rel, blkno, info->strategy);	LockBuffer(buf, BT_READ);	page = BufferGetPage(buf);	opaque = (BTPageOpaque) PageGetSpecialPointer(page);	if (!PageIsNew(page))		_bt_checkpage(rel, buf);	/*	 * If we are recursing, the only case we want to do anything with is a	 * live leaf page having the current vacuum cycle ID.  Any other state	 * implies we already saw the page (eg, deleted it as being empty). In	 * particular, we don't want to risk adding it to freePages twice.	 */	if (blkno != orig_blkno)	{		if (_bt_page_recyclable(page) ||			P_IGNORE(opaque) ||			!P_ISLEAF(opaque) ||			opaque->btpo_cycleid != vstate->cycleid)		{			_bt_relbuf(rel, buf);			return;		}	}	/* Page is valid, see what to do with it */	if (_bt_page_recyclable(page))	{		/* Okay to recycle this page */		if (vstate->nFreePages < vstate->maxFreePages)			vstate->freePages[vstate->nFreePages++] = blkno;		vstate->totFreePages++;		stats->pages_deleted++;	}	else if (P_ISDELETED(opaque))	{		/* Already deleted, but can't recycle yet */		stats->pages_deleted++;	}	else if (P_ISHALFDEAD(opaque))	{		/* Half-dead, try to delete */		delete_now = true;	}	else if (P_ISLEAF(opaque))	{		OffsetNumber deletable[MaxOffsetNumber];		int			ndeletable;		OffsetNumber offnum,					minoff,					maxoff;		/*		 * Trade in the initial read lock for a super-exclusive write lock on		 * this page.  We must get such a lock on every leaf page over the		 * course of the vacuum scan, whether or not it actually contains any		 * deletable tuples --- see nbtree/README.		 */		LockBuffer(buf, BUFFER_LOCK_UNLOCK);		LockBufferForCleanup(buf);		/*		 * Check whether we need to recurse back to earlier pages.	What we		 * are concerned about is a page split that happened since we started		 * the vacuum scan.  If the split moved some tuples to a lower page		 * then we might have missed 'em.  If so, set up for tail recursion.		 * (Must do this before possibly clearing btpo_cycleid below!)		 */		if (vstate->cycleid != 0 &&			opaque->btpo_cycleid == vstate->cycleid &&			!(opaque->btpo_flags & BTP_SPLIT_END) &&			!P_RIGHTMOST(opaque) &&			opaque->btpo_next < orig_blkno)			recurse_to = opaque->btpo_next;		/*		 * Scan over all items to see which ones need deleted according to the		 * callback function.		 */		ndeletable = 0;		minoff = P_FIRSTDATAKEY(opaque);		maxoff = PageGetMaxOffsetNumber(page);		if (callback)		{			for (offnum = minoff;				 offnum <= maxoff;				 offnum = OffsetNumberNext(offnum))			{				IndexTuple	itup;				ItemPointer htup;				itup = (IndexTuple) PageGetItem(page,												PageGetItemId(page, offnum));				htup = &(itup->t_tid);				if (callback(htup, callback_state))					deletable[ndeletable++] = offnum;			}		}		/*		 * Apply any needed deletes.  We issue just one _bt_delitems() call		 * per page, so as to minimize WAL traffic.		 */		if (ndeletable > 0)		{			_bt_delitems(rel, buf, deletable, ndeletable);			stats->tuples_removed += ndeletable;			/* must recompute maxoff */			maxoff = PageGetMaxOffsetNumber(page);		}		else		{			/*			 * If the page has been split during this vacuum cycle, it seems			 * worth expending a write to clear btpo_cycleid even if we don't			 * have any deletions to do.  (If we do, _bt_delitems takes care			 * of this.)  This ensures we won't process the page again.			 *			 * We treat this like a hint-bit update because there's no need to			 * WAL-log it.			 */			if (vstate->cycleid != 0 &&				opaque->btpo_cycleid == vstate->cycleid)			{				opaque->btpo_cycleid = 0;				SetBufferCommitInfoNeedsSave(buf);			}		}		/*		 * If it's now empty, try to delete; else count the live tuples. We		 * don't delete when recursing, though, to avoid putting entries into		 * freePages out-of-order (doesn't seem worth any extra code to handle		 * the case).		 */		if (minoff > maxoff)			delete_now = (blkno == orig_blkno);		else			stats->num_index_tuples += maxoff - minoff + 1;	}	if (delete_now)	{		MemoryContext oldcontext;		int			ndel;		/* Run pagedel in a temp context to avoid memory leakage */		MemoryContextReset(vstate->pagedelcontext);		oldcontext = MemoryContextSwitchTo(vstate->pagedelcontext);		ndel = _bt_pagedel(rel, buf, NULL, info->vacuum_full);		/* count only this page, else may double-count parent */		if (ndel)			stats->pages_deleted++;		/*		 * During VACUUM FULL it's okay to recycle deleted pages immediately,		 * since there can be no other transactions scanning the index.  Note		 * that we will only recycle the current page and not any parent pages		 * that _bt_pagedel might have recursed to; this seems reasonable in		 * the name of simplicity.	(Trying to do otherwise would mean we'd		 * have to sort the list of recyclable pages we're building.)		 */		if (ndel && info->vacuum_full)		{			if (vstate->nFreePages < vstate->maxFreePages)				vstate->freePages[vstate->nFreePages++] = blkno;			vstate->totFreePages++;		}		MemoryContextSwitchTo(oldcontext);		/* pagedel released buffer, so we shouldn't */	}	else		_bt_relbuf(rel, buf);	/*	 * This is really tail recursion, but if the compiler is too stupid to	 * optimize it as such, we'd eat an uncomfortably large amount of stack	 * space per recursion level (due to the deletable[] array). A failure is	 * improbable since the number of levels isn't likely to be large ... but	 * just in case, let's hand-optimize into a loop.	 */	if (recurse_to != P_NONE)	{		blkno = recurse_to;		goto restart;	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -