nbtree.c

来自「postgresql8.3.4源码,开源数据库」· C语言代码 · 共 977 行 · 第 1/2 页
977 行
/*------------------------------------------------------------------------- * * nbtree.c *	  Implementation of Lehman and Yao's btree management algorithm for *	  Postgres. * * NOTES *	  This file contains only the public interface routines. * * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.156.2.1 2008/04/16 23:59:51 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/genam.h"#include "access/nbtree.h"#include "catalog/index.h"#include "commands/vacuum.h"#include "storage/freespace.h"#include "storage/ipc.h"#include "storage/lmgr.h"#include "utils/memutils.h"/* Working state for btbuild and its callback */typedef struct{	bool		isUnique;	bool		haveDead;	Relation	heapRel;	BTSpool    *spool;	/*	 * spool2 is needed only when the index is an unique index. Dead tuples	 * are put into spool2 instead of spool in order to avoid uniqueness	 * check.	 */	BTSpool    *spool2;	double		indtuples;} BTBuildState;/* Working state needed by btvacuumpage */typedef struct{	IndexVacuumInfo *info;	IndexBulkDeleteResult *stats;	IndexBulkDeleteCallback callback;	void	   *callback_state;	BTCycleId	cycleid;	BlockNumber *freePages;	int			nFreePages;		/* number of entries in freePages[] */	int			maxFreePages;	/* allocated size of freePages[] */	BlockNumber totFreePages;	/* true total # of free pages */	MemoryContext pagedelcontext;} BTVacState;static void btbuildCallback(Relation index,				HeapTuple htup,				Datum *values,				bool *isnull,				bool tupleIsAlive,				void *state);static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,			 IndexBulkDeleteCallback callback, void *callback_state,			 BTCycleId cycleid);static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,			 BlockNumber orig_blkno);/* *	btbuild() -- build a new btree index. */Datumbtbuild(PG_FUNCTION_ARGS){	Relation	heap = (Relation) PG_GETARG_POINTER(0);	Relation	index = (Relation) PG_GETARG_POINTER(1);	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);	IndexBuildResult *result;	double		reltuples;	BTBuildState buildstate;	buildstate.isUnique = indexInfo->ii_Unique;	buildstate.haveDead = false;	buildstate.heapRel = heap;	buildstate.spool = NULL;	buildstate.spool2 = NULL;	buildstate.indtuples = 0;#ifdef BTREE_BUILD_STATS	if (log_btree_build_stats)		ResetUsage();#endif   /* BTREE_BUILD_STATS */	/*	 * We expect to be called exactly once for any index relation. If that's	 * not the case, big trouble's what we have.	 */	if (RelationGetNumberOfBlocks(index) != 0)		elog(ERROR, "index \"%s\" already contains data",			 RelationGetRelationName(index));	buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false);	/*	 * If building a unique index, put dead tuples in a second spool to keep	 * them out of the uniqueness check.	 */	if (indexInfo->ii_Unique)		buildstate.spool2 = _bt_spoolinit(index, false, true);	/* do the heap scan */	reltuples = IndexBuildHeapScan(heap, index, indexInfo,								   btbuildCallback, (void *) &buildstate);	/* okay, all heap tuples are indexed */	if (buildstate.spool2 && !buildstate.haveDead)	{		/* spool2 turns out to be unnecessary */		_bt_spooldestroy(buildstate.spool2);		buildstate.spool2 = NULL;	}	/*	 * Finish the build by (1) completing the sort of the spool file, (2)	 * inserting the sorted tuples into btree pages and (3) building the upper	 * levels.	 */	_bt_leafbuild(buildstate.spool, buildstate.spool2);	_bt_spooldestroy(buildstate.spool);	if (buildstate.spool2)		_bt_spooldestroy(buildstate.spool2);#ifdef BTREE_BUILD_STATS	if (log_btree_build_stats)	{		ShowUsage("BTREE BUILD STATS");		ResetUsage();	}#endif   /* BTREE_BUILD_STATS */	/*	 * If we are reindexing a pre-existing index, it is critical to send out a	 * relcache invalidation SI message to ensure all backends re-read the	 * index metapage.	We expect that the caller will ensure that happens	 * (typically as a side effect of updating index stats, but it must happen	 * even if the stats don't change!)	 */	/*	 * Return statistics	 */	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));	result->heap_tuples = reltuples;	result->index_tuples = buildstate.indtuples;	PG_RETURN_POINTER(result);}/* * Per-tuple callback from IndexBuildHeapScan */static voidbtbuildCallback(Relation index,				HeapTuple htup,				Datum *values,				bool *isnull,				bool tupleIsAlive,				void *state){	BTBuildState *buildstate = (BTBuildState *) state;	IndexTuple	itup;	/* form an index tuple and point it at the heap tuple */	itup = index_form_tuple(RelationGetDescr(index), values, isnull);	itup->t_tid = htup->t_self;	/*	 * insert the index tuple into the appropriate spool file for subsequent	 * processing	 */	if (tupleIsAlive || buildstate->spool2 == NULL)		_bt_spool(itup, buildstate->spool);	else	{		/* dead tuples are put into spool2 */		buildstate->haveDead = true;		_bt_spool(itup, buildstate->spool2);	}	buildstate->indtuples += 1;	pfree(itup);}/* *	btinsert() -- insert an index tuple into a btree. * *		Descend the tree recursively, find the appropriate location for our *		new tuple, and put it there. */Datumbtinsert(PG_FUNCTION_ARGS){	Relation	rel = (Relation) PG_GETARG_POINTER(0);	Datum	   *values = (Datum *) PG_GETARG_POINTER(1);	bool	   *isnull = (bool *) PG_GETARG_POINTER(2);	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);	bool		checkUnique = PG_GETARG_BOOL(5);	IndexTuple	itup;	/* generate an index tuple */	itup = index_form_tuple(RelationGetDescr(rel), values, isnull);	itup->t_tid = *ht_ctid;	_bt_doinsert(rel, itup, checkUnique, heapRel);	pfree(itup);	PG_RETURN_BOOL(true);}/* *	btgettuple() -- Get the next tuple in the scan. */Datumbtgettuple(PG_FUNCTION_ARGS){	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);	ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);	BTScanOpaque so = (BTScanOpaque) scan->opaque;	bool		res;	/*	 * If we've already initialized this scan, we can just advance it in the	 * appropriate direction.  If we haven't done so yet, we call a routine to	 * get the first item in the scan.	 */	if (BTScanPosIsValid(so->currPos))	{		/*		 * Check to see if we should kill the previously-fetched tuple.		 */		if (scan->kill_prior_tuple)		{			/*			 * Yes, remember it for later.	(We'll deal with all such tuples			 * at once right before leaving the index page.)  The test for			 * numKilled overrun is not just paranoia: if the caller reverses			 * direction in the indexscan then the same item might get entered			 * multiple times.	It's not worth trying to optimize that, so we			 * don't detect it, but instead just forget any excess entries.			 */			if (so->killedItems == NULL)				so->killedItems = (int *)					palloc(MaxIndexTuplesPerPage * sizeof(int));			if (so->numKilled < MaxIndexTuplesPerPage)				so->killedItems[so->numKilled++] = so->currPos.itemIndex;		}		/*		 * Now continue the scan.		 */		res = _bt_next(scan, dir);	}	else		res = _bt_first(scan, dir);	PG_RETURN_BOOL(res);}/* * btgetmulti() -- get multiple tuples at once * * In the current implementation there seems no strong reason to stop at * index page boundaries; we just press on until we fill the caller's buffer * or run out of matches. */Datumbtgetmulti(PG_FUNCTION_ARGS){	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);	ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1);	int32		max_tids = PG_GETARG_INT32(2);	int32	   *returned_tids = (int32 *) PG_GETARG_POINTER(3);	BTScanOpaque so = (BTScanOpaque) scan->opaque;	bool		res = true;	int32		ntids = 0;	if (max_tids <= 0)			/* behave correctly in boundary case */		PG_RETURN_BOOL(true);	/* If we haven't started the scan yet, fetch the first page & tuple. */	if (!BTScanPosIsValid(so->currPos))	{		res = _bt_first(scan, ForwardScanDirection);		if (!res)		{			/* empty scan */			*returned_tids = ntids;			PG_RETURN_BOOL(res);		}		/* Save tuple ID, and continue scanning */		tids[ntids] = scan->xs_ctup.t_self;		ntids++;	}	while (ntids < max_tids)	{		/*		 * Advance to next tuple within page.  This is the same as the easy		 * case in _bt_next().		 */		if (++so->currPos.itemIndex > so->currPos.lastItem)		{			/* let _bt_next do the heavy lifting */			res = _bt_next(scan, ForwardScanDirection);			if (!res)				break;		}		/* Save tuple ID, and continue scanning */		tids[ntids] = so->currPos.items[so->currPos.itemIndex].heapTid;		ntids++;	}	*returned_tids = ntids;	PG_RETURN_BOOL(res);}/* *	btbeginscan() -- start a scan on a btree index */Datumbtbeginscan(PG_FUNCTION_ARGS){	Relation	rel = (Relation) PG_GETARG_POINTER(0);	int			keysz = PG_GETARG_INT32(1);	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(2);	IndexScanDesc scan;	/* get the scan */	scan = RelationGetIndexScan(rel, keysz, scankey);	PG_RETURN_POINTER(scan);}/* *	btrescan() -- rescan an index relation */Datumbtrescan(PG_FUNCTION_ARGS){	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);	ScanKey		scankey = (ScanKey) PG_GETARG_POINTER(1);	BTScanOpaque so;	so = (BTScanOpaque) scan->opaque;	if (so == NULL)				/* if called from btbeginscan */	{		so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));		so->currPos.buf = so->markPos.buf = InvalidBuffer;		if (scan->numberOfKeys > 0)			so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData));		else			so->keyData = NULL;		so->killedItems = NULL; /* until needed */		so->numKilled = 0;		scan->opaque = so;	}	/* we aren't holding any read locks, but gotta drop the pins */	if (BTScanPosIsValid(so->currPos))	{		/* Before leaving current page, deal with any killed items */		if (so->numKilled > 0)			_bt_killitems(scan, false);		ReleaseBuffer(so->currPos.buf);		so->currPos.buf = InvalidBuffer;	}	if (BTScanPosIsValid(so->markPos))	{		ReleaseBuffer(so->markPos.buf);		so->markPos.buf = InvalidBuffer;	}	so->markItemIndex = -1;	/*	 * Reset the scan keys. Note that keys ordering stuff moved to _bt_first.	 * - vadim 05/05/97	 */	if (scankey && scan->numberOfKeys > 0)		memmove(scan->keyData,				scankey,				scan->numberOfKeys * sizeof(ScanKeyData));	so->numberOfKeys = 0;		/* until _bt_preprocess_keys sets it */	PG_RETURN_VOID();}/* *	btendscan() -- close down a scan */Datumbtendscan(PG_FUNCTION_ARGS){	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);	BTScanOpaque so = (BTScanOpaque) scan->opaque;	/* we aren't holding any read locks, but gotta drop the pins */	if (BTScanPosIsValid(so->currPos))	{		/* Before leaving current page, deal with any killed items */		if (so->numKilled > 0)			_bt_killitems(scan, false);		ReleaseBuffer(so->currPos.buf);		so->currPos.buf = InvalidBuffer;	}	if (BTScanPosIsValid(so->markPos))	{		ReleaseBuffer(so->markPos.buf);		so->markPos.buf = InvalidBuffer;	}	so->markItemIndex = -1;	if (so->killedItems != NULL)		pfree(so->killedItems);	if (so->keyData != NULL)		pfree(so->keyData);	pfree(so);	PG_RETURN_VOID();}/* *	btmarkpos() -- save current scan position */Datumbtmarkpos(PG_FUNCTION_ARGS){	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);	BTScanOpaque so = (BTScanOpaque) scan->opaque;	/* we aren't holding any read locks, but gotta drop the pin */	if (BTScanPosIsValid(so->markPos))	{		ReleaseBuffer(so->markPos.buf);		so->markPos.buf = InvalidBuffer;	}	/*	 * Just record the current itemIndex.  If we later step to next page	 * before releasing the marked position, _bt_steppage makes a full copy of	 * the currPos struct in markPos.  If (as often happens) the mark is moved	 * before we leave the page, we don't have to do that work.	 */	if (BTScanPosIsValid(so->currPos))		so->markItemIndex = so->currPos.itemIndex;	else		so->markItemIndex = -1;	PG_RETURN_VOID();}/* *	btrestrpos() -- restore scan to last saved position */Datumbtrestrpos(PG_FUNCTION_ARGS){	IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);	BTScanOpaque so = (BTScanOpaque) scan->opaque;	if (so->markItemIndex >= 0)	{		/*		 * The mark position is on the same page we are currently on. Just
nbtree.c - 源码说明

本页面展示了「postgresql8.3.4源码,开源数据库」中的 nbtree.c 源码文件，采用 C语言编程语言编写，共 977 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与postgresql相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?