📄 nbtree.c
字号:
/*------------------------------------------------------------------------- * * nbtree.c * Implementation of Lehman and Yao's btree management algorithm for * Postgres. * * NOTES * This file contains only the public interface routines. * * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.106 2003/09/29 23:40:26 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/genam.h"#include "access/heapam.h"#include "access/nbtree.h"#include "catalog/index.h"#include "miscadmin.h"#include "storage/freespace.h"#include "storage/smgr.h"/* Working state for btbuild and its callback */typedef struct{ bool usefast; bool isUnique; bool haveDead; Relation heapRel; BTSpool *spool; /* * spool2 is needed only when the index is an unique index. Dead * tuples are put into spool2 instead of spool in order to avoid * uniqueness check. */ BTSpool *spool2; double indtuples;} BTBuildState;bool FastBuild = true; /* use SORT instead of insertion build */static void _bt_restscan(IndexScanDesc scan);static void btbuildCallback(Relation index, HeapTuple htup, Datum *attdata, char *nulls, bool tupleIsAlive, void *state);/* * AtEOXact_nbtree() --- clean up nbtree subsystem at xact abort or commit. */voidAtEOXact_nbtree(void){ /* nothing to do at the moment */}/* * btbuild() -- build a new btree index. * * We use a global variable to record the fact that we're creating * a new index. This is used to avoid high-concurrency locking, * since the index won't be visible until this transaction commits * and since building is guaranteed to be single-threaded. */Datumbtbuild(PG_FUNCTION_ARGS){ Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); double reltuples; BTBuildState buildstate; /* * bootstrap processing does something strange, so don't use * sort/build for initial catalog indices. at some point i need to * look harder at this. (there is some kind of incremental processing * going on there.) -- pma 08/29/95 */ buildstate.usefast = (FastBuild && IsNormalProcessingMode()); buildstate.isUnique = indexInfo->ii_Unique; buildstate.haveDead = false; buildstate.heapRel = heap; buildstate.spool = NULL; buildstate.spool2 = NULL; buildstate.indtuples = 0;#ifdef BTREE_BUILD_STATS if (log_btree_build_stats) ResetUsage();#endif /* BTREE_BUILD_STATS */ /* * We expect to be called exactly once for any index relation. If * that's not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* initialize the btree index metadata page */ /* mark it valid right away only if using slow build */ _bt_metapinit(index, !buildstate.usefast); if (buildstate.usefast) { buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique); /* * Different from spool, the uniqueness isn't checked for spool2. */ if (indexInfo->ii_Unique) buildstate.spool2 = _bt_spoolinit(index, false); } /* do the heap scan */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, btbuildCallback, (void *) &buildstate); /* okay, all heap tuples are indexed */ if (buildstate.spool2 && !buildstate.haveDead) { /* spool2 turns out to be unnecessary */ _bt_spooldestroy(buildstate.spool2); buildstate.spool2 = NULL; } /* * if we are doing bottom-up btree build, finish the build by (1) * completing the sort of the spool file, (2) inserting the sorted * tuples into btree pages and (3) building the upper levels. */ if (buildstate.usefast) { _bt_leafbuild(buildstate.spool, buildstate.spool2); _bt_spooldestroy(buildstate.spool); if (buildstate.spool2) _bt_spooldestroy(buildstate.spool2); }#ifdef BTREE_BUILD_STATS if (log_btree_build_stats) { ShowUsage("BTREE BUILD STATS"); ResetUsage(); }#endif /* BTREE_BUILD_STATS */ /* * Since we just counted the tuples in the heap, we update its stats * in pg_class to guarantee that the planner takes advantage of the * index we just created. But, only update statistics during normal * index definitions, not for indices on system catalogs created * during bootstrap processing. We must close the relations before * updating statistics to guarantee that the relcache entries are * flushed when we increment the command counter in UpdateStats(). But * we do not release any locks on the relations; those will be held * until end of transaction. */ if (IsNormalProcessingMode()) { Oid hrelid = RelationGetRelid(heap); Oid irelid = RelationGetRelid(index); heap_close(heap, NoLock); index_close(index); UpdateStats(hrelid, reltuples); UpdateStats(irelid, buildstate.indtuples); } PG_RETURN_VOID();}/* * Per-tuple callback from IndexBuildHeapScan */static voidbtbuildCallback(Relation index, HeapTuple htup, Datum *attdata, char *nulls, bool tupleIsAlive, void *state){ BTBuildState *buildstate = (BTBuildState *) state; IndexTuple itup; BTItem btitem; InsertIndexResult res; /* form an index tuple and point it at the heap tuple */ itup = index_formtuple(RelationGetDescr(index), attdata, nulls); itup->t_tid = htup->t_self; btitem = _bt_formitem(itup); /* * if we are doing bottom-up btree build, we insert the index into a * spool file for subsequent processing. otherwise, we insert into * the btree. */ if (buildstate->usefast) { if (tupleIsAlive || buildstate->spool2 == NULL) _bt_spool(btitem, buildstate->spool); else { /* dead tuples are put into spool2 */ buildstate->haveDead = true; _bt_spool(btitem, buildstate->spool2); } } else { res = _bt_doinsert(index, btitem, buildstate->isUnique, buildstate->heapRel); if (res) pfree(res); } buildstate->indtuples += 1; pfree(btitem); pfree(itup);}/* * btinsert() -- insert an index tuple into a btree. * * Descend the tree recursively, find the appropriate location for our * new tuple, put it there, set its unique OID as appropriate, and * return an InsertIndexResult to the caller. */Datumbtinsert(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); Datum *datum = (Datum *) PG_GETARG_POINTER(1); char *nulls = (char *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); Relation heapRel = (Relation) PG_GETARG_POINTER(4); bool checkUnique = PG_GETARG_BOOL(5); InsertIndexResult res; BTItem btitem; IndexTuple itup; /* generate an index tuple */ itup = index_formtuple(RelationGetDescr(rel), datum, nulls); itup->t_tid = *ht_ctid; btitem = _bt_formitem(itup); res = _bt_doinsert(rel, btitem, checkUnique, heapRel); pfree(btitem); pfree(itup); PG_RETURN_POINTER(res);}/* * btgettuple() -- Get the next tuple in the scan. */Datumbtgettuple(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); BTScanOpaque so = (BTScanOpaque) scan->opaque; Page page; OffsetNumber offnum; bool res; /* * If we've already initialized this scan, we can just advance it in * the appropriate direction. If we haven't done so yet, we call a * routine to get the first item in the scan. */ if (ItemPointerIsValid(&(scan->currentItemData))) { /* * Restore scan position using heap TID returned by previous call * to btgettuple(). _bt_restscan() re-grabs the read lock on the * buffer, too. */ _bt_restscan(scan); /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, so mark it by setting the LP_DELETE bit in the item * flags. */ offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); page = BufferGetPage(so->btso_curbuf); PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; /* * Since this can be redone later if needed, it's treated the * same as a commit-hint-bit status update for heap tuples: we * mark the buffer dirty but don't make a WAL log entry. */ SetBufferCommitInfoNeedsSave(so->btso_curbuf); } /* * Now continue the scan. */ res = _bt_next(scan, dir); } else res = _bt_first(scan, dir); /* * Skip killed tuples if asked to. */ if (scan->ignore_killed_tuples) { while (res) { offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); page = BufferGetPage(so->btso_curbuf); if (!ItemIdDeleted(PageGetItemId(page, offnum))) break; res = _bt_next(scan, dir); } } /* * Save heap TID to use it in _bt_restscan. Then release the read * lock on the buffer so that we aren't blocking other backends. * * NOTE: we do keep the pin on the buffer! This is essential to ensure * that someone else doesn't delete the index entry we are stopped on. */ if (res) { ((BTScanOpaque) scan->opaque)->curHeapIptr = scan->xs_ctup.t_self; LockBuffer(((BTScanOpaque) scan->opaque)->btso_curbuf, BUFFER_LOCK_UNLOCK); } PG_RETURN_BOOL(res);}/* * btbeginscan() -- start a scan on a btree index */Datumbtbeginscan(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); int keysz = PG_GETARG_INT32(1); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); IndexScanDesc scan; /* get the scan */ scan = RelationGetIndexScan(rel, keysz, scankey); PG_RETURN_POINTER(scan);}/* * btrescan() -- rescan an index relation */Datumbtrescan(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; if (so == NULL) /* if called from btbeginscan */ { so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(&(so->curHeapIptr)); ItemPointerSetInvalid(&(so->mrkHeapIptr)); if (scan->numberOfKeys > 0) so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); else so->keyData = (ScanKey) NULL; so->numberOfKeys = scan->numberOfKeys; scan->opaque = so; } /* we aren't holding any read locks, but gotta drop the pins */ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { ReleaseBuffer(so->btso_curbuf); so->btso_curbuf = InvalidBuffer; ItemPointerSetInvalid(&(so->curHeapIptr)); ItemPointerSetInvalid(iptr); } if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { ReleaseBuffer(so->btso_mrkbuf); so->btso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(&(so->mrkHeapIptr)); ItemPointerSetInvalid(iptr); } /* * Reset the scan keys. Note that keys ordering stuff moved to * _bt_first. - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) { memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = scan->numberOfKeys; memmove(so->keyData, scankey, so->numberOfKeys * sizeof(ScanKeyData)); } PG_RETURN_VOID();}voidbtmovescan(IndexScanDesc scan, Datum v){ ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pin */ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { ReleaseBuffer(so->btso_curbuf); so->btso_curbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } so->keyData[0].sk_argument = v;}/* * btendscan() -- close down a scan */Datumbtendscan(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { if (BufferIsValid(so->btso_curbuf)) ReleaseBuffer(so->btso_curbuf); so->btso_curbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { if (BufferIsValid(so->btso_mrkbuf))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -