📄 nbtree.c
字号:
/*------------------------------------------------------------------------- * * nbtree.c * Implementation of Lehman and Yao's btree management algorithm for * Postgres. * * NOTES * This file contains only the public interface routines. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.132.2.2 2006/02/14 17:20:10 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/genam.h"#include "access/heapam.h"#include "access/nbtree.h"#include "catalog/index.h"#include "commands/vacuum.h"#include "miscadmin.h"#include "storage/freespace.h"#include "storage/smgr.h"#include "utils/memutils.h"/* Working state for btbuild and its callback */typedef struct{ bool usefast; bool isUnique; bool haveDead; Relation heapRel; BTSpool *spool; /* * spool2 is needed only when the index is an unique index. Dead tuples * are put into spool2 instead of spool in order to avoid uniqueness * check. */ BTSpool *spool2; double indtuples;} BTBuildState;bool FastBuild = true; /* use SORT instead of insertion build */static void _bt_restscan(IndexScanDesc scan);static void btbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state);/* * btbuild() -- build a new btree index. */Datumbtbuild(PG_FUNCTION_ARGS){ Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); double reltuples; BTBuildState buildstate; /* * bootstrap processing does something strange, so don't use sort/build * for initial catalog indices. at some point i need to look harder at * this. (there is some kind of incremental processing going on there.) * -- pma 08/29/95 */ buildstate.usefast = (FastBuild && IsNormalProcessingMode()); buildstate.isUnique = indexInfo->ii_Unique; buildstate.haveDead = false; buildstate.heapRel = heap; buildstate.spool = NULL; buildstate.spool2 = NULL; buildstate.indtuples = 0;#ifdef BTREE_BUILD_STATS if (log_btree_build_stats) ResetUsage();#endif /* BTREE_BUILD_STATS */ /* * We expect to be called exactly once for any index relation. If that's * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); if (buildstate.usefast) { buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false); /* * If building a unique index, put dead tuples in a second spool to * keep them out of the uniqueness check. */ if (indexInfo->ii_Unique) buildstate.spool2 = _bt_spoolinit(index, false, true); } else { /* if using slow build, initialize the btree index metadata page */ _bt_metapinit(index); } /* do the heap scan */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, btbuildCallback, (void *) &buildstate); /* okay, all heap tuples are indexed */ if (buildstate.spool2 && !buildstate.haveDead) { /* spool2 turns out to be unnecessary */ _bt_spooldestroy(buildstate.spool2); buildstate.spool2 = NULL; } /* * if we are doing bottom-up btree build, finish the build by (1) * completing the sort of the spool file, (2) inserting the sorted tuples * into btree pages and (3) building the upper levels. */ if (buildstate.usefast) { _bt_leafbuild(buildstate.spool, buildstate.spool2); _bt_spooldestroy(buildstate.spool); if (buildstate.spool2) _bt_spooldestroy(buildstate.spool2); }#ifdef BTREE_BUILD_STATS if (log_btree_build_stats) { ShowUsage("BTREE BUILD STATS"); ResetUsage(); }#endif /* BTREE_BUILD_STATS */ /* since we just counted the # of tuples, may as well update stats */ IndexCloseAndUpdateStats(heap, reltuples, index, buildstate.indtuples); PG_RETURN_VOID();}/* * Per-tuple callback from IndexBuildHeapScan */static voidbtbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state){ BTBuildState *buildstate = (BTBuildState *) state; IndexTuple itup; BTItem btitem; /* form an index tuple and point it at the heap tuple */ itup = index_form_tuple(RelationGetDescr(index), values, isnull); itup->t_tid = htup->t_self; btitem = _bt_formitem(itup); /* * if we are doing bottom-up btree build, we insert the index into a spool * file for subsequent processing. otherwise, we insert into the btree. */ if (buildstate->usefast) { if (tupleIsAlive || buildstate->spool2 == NULL) _bt_spool(btitem, buildstate->spool); else { /* dead tuples are put into spool2 */ buildstate->haveDead = true; _bt_spool(btitem, buildstate->spool2); } } else { _bt_doinsert(index, btitem, buildstate->isUnique, buildstate->heapRel); } buildstate->indtuples += 1; pfree(btitem); pfree(itup);}/* * btinsert() -- insert an index tuple into a btree. * * Descend the tree recursively, find the appropriate location for our * new tuple, and put it there. */Datumbtinsert(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *isnull = (bool *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); Relation heapRel = (Relation) PG_GETARG_POINTER(4); bool checkUnique = PG_GETARG_BOOL(5); BTItem btitem; IndexTuple itup; /* generate an index tuple */ itup = index_form_tuple(RelationGetDescr(rel), values, isnull); itup->t_tid = *ht_ctid; btitem = _bt_formitem(itup); _bt_doinsert(rel, btitem, checkUnique, heapRel); pfree(btitem); pfree(itup); PG_RETURN_BOOL(true);}/* * btgettuple() -- Get the next tuple in the scan. */Datumbtgettuple(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); BTScanOpaque so = (BTScanOpaque) scan->opaque; Page page; OffsetNumber offnum; bool res; /* * If we've already initialized this scan, we can just advance it in the * appropriate direction. If we haven't done so yet, we call a routine to * get the first item in the scan. */ if (ItemPointerIsValid(&(scan->currentItemData))) { /* * Restore scan position using heap TID returned by previous call to * btgettuple(). _bt_restscan() re-grabs the read lock on the buffer, * too. */ _bt_restscan(scan); /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, so mark it by setting the LP_DELETE bit in the item flags. */ offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); page = BufferGetPage(so->btso_curbuf); PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; /* * Since this can be redone later if needed, it's treated the same * as a commit-hint-bit status update for heap tuples: we mark the * buffer dirty but don't make a WAL log entry. */ SetBufferCommitInfoNeedsSave(so->btso_curbuf); } /* * Now continue the scan. */ res = _bt_next(scan, dir); } else res = _bt_first(scan, dir); /* * Skip killed tuples if asked to. */ if (scan->ignore_killed_tuples) { while (res) { offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); page = BufferGetPage(so->btso_curbuf); if (!ItemIdDeleted(PageGetItemId(page, offnum))) break; res = _bt_next(scan, dir); } } /* * Save heap TID to use it in _bt_restscan. Then release the read lock on * the buffer so that we aren't blocking other backends. * * NOTE: we do keep the pin on the buffer! This is essential to ensure * that someone else doesn't delete the index entry we are stopped on. */ if (res) { ((BTScanOpaque) scan->opaque)->curHeapIptr = scan->xs_ctup.t_self; LockBuffer(((BTScanOpaque) scan->opaque)->btso_curbuf, BUFFER_LOCK_UNLOCK); } PG_RETURN_BOOL(res);}/* * btgetmulti() -- get multiple tuples at once * * This is a somewhat generic implementation: it avoids the _bt_restscan * overhead, but there's no smarts about picking especially good stopping * points such as index page boundaries. */Datumbtgetmulti(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); int32 max_tids = PG_GETARG_INT32(2); int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res = true; int32 ntids = 0; /* * Restore prior state if we were already called at least once. */ if (ItemPointerIsValid(&(scan->currentItemData))) _bt_restscan(scan); while (ntids < max_tids) { /* * Start scan, or advance to next tuple. */ if (ItemPointerIsValid(&(scan->currentItemData))) res = _bt_next(scan, ForwardScanDirection); else res = _bt_first(scan, ForwardScanDirection); /* * Skip killed tuples if asked to. */ if (scan->ignore_killed_tuples) { while (res) { Page page; OffsetNumber offnum; offnum = ItemPointerGetOffsetNumber(&(scan->currentItemData)); page = BufferGetPage(so->btso_curbuf); if (!ItemIdDeleted(PageGetItemId(page, offnum))) break; res = _bt_next(scan, ForwardScanDirection); } } if (!res) break; /* Save tuple ID, and continue scanning */ tids[ntids] = scan->xs_ctup.t_self; ntids++; } /* * Save heap TID to use it in _bt_restscan. Then release the read lock on * the buffer so that we aren't blocking other backends. */ if (res) { ((BTScanOpaque) scan->opaque)->curHeapIptr = scan->xs_ctup.t_self; LockBuffer(((BTScanOpaque) scan->opaque)->btso_curbuf, BUFFER_LOCK_UNLOCK); } *returned_tids = ntids; PG_RETURN_BOOL(res);}/* * btbeginscan() -- start a scan on a btree index */Datumbtbeginscan(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); int keysz = PG_GETARG_INT32(1); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); IndexScanDesc scan; /* get the scan */ scan = RelationGetIndexScan(rel, keysz, scankey); PG_RETURN_POINTER(scan);}/* * btrescan() -- rescan an index relation */Datumbtrescan(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; if (so == NULL) /* if called from btbeginscan */ { so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(&(so->curHeapIptr)); ItemPointerSetInvalid(&(so->mrkHeapIptr)); if (scan->numberOfKeys > 0) so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); else so->keyData = NULL; scan->opaque = so; } /* we aren't holding any read locks, but gotta drop the pins */ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { ReleaseBuffer(so->btso_curbuf); so->btso_curbuf = InvalidBuffer; ItemPointerSetInvalid(&(so->curHeapIptr)); ItemPointerSetInvalid(iptr); } if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { ReleaseBuffer(so->btso_mrkbuf); so->btso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(&(so->mrkHeapIptr)); ItemPointerSetInvalid(iptr); } /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ PG_RETURN_VOID();}/* * btendscan() -- close down a scan */Datumbtendscan(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { if (BufferIsValid(so->btso_curbuf)) ReleaseBuffer(so->btso_curbuf); so->btso_curbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -