📄 nbtree.c
字号:
/*------------------------------------------------------------------------- * * nbtree.c * Implementation of Lehman and Yao's btree management algorithm for * Postgres. * * NOTES * This file contains only the public interface routines. * * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.156.2.1 2008/04/16 23:59:51 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/genam.h"#include "access/nbtree.h"#include "catalog/index.h"#include "commands/vacuum.h"#include "storage/freespace.h"#include "storage/ipc.h"#include "storage/lmgr.h"#include "utils/memutils.h"/* Working state for btbuild and its callback */typedef struct{ bool isUnique; bool haveDead; Relation heapRel; BTSpool *spool; /* * spool2 is needed only when the index is an unique index. Dead tuples * are put into spool2 instead of spool in order to avoid uniqueness * check. */ BTSpool *spool2; double indtuples;} BTBuildState;/* Working state needed by btvacuumpage */typedef struct{ IndexVacuumInfo *info; IndexBulkDeleteResult *stats; IndexBulkDeleteCallback callback; void *callback_state; BTCycleId cycleid; BlockNumber *freePages; int nFreePages; /* number of entries in freePages[] */ int maxFreePages; /* allocated size of freePages[] */ BlockNumber totFreePages; /* true total # of free pages */ MemoryContext pagedelcontext;} BTVacState;static void btbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state);static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, IndexBulkDeleteCallback callback, void *callback_state, BTCycleId cycleid);static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno);/* * btbuild() -- build a new btree index. */Datumbtbuild(PG_FUNCTION_ARGS){ Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); IndexBuildResult *result; double reltuples; BTBuildState buildstate; buildstate.isUnique = indexInfo->ii_Unique; buildstate.haveDead = false; buildstate.heapRel = heap; buildstate.spool = NULL; buildstate.spool2 = NULL; buildstate.indtuples = 0;#ifdef BTREE_BUILD_STATS if (log_btree_build_stats) ResetUsage();#endif /* BTREE_BUILD_STATS */ /* * We expect to be called exactly once for any index relation. If that's * not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); buildstate.spool = _bt_spoolinit(index, indexInfo->ii_Unique, false); /* * If building a unique index, put dead tuples in a second spool to keep * them out of the uniqueness check. */ if (indexInfo->ii_Unique) buildstate.spool2 = _bt_spoolinit(index, false, true); /* do the heap scan */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, btbuildCallback, (void *) &buildstate); /* okay, all heap tuples are indexed */ if (buildstate.spool2 && !buildstate.haveDead) { /* spool2 turns out to be unnecessary */ _bt_spooldestroy(buildstate.spool2); buildstate.spool2 = NULL; } /* * Finish the build by (1) completing the sort of the spool file, (2) * inserting the sorted tuples into btree pages and (3) building the upper * levels. */ _bt_leafbuild(buildstate.spool, buildstate.spool2); _bt_spooldestroy(buildstate.spool); if (buildstate.spool2) _bt_spooldestroy(buildstate.spool2);#ifdef BTREE_BUILD_STATS if (log_btree_build_stats) { ShowUsage("BTREE BUILD STATS"); ResetUsage(); }#endif /* BTREE_BUILD_STATS */ /* * If we are reindexing a pre-existing index, it is critical to send out a * relcache invalidation SI message to ensure all backends re-read the * index metapage. We expect that the caller will ensure that happens * (typically as a side effect of updating index stats, but it must happen * even if the stats don't change!) */ /* * Return statistics */ result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult)); result->heap_tuples = reltuples; result->index_tuples = buildstate.indtuples; PG_RETURN_POINTER(result);}/* * Per-tuple callback from IndexBuildHeapScan */static voidbtbuildCallback(Relation index, HeapTuple htup, Datum *values, bool *isnull, bool tupleIsAlive, void *state){ BTBuildState *buildstate = (BTBuildState *) state; IndexTuple itup; /* form an index tuple and point it at the heap tuple */ itup = index_form_tuple(RelationGetDescr(index), values, isnull); itup->t_tid = htup->t_self; /* * insert the index tuple into the appropriate spool file for subsequent * processing */ if (tupleIsAlive || buildstate->spool2 == NULL) _bt_spool(itup, buildstate->spool); else { /* dead tuples are put into spool2 */ buildstate->haveDead = true; _bt_spool(itup, buildstate->spool2); } buildstate->indtuples += 1; pfree(itup);}/* * btinsert() -- insert an index tuple into a btree. * * Descend the tree recursively, find the appropriate location for our * new tuple, and put it there. */Datumbtinsert(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); Datum *values = (Datum *) PG_GETARG_POINTER(1); bool *isnull = (bool *) PG_GETARG_POINTER(2); ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3); Relation heapRel = (Relation) PG_GETARG_POINTER(4); bool checkUnique = PG_GETARG_BOOL(5); IndexTuple itup; /* generate an index tuple */ itup = index_form_tuple(RelationGetDescr(rel), values, isnull); itup->t_tid = *ht_ctid; _bt_doinsert(rel, itup, checkUnique, heapRel); pfree(itup); PG_RETURN_BOOL(true);}/* * btgettuple() -- Get the next tuple in the scan. */Datumbtgettuple(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; /* * If we've already initialized this scan, we can just advance it in the * appropriate direction. If we haven't done so yet, we call a routine to * get the first item in the scan. */ if (BTScanPosIsValid(so->currPos)) { /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, remember it for later. (We'll deal with all such tuples * at once right before leaving the index page.) The test for * numKilled overrun is not just paranoia: if the caller reverses * direction in the indexscan then the same item might get entered * multiple times. It's not worth trying to optimize that, so we * don't detect it, but instead just forget any excess entries. */ if (so->killedItems == NULL) so->killedItems = (int *) palloc(MaxIndexTuplesPerPage * sizeof(int)); if (so->numKilled < MaxIndexTuplesPerPage) so->killedItems[so->numKilled++] = so->currPos.itemIndex; } /* * Now continue the scan. */ res = _bt_next(scan, dir); } else res = _bt_first(scan, dir); PG_RETURN_BOOL(res);}/* * btgetmulti() -- get multiple tuples at once * * In the current implementation there seems no strong reason to stop at * index page boundaries; we just press on until we fill the caller's buffer * or run out of matches. */Datumbtgetmulti(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ItemPointer tids = (ItemPointer) PG_GETARG_POINTER(1); int32 max_tids = PG_GETARG_INT32(2); int32 *returned_tids = (int32 *) PG_GETARG_POINTER(3); BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res = true; int32 ntids = 0; if (max_tids <= 0) /* behave correctly in boundary case */ PG_RETURN_BOOL(true); /* If we haven't started the scan yet, fetch the first page & tuple. */ if (!BTScanPosIsValid(so->currPos)) { res = _bt_first(scan, ForwardScanDirection); if (!res) { /* empty scan */ *returned_tids = ntids; PG_RETURN_BOOL(res); } /* Save tuple ID, and continue scanning */ tids[ntids] = scan->xs_ctup.t_self; ntids++; } while (ntids < max_tids) { /* * Advance to next tuple within page. This is the same as the easy * case in _bt_next(). */ if (++so->currPos.itemIndex > so->currPos.lastItem) { /* let _bt_next do the heavy lifting */ res = _bt_next(scan, ForwardScanDirection); if (!res) break; } /* Save tuple ID, and continue scanning */ tids[ntids] = so->currPos.items[so->currPos.itemIndex].heapTid; ntids++; } *returned_tids = ntids; PG_RETURN_BOOL(res);}/* * btbeginscan() -- start a scan on a btree index */Datumbtbeginscan(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); int keysz = PG_GETARG_INT32(1); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(2); IndexScanDesc scan; /* get the scan */ scan = RelationGetIndexScan(rel, keysz, scankey); PG_RETURN_POINTER(scan);}/* * btrescan() -- rescan an index relation */Datumbtrescan(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); BTScanOpaque so; so = (BTScanOpaque) scan->opaque; if (so == NULL) /* if called from btbeginscan */ { so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); so->currPos.buf = so->markPos.buf = InvalidBuffer; if (scan->numberOfKeys > 0) so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); else so->keyData = NULL; so->killedItems = NULL; /* until needed */ so->numKilled = 0; scan->opaque = so; } /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ PG_RETURN_VOID();}/* * btendscan() -- close down a scan */Datumbtendscan(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; if (so->killedItems != NULL) pfree(so->killedItems); if (so->keyData != NULL) pfree(so->keyData); pfree(so); PG_RETURN_VOID();}/* * btmarkpos() -- save current scan position */Datumbtmarkpos(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } /* * Just record the current itemIndex. If we later step to next page * before releasing the marked position, _bt_steppage makes a full copy of * the currPos struct in markPos. If (as often happens) the mark is moved * before we leave the page, we don't have to do that work. */ if (BTScanPosIsValid(so->currPos)) so->markItemIndex = so->currPos.itemIndex; else so->markItemIndex = -1; PG_RETURN_VOID();}/* * btrestrpos() -- restore scan to last saved position */Datumbtrestrpos(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; if (so->markItemIndex >= 0) { /* * The mark position is on the same page we are currently on. Just
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -