📄 vacuumlazy.c
字号:
/*------------------------------------------------------------------------- * * vacuumlazy.c * Concurrent ("lazy") vacuuming. * * * The major space usage for LAZY VACUUM is storage for the array of dead * tuple TIDs, with the next biggest need being storage for per-disk-page * free space info. We want to ensure we can vacuum even the very largest * relations with finite memory space usage. To do that, we set upper bounds * on the number of tuples and pages we will keep track of at once. * * We are willing to use at most maintenance_work_mem memory space to keep * track of dead tuples. We initially allocate an array of TIDs of that size. * If the array threatens to overflow, we suspend the heap scan phase and * perform a pass of index cleanup and page compaction, then resume the heap * scan with an empty TID array. * * We can limit the storage for page free space to MaxFSMPages entries, * since that's the most the free space map will be willing to remember * anyway. If the relation has fewer than that many pages with free space, * life is easy: just build an array of per-page info. If it has more, * we store the free space info as a heap ordered by amount of free space, * so that we can discard the pages with least free space to ensure we never * have more than MaxFSMPages entries in all. The surviving page entries * are passed to the free space map at conclusion of the scan. * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.61.2.2 2006/03/04 19:09:23 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <math.h>#include "access/genam.h"#include "access/heapam.h"#include "access/xlog.h"#include "commands/vacuum.h"#include "miscadmin.h"#include "pgstat.h"#include "storage/freespace.h"#include "storage/smgr.h"#include "utils/lsyscache.h"#include "utils/memutils.h"#include "utils/pg_rusage.h"/* * Space/time tradeoff parameters: do these need to be user-tunable? * * To consider truncating the relation, we want there to be at least * REL_TRUNCATE_MINIMUM or (relsize / REL_TRUNCATE_FRACTION) (whichever * is less) potentially-freeable pages. */#define REL_TRUNCATE_MINIMUM 1000#define REL_TRUNCATE_FRACTION 16typedef struct LVRelStats{ /* Overall statistics about rel */ BlockNumber rel_pages; double rel_tuples; BlockNumber pages_removed; double tuples_deleted; BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ Size threshold; /* minimum interesting free space */ /* List of TIDs of tuples we intend to delete */ /* NB: this list is ordered by TID address */ int num_dead_tuples; /* current # of entries */ int max_dead_tuples; /* # slots allocated in array */ ItemPointer dead_tuples; /* array of ItemPointerData */ /* Array or heap of per-page info about free space */ /* We use a simple array until it fills up, then convert to heap */ bool fs_is_heap; /* are we using heap organization? */ int num_free_pages; /* current # of entries */ int max_free_pages; /* # slots allocated in array */ PageFreeSpaceInfo *free_pages; /* array or heap of blkno/avail */} LVRelStats;static int elevel = -1;static TransactionId OldestXmin;static TransactionId FreezeLimit;/* non-export function prototypes */static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, Relation *Irel, int nindexes);static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);static void lazy_scan_index(Relation indrel, LVRelStats *vacrelstats);static void lazy_vacuum_index(Relation indrel, double *index_tups_vacuumed, BlockNumber *index_pages_removed, LVRelStats *vacrelstats);static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, int tupindex, LVRelStats *vacrelstats);static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);static BlockNumber count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats);static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);static void lazy_record_dead_tuple(LVRelStats *vacrelstats, ItemPointer itemptr);static void lazy_record_free_space(LVRelStats *vacrelstats, BlockNumber page, Size avail);static bool lazy_tid_reaped(ItemPointer itemptr, void *state);static bool dummy_tid_reaped(ItemPointer itemptr, void *state);static void lazy_update_fsm(Relation onerel, LVRelStats *vacrelstats);static int vac_cmp_itemptr(const void *left, const void *right);static int vac_cmp_page_spaces(const void *left, const void *right);/* * lazy_vacuum_rel() -- perform LAZY VACUUM for one heap relation * * This routine vacuums a single heap, cleans out its indexes, and * updates its num_pages and num_tuples statistics. * * At entry, we have already established a transaction and opened * and locked the relation. */voidlazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt){ LVRelStats *vacrelstats; Relation *Irel; int nindexes; bool hasindex; BlockNumber possibly_freeable; if (vacstmt->verbose) elevel = INFO; else elevel = DEBUG2; vacuum_set_xid_limits(vacstmt, onerel->rd_rel->relisshared, &OldestXmin, &FreezeLimit); vacrelstats = (LVRelStats *) palloc0(sizeof(LVRelStats)); /* Set threshold for interesting free space = average request size */ /* XXX should we scale it up or down? Adjust vacuum.c too, if so */ vacrelstats->threshold = GetAvgFSMRequestSize(&onerel->rd_node); /* Open all indexes of the relation */ vac_open_indexes(onerel, ShareUpdateExclusiveLock, &nindexes, &Irel); hasindex = (nindexes > 0); /* Do the vacuuming */ lazy_scan_heap(onerel, vacrelstats, Irel, nindexes); /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Optionally truncate the relation. * * Don't even think about it unless we have a shot at releasing a goodly * number of pages. Otherwise, the time taken isn't worth it. */ possibly_freeable = vacrelstats->rel_pages - vacrelstats->nonempty_pages; if (possibly_freeable >= REL_TRUNCATE_MINIMUM || possibly_freeable >= vacrelstats->rel_pages / REL_TRUNCATE_FRACTION) lazy_truncate_heap(onerel, vacrelstats); /* Update shared free space map with final free space info */ lazy_update_fsm(onerel, vacrelstats); /* Update statistics in pg_class */ vac_update_relstats(RelationGetRelid(onerel), vacrelstats->rel_pages, vacrelstats->rel_tuples, hasindex); /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, vacstmt->analyze, vacrelstats->rel_tuples);}/* * lazy_scan_heap() -- scan an open heap relation * * This routine sets commit status bits, builds lists of dead tuples * and pages with free space, and calculates statistics on the number * of live tuples in the heap. When done, or when we run low on space * for dead-tuple TIDs, invoke vacuuming of indexes and heap. */static voidlazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, Relation *Irel, int nindexes){ BlockNumber nblocks, blkno; HeapTupleData tuple; char *relname; BlockNumber empty_pages; double num_tuples, tups_vacuumed, nkeep, nunused; double *index_tups_vacuumed; BlockNumber *index_pages_removed; bool did_vacuum_index = false; int i; PGRUsage ru0; pg_rusage_init(&ru0); relname = RelationGetRelationName(onerel); ereport(elevel, (errmsg("vacuuming \"%s.%s\"", get_namespace_name(RelationGetNamespace(onerel)), relname))); empty_pages = 0; num_tuples = tups_vacuumed = nkeep = nunused = 0; /* * Because index vacuuming is done in multiple passes, we have to keep * track of the total number of rows and pages removed from each index. * index_tups_vacuumed[i] is the number removed so far from the i'th * index. (For partial indexes this could well be different from * tups_vacuumed.) Likewise for index_pages_removed[i]. */ index_tups_vacuumed = (double *) palloc0(nindexes * sizeof(double)); index_pages_removed = (BlockNumber *) palloc0(nindexes * sizeof(BlockNumber)); nblocks = RelationGetNumberOfBlocks(onerel); vacrelstats->rel_pages = nblocks; vacrelstats->nonempty_pages = 0; lazy_space_alloc(vacrelstats, nblocks); for (blkno = 0; blkno < nblocks; blkno++) { Buffer buf; Page page; OffsetNumber offnum, maxoff; bool pgchanged, tupgone, hastup; int prev_dead_count; vacuum_delay_point(); /* * If we are close to overrunning the available space for dead-tuple * TIDs, pause and do a cycle of vacuuming before we tackle this page. */ if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage && vacrelstats->num_dead_tuples > 0) { /* Remove index entries */ for (i = 0; i < nindexes; i++) lazy_vacuum_index(Irel[i], &index_tups_vacuumed[i], &index_pages_removed[i], vacrelstats); did_vacuum_index = true; /* Remove tuples from heap */ lazy_vacuum_heap(onerel, vacrelstats); /* Forget the now-vacuumed tuples, and press on */ vacrelstats->num_dead_tuples = 0; } buf = ReadBuffer(onerel, blkno); /* In this phase we only need shared access to the buffer */ LockBuffer(buf, BUFFER_LOCK_SHARE); page = BufferGetPage(buf); if (PageIsNew(page)) { /* * An all-zeroes page could be left over if a backend extends the * relation but crashes before initializing the page. Reclaim such * pages for use. * * We have to be careful here because we could be looking at a * page that someone has just added to the relation and not yet * been able to initialize (see RelationGetBufferForTuple). To * interlock against that, release the buffer read lock (which we * must do anyway) and grab the relation extension lock before * re-locking in exclusive mode. If the page is still * uninitialized by then, it must be left over from a crashed * backend, and we can initialize it. * * We don't really need the relation lock when this is a new or * temp relation, but it's probably not worth the code space to * check that, since this surely isn't a critical path. * * Note: the comparable code in vacuum.c need not worry because * it's got exclusive lock on the whole relation. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); LockRelationForExtension(onerel, ExclusiveLock); UnlockRelationForExtension(onerel, ExclusiveLock); LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); if (PageIsNew(page)) { ereport(WARNING, (errmsg("relation \"%s\" page %u is uninitialized --- fixing", relname, blkno))); PageInit(page, BufferGetPageSize(buf), 0); empty_pages++; lazy_record_free_space(vacrelstats, blkno, PageGetFreeSpace(page)); } LockBuffer(buf, BUFFER_LOCK_UNLOCK); WriteBuffer(buf); continue; } if (PageIsEmpty(page)) { empty_pages++; lazy_record_free_space(vacrelstats, blkno, PageGetFreeSpace(page)); LockBuffer(buf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); continue; } pgchanged = false; hastup = false; prev_dead_count = vacrelstats->num_dead_tuples; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { ItemId itemid; itemid = PageGetItemId(page, offnum); if (!ItemIdIsUsed(itemid)) { nunused += 1; continue; } tuple.t_datamcxt = NULL; tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); ItemPointerSet(&(tuple.t_self), blkno, offnum); tupgone = false; switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf)) { case HEAPTUPLE_DEAD: tupgone = true; /* we can delete the tuple */ break; case HEAPTUPLE_LIVE: /* * Tuple is good. Consider whether to replace its xmin * value with FrozenTransactionId. * * NB: Since we hold only a shared buffer lock here, we * are assuming that TransactionId read/write is atomic. * This is not the only place that makes such an * assumption. It'd be possible to avoid the assumption by * momentarily acquiring exclusive lock, but for the * moment I see no need to. */ if (TransactionIdIsNormal(HeapTupleHeaderGetXmin(tuple.t_data)) && TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data), FreezeLimit)) { HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId); /* infomask should be okay already */ Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED); pgchanged = true; } /* * Other checks... */ if (onerel->rd_rel->relhasoids && !OidIsValid(HeapTupleGetOid(&tuple))) elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", relname, blkno, offnum); break; case HEAPTUPLE_RECENTLY_DEAD: /* * If tuple is recently deleted then we must not remove it * from relation. */ nkeep += 1; break; case HEAPTUPLE_INSERT_IN_PROGRESS:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -