📄 freespace.c
字号:
/*------------------------------------------------------------------------- * * freespace.c * POSTGRES free space map for quickly finding free space in relations * * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.59 2008/01/01 19:45:51 momjian Exp $ * * * NOTES: * * The only really interesting aspect of this code is the heuristics for * deciding how much information we can afford to keep about each relation, * given that we have a limited amount of workspace in shared memory. * These currently work as follows: * * The number of distinct relations tracked is limited by a configuration * variable (MaxFSMRelations). When this would be exceeded, we discard the * least recently used relation. A doubly-linked list with move-to-front * behavior keeps track of which relation is least recently used. * * For each known relation, we track the average request size given to * GetPageWithFreeSpace() as well as the most recent number of pages reported * to RecordRelationFreeSpace(). The average request size is not directly * used in this module, but we expect VACUUM to use it to filter out * uninteresting amounts of space before calling RecordRelationFreeSpace(). * The sum of the RRFS page counts is thus the total number of "interesting" * pages that we would like to track; this is called DesiredFSMPages. * * The number of pages actually tracked is limited by a configuration variable * (MaxFSMPages). When this is less than DesiredFSMPages, each relation * gets to keep a fraction MaxFSMPages/DesiredFSMPages of its free pages. * We discard pages with less free space to reach this target. * * Actually, our space allocation is done in "chunks" of CHUNKPAGES pages, * with each relation guaranteed at least one chunk. This reduces thrashing * of the storage allocations when there are small changes in the RRFS page * counts from one VACUUM to the next. (XXX it might also be worthwhile to * impose some kind of moving-average smoothing on the RRFS page counts?) * * So the actual arithmetic is: for each relation compute myRequest as the * number of chunks needed to hold its RRFS page count (not counting the * first, guaranteed chunk); compute sumRequests as the sum of these values * over all relations; then for each relation figure its target allocation * as * 1 + round(spareChunks * myRequest / sumRequests) * where spareChunks = totalChunks - numRels is the number of chunks we have * a choice what to do with. We round off these numbers because truncating * all of them would waste significant space. But because of roundoff, it's * possible for the last few relations to get less space than they should; * the target allocation must be checked against remaining available space. * *------------------------------------------------------------------------- */#include "postgres.h"#include <limits.h>#include <math.h>#include <unistd.h>#include "storage/fd.h"#include "storage/freespace.h"#include "storage/lwlock.h"#include "storage/shmem.h"/*---------- * During database shutdown, we store the contents of FSM into a disk file, * which is re-read during startup. This way we don't have a startup * transient condition where FSM isn't really functioning. * * The file format is: * label "FSM\0" * endian constant 0x01020304 for detecting endianness problems * version# * numRels * -- for each rel, in *reverse* usage order: * relfilenode * isIndex * avgRequest * interestingPages * storedPages * arena data array of storedPages FSMPageData or IndexFSMPageData *---------- *//* Name of FSM cache file (relative to $PGDATA) */#define FSM_CACHE_FILENAME "global/pg_fsm.cache"/* Fixed values in header */#define FSM_CACHE_LABEL "FSM"#define FSM_CACHE_ENDIAN 0x01020304#define FSM_CACHE_VERSION 20030305/* File header layout */typedef struct FsmCacheFileHeader{ char label[4]; uint32 endian; uint32 version; int32 numRels;} FsmCacheFileHeader;/* Per-relation header */typedef struct FsmCacheRelHeader{ RelFileNode key; /* hash key (must be first) */ bool isIndex; /* if true, we store only page numbers */ uint32 avgRequest; /* moving average of space requests */ BlockNumber interestingPages; /* # of pages with useful free space */ int32 storedPages; /* # of pages stored in arena */} FsmCacheRelHeader;int MaxFSMRelations; /* these are set by guc.c */int MaxFSMPages;static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */static HTAB *FreeSpaceMapRelHash; /* points to (what used to be) * FSMHeader->relHash */static void CheckFreeSpaceMapStatistics(int elevel, int numRels, double needed);static FSMRelation *lookup_fsm_rel(RelFileNode *rel);static FSMRelation *create_fsm_rel(RelFileNode *rel);static void delete_fsm_rel(FSMRelation *fsmrel);static int realloc_fsm_rel(FSMRelation *fsmrel, BlockNumber interestingPages, bool isIndex);static void link_fsm_rel_usage(FSMRelation *fsmrel);static void unlink_fsm_rel_usage(FSMRelation *fsmrel);static void link_fsm_rel_storage(FSMRelation *fsmrel);static void unlink_fsm_rel_storage(FSMRelation *fsmrel);static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);static BlockNumber find_index_free_space(FSMRelation *fsmrel);static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail);static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, int *outPageIndex);static void compact_fsm_storage(void);static void push_fsm_rels_after(FSMRelation *afterRel);static void pack_incoming_pages(FSMPageData *newLocation, int newPages, PageFreeSpaceInfo *pageSpaces, int nPages);static void pack_existing_pages(FSMPageData *newLocation, int newPages, FSMPageData *oldLocation, int oldPages);static int fsm_calc_request(FSMRelation *fsmrel);static int fsm_calc_request_unclamped(FSMRelation *fsmrel);static int fsm_calc_target_allocation(int myRequest);static int fsm_current_chunks(FSMRelation *fsmrel);static int fsm_current_allocation(FSMRelation *fsmrel);/* * Exported routines *//* * InitFreeSpaceMap -- Initialize the freespace module. * * This must be called once during shared memory initialization. * It builds the empty free space map table. FreeSpaceLock must also be * initialized at some point, but is not touched here --- we assume there is * no need for locking, since only the calling process can be accessing shared * memory as yet. */voidInitFreeSpaceMap(void){ HASHCTL info; int nchunks; bool found; /* Create table header */ FreeSpaceMap = (FSMHeader *) ShmemInitStruct("Free Space Map Header", sizeof(FSMHeader), &found); if (FreeSpaceMap == NULL) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("insufficient shared memory for free space map"))); if (!found) MemSet(FreeSpaceMap, 0, sizeof(FSMHeader)); /* Create hashtable for FSMRelations */ info.keysize = sizeof(RelFileNode); info.entrysize = sizeof(FSMRelation); info.hash = tag_hash; FreeSpaceMapRelHash = ShmemInitHash("Free Space Map Hash", MaxFSMRelations + 1, MaxFSMRelations + 1, &info, (HASH_ELEM | HASH_FUNCTION)); if (!FreeSpaceMapRelHash) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("insufficient shared memory for free space map"))); if (found) return; /* Allocate page-storage arena */ nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; /* This check ensures spareChunks will be greater than zero */ if (nchunks <= MaxFSMRelations) ereport(FATAL, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("max_fsm_pages must exceed max_fsm_relations * %d", CHUNKPAGES))); FreeSpaceMap->arena = (char *) ShmemAlloc((Size) nchunks * CHUNKBYTES); if (FreeSpaceMap->arena == NULL) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("insufficient shared memory for free space map"))); FreeSpaceMap->totalChunks = nchunks; FreeSpaceMap->usedChunks = 0; FreeSpaceMap->sumRequests = 0;}/* * Estimate amount of shmem space needed for FSM. */SizeFreeSpaceShmemSize(void){ Size size; int nchunks; /* table header */ size = MAXALIGN(sizeof(FSMHeader)); /* hash table, including the FSMRelation objects */ size = add_size(size, hash_estimate_size(MaxFSMRelations + 1, sizeof(FSMRelation))); /* page-storage arena */ nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; size = add_size(size, mul_size(nchunks, CHUNKBYTES)); return size;}/* * GetPageWithFreeSpace - try to find a page in the given relation with * at least the specified amount of free space. * * If successful, return the block number; if not, return InvalidBlockNumber. * * The caller must be prepared for the possibility that the returned page * will turn out to have too little space available by the time the caller * gets a lock on it. In that case, the caller should report the actual * amount of free space available on that page and then try again (see * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned, * extend the relation. */BlockNumberGetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded){ FSMRelation *fsmrel; BlockNumber freepage; LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * We always add a rel to the hashtable when it is inquired about. */ fsmrel = create_fsm_rel(rel); /* * Update the moving average of space requests. This code implements an * exponential moving average with an equivalent period of about 63 * requests. Ignore silly requests, however, to ensure that the average * stays sane. */ if (spaceNeeded > 0 && spaceNeeded < BLCKSZ) { int cur_avg = (int) fsmrel->avgRequest; cur_avg += ((int) spaceNeeded - cur_avg) / 32; fsmrel->avgRequest = (Size) cur_avg; } freepage = find_free_space(fsmrel, spaceNeeded); LWLockRelease(FreeSpaceLock); return freepage;}/* * RecordAndGetPageWithFreeSpace - update info about a page and try again. * * We provide this combo form, instead of a separate Record operation, * to save one lock and hash table lookup cycle. */BlockNumberRecordAndGetPageWithFreeSpace(RelFileNode *rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded){ FSMRelation *fsmrel; BlockNumber freepage; /* Sanity check: ensure spaceAvail will fit into OffsetNumber */ AssertArg(oldSpaceAvail < BLCKSZ); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * We always add a rel to the hashtable when it is inquired about. */ fsmrel = create_fsm_rel(rel); /* Do the Record */ fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail); /* * Update the moving average of space requests, same as in * GetPageWithFreeSpace. */ if (spaceNeeded > 0 && spaceNeeded < BLCKSZ) { int cur_avg = (int) fsmrel->avgRequest; cur_avg += ((int) spaceNeeded - cur_avg) / 32; fsmrel->avgRequest = (Size) cur_avg; } /* Do the Get */ freepage = find_free_space(fsmrel, spaceNeeded); LWLockRelease(FreeSpaceLock); return freepage;}/* * GetAvgFSMRequestSize - get average FSM request size for a relation. * * If the relation is not known to FSM, return a default value. */SizeGetAvgFSMRequestSize(RelFileNode *rel){ Size result; FSMRelation *fsmrel; LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); fsmrel = lookup_fsm_rel(rel); if (fsmrel) result = fsmrel->avgRequest; else result = INITIAL_AVERAGE; LWLockRelease(FreeSpaceLock); return result;}/* * RecordRelationFreeSpace - record available-space info about a relation. * * Any pre-existing info about the relation is assumed obsolete and discarded. * * interestingPages is the total number of pages in the relation that have * at least threshold free space; nPages is the number actually reported in * pageSpaces[] (may be less --- in particular, callers typically clamp their * space usage to MaxFSMPages). * * The given pageSpaces[] array must be sorted in order by blkno. Note that * the FSM is at liberty to discard some or all of the data. */voidRecordRelationFreeSpace(RelFileNode *rel, BlockNumber interestingPages, int nPages, PageFreeSpaceInfo *pageSpaces){ FSMRelation *fsmrel; /* Limit nPages to something sane */ if (nPages < 0) nPages = 0; else if (nPages > MaxFSMPages) nPages = MaxFSMPages; LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * Note we don't record info about a relation unless there's already an * FSM entry for it, implying someone has done GetPageWithFreeSpace for * it. Inactive rels thus will not clutter the map simply by being * vacuumed. */ fsmrel = lookup_fsm_rel(rel); if (fsmrel) { int curAlloc; int curAllocPages; FSMPageData *newLocation; curAlloc = realloc_fsm_rel(fsmrel, interestingPages, false); curAllocPages = curAlloc * CHUNKPAGES; /* * If the data fits in our current allocation, just copy it; otherwise * must compress. */ newLocation = (FSMPageData *) (FreeSpaceMap->arena + fsmrel->firstChunk * CHUNKBYTES); if (nPages <= curAllocPages) { int i; for (i = 0; i < nPages; i++) { BlockNumber page = pageSpaces[i].blkno; Size avail = pageSpaces[i].avail; /* Check caller provides sorted data */ if (i > 0 && page <= pageSpaces[i - 1].blkno) elog(ERROR, "free-space data is not in page order"); FSMPageSetPageNum(newLocation, page); FSMPageSetSpace(newLocation, avail); newLocation++; } fsmrel->storedPages = nPages; } else { pack_incoming_pages(newLocation, curAllocPages, pageSpaces, nPages); fsmrel->storedPages = curAllocPages; } } LWLockRelease(FreeSpaceLock);}/* * GetFreeIndexPage - like GetPageWithFreeSpace, but for indexes */BlockNumberGetFreeIndexPage(RelFileNode *rel){ FSMRelation *fsmrel; BlockNumber freepage; LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * We always add a rel to the hashtable when it is inquired about. */ fsmrel = create_fsm_rel(rel); freepage = find_index_free_space(fsmrel); LWLockRelease(FreeSpaceLock); return freepage;}/* * RecordIndexFreeSpace - like RecordRelationFreeSpace, but for indexes */voidRecordIndexFreeSpace(RelFileNode *rel, BlockNumber interestingPages, int nPages, BlockNumber *pages){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -