📄 freespace.c
字号:
/*------------------------------------------------------------------------- * * freespace.c * POSTGRES free space map for quickly finding free space in relations * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.50 2005/10/29 00:31:51 petere Exp $ * * * NOTES: * * The only really interesting aspect of this code is the heuristics for * deciding how much information we can afford to keep about each relation, * given that we have a limited amount of workspace in shared memory. * These currently work as follows: * * The number of distinct relations tracked is limited by a configuration * variable (MaxFSMRelations). When this would be exceeded, we discard the * least recently used relation. A doubly-linked list with move-to-front * behavior keeps track of which relation is least recently used. * * For each known relation, we track the average request size given to * GetPageWithFreeSpace() as well as the most recent number of pages given * to RecordRelationFreeSpace(). The average request size is not directly * used in this module, but we expect VACUUM to use it to filter out * uninteresting amounts of space before calling RecordRelationFreeSpace(). * The sum of the RRFS page counts is thus the total number of "interesting" * pages that we would like to track; this is called DesiredFSMPages. * * The number of pages actually tracked is limited by a configuration variable * (MaxFSMPages). When this is less than DesiredFSMPages, each relation * gets to keep a fraction MaxFSMPages/DesiredFSMPages of its free pages. * We discard pages with less free space to reach this target. * * Actually, our space allocation is done in "chunks" of CHUNKPAGES pages, * with each relation guaranteed at least one chunk. This reduces thrashing * of the storage allocations when there are small changes in the RRFS page * counts from one VACUUM to the next. (XXX it might also be worthwhile to * impose some kind of moving-average smoothing on the RRFS page counts?) * * So the actual arithmetic is: for each relation compute myRequest as the * number of chunks needed to hold its RRFS page count (not counting the * first, guaranteed chunk); compute sumRequests as the sum of these values * over all relations; then for each relation figure its target allocation * as * 1 + round(spareChunks * myRequest / sumRequests) * where spareChunks = totalChunks - numRels is the number of chunks we have * a choice what to do with. We round off these numbers because truncating * all of them would waste significant space. But because of roundoff, it's * possible for the last few relations to get less space than they should; * the target allocation must be checked against remaining available space. * *------------------------------------------------------------------------- */#include "postgres.h"#include <errno.h>#include <limits.h>#include <math.h>#include <unistd.h>#include "miscadmin.h"#include "storage/fd.h"#include "storage/freespace.h"#include "storage/itemptr.h"#include "storage/lwlock.h"#include "storage/shmem.h"/* Initial value for average-request moving average */#define INITIAL_AVERAGE ((Size) (BLCKSZ / 32))/* * Number of pages and bytes per allocation chunk. Indexes can squeeze 50% * more pages into the same space because they don't need to remember how much * free space on each page. The nominal number of pages, CHUNKPAGES, is for * regular rels, and INDEXCHUNKPAGES is for indexes. CHUNKPAGES should be * even so that no space is wasted in the index case. */#define CHUNKPAGES 16#define CHUNKBYTES (CHUNKPAGES * sizeof(FSMPageData))#define INDEXCHUNKPAGES ((int) (CHUNKBYTES / sizeof(IndexFSMPageData)))/* * Typedefs and macros for items in the page-storage arena. We use the * existing ItemPointer and BlockId data structures, which are designed * to pack well (they should be 6 and 4 bytes apiece regardless of machine * alignment issues). Unfortunately we can't use the ItemPointer access * macros, because they include Asserts insisting that ip_posid != 0. */typedef ItemPointerData FSMPageData;typedef BlockIdData IndexFSMPageData;#define FSMPageGetPageNum(ptr) \ BlockIdGetBlockNumber(&(ptr)->ip_blkid)#define FSMPageGetSpace(ptr) \ ((Size) (ptr)->ip_posid)#define FSMPageSetPageNum(ptr, pg) \ BlockIdSet(&(ptr)->ip_blkid, pg)#define FSMPageSetSpace(ptr, sz) \ ((ptr)->ip_posid = (OffsetNumber) (sz))#define IndexFSMPageGetPageNum(ptr) \ BlockIdGetBlockNumber(ptr)#define IndexFSMPageSetPageNum(ptr, pg) \ BlockIdSet(ptr, pg)/*---------- * During database shutdown, we store the contents of FSM into a disk file, * which is re-read during startup. This way we don't have a startup * transient condition where FSM isn't really functioning. * * The file format is: * label "FSM\0" * endian constant 0x01020304 for detecting endianness problems * version# * numRels * -- for each rel, in *reverse* usage order: * relfilenode * isIndex * avgRequest * lastPageCount * storedPages * arena data array of storedPages FSMPageData or IndexFSMPageData *---------- *//* Name of FSM cache file (relative to $PGDATA) */#define FSM_CACHE_FILENAME "global/pg_fsm.cache"/* Fixed values in header */#define FSM_CACHE_LABEL "FSM"#define FSM_CACHE_ENDIAN 0x01020304#define FSM_CACHE_VERSION 20030305/* File header layout */typedef struct FsmCacheFileHeader{ char label[4]; uint32 endian; uint32 version; int32 numRels;} FsmCacheFileHeader;/* Per-relation header */typedef struct FsmCacheRelHeader{ RelFileNode key; /* hash key (must be first) */ bool isIndex; /* if true, we store only page numbers */ uint32 avgRequest; /* moving average of space requests */ int32 lastPageCount; /* pages passed to RecordRelationFreeSpace */ int32 storedPages; /* # of pages stored in arena */} FsmCacheRelHeader;/* * Shared free-space-map objects * * The per-relation objects are indexed by a hash table, and are also members * of two linked lists: one ordered by recency of usage (most recent first), * and the other ordered by physical location of the associated storage in * the page-info arena. * * Each relation owns one or more chunks of per-page storage in the "arena". * The chunks for each relation are always consecutive, so that it can treat * its page storage as a simple array. We further insist that its page data * be ordered by block number, so that binary search is possible. * * Note: we handle pointers to these items as pointers, not as SHMEM_OFFSETs. * This assumes that all processes accessing the map will have the shared * memory segment mapped at the same place in their address space. */typedef struct FSMHeader FSMHeader;typedef struct FSMRelation FSMRelation;/* Header for whole map */struct FSMHeader{ FSMRelation *usageList; /* FSMRelations in usage-recency order */ FSMRelation *usageListTail; /* tail of usage-recency list */ FSMRelation *firstRel; /* FSMRelations in arena storage order */ FSMRelation *lastRel; /* tail of storage-order list */ int numRels; /* number of FSMRelations now in use */ double sumRequests; /* sum of requested chunks over all rels */ char *arena; /* arena for page-info storage */ int totalChunks; /* total size of arena, in chunks */ int usedChunks; /* # of chunks assigned */ /* NB: there are totalChunks - usedChunks free chunks at end of arena */};/* * Per-relation struct --- this is an entry in the shared hash table. * The hash key is the RelFileNode value (hence, we look at the physical * relation ID, not the logical ID, which is appropriate). */struct FSMRelation{ RelFileNode key; /* hash key (must be first) */ FSMRelation *nextUsage; /* next rel in usage-recency order */ FSMRelation *priorUsage; /* prior rel in usage-recency order */ FSMRelation *nextPhysical; /* next rel in arena-storage order */ FSMRelation *priorPhysical; /* prior rel in arena-storage order */ bool isIndex; /* if true, we store only page numbers */ Size avgRequest; /* moving average of space requests */ int lastPageCount; /* pages passed to RecordRelationFreeSpace */ int firstChunk; /* chunk # of my first chunk in arena */ int storedPages; /* # of pages stored in arena */ int nextPage; /* index (from 0) to start next search at */};int MaxFSMRelations; /* these are set by guc.c */int MaxFSMPages;static FSMHeader *FreeSpaceMap; /* points to FSMHeader in shared memory */static HTAB *FreeSpaceMapRelHash; /* points to (what used to be) * FSMHeader->relHash */static void CheckFreeSpaceMapStatistics(int elevel, int numRels, double needed);static FSMRelation *lookup_fsm_rel(RelFileNode *rel);static FSMRelation *create_fsm_rel(RelFileNode *rel);static void delete_fsm_rel(FSMRelation *fsmrel);static int realloc_fsm_rel(FSMRelation *fsmrel, int nPages, bool isIndex);static void link_fsm_rel_usage(FSMRelation *fsmrel);static void unlink_fsm_rel_usage(FSMRelation *fsmrel);static void link_fsm_rel_storage(FSMRelation *fsmrel);static void unlink_fsm_rel_storage(FSMRelation *fsmrel);static BlockNumber find_free_space(FSMRelation *fsmrel, Size spaceNeeded);static BlockNumber find_index_free_space(FSMRelation *fsmrel);static void fsm_record_free_space(FSMRelation *fsmrel, BlockNumber page, Size spaceAvail);static bool lookup_fsm_page_entry(FSMRelation *fsmrel, BlockNumber page, int *outPageIndex);static void compact_fsm_storage(void);static void push_fsm_rels_after(FSMRelation *afterRel);static void pack_incoming_pages(FSMPageData *newLocation, int newPages, PageFreeSpaceInfo *pageSpaces, int nPages);static void pack_existing_pages(FSMPageData *newLocation, int newPages, FSMPageData *oldLocation, int oldPages);static int fsm_calc_request(FSMRelation *fsmrel);static int fsm_calc_target_allocation(int myRequest);static int fsm_current_chunks(FSMRelation *fsmrel);static int fsm_current_allocation(FSMRelation *fsmrel);/* * Exported routines *//* * InitFreeSpaceMap -- Initialize the freespace module. * * This must be called once during shared memory initialization. * It builds the empty free space map table. FreeSpaceLock must also be * initialized at some point, but is not touched here --- we assume there is * no need for locking, since only the calling process can be accessing shared * memory as yet. */voidInitFreeSpaceMap(void){ HASHCTL info; int nchunks; bool found; /* Create table header */ FreeSpaceMap = (FSMHeader *) ShmemInitStruct("Free Space Map Header", sizeof(FSMHeader), &found); if (FreeSpaceMap == NULL) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("insufficient shared memory for free space map"))); if (!found) MemSet(FreeSpaceMap, 0, sizeof(FSMHeader)); /* Create hashtable for FSMRelations */ info.keysize = sizeof(RelFileNode); info.entrysize = sizeof(FSMRelation); info.hash = tag_hash; FreeSpaceMapRelHash = ShmemInitHash("Free Space Map Hash", MaxFSMRelations + 1, MaxFSMRelations + 1, &info, (HASH_ELEM | HASH_FUNCTION)); if (!FreeSpaceMapRelHash) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("insufficient shared memory for free space map"))); if (found) return; /* Allocate page-storage arena */ nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; /* This check ensures spareChunks will be greater than zero */ if (nchunks <= MaxFSMRelations) ereport(FATAL, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("max_fsm_pages must exceed max_fsm_relations * %d", CHUNKPAGES))); FreeSpaceMap->arena = (char *) ShmemAlloc((Size) nchunks * CHUNKBYTES); if (FreeSpaceMap->arena == NULL) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("insufficient shared memory for free space map"))); FreeSpaceMap->totalChunks = nchunks; FreeSpaceMap->usedChunks = 0; FreeSpaceMap->sumRequests = 0;}/* * Estimate amount of shmem space needed for FSM. */SizeFreeSpaceShmemSize(void){ Size size; int nchunks; /* table header */ size = MAXALIGN(sizeof(FSMHeader)); /* hash table, including the FSMRelation objects */ size = add_size(size, hash_estimate_size(MaxFSMRelations + 1, sizeof(FSMRelation))); /* page-storage arena */ nchunks = (MaxFSMPages - 1) / CHUNKPAGES + 1; size = add_size(size, mul_size(nchunks, CHUNKBYTES)); return size;}/* * GetPageWithFreeSpace - try to find a page in the given relation with * at least the specified amount of free space. * * If successful, return the block number; if not, return InvalidBlockNumber. * * The caller must be prepared for the possibility that the returned page * will turn out to have too little space available by the time the caller * gets a lock on it. In that case, the caller should report the actual * amount of free space available on that page and then try again (see * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned, * extend the relation. */BlockNumberGetPageWithFreeSpace(RelFileNode *rel, Size spaceNeeded){ FSMRelation *fsmrel; BlockNumber freepage; LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * We always add a rel to the hashtable when it is inquired about. */ fsmrel = create_fsm_rel(rel); /* * Update the moving average of space requests. This code implements an * exponential moving average with an equivalent period of about 63 * requests. Ignore silly requests, however, to ensure that the average * stays sane. */ if (spaceNeeded > 0 && spaceNeeded < BLCKSZ) { int cur_avg = (int) fsmrel->avgRequest; cur_avg += ((int) spaceNeeded - cur_avg) / 32; fsmrel->avgRequest = (Size) cur_avg; } freepage = find_free_space(fsmrel, spaceNeeded); LWLockRelease(FreeSpaceLock); return freepage;}/* * RecordAndGetPageWithFreeSpace - update info about a page and try again. * * We provide this combo form, instead of a separate Record operation, * to save one lock and hash table lookup cycle. */BlockNumberRecordAndGetPageWithFreeSpace(RelFileNode *rel, BlockNumber oldPage, Size oldSpaceAvail, Size spaceNeeded){ FSMRelation *fsmrel; BlockNumber freepage; /* Sanity check: ensure spaceAvail will fit into OffsetNumber */ AssertArg(oldSpaceAvail < BLCKSZ); LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); /* * We always add a rel to the hashtable when it is inquired about. */ fsmrel = create_fsm_rel(rel); /* Do the Record */ fsm_record_free_space(fsmrel, oldPage, oldSpaceAvail); /* * Update the moving average of space requests, same as in * GetPageWithFreeSpace. */ if (spaceNeeded > 0 && spaceNeeded < BLCKSZ) { int cur_avg = (int) fsmrel->avgRequest; cur_avg += ((int) spaceNeeded - cur_avg) / 32; fsmrel->avgRequest = (Size) cur_avg; } /* Do the Get */ freepage = find_free_space(fsmrel, spaceNeeded); LWLockRelease(FreeSpaceLock); return freepage;}/* * GetAvgFSMRequestSize - get average FSM request size for a relation. * * If the relation is not known to FSM, return a default value. */SizeGetAvgFSMRequestSize(RelFileNode *rel){ Size result; FSMRelation *fsmrel; LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE); fsmrel = lookup_fsm_rel(rel); if (fsmrel) result = fsmrel->avgRequest; else result = INITIAL_AVERAGE; LWLockRelease(FreeSpaceLock); return result;}/* * RecordRelationFreeSpace - record available-space info about a relation. * * Any pre-existing info about the relation is assumed obsolete and discarded. * * The given pageSpaces[] array must be sorted in order by blkno. Note that * the FSM is at liberty to discard some or all of the data. */voidRecordRelationFreeSpace(RelFileNode *rel, int nPages, PageFreeSpaceInfo *pageSpaces){ FSMRelation *fsmrel; /* Limit nPages to something sane */ if (nPages < 0) nPages = 0; else if (nPages > MaxFSMPages) nPages = MaxFSMPages;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -