📄 smgr.c
字号:
/*------------------------------------------------------------------------- * * smgr.c * public interface routines to storage manager switch. * * All file system operations in POSTGRES dispatch through these * routines. * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.93.2.3 2006/03/30 22:11:59 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/xact.h"#include "commands/tablespace.h"#include "pgstat.h"#include "storage/bufmgr.h"#include "storage/freespace.h"#include "storage/ipc.h"#include "storage/smgr.h"#include "utils/hsearch.h"#include "utils/memutils.h"/* * This struct of function pointers defines the API between smgr.c and * any individual storage manager module. Note that smgr subfunctions are * generally expected to return TRUE on success, FALSE on error. (For * nblocks and truncate we instead say that returning InvalidBlockNumber * indicates an error.) */typedef struct f_smgr{ bool (*smgr_init) (void); /* may be NULL */ bool (*smgr_shutdown) (void); /* may be NULL */ bool (*smgr_close) (SMgrRelation reln); bool (*smgr_create) (SMgrRelation reln, bool isRedo); bool (*smgr_unlink) (RelFileNode rnode, bool isRedo); bool (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp); bool (*smgr_read) (SMgrRelation reln, BlockNumber blocknum, char *buffer); bool (*smgr_write) (SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp); BlockNumber (*smgr_nblocks) (SMgrRelation reln); BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks, bool isTemp); bool (*smgr_immedsync) (SMgrRelation reln); bool (*smgr_commit) (void); /* may be NULL */ bool (*smgr_abort) (void); /* may be NULL */ bool (*smgr_sync) (void); /* may be NULL */} f_smgr;static const f_smgr smgrsw[] = { /* magnetic disk */ {mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend, mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync, NULL, NULL, mdsync }};static const int NSmgr = lengthof(smgrsw);/* * Each backend has a hashtable that stores all extant SMgrRelation objects. */static HTAB *SMgrRelationHash = NULL;/* * We keep a list of all relations (represented as RelFileNode values) * that have been created or deleted in the current transaction. When * a relation is created, we create the physical file immediately, but * remember it so that we can delete the file again if the current * transaction is aborted. Conversely, a deletion request is NOT * executed immediately, but is just entered in the list. When and if * the transaction commits, we can delete the physical file. * * To handle subtransactions, every entry is marked with its transaction * nesting level. At subtransaction commit, we reassign the subtransaction's * entries to the parent nesting level. At subtransaction abort, we can * immediately execute the abort-time actions for all entries of the current * nesting level. * * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear * unbetimes. It'd probably be OK to keep it in TopTransactionContext, * but I'm being paranoid. */typedef struct PendingRelDelete{ RelFileNode relnode; /* relation that may need to be deleted */ int which; /* which storage manager? */ bool isTemp; /* is it a temporary relation? */ bool atCommit; /* T=delete at commit; F=delete at abort */ int nestLevel; /* xact nesting level of request */ struct PendingRelDelete *next; /* linked-list link */} PendingRelDelete;static PendingRelDelete *pendingDeletes = NULL; /* head of linked list *//* * Declarations for smgr-related XLOG records * * Note: we log file creation and truncation here, but logging of deletion * actions is handled by xact.c, because it is part of transaction commit. *//* XLOG gives us high 4 bits */#define XLOG_SMGR_CREATE 0x10#define XLOG_SMGR_TRUNCATE 0x20typedef struct xl_smgr_create{ RelFileNode rnode;} xl_smgr_create;typedef struct xl_smgr_truncate{ BlockNumber blkno; RelFileNode rnode;} xl_smgr_truncate;/* local function prototypes */static void smgrshutdown(int code, Datum arg);static void smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo);/* * smgrinit(), smgrshutdown() -- Initialize or shut down storage * managers. * * Note: smgrinit is called during backend startup (normal or standalone * case), *not* during postmaster start. Therefore, any resources created * here or destroyed in smgrshutdown are backend-local. */voidsmgrinit(void){ int i; for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_init) { if (!(*(smgrsw[i].smgr_init)) ()) elog(FATAL, "smgr initialization failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } /* register the shutdown proc */ on_proc_exit(smgrshutdown, 0);}/* * on_proc_exit hook for smgr cleanup during backend shutdown */static voidsmgrshutdown(int code, Datum arg){ int i; for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_shutdown) { if (!(*(smgrsw[i].smgr_shutdown)) ()) elog(FATAL, "smgr shutdown failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } }}/* * smgropen() -- Return an SMgrRelation object, creating it if need be. * * This does not attempt to actually open the object. */SMgrRelationsmgropen(RelFileNode rnode){ SMgrRelation reln; bool found; if (SMgrRelationHash == NULL) { /* First time through: initialize the hash table */ HASHCTL ctl; MemSet(&ctl, 0, sizeof(ctl)); ctl.keysize = sizeof(RelFileNode); ctl.entrysize = sizeof(SMgrRelationData); ctl.hash = tag_hash; SMgrRelationHash = hash_create("smgr relation table", 400, &ctl, HASH_ELEM | HASH_FUNCTION); } /* Look up or create an entry */ reln = (SMgrRelation) hash_search(SMgrRelationHash, (void *) &rnode, HASH_ENTER, &found); /* Initialize it if not present before */ if (!found) { /* hash_search already filled in the lookup key */ reln->smgr_owner = NULL; reln->smgr_which = 0; /* we only have md.c at present */ reln->md_fd = NULL; /* mark it not open */ } return reln;}/* * smgrsetowner() -- Establish a long-lived reference to an SMgrRelation object * * There can be only one owner at a time; this is sufficient since currently * the only such owners exist in the relcache. */voidsmgrsetowner(SMgrRelation *owner, SMgrRelation reln){ /* * First, unhook any old owner. (Normally there shouldn't be any, but it * seems possible that this can happen during swap_relation_files() * depending on the order of processing. It's ok to close the old * relcache entry early in that case.) */ if (reln->smgr_owner) *(reln->smgr_owner) = NULL; /* Now establish the ownership relationship. */ reln->smgr_owner = owner; *owner = reln;}/* * smgrclose() -- Close and delete an SMgrRelation object. */voidsmgrclose(SMgrRelation reln){ SMgrRelation *owner; if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not close relation %u/%u/%u: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode))); owner = reln->smgr_owner; if (hash_search(SMgrRelationHash, (void *) &(reln->smgr_rnode), HASH_REMOVE, NULL) == NULL) elog(ERROR, "SMgrRelation hashtable corrupted"); /* * Unhook the owner pointer, if any. We do this last since in the remote * possibility of failure above, the SMgrRelation object will still exist. */ if (owner) *owner = NULL;}/* * smgrcloseall() -- Close all existing SMgrRelation objects. */voidsmgrcloseall(void){ HASH_SEQ_STATUS status; SMgrRelation reln; /* Nothing to do if hashtable not set up */ if (SMgrRelationHash == NULL) return; hash_seq_init(&status, SMgrRelationHash); while ((reln = (SMgrRelation) hash_seq_search(&status)) != NULL) smgrclose(reln);}/* * smgrclosenode() -- Close SMgrRelation object for given RelFileNode, * if one exists. * * This has the same effects as smgrclose(smgropen(rnode)), but it avoids * uselessly creating a hashtable entry only to drop it again when no * such entry exists already. */voidsmgrclosenode(RelFileNode rnode){ SMgrRelation reln; /* Nothing to do if hashtable not set up */ if (SMgrRelationHash == NULL) return; reln = (SMgrRelation) hash_search(SMgrRelationHash, (void *) &rnode, HASH_FIND, NULL); if (reln != NULL) smgrclose(reln);}/* * smgrcreate() -- Create a new relation. * * Given an already-created (but presumably unused) SMgrRelation, * cause the underlying disk file or other storage to be created. * * If isRedo is true, it is okay for the underlying file to exist * already because we are in a WAL replay sequence. In this case * we should make no PendingRelDelete entry; the WAL sequence will * tell whether to drop the file. */voidsmgrcreate(SMgrRelation reln, bool isTemp, bool isRedo){ XLogRecPtr lsn; XLogRecData rdata; xl_smgr_create xlrec; PendingRelDelete *pending; /* * We may be using the target table space for the first time in this * database, so create a per-database subdirectory if needed. * * XXX this is a fairly ugly violation of module layering, but this seems * to be the best place to put the check. Maybe TablespaceCreateDbspace * should be here and not in commands/tablespace.c? But that would imply * importing a lot of stuff that smgr.c oughtn't know, either. */ TablespaceCreateDbspace(reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, isRedo); if (!(*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo)) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create relation %u/%u/%u: %m", reln->smgr_rnode.spcNode, reln->smgr_rnode.dbNode, reln->smgr_rnode.relNode))); if (isRedo) return; /* * Make a non-transactional XLOG entry showing the file creation. It's * non-transactional because we should replay it whether the transaction * commits or not; if not, the file will be dropped at abort time. */ xlrec.rnode = reln->smgr_rnode; rdata.data = (char *) &xlrec; rdata.len = sizeof(xlrec); rdata.buffer = InvalidBuffer; rdata.next = NULL; lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_CREATE | XLOG_NO_TRAN, &rdata); /* Add the relation to the list of stuff to delete at abort */ pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->smgr_rnode; pending->which = reln->smgr_which; pending->isTemp = isTemp; pending->atCommit = false; /* delete if abort */ pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; pendingDeletes = pending;}/* * smgrscheduleunlink() -- Schedule unlinking a relation at xact commit. * * The relation is marked to be removed from the store if we * successfully commit the current transaction. * * This also implies smgrclose() on the SMgrRelation object. */voidsmgrscheduleunlink(SMgrRelation reln, bool isTemp){ PendingRelDelete *pending; /* Add the relation to the list of stuff to delete at commit */ pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->smgr_rnode; pending->which = reln->smgr_which; pending->isTemp = isTemp; pending->atCommit = true; /* delete if commit */ pending->nestLevel = GetCurrentTransactionNestLevel(); pending->next = pendingDeletes; pendingDeletes = pending; /* * NOTE: if the relation was created in this transaction, it will now be * present in the pending-delete list twice, once with atCommit true and * once with atCommit false. Hence, it will be physically deleted at end * of xact in either case (and the other entry will be ignored by * smgrDoPendingDeletes, so no error will occur). We could instead remove * the existing list entry and delete the physical file immediately, but * for now I'll keep the logic simple. */ /* Now close the file and throw away the hashtable entry */ smgrclose(reln);}/* * smgrdounlink() -- Immediately unlink a relation. * * The relation is removed from the store. This should not be used * during transactional operations, since it can't be undone. * * If isRedo is true, it is okay for the underlying file to be gone * already. * * This also implies smgrclose() on the SMgrRelation object. */voidsmgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo){ RelFileNode rnode = reln->smgr_rnode; int which = reln->smgr_which; /* Close the file and throw away the hashtable entry */ smgrclose(reln); smgr_internal_unlink(rnode, which, isTemp, isRedo);}/* * Shared subroutine that actually does the unlink ... */static voidsmgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo){ /* * Get rid of any remaining buffers for the relation. bufmgr will just * drop them without bothering to write the contents. */ DropRelFileNodeBuffers(rnode, isTemp, 0); /* * Tell the free space map to forget this relation. It won't be accessed * any more anyway, but we may as well recycle the map space quickly. */ FreeSpaceMapForgetRel(&rnode); /* * Tell the stats collector to forget it immediately, too. Skip this * in recovery mode, since the stats collector likely isn't running * (and if it is, pgstats.c will get confused because we aren't a real * backend process). */ if (!InRecovery) pgstat_drop_relation(rnode.relNode); /* * And delete the physical files. * * Note: we treat deletion failure as a WARNING, not an error, because * we've already decided to commit or abort the current xact. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -