📄 smgr.c
字号:
/*------------------------------------------------------------------------- * * smgr.c * public interface routines to storage manager switch. * * All file system operations in POSTGRES dispatch through these * routines. * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.65 2003/09/25 06:58:02 petere Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "storage/bufmgr.h"#include "storage/freespace.h"#include "storage/ipc.h"#include "storage/smgr.h"#include "utils/memutils.h"static void smgrshutdown(void);typedef struct f_smgr{ int (*smgr_init) (void); /* may be NULL */ int (*smgr_shutdown) (void); /* may be NULL */ int (*smgr_create) (Relation reln); int (*smgr_unlink) (RelFileNode rnode); int (*smgr_extend) (Relation reln, BlockNumber blocknum, char *buffer); int (*smgr_open) (Relation reln); int (*smgr_close) (Relation reln); int (*smgr_read) (Relation reln, BlockNumber blocknum, char *buffer); int (*smgr_write) (Relation reln, BlockNumber blocknum, char *buffer); int (*smgr_blindwrt) (RelFileNode rnode, BlockNumber blkno, char *buffer); BlockNumber (*smgr_nblocks) (Relation reln); BlockNumber (*smgr_truncate) (Relation reln, BlockNumber nblocks); int (*smgr_commit) (void); /* may be NULL */ int (*smgr_abort) (void); /* may be NULL */ int (*smgr_sync) (void);} f_smgr;/* * The weird placement of commas in this init block is to keep the compiler * happy, regardless of what storage managers we have (or don't have). */static f_smgr smgrsw[] = { /* magnetic disk */ {mdinit, NULL, mdcreate, mdunlink, mdextend, mdopen, mdclose, mdread, mdwrite, mdblindwrt, mdnblocks, mdtruncate, mdcommit, mdabort, mdsync },#ifdef STABLE_MEMORY_STORAGE /* main memory */ {mminit, mmshutdown, mmcreate, mmunlink, mmextend, mmopen, mmclose, mmread, mmwrite, mmblindwrt, mmnblocks, NULL, mmcommit, mmabort, NULL},#endif};/* * This array records which storage managers are write-once, and which * support overwrite. A 'true' entry means that the storage manager is * write-once. In the best of all possible worlds, there would be no * write-once storage managers. */#ifdef NOT_USEDstatic bool smgrwo[] = { false, /* magnetic disk */#ifdef STABLE_MEMORY_STORAGE false, /* main memory */#endif};#endifstatic int NSmgr = lengthof(smgrsw);/* * We keep a list of all relations (represented as RelFileNode values) * that have been created or deleted in the current transaction. When * a relation is created, we create the physical file immediately, but * remember it so that we can delete the file again if the current * transaction is aborted. Conversely, a deletion request is NOT * executed immediately, but is just entered in the list. When and if * the transaction commits, we can delete the physical file. * * NOTE: the list is kept in TopMemoryContext to be sure it won't disappear * unbetimes. It'd probably be OK to keep it in TopTransactionContext, * but I'm being paranoid. */typedef struct PendingRelDelete{ RelFileNode relnode; /* relation that may need to be deleted */ int16 which; /* which storage manager? */ bool isTemp; /* is it a temporary relation? */ bool atCommit; /* T=delete at commit; F=delete at abort */ struct PendingRelDelete *next; /* linked-list link */} PendingRelDelete;static PendingRelDelete *pendingDeletes = NULL; /* head of linked list *//* * smgrinit(), smgrshutdown() -- Initialize or shut down all storage * managers. * */intsmgrinit(void){ int i; for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_init) { if ((*(smgrsw[i].smgr_init)) () == SM_FAIL) elog(FATAL, "smgr initialization failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } /* register the shutdown proc */ on_proc_exit(smgrshutdown, 0); return SM_SUCCESS;}static voidsmgrshutdown(void){ int i; for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_shutdown) { if ((*(smgrsw[i].smgr_shutdown)) () == SM_FAIL) elog(FATAL, "smgr shutdown failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } }}/* * smgrcreate() -- Create a new relation. * * This routine takes a reldesc, creates the relation on the appropriate * device, and returns a file descriptor for it. */intsmgrcreate(int16 which, Relation reln){ int fd; PendingRelDelete *pending; if ((fd = (*(smgrsw[which].smgr_create)) (reln)) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create relation \"%s\": %m", RelationGetRelationName(reln)))); /* Add the relation to the list of stuff to delete at abort */ pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->rd_node; pending->which = which; pending->isTemp = reln->rd_istemp; pending->atCommit = false; /* delete if abort */ pending->next = pendingDeletes; pendingDeletes = pending; return fd;}/* * smgrunlink() -- Unlink a relation. * * The relation is removed from the store. Actually, we just remember * that we want to do this at transaction commit. */intsmgrunlink(int16 which, Relation reln){ PendingRelDelete *pending; /* Make sure the file is closed */ if (reln->rd_fd >= 0) smgrclose(which, reln); /* Add the relation to the list of stuff to delete at commit */ pending = (PendingRelDelete *) MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); pending->relnode = reln->rd_node; pending->which = which; pending->isTemp = reln->rd_istemp; pending->atCommit = true; /* delete if commit */ pending->next = pendingDeletes; pendingDeletes = pending; /* * NOTE: if the relation was created in this transaction, it will now * be present in the pending-delete list twice, once with atCommit * true and once with atCommit false. Hence, it will be physically * deleted at end of xact in either case (and the other entry will be * ignored by smgrDoPendingDeletes, so no error will occur). We could * instead remove the existing list entry and delete the physical file * immediately, but for now I'll keep the logic simple. */ return SM_SUCCESS;}/* * smgrextend() -- Add a new block to a file. * * The semantics are basically the same as smgrwrite(): write at the * specified position. However, we are expecting to extend the * relation (ie, blocknum is the current EOF), and so in case of * failure we clean up by truncating. * * Returns SM_SUCCESS on success; aborts the current transaction on * failure. */intsmgrextend(int16 which, Relation reln, BlockNumber blocknum, char *buffer){ int status; status = (*(smgrsw[which].smgr_extend)) (reln, blocknum, buffer); if (status == SM_FAIL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not extend relation \"%s\": %m", RelationGetRelationName(reln)), errhint("Check free disk space."))); return status;}/* * smgropen() -- Open a relation using a particular storage manager. * * Returns the fd for the open relation on success. * * On failure, returns -1 if failOK, else aborts the transaction. */intsmgropen(int16 which, Relation reln, bool failOK){ int fd; if (reln->rd_rel->relkind == RELKIND_VIEW) return -1; if (reln->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) return -1; if ((fd = (*(smgrsw[which].smgr_open)) (reln)) < 0) if (!failOK) ereport(ERROR, (errcode_for_file_access(), errmsg("could not open file \"%s\": %m", RelationGetRelationName(reln)))); return fd;}/* * smgrclose() -- Close a relation. * * Returns SM_SUCCESS on success, aborts on failure. */intsmgrclose(int16 which, Relation reln){ if ((*(smgrsw[which].smgr_close)) (reln) == SM_FAIL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not close relation \"%s\": %m", RelationGetRelationName(reln)))); return SM_SUCCESS;}/* * smgrread() -- read a particular block from a relation into the supplied * buffer. * * This routine is called from the buffer manager in order to * instantiate pages in the shared buffer cache. All storage managers * return pages in the format that POSTGRES expects. This routine * dispatches the read. On success, it returns SM_SUCCESS. On failure, * the current transaction is aborted. */intsmgrread(int16 which, Relation reln, BlockNumber blocknum, char *buffer){ int status; status = (*(smgrsw[which].smgr_read)) (reln, blocknum, buffer); if (status == SM_FAIL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not read block %d of relation \"%s\": %m", blocknum, RelationGetRelationName(reln)))); return status;}/* * smgrwrite() -- Write the supplied buffer out. * * This is not a synchronous write -- the block is not necessarily * on disk at return, only dumped out to the kernel. * * The buffer is written out via the appropriate * storage manager. This routine returns SM_SUCCESS or aborts * the current transaction. */intsmgrwrite(int16 which, Relation reln, BlockNumber blocknum, char *buffer){ int status; status = (*(smgrsw[which].smgr_write)) (reln, blocknum, buffer); if (status == SM_FAIL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write block %d of relation \"%s\": %m", blocknum, RelationGetRelationName(reln)))); return status;}/* * smgrblindwrt() -- Write a page out blind. * * In some cases, we may find a page in the buffer cache that we * can't make a reldesc for. This happens, for example, when we * want to reuse a dirty page that was written by a transaction * that has not yet committed, which created a new relation. In * this case, the buffer manager will call smgrblindwrt() with * the name and OID of the database and the relation to which the * buffer belongs. Every storage manager must be able to write * this page out to stable storage in this circumstance. */intsmgrblindwrt(int16 which, RelFileNode rnode, BlockNumber blkno, char *buffer){ int status; status = (*(smgrsw[which].smgr_blindwrt)) (rnode, blkno, buffer); if (status == SM_FAIL) ereport(ERROR, (errcode_for_file_access(), errmsg("could not write block %d of %u/%u blind: %m", blkno, rnode.tblNode, rnode.relNode))); return status;}/* * smgrnblocks() -- Calculate the number of POSTGRES blocks in the * supplied relation. * * Returns the number of blocks on success, aborts the current * transaction on failure. */BlockNumbersmgrnblocks(int16 which, Relation reln){ BlockNumber nblocks; nblocks = (*(smgrsw[which].smgr_nblocks)) (reln); /* * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would * fail --- but that's a good thing, because it would stop us from * extending the rel another block and having a block whose number * actually is InvalidBlockNumber. */ if (nblocks == InvalidBlockNumber) ereport(ERROR, (errcode_for_file_access(), errmsg("could not count blocks of relation \"%s\": %m", RelationGetRelationName(reln)))); return nblocks;}/* * smgrtruncate() -- Truncate supplied relation to a specified number * of blocks * * Returns the number of blocks on success, aborts the current * transaction on failure. */BlockNumbersmgrtruncate(int16 which, Relation reln, BlockNumber nblocks){ BlockNumber newblks; newblks = nblocks; if (smgrsw[which].smgr_truncate) { /* * Tell the free space map to forget anything it may have stored * for the about-to-be-deleted blocks. We want to be sure it * won't return bogus block numbers later on. */ FreeSpaceMapTruncateRel(&reln->rd_node, nblocks); newblks = (*(smgrsw[which].smgr_truncate)) (reln, nblocks); if (newblks == InvalidBlockNumber) ereport(ERROR, (errcode_for_file_access(), errmsg("could not truncate relation \"%s\" to %u blocks: %m", RelationGetRelationName(reln), nblocks))); } return newblks;}/* * smgrDoPendingDeletes() -- take care of relation deletes at end of xact. */intsmgrDoPendingDeletes(bool isCommit){ while (pendingDeletes != NULL) { PendingRelDelete *pending = pendingDeletes; pendingDeletes = pending->next; if (pending->atCommit == isCommit) { /* * Get rid of any leftover buffers for the rel (shouldn't be * any in the commit case, but there can be in the abort * case). */ DropRelFileNodeBuffers(pending->relnode, pending->isTemp); /* * Tell the free space map to forget this relation. It won't * be accessed any more anyway, but we may as well recycle the * map space quickly. */ FreeSpaceMapForgetRel(&pending->relnode); /* * And delete the physical files. * * Note: we treat deletion failure as a WARNING, not an error, * because we've already decided to commit or abort the * current xact. */ if ((*(smgrsw[pending->which].smgr_unlink)) (pending->relnode) == SM_FAIL) ereport(WARNING, (errcode_for_file_access(), errmsg("could not unlink %u/%u: %m", pending->relnode.tblNode, pending->relnode.relNode))); } pfree(pending); } return SM_SUCCESS;}/* * smgrcommit() -- Prepare to commit changes made during the current * transaction. * * This is called before we actually commit. */intsmgrcommit(void){ int i; for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_commit) { if ((*(smgrsw[i].smgr_commit)) () == SM_FAIL) elog(FATAL, "transaction commit failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } return SM_SUCCESS;}/* * smgrabort() -- Abort changes made during the current transaction. */intsmgrabort(void){ int i; for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_abort) { if ((*(smgrsw[i].smgr_abort)) () == SM_FAIL) elog(FATAL, "transaction abort failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } return SM_SUCCESS;}/* * Sync files to disk at checkpoint time. */intsmgrsync(void){ int i; for (i = 0; i < NSmgr; i++) { if (smgrsw[i].smgr_sync) { if ((*(smgrsw[i].smgr_sync)) () == SM_FAIL) elog(PANIC, "storage sync failed on %s: %m", DatumGetCString(DirectFunctionCall1(smgrout, Int16GetDatum(i)))); } } return SM_SUCCESS;}#ifdef NOT_USEDboolsmgriswo(int16 smgrno){ if (smgrno < 0 || smgrno >= NSmgr) elog(ERROR, "invalid storage manager id: %d", smgrno); return smgrwo[smgrno];}#endifvoidsmgr_redo(XLogRecPtr lsn, XLogRecord *record){}voidsmgr_undo(XLogRecPtr lsn, XLogRecord *record){}voidsmgr_desc(char *buf, uint8 xl_info, char *rec){}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -