📄 clog.c
字号:
/*------------------------------------------------------------------------- * * clog.c * PostgreSQL transaction-commit-log manager * * This module replaces the old "pg_log" access code, which treated pg_log * essentially like a relation, in that it went through the regular buffer * manager. The problem with that was that there wasn't any good way to * recycle storage space for transactions so old that they'll never be * looked up again. Now we use specialized access code so that the commit * log can be broken into relatively small, independent segments. * * XLOG interactions: this module generates an XLOG record whenever a new * CLOG page is initialized to zeroes. Other writes of CLOG come from * recording of transaction commit or abort in xact.c, which generates its * own XLOG records for these events and will re-perform the status update * on redo; so we need make no additional XLOG entry here. For synchronous * transaction commits, the XLOG is guaranteed flushed through the XLOG commit * record before we are called to log a commit, so the WAL rule "write xlog * before data" is satisfied automatically. However, for async commits we * must track the latest LSN affecting each CLOG page, so that we can flush * XLOG that far and satisfy the WAL rule. We don't have to worry about this * for aborts (whether sync or async), since the post-crash assumption would * be that such transactions failed anyway. * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.46 2008/01/01 19:45:46 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/clog.h"#include "access/slru.h"#include "access/transam.h"#include "postmaster/bgwriter.h"/* * Defines for CLOG page sizes. A page is the same BLCKSZ as is used * everywhere else in Postgres. * * Note: because TransactionIds are 32 bits and wrap around at 0xFFFFFFFF, * CLOG page numbering also wraps around at 0xFFFFFFFF/CLOG_XACTS_PER_PAGE, * and CLOG segment numbering at 0xFFFFFFFF/CLOG_XACTS_PER_SEGMENT. We need * take no explicit notice of that fact in this module, except when comparing * segment and page numbers in TruncateCLOG (see CLOGPagePrecedes). *//* We need two bits per xact, so four xacts fit in a byte */#define CLOG_BITS_PER_XACT 2#define CLOG_XACTS_PER_BYTE 4#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)#define CLOG_XACT_BITMASK ((1 << CLOG_BITS_PER_XACT) - 1)#define TransactionIdToPage(xid) ((xid) / (TransactionId) CLOG_XACTS_PER_PAGE)#define TransactionIdToPgIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE)#define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)#define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)/* We store the latest async LSN for each group of transactions */#define CLOG_XACTS_PER_LSN_GROUP 32 /* keep this a power of 2 */#define CLOG_LSNS_PER_PAGE (CLOG_XACTS_PER_PAGE / CLOG_XACTS_PER_LSN_GROUP)#define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \ ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)/* * Link to shared-memory data structures for CLOG control */static SlruCtlData ClogCtlData;#define ClogCtl (&ClogCtlData)static int ZeroCLOGPage(int pageno, bool writeXlog);static bool CLOGPagePrecedes(int page1, int page2);static void WriteZeroPageXlogRec(int pageno);static void WriteTruncateXlogRec(int pageno);/* * Record the final state of a transaction in the commit log. * * lsn must be the WAL location of the commit record when recording an async * commit. For a synchronous commit it can be InvalidXLogRecPtr, since the * caller guarantees the commit record is already flushed in that case. It * should be InvalidXLogRecPtr for abort cases, too. * * NB: this is a low-level routine and is NOT the preferred entry point * for most uses; TransactionLogUpdate() in transam.c is the intended caller. */voidTransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn){ int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; int slotno; char *byteptr; char byteval; Assert(status == TRANSACTION_STATUS_COMMITTED || status == TRANSACTION_STATUS_ABORTED || status == TRANSACTION_STATUS_SUB_COMMITTED); LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); /* * If we're doing an async commit (ie, lsn is valid), then we must wait * for any active write on the page slot to complete. Otherwise our * update could reach disk in that write, which will not do since we * mustn't let it reach disk until we've done the appropriate WAL flush. * But when lsn is invalid, it's OK to scribble on a page while it is * write-busy, since we don't care if the update reaches disk sooner than * we think. Hence, pass write_ok = XLogRecPtrIsInvalid(lsn). */ slotno = SimpleLruReadPage(ClogCtl, pageno, XLogRecPtrIsInvalid(lsn), xid); byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; /* Current state should be 0, subcommitted or target state */ Assert(((*byteptr >> bshift) & CLOG_XACT_BITMASK) == 0 || ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == TRANSACTION_STATUS_SUB_COMMITTED || ((*byteptr >> bshift) & CLOG_XACT_BITMASK) == status); /* note this assumes exclusive access to the clog page */ byteval = *byteptr; byteval &= ~(((1 << CLOG_BITS_PER_XACT) - 1) << bshift); byteval |= (status << bshift); *byteptr = byteval; ClogCtl->shared->page_dirty[slotno] = true; /* * Update the group LSN if the transaction completion LSN is higher. * * Note: lsn will be invalid when supplied during InRecovery processing, * so we don't need to do anything special to avoid LSN updates during * recovery. After recovery completes the next clog change will set the * LSN correctly. */ if (!XLogRecPtrIsInvalid(lsn)) { int lsnindex = GetLSNIndex(slotno, xid); if (XLByteLT(ClogCtl->shared->group_lsn[lsnindex], lsn)) ClogCtl->shared->group_lsn[lsnindex] = lsn; } LWLockRelease(CLogControlLock);}/* * Interrogate the state of a transaction in the commit log. * * Aside from the actual commit status, this function returns (into *lsn) * an LSN that is late enough to be able to guarantee that if we flush up to * that LSN then we will have flushed the transaction's commit record to disk. * The result is not necessarily the exact LSN of the transaction's commit * record! For example, for long-past transactions (those whose clog pages * already migrated to disk), we'll return InvalidXLogRecPtr. Also, because * we group transactions on the same clog page to conserve storage, we might * return the LSN of a later transaction that falls into the same group. * * NB: this is a low-level routine and is NOT the preferred entry point * for most uses; TransactionLogFetch() in transam.c is the intended caller. */XidStatusTransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn){ int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; int slotno; int lsnindex; char *byteptr; XidStatus status; /* lock is acquired by SimpleLruReadPage_ReadOnly */ slotno = SimpleLruReadPage_ReadOnly(ClogCtl, pageno, xid); byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; lsnindex = GetLSNIndex(slotno, xid); *lsn = ClogCtl->shared->group_lsn[lsnindex]; LWLockRelease(CLogControlLock); return status;}/* * Initialization of shared memory for CLOG */SizeCLOGShmemSize(void){ return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);}voidCLOGShmemInit(void){ ClogCtl->PagePrecedes = CLOGPagePrecedes; SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE, CLogControlLock, "pg_clog");}/* * This func must be called ONCE on system install. It creates * the initial CLOG segment. (The CLOG directory is assumed to * have been created by the initdb shell script, and CLOGShmemInit * must have been called already.) */voidBootStrapCLOG(void){ int slotno; LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); /* Create and zero the first page of the commit log */ slotno = ZeroCLOGPage(0, false); /* Make sure it's written out */ SimpleLruWritePage(ClogCtl, slotno, NULL); Assert(!ClogCtl->shared->page_dirty[slotno]); LWLockRelease(CLogControlLock);}/* * Initialize (or reinitialize) a page of CLOG to zeroes. * If writeXlog is TRUE, also emit an XLOG record saying we did this. * * The page is not actually written, just set up in shared memory. * The slot number of the new page is returned. * * Control lock must be held at entry, and will be held at exit. */static intZeroCLOGPage(int pageno, bool writeXlog){ int slotno; slotno = SimpleLruZeroPage(ClogCtl, pageno); if (writeXlog) WriteZeroPageXlogRec(pageno); return slotno;}/* * This must be called ONCE during postmaster or standalone-backend startup, * after StartupXLOG has initialized ShmemVariableCache->nextXid.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -