xlog.c

来自「PostgreSQL7.4.6 for Linux」· C语言代码 · 共 2,181 行 · 第 1/5 页
2,181 行
/*------------------------------------------------------------------------- * * xlog.c *		PostgreSQL transaction log manager * * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.125.2.2 2004/08/11 04:08:00 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <fcntl.h>#include <signal.h>#include <unistd.h>#include <sys/stat.h>#include <sys/time.h>#include "access/clog.h"#include "access/transam.h"#include "access/xact.h"#include "access/xlog.h"#include "access/xlogutils.h"#include "catalog/catversion.h"#include "catalog/pg_control.h"#include "storage/bufpage.h"#include "storage/lwlock.h"#include "storage/pmsignal.h"#include "storage/proc.h"#include "storage/sinval.h"#include "storage/spin.h"#include "utils/builtins.h"#include "utils/guc.h"#include "utils/relcache.h"#include "miscadmin.h"/* * This chunk of hackery attempts to determine which file sync methods * are available on the current platform, and to choose an appropriate * default method.	We assume that fsync() is always available, and that * configure determined whether fdatasync() is. */#define SYNC_METHOD_FSYNC		0#define SYNC_METHOD_FDATASYNC	1#define SYNC_METHOD_OPEN		2		/* used for both O_SYNC and										 * O_DSYNC */#if defined(O_SYNC)#define OPEN_SYNC_FLAG	   O_SYNC#else#if defined(O_FSYNC)#define OPEN_SYNC_FLAG	  O_FSYNC#endif#endif#if defined(OPEN_SYNC_FLAG)#if defined(O_DSYNC) && (O_DSYNC != OPEN_SYNC_FLAG)#define OPEN_DATASYNC_FLAG	  O_DSYNC#endif#endif#if defined(OPEN_DATASYNC_FLAG)#define DEFAULT_SYNC_METHOD_STR    "open_datasync"#define DEFAULT_SYNC_METHOD		   SYNC_METHOD_OPEN#define DEFAULT_SYNC_FLAGBIT	   OPEN_DATASYNC_FLAG#else#if defined(HAVE_FDATASYNC)#define DEFAULT_SYNC_METHOD_STR   "fdatasync"#define DEFAULT_SYNC_METHOD		  SYNC_METHOD_FDATASYNC#define DEFAULT_SYNC_FLAGBIT	  0#else#define DEFAULT_SYNC_METHOD_STR   "fsync"#define DEFAULT_SYNC_METHOD		  SYNC_METHOD_FSYNC#define DEFAULT_SYNC_FLAGBIT	  0#endif#endif/* User-settable parameters */int			CheckPointSegments = 3;int			XLOGbuffers = 8;int			XLOG_DEBUG = 0;char	   *XLOG_sync_method = NULL;const char	XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR;char		XLOG_archive_dir[MAXPGPATH];		/* null string means												 * delete 'em *//* * XLOGfileslop is used in the code as the allowed "fuzz" in the number of * preallocated XLOG segments --- we try to have at least XLOGfiles advance * segments but no more than XLOGfileslop segments.  This could * be made a separate GUC variable, but at present I think it's sufficient * to hardwire it as 2*CheckPointSegments+1.  Under normal conditions, a * checkpoint will free no more than 2*CheckPointSegments log segments, and * we want to recycle all of them; the +1 allows boundary cases to happen * without wasting a delete/create-segment cycle. */#define XLOGfileslop	(2*CheckPointSegments + 1)/* these are derived from XLOG_sync_method by assign_xlog_sync_method */static int	sync_method = DEFAULT_SYNC_METHOD;static int	open_sync_bit = DEFAULT_SYNC_FLAGBIT;#define XLOG_SYNC_BIT  (enableFsync ? open_sync_bit : 0)#define MinXLOGbuffers	4/* * ThisStartUpID will be same in all backends --- it identifies current * instance of the database system. */StartUpID	ThisStartUpID = 0;/* Are we doing recovery by reading XLOG? */bool		InRecovery = false;/* * MyLastRecPtr points to the start of the last XLOG record inserted by the * current transaction.  If MyLastRecPtr.xrecoff == 0, then the current * xact hasn't yet inserted any transaction-controlled XLOG records. * * Note that XLOG records inserted outside transaction control are not * reflected into MyLastRecPtr.  They do, however, cause MyXactMadeXLogEntry * to be set true.	The latter can be used to test whether the current xact * made any loggable changes (including out-of-xact changes, such as * sequence updates). * * When we insert/update/delete a tuple in a temporary relation, we do not * make any XLOG record, since we don't care about recovering the state of * the temp rel after a crash.	However, we will still need to remember * whether our transaction committed or aborted in that case.  So, we must * set MyXactMadeTempRelUpdate true to indicate that the XID will be of * interest later. */XLogRecPtr	MyLastRecPtr = {0, 0};bool		MyXactMadeXLogEntry = false;bool		MyXactMadeTempRelUpdate = false;/* * ProcLastRecPtr points to the start of the last XLOG record inserted by the * current backend.  It is updated for all inserts, transaction-controlled * or not.	ProcLastRecEnd is similar but points to end+1 of last record. */static XLogRecPtr ProcLastRecPtr = {0, 0};XLogRecPtr	ProcLastRecEnd = {0, 0};/* * RedoRecPtr is this backend's local copy of the REDO record pointer * (which is almost but not quite the same as a pointer to the most recent * CHECKPOINT record).	We update this from the shared-memory copy, * XLogCtl->Insert.RedoRecPtr, whenever we can safely do so (ie, when we * hold the Insert lock).  See XLogInsert for details.	We are also allowed * to update from XLogCtl->Insert.RedoRecPtr if we hold the info_lck; * see GetRedoRecPtr. */static XLogRecPtr RedoRecPtr;/*---------- * Shared-memory data structures for XLOG control * * LogwrtRqst indicates a byte position that we need to write and/or fsync * the log up to (all records before that point must be written or fsynced). * LogwrtResult indicates the byte positions we have already written/fsynced. * These structs are identical but are declared separately to indicate their * slightly different functions. * * We do a lot of pushups to minimize the amount of access to lockable * shared memory values.  There are actually three shared-memory copies of * LogwrtResult, plus one unshared copy in each backend.  Here's how it works: *		XLogCtl->LogwrtResult is protected by info_lck *		XLogCtl->Write.LogwrtResult is protected by WALWriteLock *		XLogCtl->Insert.LogwrtResult is protected by WALInsertLock * One must hold the associated lock to read or write any of these, but * of course no lock is needed to read/write the unshared LogwrtResult. * * XLogCtl->LogwrtResult and XLogCtl->Write.LogwrtResult are both "always * right", since both are updated by a write or flush operation before * it releases WALWriteLock.  The point of keeping XLogCtl->Write.LogwrtResult * is that it can be examined/modified by code that already holds WALWriteLock * without needing to grab info_lck as well. * * XLogCtl->Insert.LogwrtResult may lag behind the reality of the other two, * but is updated when convenient.	Again, it exists for the convenience of * code that is already holding WALInsertLock but not the other locks. * * The unshared LogwrtResult may lag behind any or all of these, and again * is updated when convenient. * * The request bookkeeping is simpler: there is a shared XLogCtl->LogwrtRqst * (protected by info_lck), but we don't need to cache any copies of it. * * Note that this all works because the request and result positions can only * advance forward, never back up, and so we can easily determine which of two * values is "more up to date". * * info_lck is only held long enough to read/update the protected variables, * so it's a plain spinlock.  The other locks are held longer (potentially * over I/O operations), so we use LWLocks for them.  These locks are: * * WALInsertLock: must be held to insert a record into the WAL buffers. * * WALWriteLock: must be held to write WAL buffers to disk (XLogWrite or * XLogFlush). * * ControlFileLock: must be held to read/update control file or create * new log file. * * CheckpointLock: must be held to do a checkpoint (ensures only one * checkpointer at a time; even though the postmaster won't launch * parallel checkpoint processes, we need this because manual checkpoints * could be launched simultaneously). * *---------- */typedef struct XLogwrtRqst{	XLogRecPtr	Write;			/* last byte + 1 to write out */	XLogRecPtr	Flush;			/* last byte + 1 to flush */} XLogwrtRqst;typedef struct XLogwrtResult{	XLogRecPtr	Write;			/* last byte + 1 written out */	XLogRecPtr	Flush;			/* last byte + 1 flushed */} XLogwrtResult;/* * Shared state data for XLogInsert. */typedef struct XLogCtlInsert{	XLogwrtResult LogwrtResult; /* a recent value of LogwrtResult */	XLogRecPtr	PrevRecord;		/* start of previously-inserted record */	uint16		curridx;		/* current block index in cache */	XLogPageHeader currpage;	/* points to header of block in cache */	char	   *currpos;		/* current insertion point in cache */	XLogRecPtr	RedoRecPtr;		/* current redo point for insertions */} XLogCtlInsert;/* * Shared state data for XLogWrite/XLogFlush. */typedef struct XLogCtlWrite{	XLogwrtResult LogwrtResult; /* current value of LogwrtResult */	uint16		curridx;		/* cache index of next block to write */} XLogCtlWrite;/* * Total shared-memory state for XLOG. */typedef struct XLogCtlData{	/* Protected by WALInsertLock: */	XLogCtlInsert Insert;	/* Protected by info_lck: */	XLogwrtRqst LogwrtRqst;	XLogwrtResult LogwrtResult;	/* Protected by WALWriteLock: */	XLogCtlWrite Write;	/*	 * These values do not change after startup, although the pointed-to	 * pages and xlblocks values certainly do.	Permission to read/write	 * the pages and xlblocks values depends on WALInsertLock and	 * WALWriteLock.	 */	char	   *pages;			/* buffers for unwritten XLOG pages */	XLogRecPtr *xlblocks;		/* 1st byte ptr-s + BLCKSZ */	uint32		XLogCacheByte;	/* # bytes in xlog buffers */	uint32		XLogCacheBlck;	/* highest allocated xlog buffer index */	StartUpID	ThisStartUpID;	/* This value is not protected by *any* lock... */	/* see SetSavedRedoRecPtr/GetSavedRedoRecPtr */	XLogRecPtr	SavedRedoRecPtr;	slock_t		info_lck;		/* locks shared LogwrtRqst/LogwrtResult */} XLogCtlData;static XLogCtlData *XLogCtl = NULL;/* * We maintain an image of pg_control in shared memory. */static ControlFileData *ControlFile = NULL;/* * Macros for managing XLogInsert state.  In most cases, the calling routine * has local copies of XLogCtl->Insert and/or XLogCtl->Insert->curridx, * so these are passed as parameters instead of being fetched via XLogCtl. *//* Free space remaining in the current xlog page buffer */#define INSERT_FREESPACE(Insert)  \	(BLCKSZ - ((Insert)->currpos - (char *) (Insert)->currpage))/* Construct XLogRecPtr value for current insertion point */#define INSERT_RECPTR(recptr,Insert,curridx)  \	( \	  (recptr).xlogid = XLogCtl->xlblocks[curridx].xlogid, \	  (recptr).xrecoff = \		XLogCtl->xlblocks[curridx].xrecoff - INSERT_FREESPACE(Insert) \	)/* Increment an xlogid/segment pair */#define NextLogSeg(logId, logSeg)	\	do { \		if ((logSeg) >= XLogSegsPerFile-1) \		{ \			(logId)++; \			(logSeg) = 0; \		} \		else \			(logSeg)++; \	} while (0)/* Decrement an xlogid/segment pair (assume it's not 0,0) */#define PrevLogSeg(logId, logSeg)	\	do { \		if (logSeg) \			(logSeg)--; \		else \		{ \			(logId)--; \			(logSeg) = XLogSegsPerFile-1; \		} \	} while (0)/* * Compute ID and segment from an XLogRecPtr. * * For XLByteToSeg, do the computation at face value.  For XLByteToPrevSeg, * a boundary byte is taken to be in the previous segment.	This is suitable * for deciding which segment to write given a pointer to a record end, * for example.  (We can assume xrecoff is not zero, since no valid recptr * can have that.) */#define XLByteToSeg(xlrp, logId, logSeg)	\	( logId = (xlrp).xlogid, \	  logSeg = (xlrp).xrecoff / XLogSegSize \	)#define XLByteToPrevSeg(xlrp, logId, logSeg)	\	( logId = (xlrp).xlogid, \	  logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \	)/* * Is an XLogRecPtr within a particular XLOG segment? * * For XLByteInSeg, do the computation at face value.  For XLByteInPrevSeg, * a boundary byte is taken to be in the previous segment. */#define XLByteInSeg(xlrp, logId, logSeg)	\	((xlrp).xlogid == (logId) && \	 (xlrp).xrecoff / XLogSegSize == (logSeg))#define XLByteInPrevSeg(xlrp, logId, logSeg)	\	((xlrp).xlogid == (logId) && \	 ((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))#define XLogFileName(path, log, seg)	\			snprintf(path, MAXPGPATH, "%s/%08X%08X",	\					 XLogDir, log, seg)#define PrevBufIdx(idx)		\		(((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1))#define NextBufIdx(idx)		\		(((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))#define XRecOffIsValid(xrecoff) \		((xrecoff) % BLCKSZ >= SizeOfXLogPHD && \		(BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)/* * _INTL_MAXLOGRECSZ: max space needed for a record including header and * any backup-block data. */#define _INTL_MAXLOGRECSZ	(SizeOfXLogRecord + MAXLOGRECSZ + \							 XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))/* File path names */static char XLogDir[MAXPGPATH];static char ControlFilePath[MAXPGPATH];/* * Private, possibly out-of-date copy of shared LogwrtResult. * See discussion above. */static XLogwrtResult LogwrtResult = {{0, 0}, {0, 0}};/* * openLogFile is -1 or a kernel FD for an open log file segment. * When it's open, openLogOff is the current seek offset in the file. * openLogId/openLogSeg identify the segment.  These variables are only * used to write the XLOG, and so will normally refer to the active segment. */static int	openLogFile = -1;static uint32 openLogId = 0;static uint32 openLogSeg = 0;static uint32 openLogOff = 0;/* * These variables are used similarly to the ones above, but for reading * the XLOG.  Note, however, that readOff generally represents the offset * of the page just read, not the seek position of the FD itself, which * will be just past that page. */static int	readFile = -1;static uint32 readId = 0;static uint32 readSeg = 0;static uint32 readOff = 0;/* Buffer for currently read page (BLCKSZ bytes) */static char *readBuf = NULL;/* State information for XLOG reading */static XLogRecPtr ReadRecPtr;static XLogRecPtr EndRecPtr;static XLogRecord *nextRecord = NULL;static StartUpID lastReadSUI;
xlog.c - 源码说明

本页面展示了「PostgreSQL7.4.6 for Linux」中的 xlog.c 源码文件，采用 C语言编程语言编写，共 2,181 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?