📄 xlog.c

📁 postgresql8.3.4源码,开源数据库
💻 C
📖 第 1 页 / 共 5 页
字号:
上一页 1 2 3 45
	 * Since fsync is usually a horribly expensive operation, we try to	 * piggyback as much data as we can on each fsync: if we see any more data	 * entered into the xlog buffer, we'll write and fsync that too, so that	 * the final value of LogwrtResult.Flush is as large as possible. This	 * gives us some chance of avoiding another fsync immediately after.	 */	/* initialize to given target; may increase below */	WriteRqstPtr = record;	/* read LogwrtResult and update local state */	{		/* use volatile pointer to prevent code rearrangement */		volatile XLogCtlData *xlogctl = XLogCtl;		SpinLockAcquire(&xlogctl->info_lck);		if (XLByteLT(WriteRqstPtr, xlogctl->LogwrtRqst.Write))			WriteRqstPtr = xlogctl->LogwrtRqst.Write;		LogwrtResult = xlogctl->LogwrtResult;		SpinLockRelease(&xlogctl->info_lck);	}	/* done already? */	if (!XLByteLE(record, LogwrtResult.Flush))	{		/* now wait for the write lock */		LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);		LogwrtResult = XLogCtl->Write.LogwrtResult;		if (!XLByteLE(record, LogwrtResult.Flush))		{			/* try to write/flush later additions to XLOG as well */			if (LWLockConditionalAcquire(WALInsertLock, LW_EXCLUSIVE))			{				XLogCtlInsert *Insert = &XLogCtl->Insert;				uint32		freespace = INSERT_FREESPACE(Insert);				if (freespace < SizeOfXLogRecord)		/* buffer is full */					WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx];				else				{					WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx];					WriteRqstPtr.xrecoff -= freespace;				}				LWLockRelease(WALInsertLock);				WriteRqst.Write = WriteRqstPtr;				WriteRqst.Flush = WriteRqstPtr;			}			else			{				WriteRqst.Write = WriteRqstPtr;				WriteRqst.Flush = record;			}			XLogWrite(WriteRqst, false, false);		}		LWLockRelease(WALWriteLock);	}	END_CRIT_SECTION();	/*	 * If we still haven't flushed to the request point then we have a	 * problem; most likely, the requested flush point is past end of XLOG.	 * This has been seen to occur when a disk page has a corrupted LSN.	 *	 * Formerly we treated this as a PANIC condition, but that hurts the	 * system's robustness rather than helping it: we do not want to take down	 * the whole system due to corruption on one data page.  In particular, if	 * the bad page is encountered again during recovery then we would be	 * unable to restart the database at all!  (This scenario has actually	 * happened in the field several times with 7.1 releases. Note that we	 * cannot get here while InRedo is true, but if the bad page is brought in	 * and marked dirty during recovery then CreateCheckPoint will try to	 * flush it at the end of recovery.)	 *	 * The current approach is to ERROR under normal conditions, but only	 * WARNING during recovery, so that the system can be brought up even if	 * there's a corrupt LSN.  Note that for calls from xact.c, the ERROR will	 * be promoted to PANIC since xact.c calls this routine inside a critical	 * section.  However, calls from bufmgr.c are not within critical sections	 * and so we will not force a restart for a bad LSN on a data page.	 */	if (XLByteLT(LogwrtResult.Flush, record))		elog(InRecovery ? WARNING : ERROR,		"xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",			 record.xlogid, record.xrecoff,			 LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);}/* * Flush xlog, but without specifying exactly where to flush to. * * We normally flush only completed blocks; but if there is nothing to do on * that basis, we check for unflushed async commits in the current incomplete * block, and flush through the latest one of those.  Thus, if async commits * are not being used, we will flush complete blocks only.	We can guarantee * that async commits reach disk after at most three cycles; normally only * one or two.	(We allow XLogWrite to write "flexibly", meaning it can stop * at the end of the buffer ring; this makes a difference only with very high * load or long wal_writer_delay, but imposes one extra cycle for the worst * case for async commits.) * * This routine is invoked periodically by the background walwriter process. */voidXLogBackgroundFlush(void){	XLogRecPtr	WriteRqstPtr;	bool		flexible = true;	/* read LogwrtResult and update local state */	{		/* use volatile pointer to prevent code rearrangement */		volatile XLogCtlData *xlogctl = XLogCtl;		SpinLockAcquire(&xlogctl->info_lck);		LogwrtResult = xlogctl->LogwrtResult;		WriteRqstPtr = xlogctl->LogwrtRqst.Write;		SpinLockRelease(&xlogctl->info_lck);	}	/* back off to last completed page boundary */	WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;	/* if we have already flushed that far, consider async commit records */	if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))	{		/* use volatile pointer to prevent code rearrangement */		volatile XLogCtlData *xlogctl = XLogCtl;		SpinLockAcquire(&xlogctl->info_lck);		WriteRqstPtr = xlogctl->asyncCommitLSN;		SpinLockRelease(&xlogctl->info_lck);		flexible = false;		/* ensure it all gets written */	}	/* Done if already known flushed */	if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))		return;#ifdef WAL_DEBUG	if (XLOG_DEBUG)		elog(LOG, "xlog bg flush request %X/%X; write %X/%X; flush %X/%X",			 WriteRqstPtr.xlogid, WriteRqstPtr.xrecoff,			 LogwrtResult.Write.xlogid, LogwrtResult.Write.xrecoff,			 LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);#endif	START_CRIT_SECTION();	/* now wait for the write lock */	LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);	LogwrtResult = XLogCtl->Write.LogwrtResult;	if (!XLByteLE(WriteRqstPtr, LogwrtResult.Flush))	{		XLogwrtRqst WriteRqst;		WriteRqst.Write = WriteRqstPtr;		WriteRqst.Flush = WriteRqstPtr;		XLogWrite(WriteRqst, flexible, false);	}	LWLockRelease(WALWriteLock);	END_CRIT_SECTION();}/* * Flush any previous asynchronously-committed transactions' commit records. * * NOTE: it is unwise to assume that this provides any strong guarantees. * In particular, because of the inexact LSN bookkeeping used by clog.c, * we cannot assume that hint bits will be settable for these transactions. */voidXLogAsyncCommitFlush(void){	XLogRecPtr	WriteRqstPtr;	/* use volatile pointer to prevent code rearrangement */	volatile XLogCtlData *xlogctl = XLogCtl;	SpinLockAcquire(&xlogctl->info_lck);	WriteRqstPtr = xlogctl->asyncCommitLSN;	SpinLockRelease(&xlogctl->info_lck);	XLogFlush(WriteRqstPtr);}/* * Test whether XLOG data has been flushed up to (at least) the given position. * * Returns true if a flush is still needed.  (It may be that someone else * is already in process of flushing that far, however.) */boolXLogNeedsFlush(XLogRecPtr record){	/* Quick exit if already known flushed */	if (XLByteLE(record, LogwrtResult.Flush))		return false;	/* read LogwrtResult and update local state */	{		/* use volatile pointer to prevent code rearrangement */		volatile XLogCtlData *xlogctl = XLogCtl;		SpinLockAcquire(&xlogctl->info_lck);		LogwrtResult = xlogctl->LogwrtResult;		SpinLockRelease(&xlogctl->info_lck);	}	/* check again */	if (XLByteLE(record, LogwrtResult.Flush))		return false;	return true;}/* * Create a new XLOG file segment, or open a pre-existing one. * * log, seg: identify segment to be created/opened. * * *use_existent: if TRUE, OK to use a pre-existing file (else, any * pre-existing file will be deleted).	On return, TRUE if a pre-existing * file was used. * * use_lock: if TRUE, acquire ControlFileLock while moving file into * place.  This should be TRUE except during bootstrap log creation.  The * caller must *not* hold the lock at call. * * Returns FD of opened file. * * Note: errors here are ERROR not PANIC because we might or might not be * inside a critical section (eg, during checkpoint there is no reason to * take down the system on failure).  They will promote to PANIC if we are * in a critical section. */static intXLogFileInit(uint32 log, uint32 seg,			 bool *use_existent, bool use_lock){	char		path[MAXPGPATH];	char		tmppath[MAXPGPATH];	char	   *zbuffer;	uint32		installed_log;	uint32		installed_seg;	int			max_advance;	int			fd;	int			nbytes;	XLogFilePath(path, ThisTimeLineID, log, seg);	/*	 * Try to use existent file (checkpoint maker may have created it already)	 */	if (*use_existent)	{		fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,						   S_IRUSR | S_IWUSR);		if (fd < 0)		{			if (errno != ENOENT)				ereport(ERROR,						(errcode_for_file_access(),						 errmsg("could not open file \"%s\" (log file %u, segment %u): %m",								path, log, seg)));		}		else			return fd;	}	/*	 * Initialize an empty (all zeroes) segment.  NOTE: it is possible that	 * another process is doing the same thing.  If so, we will end up	 * pre-creating an extra log segment.  That seems OK, and better than	 * holding the lock throughout this lengthy process.	 */	elog(DEBUG2, "creating and filling new WAL file");	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());	unlink(tmppath);	/* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,					   S_IRUSR | S_IWUSR);	if (fd < 0)		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not create file \"%s\": %m", tmppath)));	/*	 * Zero-fill the file.	We have to do this the hard way to ensure that all	 * the file space has really been allocated --- on platforms that allow	 * "holes" in files, just seeking to the end doesn't allocate intermediate	 * space.  This way, we know that we have all the space and (after the	 * fsync below) that all the indirect blocks are down on disk.	Therefore,	 * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the	 * log file.	 *	 * Note: palloc zbuffer, instead of just using a local char array, to	 * ensure it is reasonably well-aligned; this may save a few cycles	 * transferring data to the kernel.	 */	zbuffer = (char *) palloc0(XLOG_BLCKSZ);	for (nbytes = 0; nbytes < XLogSegSize; nbytes += XLOG_BLCKSZ)	{		errno = 0;		if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)		{			int			save_errno = errno;			/*			 * If we fail to make the file, delete it to release disk space			 */			unlink(tmppath);			/* if write didn't set errno, assume problem is no disk space */			errno = save_errno ? save_errno : ENOSPC;			ereport(ERROR,					(errcode_for_file_access(),					 errmsg("could not write to file \"%s\": %m", tmppath)));		}	}	pfree(zbuffer);	if (pg_fsync(fd) != 0)		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not fsync file \"%s\": %m", tmppath)));	if (close(fd))		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not close file \"%s\": %m", tmppath)));	/*	 * Now move the segment into place with its final name.	 *	 * If caller didn't want to use a pre-existing file, get rid of any	 * pre-existing file.  Otherwise, cope with possibility that someone else	 * has created the file while we were filling ours: if so, use ours to	 * pre-create a future log segment.	 */	installed_log = log;	installed_seg = seg;	max_advance = XLOGfileslop;	if (!InstallXLogFileSegment(&installed_log, &installed_seg, tmppath,								*use_existent, &max_advance,								use_lock))	{		/* No need for any more future segments... */		unlink(tmppath);	}	elog(DEBUG2, "done creating and filling new WAL file");	/* Set flag to tell caller there was no existent file */	*use_existent = false;	/* Now open original target segment (might not be file I just made) */	fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,					   S_IRUSR | S_IWUSR);	if (fd < 0)		ereport(ERROR,				(errcode_for_file_access(),		   errmsg("could not open file \"%s\" (log file %u, segment %u): %m",				  path, log, seg)));	return fd;}/* * Create a new XLOG file segment by copying a pre-existing one. * * log, seg: identify segment to be created. * * srcTLI, srclog, srcseg: identify segment to be copied (could be from *		a different timeline) * * Currently this is only used during recovery, and so there are no locking * considerations.	But we should be just as tense as XLogFileInit to avoid * emplacing a bogus file. */static voidXLogFileCopy(uint32 log, uint32 seg,			 TimeLineID srcTLI, uint32 srclog, uint32 srcseg){	char		path[MAXPGPATH];	char		tmppath[MAXPGPATH];	char		buffer[XLOG_BLCKSZ];	int			srcfd;	int			fd;	int			nbytes;	/*	 * Open the source file	 */	XLogFilePath(path, srcTLI, srclog, srcseg);	srcfd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);	if (srcfd < 0)		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not open file \"%s\": %m", path)));	/*	 * Copy into a temp file name.	 */	snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());	unlink(tmppath);	/* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */	fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,					   S_IRUSR | S_IWUSR);	if (fd < 0)		ereport(ERROR,				(errcode_for_file_access(),
上一页 1 2 3 45
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -