bufmgr.c

来自「PostgreSQL7.4.6 for Linux」· C语言代码 · 共 2,201 行 · 第 1/4 页
2,201 行
/*------------------------------------------------------------------------- * * bufmgr.c *	  buffer manager interface routines * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.141.2.1 2003/12/01 16:53:30 tgl Exp $ * *------------------------------------------------------------------------- *//* * * BufferAlloc() -- lookup a buffer in the buffer table.  If *		it isn't there add it, but do not read data into memory. *		This is used when we are about to reinitialize the *		buffer so don't care what the current disk contents are. *		BufferAlloc() also pins the new buffer in memory. * * ReadBuffer() -- like BufferAlloc() but reads the data *		on a buffer cache miss. * * ReleaseBuffer() -- unpin the buffer * * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty" *		but don't unpin.  The disk IO is delayed until buffer *		replacement. * * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() * * BufferSync() -- flush all dirty buffers in the buffer pool. * * InitBufferPool() -- Init the buffer module. * * See other files: *		freelist.c -- chooses victim for buffer replacement *		buf_table.c -- manages the buffer lookup table */#include "postgres.h"#include <sys/file.h>#include <math.h>#include <signal.h>#include "lib/stringinfo.h"#include "miscadmin.h"#include "storage/buf_internals.h"#include "storage/bufmgr.h"#include "storage/bufpage.h"#include "storage/proc.h"#include "storage/smgr.h"#include "utils/relcache.h"#include "pgstat.h"#define BufferGetLSN(bufHdr)	\	(*((XLogRecPtr*) MAKE_PTR((bufHdr)->data)))/* GUC variable */bool		zero_damaged_pages = false;static void WaitIO(BufferDesc *buf);static void StartBufferIO(BufferDesc *buf, bool forInput);static void TerminateBufferIO(BufferDesc *buf);static void ContinueBufferIO(BufferDesc *buf, bool forInput);static void buffer_write_error_callback(void *arg);/* * Macro : BUFFER_IS_BROKEN *		Note that write error doesn't mean the buffer broken*/#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum,				   bool bufferLockHeld);static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,			bool *foundPtr);static int	BufferReplace(BufferDesc *bufHdr);#ifdef NOT_USEDvoid		PrintBufferDescs(void);#endifstatic void write_buffer(Buffer buffer, bool unpin);/* * ReadBuffer -- returns a buffer containing the requested *		block of the requested relation.  If the blknum *		requested is P_NEW, extend the relation file and *		allocate a new block.  (Caller is responsible for *		ensuring that only one backend tries to extend a *		relation at the same time!) * * Returns: the buffer number for the buffer containing *		the block read, or NULL on an error.  If successful, *		the returned buffer has been pinned. * * Assume when this function is called, that reln has been *		opened already. * * Note: a side effect of a P_NEW call is to update reln->rd_nblocks. */#undef ReadBuffer				/* conflicts with macro when BUFMGR_DEBUG								 * defined *//* * ReadBuffer */BufferReadBuffer(Relation reln, BlockNumber blockNum){	return ReadBufferInternal(reln, blockNum, false);}/* * ReadBufferInternal -- internal version of ReadBuffer with more options * * bufferLockHeld: if true, caller already acquired the bufmgr lock. * (This is assumed never to be true if dealing with a local buffer!) */static BufferReadBufferInternal(Relation reln, BlockNumber blockNum,				   bool bufferLockHeld){	BufferDesc *bufHdr;	int			status;	bool		found;	bool		isExtend;	bool		isLocalBuf;	isExtend = (blockNum == P_NEW);	isLocalBuf = reln->rd_istemp;	if (isLocalBuf)	{		ReadLocalBufferCount++;		pgstat_count_buffer_read(&reln->pgstat_info, reln);		/* Substitute proper block number if caller asked for P_NEW */		if (isExtend)		{			blockNum = reln->rd_nblocks;			reln->rd_nblocks++;		}		bufHdr = LocalBufferAlloc(reln, blockNum, &found);		if (found)		{			LocalBufferHitCount++;			pgstat_count_buffer_hit(&reln->pgstat_info, reln);		}	}	else	{		ReadBufferCount++;		pgstat_count_buffer_read(&reln->pgstat_info, reln);		/* Substitute proper block number if caller asked for P_NEW */		if (isExtend)		{			/* must be sure we have accurate file length! */			blockNum = reln->rd_nblocks = smgrnblocks(DEFAULT_SMGR, reln);			reln->rd_nblocks++;		}		/*		 * lookup the buffer.  IO_IN_PROGRESS is set if the requested		 * block is not currently in memory.		 */		if (!bufferLockHeld)			LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);		bufHdr = BufferAlloc(reln, blockNum, &found);		if (found)		{			BufferHitCount++;			pgstat_count_buffer_hit(&reln->pgstat_info, reln);		}	}	/* At this point we do NOT hold the bufmgr lock. */	if (!bufHdr)		return InvalidBuffer;	/* if it's already in the buffer pool, we're done */	if (found)	{		/* That is, we're done if we expected to be able to find it ... */		if (!isExtend)			return BufferDescriptorGetBuffer(bufHdr);		/*		 * If we found a buffer when we were expecting to extend the		 * relation, the implication is that a buffer was already created		 * for the next page position, but then smgrextend failed to write		 * the page. We'd better try the smgrextend again.  But since		 * BufferAlloc won't have done StartBufferIO, we must do that		 * first.		 */		if (!isLocalBuf)		{			LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);			StartBufferIO(bufHdr, false);			LWLockRelease(BufMgrLock);		}	}	/*	 * if we have gotten to this point, the reln pointer must be ok and	 * the relation file must be open.	 */	if (isExtend)	{		/* new buffers are zero-filled */		MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);		status = smgrextend(DEFAULT_SMGR, reln, blockNum,							(char *) MAKE_PTR(bufHdr->data));	}	else	{		status = smgrread(DEFAULT_SMGR, reln, blockNum,						  (char *) MAKE_PTR(bufHdr->data));		/* check for garbage data */		if (status == SM_SUCCESS &&			!PageHeaderIsValid((PageHeader) MAKE_PTR(bufHdr->data)))		{			/*			 * During WAL recovery, the first access to any data page should			 * overwrite the whole page from the WAL; so a clobbered page			 * header is not reason to fail.  Hence, when InRecovery we may			 * always act as though zero_damaged_pages is ON.			 */			if (zero_damaged_pages || InRecovery)			{				ereport(WARNING,						(errcode(ERRCODE_DATA_CORRUPTED),						 errmsg("invalid page header in block %u of relation \"%s\"; zeroing out page",							  blockNum, RelationGetRelationName(reln))));				MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);			}			else				ereport(ERROR,						(errcode(ERRCODE_DATA_CORRUPTED),					  errmsg("invalid page header in block %u of relation \"%s\"",							 blockNum, RelationGetRelationName(reln))));		}	}	if (isLocalBuf)	{		/* No shared buffer state to update... */		if (status == SM_FAIL)		{			bufHdr->flags |= BM_IO_ERROR;			return InvalidBuffer;		}		return BufferDescriptorGetBuffer(bufHdr);	}	/* lock buffer manager again to update IO IN PROGRESS */	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);	if (status == SM_FAIL)	{		/* IO Failed.  cleanup the data structures and go home */		if (!BufTableDelete(bufHdr))		{			LWLockRelease(BufMgrLock);			elog(FATAL, "buffer table broken after I/O error");		}		/* remember that BufferAlloc() pinned the buffer */		UnpinBuffer(bufHdr);		/*		 * Have to reset the flag so that anyone waiting for the buffer		 * can tell that the contents are invalid.		 */		bufHdr->flags |= BM_IO_ERROR;		bufHdr->flags &= ~BM_IO_IN_PROGRESS;	}	else	{		/* IO Succeeded.  clear the flags, finish buffer update */		bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);	}	/* If anyone was waiting for IO to complete, wake them up now */	TerminateBufferIO(bufHdr);	LWLockRelease(BufMgrLock);	if (status == SM_FAIL)		return InvalidBuffer;	return BufferDescriptorGetBuffer(bufHdr);}/* * BufferAlloc -- Get a buffer from the buffer pool but don't *		read it.  If successful, the returned buffer is pinned. * * Returns: descriptor for buffer * * BufMgrLock must be held at entry.  When this routine returns, * the BufMgrLock is guaranteed NOT to be held. */static BufferDesc *BufferAlloc(Relation reln,			BlockNumber blockNum,			bool *foundPtr){	BufferDesc *buf,			   *buf2;	BufferTag	newTag;			/* identity of requested block */	bool		inProgress;		/* buffer undergoing IO */	/* create a new tag so we can lookup the buffer */	/* assume that the relation is already open */	INIT_BUFFERTAG(&newTag, reln, blockNum);	/* see if the block is in the buffer pool already */	buf = BufTableLookup(&newTag);	if (buf != NULL)	{		/*		 * Found it.  Now, (a) pin the buffer so no one steals it from the		 * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting		 * the buffer into the buffer pool.		 */		PinBuffer(buf);		inProgress = (buf->flags & BM_IO_IN_PROGRESS);		*foundPtr = TRUE;		if (inProgress)			/* confirm end of IO */		{			WaitIO(buf);			inProgress = (buf->flags & BM_IO_IN_PROGRESS);		}		if (BUFFER_IS_BROKEN(buf))		{			/*			 * I couldn't understand the following old comment. If there's			 * no IO for the buffer and the buffer is BROKEN, it should be			 * read again. So start a new buffer IO here.			 *			 * wierd race condition:			 *			 * We were waiting for someone else to read the buffer. While we			 * were waiting, the reader boof'd in some way, so the			 * contents of the buffer are still invalid.  By saying that			 * we didn't find it, we can make the caller reinitialize the			 * buffer.	If two processes are waiting for this block, both			 * will read the block.  The second one to finish may			 * overwrite any updates made by the first.  (Assume higher			 * level synchronization prevents this from happening).			 *			 * This is never going to happen, don't worry about it.			 */			*foundPtr = FALSE;		}#ifdef BMTRACE		_bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND);#endif   /* BMTRACE */		if (!(*foundPtr))			StartBufferIO(buf, true);		LWLockRelease(BufMgrLock);		return buf;	}	*foundPtr = FALSE;	/*	 * Didn't find it in the buffer pool.  We'll have to initialize a new	 * buffer.	First, grab one from the free list.  If it's dirty, flush	 * it to disk. Remember to unlock BufMgrLock while doing the IOs.	 */	inProgress = FALSE;	for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;)	{		buf = GetFreeBuffer();		/* GetFreeBuffer will abort if it can't find a free buffer */		Assert(buf);		/*		 * There should be exactly one pin on the buffer after it is		 * allocated -- ours.  If it had a pin it wouldn't have been on		 * the free list.  No one else could have pinned it between		 * GetFreeBuffer and here because we have the BufMgrLock.		 */		Assert(buf->refcount == 0);		buf->refcount = 1;		PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;		if (buf->flags & BM_DIRTY || buf->cntxDirty)		{			bool		smok;			/*			 * skip write error buffers			 */			if ((buf->flags & BM_IO_ERROR) != 0)			{				UnpinBuffer(buf);				buf = (BufferDesc *) NULL;				continue;			}			/*			 * Set BM_IO_IN_PROGRESS to keep anyone from doing anything			 * with the contents of the buffer while we write it out. We			 * don't really care if they try to read it, but if they can			 * complete a BufferAlloc on it they can then scribble into			 * it, and we'd really like to avoid that while we are			 * flushing the buffer.  Setting this flag should block them			 * in WaitIO until we're done.			 */			inProgress = TRUE;			/*			 * All code paths that acquire this lock pin the buffer first;			 * since no one had it pinned (it just came off the free			 * list), no one else can have this lock.			 */			StartBufferIO(buf, false);			/*			 * Write the buffer out, being careful to release BufMgrLock			 * before starting the I/O.			 */			smok = BufferReplace(buf);			if (smok == FALSE)			{				ereport(WARNING,						(errcode(ERRCODE_IO_ERROR),						 errmsg("could not write block %u of %u/%u",								buf->tag.blockNum,								buf->tag.rnode.tblNode,								buf->tag.rnode.relNode)));				inProgress = FALSE;				buf->flags |= BM_IO_ERROR;				buf->flags &= ~BM_IO_IN_PROGRESS;				TerminateBufferIO(buf);				UnpinBuffer(buf);				buf = (BufferDesc *) NULL;			}			else			{				/*				 * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't				 * be set by anyone.		- vadim 01/17/97				 */				if (buf->flags & BM_JUST_DIRTIED)				{					elog(PANIC, "content of block %u of %u/%u changed while flushing",						 buf->tag.blockNum,						 buf->tag.rnode.tblNode, buf->tag.rnode.relNode);				}				else					buf->flags &= ~BM_DIRTY;				buf->cntxDirty = false;			}			/*			 * Somebody could have pinned the buffer while we were doing			 * the I/O and had given up the BufMgrLock (though they would			 * be waiting for us to clear the BM_IO_IN_PROGRESS flag).			 * That's why this is a loop -- if so, we need to clear the			 * I/O flags, remove our pin and start all over again.			 *			 * People may be making buffers free at any time, so there's no			 * reason to think that we have an immediate disaster on our			 * hands.			 */			if (buf && buf->refcount > 1)			{				inProgress = FALSE;				buf->flags &= ~BM_IO_IN_PROGRESS;				TerminateBufferIO(buf);				UnpinBuffer(buf);				buf = (BufferDesc *) NULL;			}			/*			 * Somebody could have allocated another buffer for the same			 * block we are about to read in. (While we flush out the			 * dirty buffer, we don't hold the lock and someone could have			 * allocated another buffer for the same block. The problem is			 * we haven't gotten around to insert the new tag into the			 * buffer table. So we need to check here.		-ay 3/95			 */			buf2 = BufTableLookup(&newTag);			if (buf2 != NULL)			{				/*				 * Found it. Someone has already done what we're about to				 * do. We'll just handle this as if it were found in the				 * buffer pool in the first place.				 */				if (buf != NULL)				{					buf->flags &= ~BM_IO_IN_PROGRESS;					TerminateBufferIO(buf);					/* give up old buffer since we don't need it any more */					UnpinBuffer(buf);				}				PinBuffer(buf2);				inProgress = (buf2->flags & BM_IO_IN_PROGRESS);				*foundPtr = TRUE;				if (inProgress)				{					WaitIO(buf2);					inProgress = (buf2->flags & BM_IO_IN_PROGRESS);				}				if (BUFFER_IS_BROKEN(buf2))					*foundPtr = FALSE;				if (!(*foundPtr))					StartBufferIO(buf2, true);				LWLockRelease(BufMgrLock);				return buf2;			}		}	}	/*	 * At this point we should have the sole pin on a non-dirty buffer and	 * we may or may not already have the BM_IO_IN_PROGRESS flag set.	 */	/*	 * Change the name of the buffer in the lookup table:	 *	 * Need to update the lookup table before the read starts. If someone	 * comes along looking for the buffer while we are reading it in, we	 * don't want them to allocate a new buffer.  For the same reason, we	 * didn't want to erase the buf table entry for the buffer we were	 * writing back until now, either.	 */
bufmgr.c - 源码说明

本页面展示了「PostgreSQL7.4.6 for Linux」中的 bufmgr.c 源码文件，采用 C语言编程语言编写，共 2,201 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?