bufmgr.c

来自「postgresql8.3.4源码,开源数据库」· C语言代码 · 共 2,235 行 · 第 1/5 页
2,235 行
/*------------------------------------------------------------------------- * * bufmgr.c *	  buffer manager interface routines * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.228 2008/01/01 19:45:51 momjian Exp $ * *------------------------------------------------------------------------- *//* * Principal entry points: * * ReadBuffer() -- find or create a buffer holding the requested page, *		and pin it so that no one can destroy it while this process *		is using it. * * ReleaseBuffer() -- unpin a buffer * * MarkBufferDirty() -- mark a pinned buffer's contents as "dirty". *		The disk write is delayed until buffer replacement or checkpoint. * * See also these files: *		freelist.c -- chooses victim for buffer replacement *		buf_table.c -- manages the buffer lookup table */#include "postgres.h"#include <sys/file.h>#include <unistd.h>#include "miscadmin.h"#include "postmaster/bgwriter.h"#include "storage/buf_internals.h"#include "storage/bufpage.h"#include "storage/ipc.h"#include "storage/proc.h"#include "storage/smgr.h"#include "utils/resowner.h"#include "pgstat.h"/* Note: these two macros only work on shared buffers, not local ones! */#define BufHdrGetBlock(bufHdr)	((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))#define BufferGetLSN(bufHdr)	(*((XLogRecPtr*) BufHdrGetBlock(bufHdr)))/* Note: this macro only works on local buffers, not shared ones! */#define LocalBufHdrGetBlock(bufHdr) \	LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]/* Bits in SyncOneBuffer's return value */#define BUF_WRITTEN				0x01#define BUF_REUSABLE			0x02/* GUC variables */bool		zero_damaged_pages = false;int			bgwriter_lru_maxpages = 100;double		bgwriter_lru_multiplier = 2.0;long		NDirectFileRead;	/* some I/O's are direct file access. bypass								 * bufmgr */long		NDirectFileWrite;	/* e.g., I/O in psort and hashjoin. *//* local state for StartBufferIO and related functions */static volatile BufferDesc *InProgressBuf = NULL;static bool IsForInput;/* local state for LockBufferForCleanup */static volatile BufferDesc *PinCountWaitBuf = NULL;static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,				  bool zeroPage,				  BufferAccessStrategy strategy);static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);static void PinBuffer_Locked(volatile BufferDesc *buf);static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);static void BufferSync(int flags);static int	SyncOneBuffer(int buf_id, bool skip_recently_used);static void WaitIO(volatile BufferDesc *buf);static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,				  int set_flag_bits);static void buffer_write_error_callback(void *arg);static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,			BufferAccessStrategy strategy,			bool *foundPtr);static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);static void AtProcExit_Buffers(int code, Datum arg);/* * ReadBuffer -- returns a buffer containing the requested *		block of the requested relation.  If the blknum *		requested is P_NEW, extend the relation file and *		allocate a new block.  (Caller is responsible for *		ensuring that only one backend tries to extend a *		relation at the same time!) * * Returns: the buffer number for the buffer containing *		the block read.  The returned buffer has been pinned. *		Does not return on error --- elog's instead. * * Assume when this function is called, that reln has been *		opened already. */BufferReadBuffer(Relation reln, BlockNumber blockNum){	return ReadBuffer_common(reln, blockNum, false, NULL);}/* * ReadBufferWithStrategy -- same as ReadBuffer, except caller can specify *		a nondefault buffer access strategy.  See buffer/README for details. */BufferReadBufferWithStrategy(Relation reln, BlockNumber blockNum,					   BufferAccessStrategy strategy){	return ReadBuffer_common(reln, blockNum, false, strategy);}/* * ReadOrZeroBuffer -- like ReadBuffer, but if the page isn't in buffer *		cache already, it's filled with zeros instead of reading it from *		disk.  Useful when the caller intends to fill the page from scratch, *		since this saves I/O and avoids unnecessary failure if the *		page-on-disk has corrupt page headers. * *		Caution: do not use this to read a page that is beyond the relation's *		current physical EOF; that is likely to cause problems in md.c when *		the page is modified and written out.  P_NEW is OK, though. */BufferReadOrZeroBuffer(Relation reln, BlockNumber blockNum){	return ReadBuffer_common(reln, blockNum, true, NULL);}/* * ReadBuffer_common -- common logic for ReadBuffer variants */static BufferReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage,				  BufferAccessStrategy strategy){	volatile BufferDesc *bufHdr;	Block		bufBlock;	bool		found;	bool		isExtend;	bool		isLocalBuf;	/* Make sure we will have room to remember the buffer pin */	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);	isExtend = (blockNum == P_NEW);	isLocalBuf = reln->rd_istemp;	/* Open it at the smgr level if not already done */	RelationOpenSmgr(reln);	/* Substitute proper block number if caller asked for P_NEW */	if (isExtend)		blockNum = smgrnblocks(reln->rd_smgr);	pgstat_count_buffer_read(reln);	if (isLocalBuf)	{		ReadLocalBufferCount++;		bufHdr = LocalBufferAlloc(reln, blockNum, &found);		if (found)			LocalBufferHitCount++;	}	else	{		ReadBufferCount++;		/*		 * lookup the buffer.  IO_IN_PROGRESS is set if the requested block is		 * not currently in memory.		 */		bufHdr = BufferAlloc(reln, blockNum, strategy, &found);		if (found)			BufferHitCount++;	}	/* At this point we do NOT hold any locks. */	/* if it was already in the buffer pool, we're done */	if (found)	{		if (!isExtend)		{			/* Just need to update stats before we exit */			pgstat_count_buffer_hit(reln);			if (VacuumCostActive)				VacuumCostBalance += VacuumCostPageHit;			return BufferDescriptorGetBuffer(bufHdr);		}		/*		 * We get here only in the corner case where we are trying to extend		 * the relation but we found a pre-existing buffer marked BM_VALID.		 * This can happen because mdread doesn't complain about reads beyond		 * EOF (when zero_damaged_pages is ON) and so a previous attempt to		 * read a block beyond EOF could have left a "valid" zero-filled		 * buffer.	Unfortunately, we have also seen this case occurring		 * because of buggy Linux kernels that sometimes return an		 * lseek(SEEK_END) result that doesn't account for a recent write. In		 * that situation, the pre-existing buffer would contain valid data		 * that we don't want to overwrite.  Since the legitimate case should		 * always have left a zero-filled buffer, complain if not PageIsNew.		 */		bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);		if (!PageIsNew((PageHeader) bufBlock))			ereport(ERROR,					(errmsg("unexpected data beyond EOF in block %u of relation \"%s\"",							blockNum, RelationGetRelationName(reln)),					 errhint("This has been seen to occur with buggy kernels; consider updating your system.")));		/*		 * We *must* do smgrextend before succeeding, else the page will not		 * be reserved by the kernel, and the next P_NEW call will decide to		 * return the same page.  Clear the BM_VALID bit, do the StartBufferIO		 * call that BufferAlloc didn't, and proceed.		 */		if (isLocalBuf)		{			/* Only need to adjust flags */			Assert(bufHdr->flags & BM_VALID);			bufHdr->flags &= ~BM_VALID;		}		else		{			/*			 * Loop to handle the very small possibility that someone re-sets			 * BM_VALID between our clearing it and StartBufferIO inspecting			 * it.			 */			do			{				LockBufHdr(bufHdr);				Assert(bufHdr->flags & BM_VALID);				bufHdr->flags &= ~BM_VALID;				UnlockBufHdr(bufHdr);			} while (!StartBufferIO(bufHdr, true));		}	}	/*	 * if we have gotten to this point, we have allocated a buffer for the	 * page but its contents are not yet valid.  IO_IN_PROGRESS is set for it,	 * if it's a shared buffer.	 *	 * Note: if smgrextend fails, we will end up with a buffer that is	 * allocated but not marked BM_VALID.  P_NEW will still select the same	 * block number (because the relation didn't get any longer on disk) and	 * so future attempts to extend the relation will find the same buffer (if	 * it's not been recycled) but come right back here to try smgrextend	 * again.	 */	Assert(!(bufHdr->flags & BM_VALID));		/* spinlock not needed */	bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);	if (isExtend)	{		/* new buffers are zero-filled */		MemSet((char *) bufBlock, 0, BLCKSZ);		smgrextend(reln->rd_smgr, blockNum, (char *) bufBlock,				   reln->rd_istemp);	}	else	{		/*		 * Read in the page, unless the caller intends to overwrite it and		 * just wants us to allocate a buffer.		 */		if (zeroPage)			MemSet((char *) bufBlock, 0, BLCKSZ);		else			smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);		/* check for garbage data */		if (!PageHeaderIsValid((PageHeader) bufBlock))		{			if (zero_damaged_pages)			{				ereport(WARNING,						(errcode(ERRCODE_DATA_CORRUPTED),						 errmsg("invalid page header in block %u of relation \"%s\"; zeroing out page",								blockNum, RelationGetRelationName(reln))));				MemSet((char *) bufBlock, 0, BLCKSZ);			}			else				ereport(ERROR,						(errcode(ERRCODE_DATA_CORRUPTED),				 errmsg("invalid page header in block %u of relation \"%s\"",						blockNum, RelationGetRelationName(reln))));		}	}	if (isLocalBuf)	{		/* Only need to adjust flags */		bufHdr->flags |= BM_VALID;	}	else	{		/* Set BM_VALID, terminate IO, and wake up any waiters */		TerminateBufferIO(bufHdr, false, BM_VALID);	}	if (VacuumCostActive)		VacuumCostBalance += VacuumCostPageMiss;	return BufferDescriptorGetBuffer(bufHdr);}/* * BufferAlloc -- subroutine for ReadBuffer.  Handles lookup of a shared *		buffer.  If no buffer exists already, selects a replacement *		victim and evicts the old page, but does NOT read in new page. * * "strategy" can be a buffer replacement strategy object, or NULL for * the default strategy.  The selected buffer's usage_count is advanced when * using the default strategy, but otherwise possibly not (see PinBuffer). * * The returned buffer is pinned and is already marked as holding the * desired page.  If it already did have the desired page, *foundPtr is * set TRUE.  Otherwise, *foundPtr is set FALSE and the buffer is marked * as IO_IN_PROGRESS; ReadBuffer will now need to do I/O to fill it. * * *foundPtr is actually redundant with the buffer's BM_VALID flag, but * we keep it for simplicity in ReadBuffer. * * No locks are held either at entry or exit. */static volatile BufferDesc *BufferAlloc(Relation reln,			BlockNumber blockNum,			BufferAccessStrategy strategy,			bool *foundPtr){	BufferTag	newTag;			/* identity of requested block */	uint32		newHash;		/* hash value for newTag */	LWLockId	newPartitionLock;		/* buffer partition lock for it */	BufferTag	oldTag;			/* previous identity of selected buffer */	uint32		oldHash;		/* hash value for oldTag */	LWLockId	oldPartitionLock;		/* buffer partition lock for it */	BufFlags	oldFlags;	int			buf_id;	volatile BufferDesc *buf;	bool		valid;	/* create a tag so we can lookup the buffer */	INIT_BUFFERTAG(newTag, reln, blockNum);	/* determine its hash code and partition lock ID */	newHash = BufTableHashCode(&newTag);	newPartitionLock = BufMappingPartitionLock(newHash);	/* see if the block is in the buffer pool already */	LWLockAcquire(newPartitionLock, LW_SHARED);	buf_id = BufTableLookup(&newTag, newHash);	if (buf_id >= 0)	{		/*		 * Found it.  Now, pin the buffer so no one can steal it from the		 * buffer pool, and check to see if the correct data has been loaded		 * into the buffer.		 */		buf = &BufferDescriptors[buf_id];		valid = PinBuffer(buf, strategy);		/* Can release the mapping lock as soon as we've pinned it */		LWLockRelease(newPartitionLock);		*foundPtr = TRUE;		if (!valid)		{			/*			 * We can only get here if (a) someone else is still reading in			 * the page, or (b) a previous read attempt failed.  We have to			 * wait for any active read attempt to finish, and then set up our			 * own read attempt if the page is still not BM_VALID.			 * StartBufferIO does it all.			 */			if (StartBufferIO(buf, true))			{				/*				 * If we get here, previous attempts to read the buffer must				 * have failed ... but we shall bravely try again.				 */				*foundPtr = FALSE;			}		}		return buf;	}	/*	 * Didn't find it in the buffer pool.  We'll have to initialize a new	 * buffer.	Remember to unlock the mapping lock while doing the work.	 */	LWLockRelease(newPartitionLock);	/* Loop here in case we have to try another victim buffer */	for (;;)	{		bool		lock_held;		/*		 * Select a victim buffer.	The buffer is returned with its header		 * spinlock still held!  Also (in most cases) the BufFreelistLock is		 * still held, since it would be bad to hold the spinlock while		 * possibly waking up other processes.		 */		buf = StrategyGetBuffer(strategy, &lock_held);		Assert(buf->refcount == 0);		/* Must copy buffer flags while we still hold the spinlock */		oldFlags = buf->flags;		/* Pin the buffer and then release the buffer spinlock */		PinBuffer_Locked(buf);		/* Now it's safe to release the freelist lock */		if (lock_held)			LWLockRelease(BufFreelistLock);		/*		 * If the buffer was dirty, try to write it out.  There is a race		 * condition here, in that someone might dirty it after we released it
bufmgr.c - 源码说明

本页面展示了「postgresql8.3.4源码,开源数据库」中的 bufmgr.c 源码文件，采用 C语言编程语言编写，共 2,235 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与postgresql相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?