📄 bufmgr.c

📁 PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统
💻 C
📖 第 1 页 / 共 4 页
字号:
12 3 4 下一页
/*------------------------------------------------------------------------- * * bufmgr.c *	  buffer manager interface routines * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.198.2.3 2006/01/06 00:04:26 tgl Exp $ * *------------------------------------------------------------------------- *//* * ReadBuffer() -- find or create a buffer holding the requested page, *		and pin it so that no one can destroy it while this process *		is using it. * * ReleaseBuffer() -- unpin the buffer * * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty" *		but don't unpin.  The disk IO is delayed until buffer *		replacement. * * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() * * BufferSync() -- flush all dirty buffers in the buffer pool. * * BgBufferSync() -- flush some dirty buffers in the buffer pool. * * InitBufferPool() -- Init the buffer module. * * See other files: *		freelist.c -- chooses victim for buffer replacement *		buf_table.c -- manages the buffer lookup table */#include "postgres.h"#include <sys/file.h>#include <unistd.h>#include "lib/stringinfo.h"#include "miscadmin.h"#include "storage/buf_internals.h"#include "storage/bufmgr.h"#include "storage/bufpage.h"#include "storage/ipc.h"#include "storage/proc.h"#include "storage/smgr.h"#include "utils/relcache.h"#include "utils/resowner.h"#include "pgstat.h"/* Note: these two macros only work on shared buffers, not local ones! */#define BufHdrGetBlock(bufHdr)	((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))#define BufferGetLSN(bufHdr)	(*((XLogRecPtr*) BufHdrGetBlock(bufHdr)))/* Note: this macro only works on local buffers, not shared ones! */#define LocalBufHdrGetBlock(bufHdr) \	LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]/* GUC variables */bool		zero_damaged_pages = false;double		bgwriter_lru_percent = 1.0;double		bgwriter_all_percent = 0.333;int			bgwriter_lru_maxpages = 5;int			bgwriter_all_maxpages = 5;long		NDirectFileRead;	/* some I/O's are direct file access. bypass								 * bufmgr */long		NDirectFileWrite;	/* e.g., I/O in psort and hashjoin. *//* local state for StartBufferIO and related functions */static volatile BufferDesc *InProgressBuf = NULL;static bool IsForInput;/* local state for LockBufferForCleanup */static volatile BufferDesc *PinCountWaitBuf = NULL;static bool PinBuffer(volatile BufferDesc *buf);static void PinBuffer_Locked(volatile BufferDesc *buf);static void UnpinBuffer(volatile BufferDesc *buf,			bool fixOwner, bool normalAccess);static bool SyncOneBuffer(int buf_id, bool skip_pinned);static void WaitIO(volatile BufferDesc *buf);static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,				  int set_flag_bits);static void buffer_write_error_callback(void *arg);static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,			bool *foundPtr);static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);static void AtProcExit_Buffers(int code, Datum arg);static void write_buffer(Buffer buffer, bool unpin);/* * ReadBuffer -- returns a buffer containing the requested *		block of the requested relation.  If the blknum *		requested is P_NEW, extend the relation file and *		allocate a new block.  (Caller is responsible for *		ensuring that only one backend tries to extend a *		relation at the same time!) * * Returns: the buffer number for the buffer containing *		the block read.  The returned buffer has been pinned. *		Does not return on error --- elog's instead. * * Assume when this function is called, that reln has been *		opened already. */BufferReadBuffer(Relation reln, BlockNumber blockNum){	volatile BufferDesc *bufHdr;	Block		bufBlock;	bool		found;	bool		isExtend;	bool		isLocalBuf;	/* Make sure we will have room to remember the buffer pin */	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);	isExtend = (blockNum == P_NEW);	isLocalBuf = reln->rd_istemp;	/* Open it at the smgr level if not already done */	RelationOpenSmgr(reln);	/* Substitute proper block number if caller asked for P_NEW */	if (isExtend)		blockNum = smgrnblocks(reln->rd_smgr);	pgstat_count_buffer_read(&reln->pgstat_info, reln);	if (isLocalBuf)	{		ReadLocalBufferCount++;		bufHdr = LocalBufferAlloc(reln, blockNum, &found);		if (found)			LocalBufferHitCount++;	}	else	{		ReadBufferCount++;		/*		 * lookup the buffer.  IO_IN_PROGRESS is set if the requested block is		 * not currently in memory.		 */		bufHdr = BufferAlloc(reln, blockNum, &found);		if (found)			BufferHitCount++;	}	/* At this point we do NOT hold any locks. */	/* if it was already in the buffer pool, we're done */	if (found)	{		if (!isExtend)		{			/* Just need to update stats before we exit */			pgstat_count_buffer_hit(&reln->pgstat_info, reln);			if (VacuumCostActive)				VacuumCostBalance += VacuumCostPageHit;			return BufferDescriptorGetBuffer(bufHdr);		}		/*		 * We get here only in the corner case where we are trying to extend		 * the relation but we found a pre-existing buffer marked BM_VALID.		 * (This can happen because mdread doesn't complain about reads		 * beyond EOF --- which is arguably bogus, but changing it seems		 * tricky.)  We *must* do smgrextend before succeeding, else the		 * page will not be reserved by the kernel, and the next P_NEW call		 * will decide to return the same page.  Clear the BM_VALID bit,		 * do the StartBufferIO call that BufferAlloc didn't, and proceed.		 */		if (isLocalBuf)		{			/* Only need to adjust flags */			Assert(bufHdr->flags & BM_VALID);			bufHdr->flags &= ~BM_VALID;		}		else		{			/*			 * Loop to handle the very small possibility that someone			 * re-sets BM_VALID between our clearing it and StartBufferIO			 * inspecting it.			 */			do {				LockBufHdr(bufHdr);				Assert(bufHdr->flags & BM_VALID);				bufHdr->flags &= ~BM_VALID;				UnlockBufHdr(bufHdr);			} while (!StartBufferIO(bufHdr, true));		}	}	/*	 * if we have gotten to this point, we have allocated a buffer for the	 * page but its contents are not yet valid.  IO_IN_PROGRESS is set for it,	 * if it's a shared buffer.	 *	 * Note: if smgrextend fails, we will end up with a buffer that is	 * allocated but not marked BM_VALID.  P_NEW will still select the same	 * block number (because the relation didn't get any longer on disk) and	 * so future attempts to extend the relation will find the same buffer (if	 * it's not been recycled) but come right back here to try smgrextend	 * again.	 */	Assert(!(bufHdr->flags & BM_VALID));		/* spinlock not needed */	bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);	if (isExtend)	{		/* new buffers are zero-filled */		MemSet((char *) bufBlock, 0, BLCKSZ);		smgrextend(reln->rd_smgr, blockNum, (char *) bufBlock,				   reln->rd_istemp);	}	else	{		smgrread(reln->rd_smgr, blockNum, (char *) bufBlock);		/* check for garbage data */		if (!PageHeaderIsValid((PageHeader) bufBlock))		{			/*			 * During WAL recovery, the first access to any data page should			 * overwrite the whole page from the WAL; so a clobbered page			 * header is not reason to fail.  Hence, when InRecovery we may			 * always act as though zero_damaged_pages is ON.			 */			if (zero_damaged_pages || InRecovery)			{				ereport(WARNING,						(errcode(ERRCODE_DATA_CORRUPTED),						 errmsg("invalid page header in block %u of relation \"%s\"; zeroing out page",								blockNum, RelationGetRelationName(reln))));				MemSet((char *) bufBlock, 0, BLCKSZ);			}			else				ereport(ERROR,						(errcode(ERRCODE_DATA_CORRUPTED),				 errmsg("invalid page header in block %u of relation \"%s\"",						blockNum, RelationGetRelationName(reln))));		}	}	if (isLocalBuf)	{		/* Only need to adjust flags */		bufHdr->flags |= BM_VALID;	}	else	{		/* Set BM_VALID, terminate IO, and wake up any waiters */		TerminateBufferIO(bufHdr, false, BM_VALID);	}	if (VacuumCostActive)		VacuumCostBalance += VacuumCostPageMiss;	return BufferDescriptorGetBuffer(bufHdr);}/* * BufferAlloc -- subroutine for ReadBuffer.  Handles lookup of a shared *		buffer.  If no buffer exists already, selects a replacement *		victim and evicts the old page, but does NOT read in new page. * * The returned buffer is pinned and is already marked as holding the * desired page.  If it already did have the desired page, *foundPtr is * set TRUE.  Otherwise, *foundPtr is set FALSE and the buffer is marked * as IO_IN_PROGRESS; ReadBuffer will now need to do I/O to fill it. * * *foundPtr is actually redundant with the buffer's BM_VALID flag, but * we keep it for simplicity in ReadBuffer. * * No locks are held either at entry or exit. */static volatile BufferDesc *BufferAlloc(Relation reln,			BlockNumber blockNum,			bool *foundPtr){	BufferTag	newTag;			/* identity of requested block */	BufferTag	oldTag;	BufFlags	oldFlags;	int			buf_id;	volatile BufferDesc *buf;	bool		valid;	/* create a tag so we can lookup the buffer */	INIT_BUFFERTAG(newTag, reln, blockNum);	/* see if the block is in the buffer pool already */	LWLockAcquire(BufMappingLock, LW_SHARED);	buf_id = BufTableLookup(&newTag);	if (buf_id >= 0)	{		/*		 * Found it.  Now, pin the buffer so no one can steal it from the		 * buffer pool, and check to see if the correct data has been loaded		 * into the buffer.		 */		buf = &BufferDescriptors[buf_id];		valid = PinBuffer(buf);		/* Can release the mapping lock as soon as we've pinned it */		LWLockRelease(BufMappingLock);		*foundPtr = TRUE;		if (!valid)		{			/*			 * We can only get here if (a) someone else is still reading in			 * the page, or (b) a previous read attempt failed.  We have to			 * wait for any active read attempt to finish, and then set up our			 * own read attempt if the page is still not BM_VALID.			 * StartBufferIO does it all.			 */			if (StartBufferIO(buf, true))			{				/*				 * If we get here, previous attempts to read the buffer must				 * have failed ... but we shall bravely try again.				 */				*foundPtr = FALSE;			}		}		return buf;	}	/*	 * Didn't find it in the buffer pool.  We'll have to initialize a new	 * buffer.	Remember to unlock BufMappingLock while doing the work.	 */	LWLockRelease(BufMappingLock);	/* Loop here in case we have to try another victim buffer */	for (;;)	{		/*		 * Select a victim buffer.	The buffer is returned with its header		 * spinlock still held!  Also the BufFreelistLock is still held, since		 * it would be bad to hold the spinlock while possibly waking up other		 * processes.		 */		buf = StrategyGetBuffer();		Assert(buf->refcount == 0);		/* Must copy buffer flags while we still hold the spinlock */		oldFlags = buf->flags;		/* Pin the buffer and then release the buffer spinlock */		PinBuffer_Locked(buf);		/* Now it's safe to release the freelist lock */		LWLockRelease(BufFreelistLock);		/*		 * If the buffer was dirty, try to write it out.  There is a race		 * condition here, in that someone might dirty it after we released it		 * above, or even while we are writing it out (since our share-lock		 * won't prevent hint-bit updates).  We will recheck the dirty bit		 * after re-locking the buffer header.		 */		if (oldFlags & BM_DIRTY)		{			/*			 * We need a share-lock on the buffer contents to write it out			 * (else we might write invalid data, eg because someone else is			 * compacting the page contents while we write).  We must use a			 * conditional lock acquisition here to avoid deadlock.  Even			 * though the buffer was not pinned (and therefore surely not			 * locked) when StrategyGetBuffer returned it, someone else could			 * have pinned and exclusive-locked it by the time we get here. If			 * we try to get the lock unconditionally, we'd block waiting for			 * them; if they later block waiting for us, deadlock ensues.			 * (This has been observed to happen when two backends are both			 * trying to split btree index pages, and the second one just			 * happens to be trying to split the page the first one got from			 * StrategyGetBuffer.)			 */			if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED))			{				FlushBuffer(buf, NULL);				LWLockRelease(buf->content_lock);			}			else			{				/*				 * Someone else has pinned the buffer, so give it up and loop				 * back to get another one.				 */				UnpinBuffer(buf, true, false /* evidently recently used */ );				continue;			}		}		/*		 * Acquire exclusive mapping lock in preparation for changing the		 * buffer's association.		 */		LWLockAcquire(BufMappingLock, LW_EXCLUSIVE);		/*		 * Try to make a hashtable entry for the buffer under its new tag.		 * This could fail because while we were writing someone else		 * allocated another buffer for the same block we want to read in.		 * Note that we have not yet removed the hashtable entry for the old		 * tag.		 */		buf_id = BufTableInsert(&newTag, buf->buf_id);		if (buf_id >= 0)		{			/*			 * Got a collision. Someone has already done what we were about to			 * do. We'll just handle this as if it were found in the buffer			 * pool in the first place.  First, give up the buffer we were			 * planning to use.  Don't allow it to be thrown in the free list			 * (we don't want to hold both global locks at once).			 */			UnpinBuffer(buf, true, false);			/* remaining code should match code at top of routine */			buf = &BufferDescriptors[buf_id];			valid = PinBuffer(buf);			/* Can release the mapping lock as soon as we've pinned it */			LWLockRelease(BufMappingLock);			*foundPtr = TRUE;			if (!valid)			{				/*				 * We can only get here if (a) someone else is still reading				 * in the page, or (b) a previous read attempt failed.	We				 * have to wait for any active read attempt to finish, and				 * then set up our own read attempt if the page is still not				 * BM_VALID.  StartBufferIO does it all.				 */				if (StartBufferIO(buf, true))				{					/*					 * If we get here, previous attempts to read the buffer					 * must have failed ... but we shall bravely try again.					 */					*foundPtr = FALSE;				}			}			return buf;		}		/*		 * Need to lock the buffer header too in order to change its tag.		 */		LockBufHdr_NoHoldoff(buf);		/*		 * Somebody could have pinned or re-dirtied the buffer while we were		 * doing the I/O and making the new hashtable entry.  If so, we can't		 * recycle this buffer; we must undo everything we've done and start		 * over with a new victim buffer.		 */		if (buf->refcount == 1 && !(buf->flags & BM_DIRTY))			break;		UnlockBufHdr_NoHoldoff(buf);		BufTableDelete(&newTag);		LWLockRelease(BufMappingLock);		UnpinBuffer(buf, true, false /* evidently recently used */ );	}	/*	 * Okay, it's finally safe to rename the buffer.	 *	 * Clearing BM_VALID here is necessary, clearing the dirtybits is just	 * paranoia.  We also clear the usage_count since any recency of use of	 * the old content is no longer relevant.	 */	oldTag = buf->tag;	oldFlags = buf->flags;	buf->tag = newTag;	buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);	buf->flags |= BM_TAG_VALID;	buf->usage_count = 0;	UnlockBufHdr_NoHoldoff(buf);	if (oldFlags & BM_TAG_VALID)		BufTableDelete(&oldTag);	LWLockRelease(BufMappingLock);	/*	 * Buffer contents are currently invalid.  Try to get the io_in_progress	 * lock.  If StartBufferIO returns false, then someone else managed to	 * read it before we did, so there's nothing left for BufferAlloc() to do.	 */	if (StartBufferIO(buf, true))		*foundPtr = FALSE;	else		*foundPtr = TRUE;	return buf;}
12 3 4 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -