📄 bufmgr.c

📁 PostgreSQL7.4.6 for Linux
💻 C
📖 第 1 页 / 共 4 页
字号:
	/*	 * relation->rd_nblocks should be accurate already if the relation is	 * new or temp, because no one else should be modifying it.  Otherwise	 * we need to ask the smgr for the current physical file length.	 *	 * Don't call smgr on a view, either.	 */	if (relation->rd_rel->relkind == RELKIND_VIEW)		relation->rd_nblocks = 0;	else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)		relation->rd_nblocks = 0;	else if (!relation->rd_isnew && !relation->rd_istemp)		relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);	return relation->rd_nblocks;}/* * RelationUpdateNumberOfBlocks *		Forcibly update relation->rd_nblocks. * * If the relcache drops an entry for a temp relation, it must call this * routine after recreating the relcache entry, so that rd_nblocks is * re-sync'd with reality.  See RelationGetNumberOfBlocks. */voidRelationUpdateNumberOfBlocks(Relation relation){	if (relation->rd_rel->relkind == RELKIND_VIEW)		relation->rd_nblocks = 0;	else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)		relation->rd_nblocks = 0;	else		relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);}/* --------------------------------------------------------------------- *		DropRelationBuffers * *		This function removes all the buffered pages for a relation *		from the buffer pool.  Dirty pages are simply dropped, without *		bothering to write them out first.	This is NOT rollback-able, *		and so should be used only with extreme caution! * *		We assume that the caller holds an exclusive lock on the relation, *		which should assure that no new buffers will be acquired for the rel *		meanwhile. * -------------------------------------------------------------------- */voidDropRelationBuffers(Relation rel){	DropRelFileNodeBuffers(rel->rd_node, rel->rd_istemp);}/* --------------------------------------------------------------------- *		DropRelFileNodeBuffers * *		This is the same as DropRelationBuffers, except that the target *		relation is specified by RelFileNode and temp status. * *		This is NOT rollback-able.	One legitimate use is to clear the *		buffer cache of buffers for a relation that is being deleted *		during transaction abort. * -------------------------------------------------------------------- */voidDropRelFileNodeBuffers(RelFileNode rnode, bool istemp){	int			i;	BufferDesc *bufHdr;	if (istemp)	{		for (i = 0; i < NLocBuffer; i++)		{			bufHdr = &LocalBufferDescriptors[i];			if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))			{				bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);				bufHdr->cntxDirty = false;				LocalRefCount[i] = 0;				bufHdr->tag.rnode.relNode = InvalidOid;			}		}		return;	}	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);	for (i = 1; i <= NBuffers; i++)	{		bufHdr = &BufferDescriptors[i - 1];recheck:		if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))		{			/*			 * If there is I/O in progress, better wait till it's done;			 * don't want to delete the relation out from under someone			 * who's just trying to flush the buffer!			 */			if (bufHdr->flags & BM_IO_IN_PROGRESS)			{				WaitIO(bufHdr);				/*				 * By now, the buffer very possibly belongs to some other				 * rel, so check again before proceeding.				 */				goto recheck;			}			/* Now we can do what we came for */			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);			bufHdr->cntxDirty = false;			/*			 * Release any refcount we may have.  If someone else has a			 * pin on the buffer, we got trouble.			 */			if (!(bufHdr->flags & BM_FREE))			{				/* the sole pin should be ours */				if (bufHdr->refcount != 1 || PrivateRefCount[i - 1] == 0)					elog(FATAL, "block %u of %u/%u is still referenced (private %ld, global %d)",						 bufHdr->tag.blockNum,						 bufHdr->tag.rnode.tblNode,						 bufHdr->tag.rnode.relNode,						 PrivateRefCount[i - 1], bufHdr->refcount);				/* Make sure it will be released */				PrivateRefCount[i - 1] = 1;				UnpinBuffer(bufHdr);			}			/*			 * And mark the buffer as no longer occupied by this rel.			 */			BufTableDelete(bufHdr);		}	}	LWLockRelease(BufMgrLock);}/* --------------------------------------------------------------------- *		DropBuffers * *		This function removes all the buffers in the buffer cache for a *		particular database.  Dirty pages are simply dropped, without *		bothering to write them out first.	This is used when we destroy a *		database, to avoid trying to flush data to disk when the directory *		tree no longer exists.	Implementation is pretty similar to *		DropRelationBuffers() which is for destroying just one relation. * -------------------------------------------------------------------- */voidDropBuffers(Oid dbid){	int			i;	BufferDesc *bufHdr;	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);	for (i = 1; i <= NBuffers; i++)	{		bufHdr = &BufferDescriptors[i - 1];recheck:		/*		 * We know that currently database OID is tblNode but this		 * probably will be changed in future and this func will be used		 * to drop tablespace buffers.		 */		if (bufHdr->tag.rnode.tblNode == dbid)		{			/*			 * If there is I/O in progress, better wait till it's done;			 * don't want to delete the database out from under someone			 * who's just trying to flush the buffer!			 */			if (bufHdr->flags & BM_IO_IN_PROGRESS)			{				WaitIO(bufHdr);				/*				 * By now, the buffer very possibly belongs to some other				 * DB, so check again before proceeding.				 */				goto recheck;			}			/* Now we can do what we came for */			bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);			bufHdr->cntxDirty = false;			/*			 * The thing should be free, if caller has checked that no			 * backends are running in that database.			 */			Assert(bufHdr->flags & BM_FREE);			/*			 * And mark the buffer as no longer occupied by this page.			 */			BufTableDelete(bufHdr);		}	}	LWLockRelease(BufMgrLock);}/* ----------------------------------------------------------------- *		PrintBufferDescs * *		this function prints all the buffer descriptors, for debugging *		use only. * ----------------------------------------------------------------- */#ifdef NOT_USEDvoidPrintBufferDescs(void){	int			i;	BufferDesc *buf = BufferDescriptors;	if (IsUnderPostmaster)	{		LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);		for (i = 0; i < NBuffers; ++i, ++buf)		{			elog(LOG, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \blockNum=%u, flags=0x%x, refcount=%d %ld)",				 i, buf->freeNext, buf->freePrev,				 buf->tag.rnode.tblNode, buf->tag.rnode.relNode,				 buf->tag.blockNum, buf->flags,				 buf->refcount, PrivateRefCount[i]);		}		LWLockRelease(BufMgrLock);	}	else	{		/* interactive backend */		for (i = 0; i < NBuffers; ++i, ++buf)		{			printf("[%-2d] (%u/%u, %u) flags=0x%x, refcnt=%d %ld)\n",				   i, buf->tag.rnode.tblNode, buf->tag.rnode.relNode,				   buf->tag.blockNum,				   buf->flags, buf->refcount, PrivateRefCount[i]);		}	}}#endif#ifdef NOT_USEDvoidPrintPinnedBufs(void){	int			i;	BufferDesc *buf = BufferDescriptors;	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);	for (i = 0; i < NBuffers; ++i, ++buf)	{		if (PrivateRefCount[i] > 0)			elog(WARNING, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \blockNum=%u, flags=0x%x, refcount=%d %ld)",				 i, buf->freeNext, buf->freePrev,				 buf->tag.rnode.tblNode, buf->tag.rnode.relNode,				 buf->tag.blockNum, buf->flags,				 buf->refcount, PrivateRefCount[i]);	}	LWLockRelease(BufMgrLock);}#endif/* --------------------------------------------------------------------- *		FlushRelationBuffers * *		This function writes all dirty pages of a relation out to disk. *		Furthermore, pages that have blocknumber >= firstDelBlock are *		actually removed from the buffer pool.	An error code is returned *		if we fail to dump a dirty buffer or if we find one of *		the target pages is pinned into the cache. * *		This is called by DROP TABLE to clear buffers for the relation *		from the buffer pool.  Note that we must write dirty buffers, *		rather than just dropping the changes, because our transaction *		might abort later on; we want to roll back safely in that case. * *		This is also called by VACUUM before truncating the relation to the *		given number of blocks.  It might seem unnecessary for VACUUM to *		write dirty pages before firstDelBlock, since VACUUM should already *		have committed its changes.  However, it is possible for there still *		to be dirty pages: if some page had unwritten on-row tuple status *		updates from a prior transaction, and VACUUM had no additional *		changes to make to that page, then VACUUM won't have written it. *		This is harmless in most cases but will break pg_upgrade, which *		relies on VACUUM to ensure that *all* tuples have correct on-row *		status.  So, we check and flush all dirty pages of the rel *		regardless of block number. * *		In all cases, the caller should be holding AccessExclusiveLock on *		the target relation to ensure that no other backend is busy reading *		more blocks of the relation (or might do so before we commit). * *		Formerly, we considered it an error condition if we found dirty *		buffers here.	However, since BufferSync no longer forces out all *		dirty buffers at every xact commit, it's possible for dirty buffers *		to still be present in the cache due to failure of an earlier *		transaction.  So, must flush dirty buffers without complaint. * *		Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED * *		XXX currently it sequentially searches the buffer pool, should be *		changed to more clever ways of searching. * -------------------------------------------------------------------- */intFlushRelationBuffers(Relation rel, BlockNumber firstDelBlock){	int			i;	BufferDesc *bufHdr;	XLogRecPtr	recptr;	int			status;	ErrorContextCallback errcontext;	/* Setup error traceback support for ereport() */	errcontext.callback = buffer_write_error_callback;	errcontext.arg = NULL;	errcontext.previous = error_context_stack;	error_context_stack = &errcontext;	if (rel->rd_istemp)	{		for (i = 0; i < NLocBuffer; i++)		{			bufHdr = &LocalBufferDescriptors[i];			errcontext.arg = bufHdr;			if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))			{				if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)				{					status = smgrwrite(DEFAULT_SMGR, rel,									   bufHdr->tag.blockNum,									   (char *) MAKE_PTR(bufHdr->data));					if (status == SM_FAIL)					{						error_context_stack = errcontext.previous;						elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is dirty, could not flush it",							 RelationGetRelationName(rel), firstDelBlock,							 bufHdr->tag.blockNum);						return (-1);					}					bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);					bufHdr->cntxDirty = false;				}				if (LocalRefCount[i] > 0)				{					error_context_stack = errcontext.previous;					elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is referenced (%ld)",						 RelationGetRelationName(rel), firstDelBlock,						 bufHdr->tag.blockNum, LocalRefCount[i]);					return (-2);				}				if (bufHdr->tag.blockNum >= firstDelBlock)					bufHdr->tag.rnode.relNode = InvalidOid;			}		}		/* Pop the error context stack */		error_context_stack = errcontext.previous;		return 0;	}	LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);	for (i = 0; i < NBuffers; i++)	{		bufHdr = &BufferDescriptors[i];		errcontext.arg = bufHdr;		if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))		{			if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)			{				PinBuffer(bufHdr);				if (bufHdr->flags & BM_IO_IN_PROGRESS)					WaitIO(bufHdr);				LWLockRelease(BufMgrLock);				/*				 * Force XLOG flush for buffer' LSN				 */				recptr = BufferGetLSN(bufHdr);				XLogFlush(recptr);				/*				 * Now it's safe to write buffer to disk				 */				LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);				if (bufHdr->flags & BM_IO_IN_PROGRESS)					WaitIO(bufHdr);				if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)				{					bufHdr->flags &= ~BM_JUST_DIRTIED;					StartBufferIO(bufHdr, false);		/* output IO start */					LWLockRelease(BufMgrLock);					status = smgrwrite(DEFAULT_SMGR, rel,									   bufHdr->tag.blockNum,									   (char *) MAKE_PTR(bufHdr->data));					if (status == SM_FAIL)		/* disk failure ?! */						ereport(PANIC,								(errcode(ERRCODE_IO_ERROR),							  errmsg("could not write block %u of %u/%u",									 bufHdr->tag.blockNum,									 bufHdr->tag.rnode.tblNode,									 bufHdr->tag.rnode.relNode)));					BufferFlushCount++;					LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);					bufHdr->flags &= ~BM_IO_IN_PROGRESS;					TerminateBufferIO(bufHdr);					Assert(!(bufHdr->flags & BM_JUST_DIRTIED));					bufHdr->flags &= ~BM_DIRTY;					/*					 * Note that it's safe to change cntxDirty here					 * because of we protect it from upper writers by					 * AccessExclusiveLock and from other bufmgr routines					 * by BM_IO_IN_PROGRESS					 */					bufHdr->cntxDirty = false;				}				UnpinBuffer(bufHdr);			}			if (!(bufHdr->flags & BM_FREE))			{				LWLockRelease(BufMgrLock);				error_context_stack = errcontext.previous;				elog(WARNING, "FlushRelationBuffers(\"%s\", %u): block %u is referenced (private %ld, global %d)",					 RelationGetRelationName(rel), firstDelBlock,					 bufHdr->tag.blockNum,					 PrivateRefCount[i], bufHdr->refcount);				return -2;			}			if (bufHdr->tag.blockNum >= firstDelBlock)				BufTableDelete(bufHdr);		}	}	LWLockRelease(BufMgrLock);	/* Pop the error context stack */	error_context_stack = errcontext.previous;	return 0;}#undef ReleaseBuffer/* * ReleaseBuffer -- remove the pin on a buffer without *		marking it dirty. */intReleaseBuffer(Buffer buffer){	BufferDesc *bufHdr;	if (BufferIsLocal(buffer))	{		Assert(LocalRefCount[-buffer - 1] > 0);		LocalRefCount[-buffer - 1]--;		return STATUS_OK;	}	if (BAD_BUFFER_ID(buffer))		return STATUS_ERROR;	bufHdr = &BufferDescriptors[buffer - 1];	Assert(PrivateRefCount[buffer - 1] > 0);	if (PrivateRefCount[buffer - 1] > 1)		PrivateRefCount[buffer - 1]--;	else	{		LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);		UnpinBuffer(bufHdr);		LWLockRelease(BufMgrLock);	}	return STATUS_OK;}#ifdef NOT_USEDvoidIncrBufferRefCount_Debug(char *file, int line, Buffer buffer){	IncrBufferRefCount(buffer);	if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))	{		BufferDesc *buf = &BufferDescriptors[buffer - 1];		fprintf(stderr, "PIN(Incr) %d rel = %u/%u, blockNum = %u, \refcount = %ld, file: %s, line: %d\n",				buffer,				buf->tag.rnode.tblNode, buf->tag.rnode.relNode,				buf->tag.blockNum,				PrivateRefCount[buffer - 1], file, line);	}}#endif#ifdef NOT_USEDvoidReleaseBuffer_Debug(char *file, int line, Buffer buffer){	ReleaseBuffer(buffer);	if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer))	{		BufferDesc *buf = &BufferDescriptors[buffer - 1];		fprintf(stderr, "UNPIN(Rel) %d rel = %u/%u, blockNum = %u, \refcount = %ld, file: %s, line: %d\n",				buffer,				buf->tag.rnode.tblNode, buf->tag.rnode.relNode,				buf->tag.blockNum,				PrivateRefCount[buffer - 1], file, line);	}}#endif#ifdef NOT_USEDBufferReleaseAndReadBuffer_Debug(char *file,						   int line,						   Buffer buffer,						   Relation relation,						   BlockNumber blockNum){	bool		bufferValid;	Buffer		b;	bufferValid = BufferIsValid(buffer);	b = ReleaseAndReadBuffer(buffer, relation, blockNum);	if (ShowPinTrace && bufferValid && BufferIsLocal(buffer)		&& is_userbuffer(buffer))	{
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -