📄 bufmgr.c

📁 postgresql8.3.4源码,开源数据库
💻 C
📖 第 1 页 / 共 5 页
字号:
		 * above, or even while we are writing it out (since our share-lock		 * won't prevent hint-bit updates).  We will recheck the dirty bit		 * after re-locking the buffer header.		 */		if (oldFlags & BM_DIRTY)		{			/*			 * We need a share-lock on the buffer contents to write it out			 * (else we might write invalid data, eg because someone else is			 * compacting the page contents while we write).  We must use a			 * conditional lock acquisition here to avoid deadlock.  Even			 * though the buffer was not pinned (and therefore surely not			 * locked) when StrategyGetBuffer returned it, someone else could			 * have pinned and exclusive-locked it by the time we get here. If			 * we try to get the lock unconditionally, we'd block waiting for			 * them; if they later block waiting for us, deadlock ensues.			 * (This has been observed to happen when two backends are both			 * trying to split btree index pages, and the second one just			 * happens to be trying to split the page the first one got from			 * StrategyGetBuffer.)			 */			if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED))			{				/*				 * If using a nondefault strategy, and writing the buffer				 * would require a WAL flush, let the strategy decide whether				 * to go ahead and write/reuse the buffer or to choose another				 * victim.	We need lock to inspect the page LSN, so this				 * can't be done inside StrategyGetBuffer.				 */				if (strategy != NULL &&					XLogNeedsFlush(BufferGetLSN(buf)) &&					StrategyRejectBuffer(strategy, buf))				{					/* Drop lock/pin and loop around for another buffer */					LWLockRelease(buf->content_lock);					UnpinBuffer(buf, true);					continue;				}				/* OK, do the I/O */				FlushBuffer(buf, NULL);				LWLockRelease(buf->content_lock);			}			else			{				/*				 * Someone else has locked the buffer, so give it up and loop				 * back to get another one.				 */				UnpinBuffer(buf, true);				continue;			}		}		/*		 * To change the association of a valid buffer, we'll need to have		 * exclusive lock on both the old and new mapping partitions.		 */		if (oldFlags & BM_TAG_VALID)		{			/*			 * Need to compute the old tag's hashcode and partition lock ID.			 * XXX is it worth storing the hashcode in BufferDesc so we need			 * not recompute it here?  Probably not.			 */			oldTag = buf->tag;			oldHash = BufTableHashCode(&oldTag);			oldPartitionLock = BufMappingPartitionLock(oldHash);			/*			 * Must lock the lower-numbered partition first to avoid			 * deadlocks.			 */			if (oldPartitionLock < newPartitionLock)			{				LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);				LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);			}			else if (oldPartitionLock > newPartitionLock)			{				LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);				LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);			}			else			{				/* only one partition, only one lock */				LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);			}		}		else		{			/* if it wasn't valid, we need only the new partition */			LWLockAcquire(newPartitionLock, LW_EXCLUSIVE);			/* these just keep the compiler quiet about uninit variables */			oldHash = 0;			oldPartitionLock = 0;		}		/*		 * Try to make a hashtable entry for the buffer under its new tag.		 * This could fail because while we were writing someone else		 * allocated another buffer for the same block we want to read in.		 * Note that we have not yet removed the hashtable entry for the old		 * tag.		 */		buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);		if (buf_id >= 0)		{			/*			 * Got a collision. Someone has already done what we were about to			 * do. We'll just handle this as if it were found in the buffer			 * pool in the first place.  First, give up the buffer we were			 * planning to use.			 */			UnpinBuffer(buf, true);			/* Can give up that buffer's mapping partition lock now */			if ((oldFlags & BM_TAG_VALID) &&				oldPartitionLock != newPartitionLock)				LWLockRelease(oldPartitionLock);			/* remaining code should match code at top of routine */			buf = &BufferDescriptors[buf_id];			valid = PinBuffer(buf, strategy);			/* Can release the mapping lock as soon as we've pinned it */			LWLockRelease(newPartitionLock);			*foundPtr = TRUE;			if (!valid)			{				/*				 * We can only get here if (a) someone else is still reading				 * in the page, or (b) a previous read attempt failed.	We				 * have to wait for any active read attempt to finish, and				 * then set up our own read attempt if the page is still not				 * BM_VALID.  StartBufferIO does it all.				 */				if (StartBufferIO(buf, true))				{					/*					 * If we get here, previous attempts to read the buffer					 * must have failed ... but we shall bravely try again.					 */					*foundPtr = FALSE;				}			}			return buf;		}		/*		 * Need to lock the buffer header too in order to change its tag.		 */		LockBufHdr(buf);		/*		 * Somebody could have pinned or re-dirtied the buffer while we were		 * doing the I/O and making the new hashtable entry.  If so, we can't		 * recycle this buffer; we must undo everything we've done and start		 * over with a new victim buffer.		 */		oldFlags = buf->flags;		if (buf->refcount == 1 && !(oldFlags & BM_DIRTY))			break;		UnlockBufHdr(buf);		BufTableDelete(&newTag, newHash);		if ((oldFlags & BM_TAG_VALID) &&			oldPartitionLock != newPartitionLock)			LWLockRelease(oldPartitionLock);		LWLockRelease(newPartitionLock);		UnpinBuffer(buf, true);	}	/*	 * Okay, it's finally safe to rename the buffer.	 *	 * Clearing BM_VALID here is necessary, clearing the dirtybits is just	 * paranoia.  We also reset the usage_count since any recency of use of	 * the old content is no longer relevant.  (The usage_count starts out at	 * 1 so that the buffer can survive one clock-sweep pass.)	 */	buf->tag = newTag;	buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR);	buf->flags |= BM_TAG_VALID;	buf->usage_count = 1;	UnlockBufHdr(buf);	if (oldFlags & BM_TAG_VALID)	{		BufTableDelete(&oldTag, oldHash);		if (oldPartitionLock != newPartitionLock)			LWLockRelease(oldPartitionLock);	}	LWLockRelease(newPartitionLock);	/*	 * Buffer contents are currently invalid.  Try to get the io_in_progress	 * lock.  If StartBufferIO returns false, then someone else managed to	 * read it before we did, so there's nothing left for BufferAlloc() to do.	 */	if (StartBufferIO(buf, true))		*foundPtr = FALSE;	else		*foundPtr = TRUE;	return buf;}/* * InvalidateBuffer -- mark a shared buffer invalid and return it to the * freelist. * * The buffer header spinlock must be held at entry.  We drop it before * returning.  (This is sane because the caller must have locked the * buffer in order to be sure it should be dropped.) * * This is used only in contexts such as dropping a relation.  We assume * that no other backend could possibly be interested in using the page, * so the only reason the buffer might be pinned is if someone else is * trying to write it out.	We have to let them finish before we can * reclaim the buffer. * * The buffer could get reclaimed by someone else while we are waiting * to acquire the necessary locks; if so, don't mess it up. */static voidInvalidateBuffer(volatile BufferDesc *buf){	BufferTag	oldTag;	uint32		oldHash;		/* hash value for oldTag */	LWLockId	oldPartitionLock;		/* buffer partition lock for it */	BufFlags	oldFlags;	/* Save the original buffer tag before dropping the spinlock */	oldTag = buf->tag;	UnlockBufHdr(buf);	/*	 * Need to compute the old tag's hashcode and partition lock ID. XXX is it	 * worth storing the hashcode in BufferDesc so we need not recompute it	 * here?  Probably not.	 */	oldHash = BufTableHashCode(&oldTag);	oldPartitionLock = BufMappingPartitionLock(oldHash);retry:	/*	 * Acquire exclusive mapping lock in preparation for changing the buffer's	 * association.	 */	LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE);	/* Re-lock the buffer header */	LockBufHdr(buf);	/* If it's changed while we were waiting for lock, do nothing */	if (!BUFFERTAGS_EQUAL(buf->tag, oldTag))	{		UnlockBufHdr(buf);		LWLockRelease(oldPartitionLock);		return;	}	/*	 * We assume the only reason for it to be pinned is that someone else is	 * flushing the page out.  Wait for them to finish.  (This could be an	 * infinite loop if the refcount is messed up... it would be nice to time	 * out after awhile, but there seems no way to be sure how many loops may	 * be needed.  Note that if the other guy has pinned the buffer but not	 * yet done StartBufferIO, WaitIO will fall through and we'll effectively	 * be busy-looping here.)	 */	if (buf->refcount != 0)	{		UnlockBufHdr(buf);		LWLockRelease(oldPartitionLock);		/* safety check: should definitely not be our *own* pin */		if (PrivateRefCount[buf->buf_id] != 0)			elog(ERROR, "buffer is pinned in InvalidateBuffer");		WaitIO(buf);		goto retry;	}	/*	 * Clear out the buffer's tag and flags.  We must do this to ensure that	 * linear scans of the buffer array don't think the buffer is valid.	 */	oldFlags = buf->flags;	CLEAR_BUFFERTAG(buf->tag);	buf->flags = 0;	buf->usage_count = 0;	UnlockBufHdr(buf);	/*	 * Remove the buffer from the lookup hashtable, if it was in there.	 */	if (oldFlags & BM_TAG_VALID)		BufTableDelete(&oldTag, oldHash);	/*	 * Done with mapping lock.	 */	LWLockRelease(oldPartitionLock);	/*	 * Insert the buffer at the head of the list of free buffers.	 */	StrategyFreeBuffer(buf);}/* * MarkBufferDirty * *		Marks buffer contents as dirty (actual write happens later). * * Buffer must be pinned and exclusive-locked.	(If caller does not hold * exclusive lock, then somebody could be in process of writing the buffer, * leading to risk of bad data written to disk.) */voidMarkBufferDirty(Buffer buffer){	volatile BufferDesc *bufHdr;	if (!BufferIsValid(buffer))		elog(ERROR, "bad buffer id: %d", buffer);	if (BufferIsLocal(buffer))	{		MarkLocalBufferDirty(buffer);		return;	}	bufHdr = &BufferDescriptors[buffer - 1];	Assert(PrivateRefCount[buffer - 1] > 0);	/* unfortunately we can't check if the lock is held exclusively */	Assert(LWLockHeldByMe(bufHdr->content_lock));	LockBufHdr(bufHdr);	Assert(bufHdr->refcount > 0);	/*	 * If the buffer was not dirty already, do vacuum cost accounting.	 */	if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive)		VacuumCostBalance += VacuumCostPageDirty;	bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED);	UnlockBufHdr(bufHdr);}/* * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer() * * Formerly, this saved one cycle of acquiring/releasing the BufMgrLock * compared to calling the two routines separately.  Now it's mainly just * a convenience function.	However, if the passed buffer is valid and * already contains the desired block, we just return it as-is; and that * does save considerable work compared to a full release and reacquire. * * Note: it is OK to pass buffer == InvalidBuffer, indicating that no old * buffer actually needs to be released.  This case is the same as ReadBuffer, * but can save some tests in the caller. */BufferReleaseAndReadBuffer(Buffer buffer,					 Relation relation,					 BlockNumber blockNum){	volatile BufferDesc *bufHdr;	if (BufferIsValid(buffer))	{		if (BufferIsLocal(buffer))		{			Assert(LocalRefCount[-buffer - 1] > 0);			bufHdr = &LocalBufferDescriptors[-buffer - 1];			if (bufHdr->tag.blockNum == blockNum &&				RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))				return buffer;			ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);			LocalRefCount[-buffer - 1]--;		}		else		{			Assert(PrivateRefCount[buffer - 1] > 0);			bufHdr = &BufferDescriptors[buffer - 1];			/* we have pin, so it's ok to examine tag without spinlock */			if (bufHdr->tag.blockNum == blockNum &&				RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))				return buffer;			UnpinBuffer(bufHdr, true);		}	}	return ReadBuffer(relation, blockNum);}/* * PinBuffer -- make buffer unavailable for replacement. * * For the default access strategy, the buffer's usage_count is incremented * when we first pin it; for other strategies we just make sure the usage_count * isn't zero.  (The idea of the latter is that we don't want synchronized * heap scans to inflate the count, but we need it to not be zero to discourage * other backends from stealing buffers from our ring.	As long as we cycle * through the ring faster than the global clock-sweep cycles, buffers in * our ring won't be chosen as victims for replacement by other backends.) * * This should be applied only to shared buffers, never local ones. * * Note that ResourceOwnerEnlargeBuffers must have been done already. * * Returns TRUE if buffer is BM_VALID, else FALSE.	This provision allows * some callers to avoid an extra spinlock cycle. */static boolPinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy){	int			b = buf->buf_id;	bool		result;	if (PrivateRefCount[b] == 0)	{		LockBufHdr(buf);		buf->refcount++;		if (strategy == NULL)		{			if (buf->usage_count < BM_MAX_USAGE_COUNT)				buf->usage_count++;		}		else
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -