📄 bufmgr.c

📁 postgresql8.3.4源码,开源数据库
💻 C
📖 第 1 页 / 共 5 页
字号:
		{			if (buf->usage_count == 0)				buf->usage_count = 1;		}		result = (buf->flags & BM_VALID) != 0;		UnlockBufHdr(buf);	}	else	{		/* If we previously pinned the buffer, it must surely be valid */		result = true;	}	PrivateRefCount[b]++;	Assert(PrivateRefCount[b] > 0);	ResourceOwnerRememberBuffer(CurrentResourceOwner,								BufferDescriptorGetBuffer(buf));	return result;}/* * PinBuffer_Locked -- as above, but caller already locked the buffer header. * The spinlock is released before return. * * Currently, no callers of this function want to modify the buffer's * usage_count at all, so there's no need for a strategy parameter. * Also we don't bother with a BM_VALID test (the caller could check that for * itself). * * Note: use of this routine is frequently mandatory, not just an optimization * to save a spin lock/unlock cycle, because we need to pin a buffer before * its state can change under us. */static voidPinBuffer_Locked(volatile BufferDesc *buf){	int			b = buf->buf_id;	if (PrivateRefCount[b] == 0)		buf->refcount++;	UnlockBufHdr(buf);	PrivateRefCount[b]++;	Assert(PrivateRefCount[b] > 0);	ResourceOwnerRememberBuffer(CurrentResourceOwner,								BufferDescriptorGetBuffer(buf));}/* * UnpinBuffer -- make buffer available for replacement. * * This should be applied only to shared buffers, never local ones. * * Most but not all callers want CurrentResourceOwner to be adjusted. * Those that don't should pass fixOwner = FALSE. */static voidUnpinBuffer(volatile BufferDesc *buf, bool fixOwner){	int			b = buf->buf_id;	if (fixOwner)		ResourceOwnerForgetBuffer(CurrentResourceOwner,								  BufferDescriptorGetBuffer(buf));	Assert(PrivateRefCount[b] > 0);	PrivateRefCount[b]--;	if (PrivateRefCount[b] == 0)	{		/* I'd better not still hold any locks on the buffer */		Assert(!LWLockHeldByMe(buf->content_lock));		Assert(!LWLockHeldByMe(buf->io_in_progress_lock));		LockBufHdr(buf);		/* Decrement the shared reference count */		Assert(buf->refcount > 0);		buf->refcount--;		/* Support LockBufferForCleanup() */		if ((buf->flags & BM_PIN_COUNT_WAITER) &&			buf->refcount == 1)		{			/* we just released the last pin other than the waiter's */			int			wait_backend_pid = buf->wait_backend_pid;			buf->flags &= ~BM_PIN_COUNT_WAITER;			UnlockBufHdr(buf);			ProcSendSignal(wait_backend_pid);		}		else			UnlockBufHdr(buf);	}}/* * BufferSync -- Write out all dirty buffers in the pool. * * This is called at checkpoint time to write out all dirty shared buffers. * The checkpoint request flags should be passed in; currently the only one * examined is CHECKPOINT_IMMEDIATE, which disables delays between writes. */static voidBufferSync(int flags){	int			buf_id;	int			num_to_scan;	int			num_to_write;	int			num_written;	/* Make sure we can handle the pin inside SyncOneBuffer */	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);	/*	 * Loop over all buffers, and mark the ones that need to be written with	 * BM_CHECKPOINT_NEEDED.  Count them as we go (num_to_write), so that we	 * can estimate how much work needs to be done.	 *	 * This allows us to write only those pages that were dirty when the	 * checkpoint began, and not those that get dirtied while it proceeds.	 * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us	 * later in this function, or by normal backends or the bgwriter cleaning	 * scan, the flag is cleared.  Any buffer dirtied after this point won't	 * have the flag set.	 *	 * Note that if we fail to write some buffer, we may leave buffers with	 * BM_CHECKPOINT_NEEDED still set.	This is OK since any such buffer would	 * certainly need to be written for the next checkpoint attempt, too.	 */	num_to_write = 0;	for (buf_id = 0; buf_id < NBuffers; buf_id++)	{		volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];		/*		 * Header spinlock is enough to examine BM_DIRTY, see comment in		 * SyncOneBuffer.		 */		LockBufHdr(bufHdr);		if (bufHdr->flags & BM_DIRTY)		{			bufHdr->flags |= BM_CHECKPOINT_NEEDED;			num_to_write++;		}		UnlockBufHdr(bufHdr);	}	if (num_to_write == 0)		return;					/* nothing to do */	/*	 * Loop over all buffers again, and write the ones (still) marked with	 * BM_CHECKPOINT_NEEDED.  In this loop, we start at the clock sweep point	 * since we might as well dump soon-to-be-recycled buffers first.	 *	 * Note that we don't read the buffer alloc count here --- that should be	 * left untouched till the next BgBufferSync() call.	 */	buf_id = StrategySyncStart(NULL, NULL);	num_to_scan = NBuffers;	num_written = 0;	while (num_to_scan-- > 0)	{		volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];		/*		 * We don't need to acquire the lock here, because we're only looking		 * at a single bit. It's possible that someone else writes the buffer		 * and clears the flag right after we check, but that doesn't matter		 * since SyncOneBuffer will then do nothing.  However, there is a		 * further race condition: it's conceivable that between the time we		 * examine the bit here and the time SyncOneBuffer acquires lock,		 * someone else not only wrote the buffer but replaced it with another		 * page and dirtied it.  In that improbable case, SyncOneBuffer will		 * write the buffer though we didn't need to.  It doesn't seem worth		 * guarding against this, though.		 */		if (bufHdr->flags & BM_CHECKPOINT_NEEDED)		{			if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)			{				BgWriterStats.m_buf_written_checkpoints++;				num_written++;				/*				 * We know there are at most num_to_write buffers with				 * BM_CHECKPOINT_NEEDED set; so we can stop scanning if				 * num_written reaches num_to_write.				 *				 * Note that num_written doesn't include buffers written by				 * other backends, or by the bgwriter cleaning scan. That				 * means that the estimate of how much progress we've made is				 * conservative, and also that this test will often fail to				 * trigger.  But it seems worth making anyway.				 */				if (num_written >= num_to_write)					break;				/*				 * Perform normal bgwriter duties and sleep to throttle our				 * I/O rate.				 */				CheckpointWriteDelay(flags,									 (double) num_written / num_to_write);			}		}		if (++buf_id >= NBuffers)			buf_id = 0;	}	/*	 * Update checkpoint statistics. As noted above, this doesn't include	 * buffers written by other backends or bgwriter scan.	 */	CheckpointStats.ckpt_bufs_written += num_written;}/* * BgBufferSync -- Write out some dirty buffers in the pool. * * This is called periodically by the background writer process. */voidBgBufferSync(void){	/* info obtained from freelist.c */	int			strategy_buf_id;	uint32		strategy_passes;	uint32		recent_alloc;	/*	 * Information saved between calls so we can determine the strategy	 * point's advance rate and avoid scanning already-cleaned buffers.	 */	static bool saved_info_valid = false;	static int	prev_strategy_buf_id;	static uint32 prev_strategy_passes;	static int	next_to_clean;	static uint32 next_passes;	/* Moving averages of allocation rate and clean-buffer density */	static float smoothed_alloc = 0;	static float smoothed_density = 10.0;	/* Potentially these could be tunables, but for now, not */	float		smoothing_samples = 16;	float		scan_whole_pool_milliseconds = 120000.0;	/* Used to compute how far we scan ahead */	long		strategy_delta;	int			bufs_to_lap;	int			bufs_ahead;	float		scans_per_alloc;	int			reusable_buffers_est;	int			upcoming_alloc_est;	int			min_scan_buffers;	/* Variables for the scanning loop proper */	int			num_to_scan;	int			num_written;	int			reusable_buffers;	/*	 * Find out where the freelist clock sweep currently is, and how many	 * buffer allocations have happened since our last call.	 */	strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc);	/* Report buffer alloc counts to pgstat */	BgWriterStats.m_buf_alloc += recent_alloc;	/*	 * If we're not running the LRU scan, just stop after doing the stats	 * stuff.  We mark the saved state invalid so that we can recover sanely	 * if LRU scan is turned back on later.	 */	if (bgwriter_lru_maxpages <= 0)	{		saved_info_valid = false;		return;	}	/*	 * Compute strategy_delta = how many buffers have been scanned by the	 * clock sweep since last time.  If first time through, assume none. Then	 * see if we are still ahead of the clock sweep, and if so, how many	 * buffers we could scan before we'd catch up with it and "lap" it. Note:	 * weird-looking coding of xxx_passes comparisons are to avoid bogus	 * behavior when the passes counts wrap around.	 */	if (saved_info_valid)	{		int32		passes_delta = strategy_passes - prev_strategy_passes;		strategy_delta = strategy_buf_id - prev_strategy_buf_id;		strategy_delta += (long) passes_delta *NBuffers;		Assert(strategy_delta >= 0);		if ((int32) (next_passes - strategy_passes) > 0)		{			/* we're one pass ahead of the strategy point */			bufs_to_lap = strategy_buf_id - next_to_clean;#ifdef BGW_DEBUG			elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",				 next_passes, next_to_clean,				 strategy_passes, strategy_buf_id,				 strategy_delta, bufs_to_lap);#endif		}		else if (next_passes == strategy_passes &&				 next_to_clean >= strategy_buf_id)		{			/* on same pass, but ahead or at least not behind */			bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);#ifdef BGW_DEBUG			elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d",				 next_passes, next_to_clean,				 strategy_passes, strategy_buf_id,				 strategy_delta, bufs_to_lap);#endif		}		else		{			/*			 * We're behind, so skip forward to the strategy point and start			 * cleaning from there.			 */#ifdef BGW_DEBUG			elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld",				 next_passes, next_to_clean,				 strategy_passes, strategy_buf_id,				 strategy_delta);#endif			next_to_clean = strategy_buf_id;			next_passes = strategy_passes;			bufs_to_lap = NBuffers;		}	}	else	{		/*		 * Initializing at startup or after LRU scanning had been off. Always		 * start at the strategy point.		 */#ifdef BGW_DEBUG		elog(DEBUG2, "bgwriter initializing: strategy %u-%u",			 strategy_passes, strategy_buf_id);#endif		strategy_delta = 0;		next_to_clean = strategy_buf_id;		next_passes = strategy_passes;		bufs_to_lap = NBuffers;	}	/* Update saved info for next time */	prev_strategy_buf_id = strategy_buf_id;	prev_strategy_passes = strategy_passes;	saved_info_valid = true;	/*	 * Compute how many buffers had to be scanned for each new allocation, ie,	 * 1/density of reusable buffers, and track a moving average of that.	 *	 * If the strategy point didn't move, we don't update the density estimate	 */	if (strategy_delta > 0 && recent_alloc > 0)	{		scans_per_alloc = (float) strategy_delta / (float) recent_alloc;		smoothed_density += (scans_per_alloc - smoothed_density) /			smoothing_samples;	}	/*	 * Estimate how many reusable buffers there are between the current	 * strategy point and where we've scanned ahead to, based on the smoothed	 * density estimate.	 */	bufs_ahead = NBuffers - bufs_to_lap;	reusable_buffers_est = (float) bufs_ahead / smoothed_density;	/*	 * Track a moving average of recent buffer allocations.  Here, rather than	 * a true average we want a fast-attack, slow-decline behavior: we	 * immediately follow any increase.	 */	if (smoothed_alloc <= (float) recent_alloc)		smoothed_alloc = recent_alloc;	else		smoothed_alloc += ((float) recent_alloc - smoothed_alloc) /			smoothing_samples;	/* Scale the estimate by a GUC to allow more aggressive tuning. */	upcoming_alloc_est = smoothed_alloc * bgwriter_lru_multiplier;	/*	 * Even in cases where there's been little or no buffer allocation	 * activity, we want to make a small amount of progress through the buffer	 * cache so that as many reusable buffers as possible are clean after an	 * idle period.	 *	 * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times	 * the BGW will be called during the scan_whole_pool time; slice the	 * buffer pool into that many sections.	 */	min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay));	if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est))	{#ifdef BGW_DEBUG		elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d",			 upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);#endif		upcoming_alloc_est = min_scan_buffers + reusable_buffers_est;	}	/*	 * Now write out dirty reusable buffers, working forward from the	 * next_to_clean point, until we have lapped the strategy scan, or cleaned	 * enough buffers to match our estimate of the next cycle's allocation	 * requirements, or hit the bgwriter_lru_maxpages limit.	 */	/* Make sure we can handle the pin inside SyncOneBuffer */	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);	num_to_scan = bufs_to_lap;	num_written = 0;	reusable_buffers = reusable_buffers_est;	/* Execute the LRU scan */	while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)	{		int			buffer_state = SyncOneBuffer(next_to_clean, true);		if (++next_to_clean >= NBuffers)		{			next_to_clean = 0;			next_passes++;		}		num_to_scan--;		if (buffer_state & BUF_WRITTEN)		{			reusable_buffers++;			if (++num_written >= bgwriter_lru_maxpages)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -