📄 bufmgr.c
字号:
{ if (buf->usage_count == 0) buf->usage_count = 1; } result = (buf->flags & BM_VALID) != 0; UnlockBufHdr(buf); } else { /* If we previously pinned the buffer, it must surely be valid */ result = true; } PrivateRefCount[b]++; Assert(PrivateRefCount[b] > 0); ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); return result;}/* * PinBuffer_Locked -- as above, but caller already locked the buffer header. * The spinlock is released before return. * * Currently, no callers of this function want to modify the buffer's * usage_count at all, so there's no need for a strategy parameter. * Also we don't bother with a BM_VALID test (the caller could check that for * itself). * * Note: use of this routine is frequently mandatory, not just an optimization * to save a spin lock/unlock cycle, because we need to pin a buffer before * its state can change under us. */static voidPinBuffer_Locked(volatile BufferDesc *buf){ int b = buf->buf_id; if (PrivateRefCount[b] == 0) buf->refcount++; UnlockBufHdr(buf); PrivateRefCount[b]++; Assert(PrivateRefCount[b] > 0); ResourceOwnerRememberBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(buf));}/* * UnpinBuffer -- make buffer available for replacement. * * This should be applied only to shared buffers, never local ones. * * Most but not all callers want CurrentResourceOwner to be adjusted. * Those that don't should pass fixOwner = FALSE. */static voidUnpinBuffer(volatile BufferDesc *buf, bool fixOwner){ int b = buf->buf_id; if (fixOwner) ResourceOwnerForgetBuffer(CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); Assert(PrivateRefCount[b] > 0); PrivateRefCount[b]--; if (PrivateRefCount[b] == 0) { /* I'd better not still hold any locks on the buffer */ Assert(!LWLockHeldByMe(buf->content_lock)); Assert(!LWLockHeldByMe(buf->io_in_progress_lock)); LockBufHdr(buf); /* Decrement the shared reference count */ Assert(buf->refcount > 0); buf->refcount--; /* Support LockBufferForCleanup() */ if ((buf->flags & BM_PIN_COUNT_WAITER) && buf->refcount == 1) { /* we just released the last pin other than the waiter's */ int wait_backend_pid = buf->wait_backend_pid; buf->flags &= ~BM_PIN_COUNT_WAITER; UnlockBufHdr(buf); ProcSendSignal(wait_backend_pid); } else UnlockBufHdr(buf); }}/* * BufferSync -- Write out all dirty buffers in the pool. * * This is called at checkpoint time to write out all dirty shared buffers. * The checkpoint request flags should be passed in; currently the only one * examined is CHECKPOINT_IMMEDIATE, which disables delays between writes. */static voidBufferSync(int flags){ int buf_id; int num_to_scan; int num_to_write; int num_written; /* Make sure we can handle the pin inside SyncOneBuffer */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); /* * Loop over all buffers, and mark the ones that need to be written with * BM_CHECKPOINT_NEEDED. Count them as we go (num_to_write), so that we * can estimate how much work needs to be done. * * This allows us to write only those pages that were dirty when the * checkpoint began, and not those that get dirtied while it proceeds. * Whenever a page with BM_CHECKPOINT_NEEDED is written out, either by us * later in this function, or by normal backends or the bgwriter cleaning * scan, the flag is cleared. Any buffer dirtied after this point won't * have the flag set. * * Note that if we fail to write some buffer, we may leave buffers with * BM_CHECKPOINT_NEEDED still set. This is OK since any such buffer would * certainly need to be written for the next checkpoint attempt, too. */ num_to_write = 0; for (buf_id = 0; buf_id < NBuffers; buf_id++) { volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; /* * Header spinlock is enough to examine BM_DIRTY, see comment in * SyncOneBuffer. */ LockBufHdr(bufHdr); if (bufHdr->flags & BM_DIRTY) { bufHdr->flags |= BM_CHECKPOINT_NEEDED; num_to_write++; } UnlockBufHdr(bufHdr); } if (num_to_write == 0) return; /* nothing to do */ /* * Loop over all buffers again, and write the ones (still) marked with * BM_CHECKPOINT_NEEDED. In this loop, we start at the clock sweep point * since we might as well dump soon-to-be-recycled buffers first. * * Note that we don't read the buffer alloc count here --- that should be * left untouched till the next BgBufferSync() call. */ buf_id = StrategySyncStart(NULL, NULL); num_to_scan = NBuffers; num_written = 0; while (num_to_scan-- > 0) { volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; /* * We don't need to acquire the lock here, because we're only looking * at a single bit. It's possible that someone else writes the buffer * and clears the flag right after we check, but that doesn't matter * since SyncOneBuffer will then do nothing. However, there is a * further race condition: it's conceivable that between the time we * examine the bit here and the time SyncOneBuffer acquires lock, * someone else not only wrote the buffer but replaced it with another * page and dirtied it. In that improbable case, SyncOneBuffer will * write the buffer though we didn't need to. It doesn't seem worth * guarding against this, though. */ if (bufHdr->flags & BM_CHECKPOINT_NEEDED) { if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN) { BgWriterStats.m_buf_written_checkpoints++; num_written++; /* * We know there are at most num_to_write buffers with * BM_CHECKPOINT_NEEDED set; so we can stop scanning if * num_written reaches num_to_write. * * Note that num_written doesn't include buffers written by * other backends, or by the bgwriter cleaning scan. That * means that the estimate of how much progress we've made is * conservative, and also that this test will often fail to * trigger. But it seems worth making anyway. */ if (num_written >= num_to_write) break; /* * Perform normal bgwriter duties and sleep to throttle our * I/O rate. */ CheckpointWriteDelay(flags, (double) num_written / num_to_write); } } if (++buf_id >= NBuffers) buf_id = 0; } /* * Update checkpoint statistics. As noted above, this doesn't include * buffers written by other backends or bgwriter scan. */ CheckpointStats.ckpt_bufs_written += num_written;}/* * BgBufferSync -- Write out some dirty buffers in the pool. * * This is called periodically by the background writer process. */voidBgBufferSync(void){ /* info obtained from freelist.c */ int strategy_buf_id; uint32 strategy_passes; uint32 recent_alloc; /* * Information saved between calls so we can determine the strategy * point's advance rate and avoid scanning already-cleaned buffers. */ static bool saved_info_valid = false; static int prev_strategy_buf_id; static uint32 prev_strategy_passes; static int next_to_clean; static uint32 next_passes; /* Moving averages of allocation rate and clean-buffer density */ static float smoothed_alloc = 0; static float smoothed_density = 10.0; /* Potentially these could be tunables, but for now, not */ float smoothing_samples = 16; float scan_whole_pool_milliseconds = 120000.0; /* Used to compute how far we scan ahead */ long strategy_delta; int bufs_to_lap; int bufs_ahead; float scans_per_alloc; int reusable_buffers_est; int upcoming_alloc_est; int min_scan_buffers; /* Variables for the scanning loop proper */ int num_to_scan; int num_written; int reusable_buffers; /* * Find out where the freelist clock sweep currently is, and how many * buffer allocations have happened since our last call. */ strategy_buf_id = StrategySyncStart(&strategy_passes, &recent_alloc); /* Report buffer alloc counts to pgstat */ BgWriterStats.m_buf_alloc += recent_alloc; /* * If we're not running the LRU scan, just stop after doing the stats * stuff. We mark the saved state invalid so that we can recover sanely * if LRU scan is turned back on later. */ if (bgwriter_lru_maxpages <= 0) { saved_info_valid = false; return; } /* * Compute strategy_delta = how many buffers have been scanned by the * clock sweep since last time. If first time through, assume none. Then * see if we are still ahead of the clock sweep, and if so, how many * buffers we could scan before we'd catch up with it and "lap" it. Note: * weird-looking coding of xxx_passes comparisons are to avoid bogus * behavior when the passes counts wrap around. */ if (saved_info_valid) { int32 passes_delta = strategy_passes - prev_strategy_passes; strategy_delta = strategy_buf_id - prev_strategy_buf_id; strategy_delta += (long) passes_delta *NBuffers; Assert(strategy_delta >= 0); if ((int32) (next_passes - strategy_passes) > 0) { /* we're one pass ahead of the strategy point */ bufs_to_lap = strategy_buf_id - next_to_clean;#ifdef BGW_DEBUG elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d", next_passes, next_to_clean, strategy_passes, strategy_buf_id, strategy_delta, bufs_to_lap);#endif } else if (next_passes == strategy_passes && next_to_clean >= strategy_buf_id) { /* on same pass, but ahead or at least not behind */ bufs_to_lap = NBuffers - (next_to_clean - strategy_buf_id);#ifdef BGW_DEBUG elog(DEBUG2, "bgwriter ahead: bgw %u-%u strategy %u-%u delta=%ld lap=%d", next_passes, next_to_clean, strategy_passes, strategy_buf_id, strategy_delta, bufs_to_lap);#endif } else { /* * We're behind, so skip forward to the strategy point and start * cleaning from there. */#ifdef BGW_DEBUG elog(DEBUG2, "bgwriter behind: bgw %u-%u strategy %u-%u delta=%ld", next_passes, next_to_clean, strategy_passes, strategy_buf_id, strategy_delta);#endif next_to_clean = strategy_buf_id; next_passes = strategy_passes; bufs_to_lap = NBuffers; } } else { /* * Initializing at startup or after LRU scanning had been off. Always * start at the strategy point. */#ifdef BGW_DEBUG elog(DEBUG2, "bgwriter initializing: strategy %u-%u", strategy_passes, strategy_buf_id);#endif strategy_delta = 0; next_to_clean = strategy_buf_id; next_passes = strategy_passes; bufs_to_lap = NBuffers; } /* Update saved info for next time */ prev_strategy_buf_id = strategy_buf_id; prev_strategy_passes = strategy_passes; saved_info_valid = true; /* * Compute how many buffers had to be scanned for each new allocation, ie, * 1/density of reusable buffers, and track a moving average of that. * * If the strategy point didn't move, we don't update the density estimate */ if (strategy_delta > 0 && recent_alloc > 0) { scans_per_alloc = (float) strategy_delta / (float) recent_alloc; smoothed_density += (scans_per_alloc - smoothed_density) / smoothing_samples; } /* * Estimate how many reusable buffers there are between the current * strategy point and where we've scanned ahead to, based on the smoothed * density estimate. */ bufs_ahead = NBuffers - bufs_to_lap; reusable_buffers_est = (float) bufs_ahead / smoothed_density; /* * Track a moving average of recent buffer allocations. Here, rather than * a true average we want a fast-attack, slow-decline behavior: we * immediately follow any increase. */ if (smoothed_alloc <= (float) recent_alloc) smoothed_alloc = recent_alloc; else smoothed_alloc += ((float) recent_alloc - smoothed_alloc) / smoothing_samples; /* Scale the estimate by a GUC to allow more aggressive tuning. */ upcoming_alloc_est = smoothed_alloc * bgwriter_lru_multiplier; /* * Even in cases where there's been little or no buffer allocation * activity, we want to make a small amount of progress through the buffer * cache so that as many reusable buffers as possible are clean after an * idle period. * * (scan_whole_pool_milliseconds / BgWriterDelay) computes how many times * the BGW will be called during the scan_whole_pool time; slice the * buffer pool into that many sections. */ min_scan_buffers = (int) (NBuffers / (scan_whole_pool_milliseconds / BgWriterDelay)); if (upcoming_alloc_est < (min_scan_buffers + reusable_buffers_est)) {#ifdef BGW_DEBUG elog(DEBUG2, "bgwriter: alloc_est=%d too small, using min=%d + reusable_est=%d", upcoming_alloc_est, min_scan_buffers, reusable_buffers_est);#endif upcoming_alloc_est = min_scan_buffers + reusable_buffers_est; } /* * Now write out dirty reusable buffers, working forward from the * next_to_clean point, until we have lapped the strategy scan, or cleaned * enough buffers to match our estimate of the next cycle's allocation * requirements, or hit the bgwriter_lru_maxpages limit. */ /* Make sure we can handle the pin inside SyncOneBuffer */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); num_to_scan = bufs_to_lap; num_written = 0; reusable_buffers = reusable_buffers_est; /* Execute the LRU scan */ while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est) { int buffer_state = SyncOneBuffer(next_to_clean, true); if (++next_to_clean >= NBuffers) { next_to_clean = 0; next_passes++; } num_to_scan--; if (buffer_state & BUF_WRITTEN) { reusable_buffers++; if (++num_written >= bgwriter_lru_maxpages)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -