📄 bufmgr.c
字号:
* above, or even while we are writing it out (since our share-lock * won't prevent hint-bit updates). We will recheck the dirty bit * after re-locking the buffer header. */ if (oldFlags & BM_DIRTY) { /* * We need a share-lock on the buffer contents to write it out * (else we might write invalid data, eg because someone else is * compacting the page contents while we write). We must use a * conditional lock acquisition here to avoid deadlock. Even * though the buffer was not pinned (and therefore surely not * locked) when StrategyGetBuffer returned it, someone else could * have pinned and exclusive-locked it by the time we get here. If * we try to get the lock unconditionally, we'd block waiting for * them; if they later block waiting for us, deadlock ensues. * (This has been observed to happen when two backends are both * trying to split btree index pages, and the second one just * happens to be trying to split the page the first one got from * StrategyGetBuffer.) */ if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED)) { /* * If using a nondefault strategy, and writing the buffer * would require a WAL flush, let the strategy decide whether * to go ahead and write/reuse the buffer or to choose another * victim. We need lock to inspect the page LSN, so this * can't be done inside StrategyGetBuffer. */ if (strategy != NULL && XLogNeedsFlush(BufferGetLSN(buf)) && StrategyRejectBuffer(strategy, buf)) { /* Drop lock/pin and loop around for another buffer */ LWLockRelease(buf->content_lock); UnpinBuffer(buf, true); continue; } /* OK, do the I/O */ FlushBuffer(buf, NULL); LWLockRelease(buf->content_lock); } else { /* * Someone else has locked the buffer, so give it up and loop * back to get another one. */ UnpinBuffer(buf, true); continue; } } /* * To change the association of a valid buffer, we'll need to have * exclusive lock on both the old and new mapping partitions. */ if (oldFlags & BM_TAG_VALID) { /* * Need to compute the old tag's hashcode and partition lock ID. * XXX is it worth storing the hashcode in BufferDesc so we need * not recompute it here? Probably not. */ oldTag = buf->tag; oldHash = BufTableHashCode(&oldTag); oldPartitionLock = BufMappingPartitionLock(oldHash); /* * Must lock the lower-numbered partition first to avoid * deadlocks. */ if (oldPartitionLock < newPartitionLock) { LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE); LWLockAcquire(newPartitionLock, LW_EXCLUSIVE); } else if (oldPartitionLock > newPartitionLock) { LWLockAcquire(newPartitionLock, LW_EXCLUSIVE); LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE); } else { /* only one partition, only one lock */ LWLockAcquire(newPartitionLock, LW_EXCLUSIVE); } } else { /* if it wasn't valid, we need only the new partition */ LWLockAcquire(newPartitionLock, LW_EXCLUSIVE); /* these just keep the compiler quiet about uninit variables */ oldHash = 0; oldPartitionLock = 0; } /* * Try to make a hashtable entry for the buffer under its new tag. * This could fail because while we were writing someone else * allocated another buffer for the same block we want to read in. * Note that we have not yet removed the hashtable entry for the old * tag. */ buf_id = BufTableInsert(&newTag, newHash, buf->buf_id); if (buf_id >= 0) { /* * Got a collision. Someone has already done what we were about to * do. We'll just handle this as if it were found in the buffer * pool in the first place. First, give up the buffer we were * planning to use. */ UnpinBuffer(buf, true); /* Can give up that buffer's mapping partition lock now */ if ((oldFlags & BM_TAG_VALID) && oldPartitionLock != newPartitionLock) LWLockRelease(oldPartitionLock); /* remaining code should match code at top of routine */ buf = &BufferDescriptors[buf_id]; valid = PinBuffer(buf, strategy); /* Can release the mapping lock as soon as we've pinned it */ LWLockRelease(newPartitionLock); *foundPtr = TRUE; if (!valid) { /* * We can only get here if (a) someone else is still reading * in the page, or (b) a previous read attempt failed. We * have to wait for any active read attempt to finish, and * then set up our own read attempt if the page is still not * BM_VALID. StartBufferIO does it all. */ if (StartBufferIO(buf, true)) { /* * If we get here, previous attempts to read the buffer * must have failed ... but we shall bravely try again. */ *foundPtr = FALSE; } } return buf; } /* * Need to lock the buffer header too in order to change its tag. */ LockBufHdr(buf); /* * Somebody could have pinned or re-dirtied the buffer while we were * doing the I/O and making the new hashtable entry. If so, we can't * recycle this buffer; we must undo everything we've done and start * over with a new victim buffer. */ oldFlags = buf->flags; if (buf->refcount == 1 && !(oldFlags & BM_DIRTY)) break; UnlockBufHdr(buf); BufTableDelete(&newTag, newHash); if ((oldFlags & BM_TAG_VALID) && oldPartitionLock != newPartitionLock) LWLockRelease(oldPartitionLock); LWLockRelease(newPartitionLock); UnpinBuffer(buf, true); } /* * Okay, it's finally safe to rename the buffer. * * Clearing BM_VALID here is necessary, clearing the dirtybits is just * paranoia. We also reset the usage_count since any recency of use of * the old content is no longer relevant. (The usage_count starts out at * 1 so that the buffer can survive one clock-sweep pass.) */ buf->tag = newTag; buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_CHECKPOINT_NEEDED | BM_IO_ERROR); buf->flags |= BM_TAG_VALID; buf->usage_count = 1; UnlockBufHdr(buf); if (oldFlags & BM_TAG_VALID) { BufTableDelete(&oldTag, oldHash); if (oldPartitionLock != newPartitionLock) LWLockRelease(oldPartitionLock); } LWLockRelease(newPartitionLock); /* * Buffer contents are currently invalid. Try to get the io_in_progress * lock. If StartBufferIO returns false, then someone else managed to * read it before we did, so there's nothing left for BufferAlloc() to do. */ if (StartBufferIO(buf, true)) *foundPtr = FALSE; else *foundPtr = TRUE; return buf;}/* * InvalidateBuffer -- mark a shared buffer invalid and return it to the * freelist. * * The buffer header spinlock must be held at entry. We drop it before * returning. (This is sane because the caller must have locked the * buffer in order to be sure it should be dropped.) * * This is used only in contexts such as dropping a relation. We assume * that no other backend could possibly be interested in using the page, * so the only reason the buffer might be pinned is if someone else is * trying to write it out. We have to let them finish before we can * reclaim the buffer. * * The buffer could get reclaimed by someone else while we are waiting * to acquire the necessary locks; if so, don't mess it up. */static voidInvalidateBuffer(volatile BufferDesc *buf){ BufferTag oldTag; uint32 oldHash; /* hash value for oldTag */ LWLockId oldPartitionLock; /* buffer partition lock for it */ BufFlags oldFlags; /* Save the original buffer tag before dropping the spinlock */ oldTag = buf->tag; UnlockBufHdr(buf); /* * Need to compute the old tag's hashcode and partition lock ID. XXX is it * worth storing the hashcode in BufferDesc so we need not recompute it * here? Probably not. */ oldHash = BufTableHashCode(&oldTag); oldPartitionLock = BufMappingPartitionLock(oldHash);retry: /* * Acquire exclusive mapping lock in preparation for changing the buffer's * association. */ LWLockAcquire(oldPartitionLock, LW_EXCLUSIVE); /* Re-lock the buffer header */ LockBufHdr(buf); /* If it's changed while we were waiting for lock, do nothing */ if (!BUFFERTAGS_EQUAL(buf->tag, oldTag)) { UnlockBufHdr(buf); LWLockRelease(oldPartitionLock); return; } /* * We assume the only reason for it to be pinned is that someone else is * flushing the page out. Wait for them to finish. (This could be an * infinite loop if the refcount is messed up... it would be nice to time * out after awhile, but there seems no way to be sure how many loops may * be needed. Note that if the other guy has pinned the buffer but not * yet done StartBufferIO, WaitIO will fall through and we'll effectively * be busy-looping here.) */ if (buf->refcount != 0) { UnlockBufHdr(buf); LWLockRelease(oldPartitionLock); /* safety check: should definitely not be our *own* pin */ if (PrivateRefCount[buf->buf_id] != 0) elog(ERROR, "buffer is pinned in InvalidateBuffer"); WaitIO(buf); goto retry; } /* * Clear out the buffer's tag and flags. We must do this to ensure that * linear scans of the buffer array don't think the buffer is valid. */ oldFlags = buf->flags; CLEAR_BUFFERTAG(buf->tag); buf->flags = 0; buf->usage_count = 0; UnlockBufHdr(buf); /* * Remove the buffer from the lookup hashtable, if it was in there. */ if (oldFlags & BM_TAG_VALID) BufTableDelete(&oldTag, oldHash); /* * Done with mapping lock. */ LWLockRelease(oldPartitionLock); /* * Insert the buffer at the head of the list of free buffers. */ StrategyFreeBuffer(buf);}/* * MarkBufferDirty * * Marks buffer contents as dirty (actual write happens later). * * Buffer must be pinned and exclusive-locked. (If caller does not hold * exclusive lock, then somebody could be in process of writing the buffer, * leading to risk of bad data written to disk.) */voidMarkBufferDirty(Buffer buffer){ volatile BufferDesc *bufHdr; if (!BufferIsValid(buffer)) elog(ERROR, "bad buffer id: %d", buffer); if (BufferIsLocal(buffer)) { MarkLocalBufferDirty(buffer); return; } bufHdr = &BufferDescriptors[buffer - 1]; Assert(PrivateRefCount[buffer - 1] > 0); /* unfortunately we can't check if the lock is held exclusively */ Assert(LWLockHeldByMe(bufHdr->content_lock)); LockBufHdr(bufHdr); Assert(bufHdr->refcount > 0); /* * If the buffer was not dirty already, do vacuum cost accounting. */ if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive) VacuumCostBalance += VacuumCostPageDirty; bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); UnlockBufHdr(bufHdr);}/* * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer() * * Formerly, this saved one cycle of acquiring/releasing the BufMgrLock * compared to calling the two routines separately. Now it's mainly just * a convenience function. However, if the passed buffer is valid and * already contains the desired block, we just return it as-is; and that * does save considerable work compared to a full release and reacquire. * * Note: it is OK to pass buffer == InvalidBuffer, indicating that no old * buffer actually needs to be released. This case is the same as ReadBuffer, * but can save some tests in the caller. */BufferReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum){ volatile BufferDesc *bufHdr; if (BufferIsValid(buffer)) { if (BufferIsLocal(buffer)) { Assert(LocalRefCount[-buffer - 1] > 0); bufHdr = &LocalBufferDescriptors[-buffer - 1]; if (bufHdr->tag.blockNum == blockNum && RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node)) return buffer; ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer); LocalRefCount[-buffer - 1]--; } else { Assert(PrivateRefCount[buffer - 1] > 0); bufHdr = &BufferDescriptors[buffer - 1]; /* we have pin, so it's ok to examine tag without spinlock */ if (bufHdr->tag.blockNum == blockNum && RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node)) return buffer; UnpinBuffer(bufHdr, true); } } return ReadBuffer(relation, blockNum);}/* * PinBuffer -- make buffer unavailable for replacement. * * For the default access strategy, the buffer's usage_count is incremented * when we first pin it; for other strategies we just make sure the usage_count * isn't zero. (The idea of the latter is that we don't want synchronized * heap scans to inflate the count, but we need it to not be zero to discourage * other backends from stealing buffers from our ring. As long as we cycle * through the ring faster than the global clock-sweep cycles, buffers in * our ring won't be chosen as victims for replacement by other backends.) * * This should be applied only to shared buffers, never local ones. * * Note that ResourceOwnerEnlargeBuffers must have been done already. * * Returns TRUE if buffer is BM_VALID, else FALSE. This provision allows * some callers to avoid an extra spinlock cycle. */static boolPinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy){ int b = buf->buf_id; bool result; if (PrivateRefCount[b] == 0) { LockBufHdr(buf); buf->refcount++; if (strategy == NULL) { if (buf->usage_count < BM_MAX_USAGE_COUNT) buf->usage_count++; } else
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -