📄 bufmgr.c
字号:
/*------------------------------------------------------------------------- * * bufmgr.c * buffer manager interface routines * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /usr/local/cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.56 1999/06/29 04:54:47 vadim Exp $ * *------------------------------------------------------------------------- *//* * * BufferAlloc() -- lookup a buffer in the buffer table. If * it isn't there add it, but do not read it into memory. * This is used when we are about to reinitialize the * buffer so don't care what the current disk contents are. * BufferAlloc() pins the new buffer in memory. * * ReadBuffer() -- same as BufferAlloc() but reads the data * on a buffer cache miss. * * ReleaseBuffer() -- unpin the buffer * * WriteNoReleaseBuffer() -- mark the buffer contents as "dirty" * but don't unpin. The disk IO is delayed until buffer * replacement if WriteMode is BUFFER_LATE_WRITE. * * WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer() * * FlushBuffer() -- as above but never delayed write. * * BufferSync() -- flush all dirty buffers in the buffer pool. * * InitBufferPool() -- Init the buffer module. * * See other files: * freelist.c -- chooses victim for buffer replacement * buf_table.c -- manages the buffer lookup table */#include <sys/types.h>#include <sys/file.h>#include <stdio.h>#include <string.h>#include <math.h>#include <signal.h>#include "postgres.h"/* declarations split between these three files */#include "storage/buf.h"#include "storage/buf_internals.h"#include "storage/bufmgr.h"#include "storage/fd.h"#include "storage/ipc.h"#include "storage/s_lock.h"#include "storage/shmem.h"#include "storage/spin.h"#include "storage/smgr.h"#include "storage/lmgr.h"#include "miscadmin.h"#include "utils/builtins.h"#include "utils/hsearch.h"#include "utils/palloc.h"#include "utils/memutils.h"#include "utils/relcache.h"#include "executor/execdebug.h" /* for NDirectFileRead */#include "catalog/catalog.h"extern SPINLOCK BufMgrLock;extern long int ReadBufferCount;extern long int ReadLocalBufferCount;extern long int BufferHitCount;extern long int LocalBufferHitCount;extern long int BufferFlushCount;extern long int LocalBufferFlushCount;/* * It's used to avoid disk writes for read-only transactions * (i.e. when no one shared buffer was changed by transaction). * We set it to true in WriteBuffer/WriteNoReleaseBuffer when * marking shared buffer as dirty. We set it to false in xact.c * after transaction is committed/aborted. */bool SharedBufferChanged = false;static int WriteMode = BUFFER_LATE_WRITE; /* Delayed write is * default */static void WaitIO(BufferDesc *buf, SPINLOCK spinlock);#ifndef HAS_TEST_AND_SETstatic void SignalIO(BufferDesc *buf);extern long *NWaitIOBackendP; /* defined in buf_init.c */#endif /* HAS_TEST_AND_SET */static Buffer ReadBufferWithBufferLock(Relation relation, BlockNumber blockNum, bool bufferLockHeld);static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr, bool bufferLockHeld);static int FlushBuffer(Buffer buffer, bool release);static void BufferSync(void);static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld);void PrintBufferDescs(void);/* not static but used by vacuum only ... */int BlowawayRelationBuffers(Relation rel, BlockNumber block);/* --------------------------------------------------- * RelationGetBufferWithBuffer * see if the given buffer is what we want * if yes, we don't need to bother the buffer manager * --------------------------------------------------- */BufferRelationGetBufferWithBuffer(Relation relation, BlockNumber blockNumber, Buffer buffer){ BufferDesc *bufHdr; if (BufferIsValid(buffer)) { if (!BufferIsLocal(buffer)) { LockRelId *lrelId = &(((LockInfo) (relation->lockInfo))->lockRelId); bufHdr = &BufferDescriptors[buffer - 1]; SpinAcquire(BufMgrLock); if (bufHdr->tag.blockNum == blockNumber && bufHdr->tag.relId.relId == lrelId->relId && bufHdr->tag.relId.dbId == lrelId->dbId) { SpinRelease(BufMgrLock); return buffer; } return ReadBufferWithBufferLock(relation, blockNumber, true); } else { bufHdr = &LocalBufferDescriptors[-buffer - 1]; if (bufHdr->tag.relId.relId == RelationGetRelid(relation) && bufHdr->tag.blockNum == blockNumber) return buffer; } } return ReadBuffer(relation, blockNumber);}/* * ReadBuffer -- returns a buffer containing the requested * block of the requested relation. If the blknum * requested is P_NEW, extend the relation file and * allocate a new block. * * Returns: the buffer number for the buffer containing * the block read or NULL on an error. * * Assume when this function is called, that reln has been * opened already. */extern int ShowPinTrace;#undef ReadBuffer /* conflicts with macro when BUFMGR_DEBUG * defined *//* * ReadBuffer * */BufferReadBuffer(Relation reln, BlockNumber blockNum){ return ReadBufferWithBufferLock(reln, blockNum, false);}/* * is_userbuffer * * XXX caller must have already acquired BufMgrLock */#ifdef NOT_USEDstatic boolis_userbuffer(Buffer buffer){ BufferDesc *buf = &BufferDescriptors[buffer - 1]; if (IsSystemRelationName(buf->sb_relname)) return false; return true;}#endif#ifdef NOT_USEDBufferReadBuffer_Debug(char *file, int line, Relation reln, BlockNumber blockNum){ Buffer buffer; buffer = ReadBufferWithBufferLock(reln, blockNum, false); if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { BufferDesc *buf = &BufferDescriptors[buffer - 1]; fprintf(stderr, "PIN(RD) %ld relname = %s, blockNum = %d, \refcount = %ld, file: %s, line: %d\n", buffer, buf->sb_relname, buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line); } return buffer;}#endif/* * ReadBufferWithBufferLock -- does the work of * ReadBuffer() but with the possibility that * the buffer lock has already been held. this * is yet another effort to reduce the number of * semops in the system. */static BufferReadBufferWithBufferLock(Relation reln, BlockNumber blockNum, bool bufferLockHeld){ BufferDesc *bufHdr; int extend; /* extending the file by one block */ int status; bool found; bool isLocalBuf; extend = (blockNum == P_NEW); isLocalBuf = reln->rd_myxactonly; if (isLocalBuf) { ReadLocalBufferCount++; bufHdr = LocalBufferAlloc(reln, blockNum, &found); if (found) LocalBufferHitCount++; } else { ReadBufferCount++; /* * lookup the buffer. IO_IN_PROGRESS is set if the requested * block is not currently in memory. */ bufHdr = BufferAlloc(reln, blockNum, &found, bufferLockHeld); if (found) BufferHitCount++; } if (!bufHdr) return InvalidBuffer; /* if its already in the buffer pool, we're done */ if (found) { /* * This happens when a bogus buffer was returned previously and is * floating around in the buffer pool. A routine calling this * would want this extended. */ if (extend) { /* new buffers are zero-filled */ MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); smgrextend(DEFAULT_SMGR, reln, (char *) MAKE_PTR(bufHdr->data)); } return BufferDescriptorGetBuffer(bufHdr); } /* * if we have gotten to this point, the reln pointer must be ok and * the relation file must be open. */ if (extend) { /* new buffers are zero-filled */ MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ); status = smgrextend(DEFAULT_SMGR, reln, (char *) MAKE_PTR(bufHdr->data)); } else { status = smgrread(DEFAULT_SMGR, reln, blockNum, (char *) MAKE_PTR(bufHdr->data)); } if (isLocalBuf) return BufferDescriptorGetBuffer(bufHdr); /* lock buffer manager again to update IO IN PROGRESS */ SpinAcquire(BufMgrLock); if (status == SM_FAIL) { /* IO Failed. cleanup the data structures and go home */ if (!BufTableDelete(bufHdr)) { SpinRelease(BufMgrLock); elog(FATAL, "BufRead: buffer table broken after IO error\n"); } /* remember that BufferAlloc() pinned the buffer */ UnpinBuffer(bufHdr); /* * Have to reset the flag so that anyone waiting for the buffer * can tell that the contents are invalid. */ bufHdr->flags |= BM_IO_ERROR; bufHdr->flags &= ~BM_IO_IN_PROGRESS; } else { /* IO Succeeded. clear the flags, finish buffer update */ bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS); } /* If anyone was waiting for IO to complete, wake them up now */#ifdef HAS_TEST_AND_SET S_UNLOCK(&(bufHdr->io_in_progress_lock));#else if (bufHdr->refcount > 1) SignalIO(bufHdr);#endif SpinRelease(BufMgrLock); if (status == SM_FAIL) return InvalidBuffer; return BufferDescriptorGetBuffer(bufHdr);}/* * BufferAlloc -- Get a buffer from the buffer pool but dont * read it. * * Returns: descriptor for buffer * * When this routine returns, the BufMgrLock is guaranteed NOT be held. */static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr, bool bufferLockHeld){ BufferDesc *buf, *buf2; BufferTag newTag; /* identity of requested block */ bool inProgress; /* buffer undergoing IO */ bool newblock = FALSE; /* create a new tag so we can lookup the buffer */ /* assume that the relation is already open */ if (blockNum == P_NEW) { newblock = TRUE; blockNum = smgrnblocks(DEFAULT_SMGR, reln); } INIT_BUFFERTAG(&newTag, reln, blockNum); if (!bufferLockHeld) SpinAcquire(BufMgrLock); /* see if the block is in the buffer pool already */ buf = BufTableLookup(&newTag); if (buf != NULL) { /* * Found it. Now, (a) pin the buffer so no one steals it from the * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting * the buffer into the buffer pool. */ PinBuffer(buf); inProgress = (buf->flags & BM_IO_IN_PROGRESS); *foundPtr = TRUE; if (inProgress) { WaitIO(buf, BufMgrLock); if (buf->flags & BM_IO_ERROR) { /* * wierd race condition: * * We were waiting for someone else to read the buffer. While * we were waiting, the reader boof'd in some way, so the * contents of the buffer are still invalid. By saying * that we didn't find it, we can make the caller * reinitialize the buffer. If two processes are waiting * for this block, both will read the block. The second * one to finish may overwrite any updates made by the * first. (Assume higher level synchronization prevents * this from happening). * * This is never going to happen, don't worry about it. */ *foundPtr = FALSE; } }#ifdef BMTRACE _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCFND);#endif /* BMTRACE */ SpinRelease(BufMgrLock); return buf; } *foundPtr = FALSE; /* * Didn't find it in the buffer pool. We'll have to initialize a new * buffer. First, grab one from the free list. If it's dirty, flush * it to disk. Remember to unlock BufMgr spinlock while doing the IOs. */ inProgress = FALSE; for (buf = (BufferDesc *) NULL; buf == (BufferDesc *) NULL;) { /* GetFreeBuffer will abort if it can't find a free buffer */ buf = GetFreeBuffer(); /* * But it can return buf == NULL if we are in aborting transaction * now and so elog(ERROR,...) in GetFreeBuffer will not abort * again. */ if (buf == NULL) return NULL; /* * There should be exactly one pin on the buffer after it is * allocated -- ours. If it had a pin it wouldn't have been on * the free list. No one else could have pinned it between * GetFreeBuffer and here because we have the BufMgrLock. */ Assert(buf->refcount == 0); buf->refcount = 1; PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1; if (buf->flags & BM_DIRTY) { bool smok; /* * Set BM_IO_IN_PROGRESS to keep anyone from doing anything * with the contents of the buffer while we write it out. We * don't really care if they try to read it, but if they can * complete a BufferAlloc on it they can then scribble into * it, and we'd really like to avoid that while we are * flushing the buffer. Setting this flag should block them * in WaitIO until we're done. */ inProgress = TRUE; buf->flags |= BM_IO_IN_PROGRESS;#ifdef HAS_TEST_AND_SET /* * All code paths that acquire this lock pin the buffer first; * since no one had it pinned (it just came off the free * list), no one else can have this lock. */ Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); S_LOCK(&(buf->io_in_progress_lock));#endif /* HAS_TEST_AND_SET */ /* * Write the buffer out, being careful to release BufMgrLock * before starting the I/O. * * This #ifndef is here because a few extra semops REALLY kill * you on machines that don't have spinlocks. If you don't * operate with much concurrency, well... */ smok = BufferReplace(buf, true);#ifndef OPTIMIZE_SINGLE SpinAcquire(BufMgrLock);#endif /* OPTIMIZE_SINGLE */ if (smok == FALSE) { elog(NOTICE, "BufferAlloc: cannot write block %u for %s/%s", buf->tag.blockNum, buf->sb_dbname, buf->sb_relname); inProgress = FALSE; buf->flags |= BM_IO_ERROR; buf->flags &= ~BM_IO_IN_PROGRESS;#ifdef HAS_TEST_AND_SET S_UNLOCK(&(buf->io_in_progress_lock));#else /* !HAS_TEST_AND_SET */ if (buf->refcount > 1) SignalIO(buf);#endif /* !HAS_TEST_AND_SET */ PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; buf->refcount--; if (buf->refcount == 0) { AddBufferToFreelist(buf); buf->flags |= BM_FREE; } buf = (BufferDesc *) NULL; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -