📄 bufmgr.c
字号:
/* * relation->rd_nblocks should be accurate already if the relation is * new or temp, because no one else should be modifying it. Otherwise * we need to ask the smgr for the current physical file length. * * Don't call smgr on a view, either. */ if (relation->rd_rel->relkind == RELKIND_VIEW) relation->rd_nblocks = 0; else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) relation->rd_nblocks = 0; else if (!relation->rd_isnew && !relation->rd_istemp) relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation); return relation->rd_nblocks;}/* * RelationUpdateNumberOfBlocks * Forcibly update relation->rd_nblocks. * * If the relcache drops an entry for a temp relation, it must call this * routine after recreating the relcache entry, so that rd_nblocks is * re-sync'd with reality. See RelationGetNumberOfBlocks. */voidRelationUpdateNumberOfBlocks(Relation relation){ if (relation->rd_rel->relkind == RELKIND_VIEW) relation->rd_nblocks = 0; else if (relation->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) relation->rd_nblocks = 0; else relation->rd_nblocks = smgrnblocks(DEFAULT_SMGR, relation);}/* --------------------------------------------------------------------- * DropRelationBuffers * * This function removes all the buffered pages for a relation * from the buffer pool. Dirty pages are simply dropped, without * bothering to write them out first. This is NOT rollback-able, * and so should be used only with extreme caution! * * We assume that the caller holds an exclusive lock on the relation, * which should assure that no new buffers will be acquired for the rel * meanwhile. * -------------------------------------------------------------------- */voidDropRelationBuffers(Relation rel){ DropRelFileNodeBuffers(rel->rd_node, rel->rd_istemp);}/* --------------------------------------------------------------------- * DropRelFileNodeBuffers * * This is the same as DropRelationBuffers, except that the target * relation is specified by RelFileNode and temp status. * * This is NOT rollback-able. One legitimate use is to clear the * buffer cache of buffers for a relation that is being deleted * during transaction abort. * -------------------------------------------------------------------- */voidDropRelFileNodeBuffers(RelFileNode rnode, bool istemp){ int i; BufferDesc *bufHdr; if (istemp) { for (i = 0; i < NLocBuffer; i++) { bufHdr = &LocalBufferDescriptors[i]; if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) { bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); bufHdr->cntxDirty = false; LocalRefCount[i] = 0; bufHdr->tag.rnode.relNode = InvalidOid; } } return; } LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); for (i = 1; i <= NBuffers; i++) { bufHdr = &BufferDescriptors[i - 1];recheck: if (RelFileNodeEquals(bufHdr->tag.rnode, rnode)) { /* * If there is I/O in progress, better wait till it's done; * don't want to delete the relation out from under someone * who's just trying to flush the buffer! */ if (bufHdr->flags & BM_IO_IN_PROGRESS) { WaitIO(bufHdr); /* * By now, the buffer very possibly belongs to some other * rel, so check again before proceeding. */ goto recheck; } /* Now we can do what we came for */ bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); bufHdr->cntxDirty = false; /* * Release any refcount we may have. If someone else has a * pin on the buffer, we got trouble. */ if (!(bufHdr->flags & BM_FREE)) { /* the sole pin should be ours */ if (bufHdr->refcount != 1 || PrivateRefCount[i - 1] == 0) elog(FATAL, "block %u of %u/%u is still referenced (private %ld, global %d)", bufHdr->tag.blockNum, bufHdr->tag.rnode.tblNode, bufHdr->tag.rnode.relNode, PrivateRefCount[i - 1], bufHdr->refcount); /* Make sure it will be released */ PrivateRefCount[i - 1] = 1; UnpinBuffer(bufHdr); } /* * And mark the buffer as no longer occupied by this rel. */ BufTableDelete(bufHdr); } } LWLockRelease(BufMgrLock);}/* --------------------------------------------------------------------- * DropBuffers * * This function removes all the buffers in the buffer cache for a * particular database. Dirty pages are simply dropped, without * bothering to write them out first. This is used when we destroy a * database, to avoid trying to flush data to disk when the directory * tree no longer exists. Implementation is pretty similar to * DropRelationBuffers() which is for destroying just one relation. * -------------------------------------------------------------------- */voidDropBuffers(Oid dbid){ int i; BufferDesc *bufHdr; LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); for (i = 1; i <= NBuffers; i++) { bufHdr = &BufferDescriptors[i - 1];recheck: /* * We know that currently database OID is tblNode but this * probably will be changed in future and this func will be used * to drop tablespace buffers. */ if (bufHdr->tag.rnode.tblNode == dbid) { /* * If there is I/O in progress, better wait till it's done; * don't want to delete the database out from under someone * who's just trying to flush the buffer! */ if (bufHdr->flags & BM_IO_IN_PROGRESS) { WaitIO(bufHdr); /* * By now, the buffer very possibly belongs to some other * DB, so check again before proceeding. */ goto recheck; } /* Now we can do what we came for */ bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); bufHdr->cntxDirty = false; /* * The thing should be free, if caller has checked that no * backends are running in that database. */ Assert(bufHdr->flags & BM_FREE); /* * And mark the buffer as no longer occupied by this page. */ BufTableDelete(bufHdr); } } LWLockRelease(BufMgrLock);}/* ----------------------------------------------------------------- * PrintBufferDescs * * this function prints all the buffer descriptors, for debugging * use only. * ----------------------------------------------------------------- */#ifdef NOT_USEDvoidPrintBufferDescs(void){ int i; BufferDesc *buf = BufferDescriptors; if (IsUnderPostmaster) { LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); for (i = 0; i < NBuffers; ++i, ++buf) { elog(LOG, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \blockNum=%u, flags=0x%x, refcount=%d %ld)", i, buf->freeNext, buf->freePrev, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } LWLockRelease(BufMgrLock); } else { /* interactive backend */ for (i = 0; i < NBuffers; ++i, ++buf) { printf("[%-2d] (%u/%u, %u) flags=0x%x, refcnt=%d %ld)\n", i, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } }}#endif#ifdef NOT_USEDvoidPrintPinnedBufs(void){ int i; BufferDesc *buf = BufferDescriptors; LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); for (i = 0; i < NBuffers; ++i, ++buf) { if (PrivateRefCount[i] > 0) elog(WARNING, "[%02d] (freeNext=%d, freePrev=%d, rel=%u/%u, \blockNum=%u, flags=0x%x, refcount=%d %ld)", i, buf->freeNext, buf->freePrev, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } LWLockRelease(BufMgrLock);}#endif/* --------------------------------------------------------------------- * FlushRelationBuffers * * This function writes all dirty pages of a relation out to disk. * Furthermore, pages that have blocknumber >= firstDelBlock are * actually removed from the buffer pool. An error code is returned * if we fail to dump a dirty buffer or if we find one of * the target pages is pinned into the cache. * * This is called by DROP TABLE to clear buffers for the relation * from the buffer pool. Note that we must write dirty buffers, * rather than just dropping the changes, because our transaction * might abort later on; we want to roll back safely in that case. * * This is also called by VACUUM before truncating the relation to the * given number of blocks. It might seem unnecessary for VACUUM to * write dirty pages before firstDelBlock, since VACUUM should already * have committed its changes. However, it is possible for there still * to be dirty pages: if some page had unwritten on-row tuple status * updates from a prior transaction, and VACUUM had no additional * changes to make to that page, then VACUUM won't have written it. * This is harmless in most cases but will break pg_upgrade, which * relies on VACUUM to ensure that *all* tuples have correct on-row * status. So, we check and flush all dirty pages of the rel * regardless of block number. * * In all cases, the caller should be holding AccessExclusiveLock on * the target relation to ensure that no other backend is busy reading * more blocks of the relation (or might do so before we commit). * * Formerly, we considered it an error condition if we found dirty * buffers here. However, since BufferSync no longer forces out all * dirty buffers at every xact commit, it's possible for dirty buffers * to still be present in the cache due to failure of an earlier * transaction. So, must flush dirty buffers without complaint. * * Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED * * XXX currently it sequentially searches the buffer pool, should be * changed to more clever ways of searching. * -------------------------------------------------------------------- */intFlushRelationBuffers(Relation rel, BlockNumber firstDelBlock){ int i; BufferDesc *bufHdr; XLogRecPtr recptr; int status; ErrorContextCallback errcontext; /* Setup error traceback support for ereport() */ errcontext.callback = buffer_write_error_callback; errcontext.arg = NULL; errcontext.previous = error_context_stack; error_context_stack = &errcontext; if (rel->rd_istemp) { for (i = 0; i < NLocBuffer; i++) { bufHdr = &LocalBufferDescriptors[i]; errcontext.arg = bufHdr; if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) { if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { status = smgrwrite(DEFAULT_SMGR, rel, bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data)); if (status == SM_FAIL) { error_context_stack = errcontext.previous; elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is dirty, could not flush it", RelationGetRelationName(rel), firstDelBlock, bufHdr->tag.blockNum); return (-1); } bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED); bufHdr->cntxDirty = false; } if (LocalRefCount[i] > 0) { error_context_stack = errcontext.previous; elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is referenced (%ld)", RelationGetRelationName(rel), firstDelBlock, bufHdr->tag.blockNum, LocalRefCount[i]); return (-2); } if (bufHdr->tag.blockNum >= firstDelBlock) bufHdr->tag.rnode.relNode = InvalidOid; } } /* Pop the error context stack */ error_context_stack = errcontext.previous; return 0; } LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); for (i = 0; i < NBuffers; i++) { bufHdr = &BufferDescriptors[i]; errcontext.arg = bufHdr; if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node)) { if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { PinBuffer(bufHdr); if (bufHdr->flags & BM_IO_IN_PROGRESS) WaitIO(bufHdr); LWLockRelease(BufMgrLock); /* * Force XLOG flush for buffer' LSN */ recptr = BufferGetLSN(bufHdr); XLogFlush(recptr); /* * Now it's safe to write buffer to disk */ LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); if (bufHdr->flags & BM_IO_IN_PROGRESS) WaitIO(bufHdr); if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty) { bufHdr->flags &= ~BM_JUST_DIRTIED; StartBufferIO(bufHdr, false); /* output IO start */ LWLockRelease(BufMgrLock); status = smgrwrite(DEFAULT_SMGR, rel, bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data)); if (status == SM_FAIL) /* disk failure ?! */ ereport(PANIC, (errcode(ERRCODE_IO_ERROR), errmsg("could not write block %u of %u/%u", bufHdr->tag.blockNum, bufHdr->tag.rnode.tblNode, bufHdr->tag.rnode.relNode))); BufferFlushCount++; LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); bufHdr->flags &= ~BM_IO_IN_PROGRESS; TerminateBufferIO(bufHdr); Assert(!(bufHdr->flags & BM_JUST_DIRTIED)); bufHdr->flags &= ~BM_DIRTY; /* * Note that it's safe to change cntxDirty here * because of we protect it from upper writers by * AccessExclusiveLock and from other bufmgr routines * by BM_IO_IN_PROGRESS */ bufHdr->cntxDirty = false; } UnpinBuffer(bufHdr); } if (!(bufHdr->flags & BM_FREE)) { LWLockRelease(BufMgrLock); error_context_stack = errcontext.previous; elog(WARNING, "FlushRelationBuffers(\"%s\", %u): block %u is referenced (private %ld, global %d)", RelationGetRelationName(rel), firstDelBlock, bufHdr->tag.blockNum, PrivateRefCount[i], bufHdr->refcount); return -2; } if (bufHdr->tag.blockNum >= firstDelBlock) BufTableDelete(bufHdr); } } LWLockRelease(BufMgrLock); /* Pop the error context stack */ error_context_stack = errcontext.previous; return 0;}#undef ReleaseBuffer/* * ReleaseBuffer -- remove the pin on a buffer without * marking it dirty. */intReleaseBuffer(Buffer buffer){ BufferDesc *bufHdr; if (BufferIsLocal(buffer)) { Assert(LocalRefCount[-buffer - 1] > 0); LocalRefCount[-buffer - 1]--; return STATUS_OK; } if (BAD_BUFFER_ID(buffer)) return STATUS_ERROR; bufHdr = &BufferDescriptors[buffer - 1]; Assert(PrivateRefCount[buffer - 1] > 0); if (PrivateRefCount[buffer - 1] > 1) PrivateRefCount[buffer - 1]--; else { LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); UnpinBuffer(bufHdr); LWLockRelease(BufMgrLock); } return STATUS_OK;}#ifdef NOT_USEDvoidIncrBufferRefCount_Debug(char *file, int line, Buffer buffer){ IncrBufferRefCount(buffer); if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { BufferDesc *buf = &BufferDescriptors[buffer - 1]; fprintf(stderr, "PIN(Incr) %d rel = %u/%u, blockNum = %u, \refcount = %ld, file: %s, line: %d\n", buffer, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line); }}#endif#ifdef NOT_USEDvoidReleaseBuffer_Debug(char *file, int line, Buffer buffer){ ReleaseBuffer(buffer); if (ShowPinTrace && !BufferIsLocal(buffer) && is_userbuffer(buffer)) { BufferDesc *buf = &BufferDescriptors[buffer - 1]; fprintf(stderr, "UNPIN(Rel) %d rel = %u/%u, blockNum = %u, \refcount = %ld, file: %s, line: %d\n", buffer, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line); }}#endif#ifdef NOT_USEDBufferReleaseAndReadBuffer_Debug(char *file, int line, Buffer buffer, Relation relation, BlockNumber blockNum){ bool bufferValid; Buffer b; bufferValid = BufferIsValid(buffer); b = ReleaseAndReadBuffer(buffer, relation, blockNum); if (ShowPinTrace && bufferValid && BufferIsLocal(buffer) && is_userbuffer(buffer)) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -