📄 bufmgr.c
字号:
else { /* * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't * be setted by anyone. - vadim 01/17/97 */ if (buf->flags & BM_JUST_DIRTIED) { elog(FATAL, "BufferAlloc: content of block %u (%s) changed while flushing", buf->tag.blockNum, buf->sb_relname); } else buf->flags &= ~BM_DIRTY; } /* * Somebody could have pinned the buffer while we were doing * the I/O and had given up the BufMgrLock (though they would * be waiting for us to clear the BM_IO_IN_PROGRESS flag). * That's why this is a loop -- if so, we need to clear the * I/O flags, remove our pin and start all over again. * * People may be making buffers free at any time, so there's no * reason to think that we have an immediate disaster on our * hands. */ if (buf && buf->refcount > 1) { inProgress = FALSE; buf->flags &= ~BM_IO_IN_PROGRESS;#ifdef HAS_TEST_AND_SET S_UNLOCK(&(buf->io_in_progress_lock));#else /* !HAS_TEST_AND_SET */ if (buf->refcount > 1) SignalIO(buf);#endif /* !HAS_TEST_AND_SET */ PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; buf->refcount--; buf = (BufferDesc *) NULL; } /* * Somebody could have allocated another buffer for the same * block we are about to read in. (While we flush out the * dirty buffer, we don't hold the lock and someone could have * allocated another buffer for the same block. The problem is * we haven't gotten around to insert the new tag into the * buffer table. So we need to check here. -ay 3/95 */ buf2 = BufTableLookup(&newTag); if (buf2 != NULL) { /* * Found it. Someone has already done what we're about to * do. We'll just handle this as if it were found in the * buffer pool in the first place. */ if (buf != NULL) {#ifdef HAS_TEST_AND_SET S_UNLOCK(&(buf->io_in_progress_lock));#else /* !HAS_TEST_AND_SET */ if (buf->refcount > 1) SignalIO(buf);#endif /* !HAS_TEST_AND_SET */ /* give up the buffer since we don't need it any more */ buf->refcount--; PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 0; AddBufferToFreelist(buf); buf->flags |= BM_FREE; buf->flags &= ~BM_IO_IN_PROGRESS; } PinBuffer(buf2); inProgress = (buf2->flags & BM_IO_IN_PROGRESS); *foundPtr = TRUE; if (inProgress) { WaitIO(buf2, BufMgrLock); if (buf2->flags & BM_IO_ERROR) *foundPtr = FALSE; } SpinRelease(BufMgrLock); return buf2; } } } /* * At this point we should have the sole pin on a non-dirty buffer and * we may or may not already have the BM_IO_IN_PROGRESS flag set. */ /* * Change the name of the buffer in the lookup table: * * Need to update the lookup table before the read starts. If someone * comes along looking for the buffer while we are reading it in, we * don't want them to allocate a new buffer. For the same reason, we * didn't want to erase the buf table entry for the buffer we were * writing back until now, either. */ if (!BufTableDelete(buf)) { SpinRelease(BufMgrLock); elog(FATAL, "buffer wasn't in the buffer table\n"); } /* record the database name and relation name for this buffer */ strcpy(buf->sb_relname, reln->rd_rel->relname.data); strcpy(buf->sb_dbname, DatabaseName); INIT_BUFFERTAG(&(buf->tag), reln, blockNum); if (!BufTableInsert(buf)) { SpinRelease(BufMgrLock); elog(FATAL, "Buffer in lookup table twice \n"); } /* * Buffer contents are currently invalid. Have to mark IO IN PROGRESS * so no one fiddles with them until the read completes. If this * routine has been called simply to allocate a buffer, no io will be * attempted, so the flag isnt set. */ if (!inProgress) { buf->flags |= BM_IO_IN_PROGRESS;#ifdef HAS_TEST_AND_SET Assert(S_LOCK_FREE(&(buf->io_in_progress_lock))); S_LOCK(&(buf->io_in_progress_lock));#endif /* HAS_TEST_AND_SET */ }#ifdef BMTRACE _bm_trace((reln->rd_rel->relisshared ? 0 : MyDatabaseId), RelationGetRelid(reln), blockNum, BufferDescriptorGetBuffer(buf), BMT_ALLOCNOTFND);#endif /* BMTRACE */ SpinRelease(BufMgrLock); return buf;}/* * WriteBuffer * * Pushes buffer contents to disk if WriteMode is BUFFER_FLUSH_WRITE. * Otherwise, marks contents as dirty. * * Assume that buffer is pinned. Assume that reln is * valid. * * Side Effects: * Pin count is decremented. */#undef WriteBufferintWriteBuffer(Buffer buffer){ BufferDesc *bufHdr; if (WriteMode == BUFFER_FLUSH_WRITE) return FlushBuffer(buffer, TRUE); else { if (BufferIsLocal(buffer)) return WriteLocalBuffer(buffer, TRUE); if (BAD_BUFFER_ID(buffer)) return FALSE; bufHdr = &BufferDescriptors[buffer - 1]; SharedBufferChanged = true; SpinAcquire(BufMgrLock); Assert(bufHdr->refcount > 0); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); UnpinBuffer(bufHdr); SpinRelease(BufMgrLock); CommitInfoNeedsSave[buffer - 1] = 0; } return TRUE;}#ifdef NOT_USEDvoidWriteBuffer_Debug(char *file, int line, Buffer buffer){ WriteBuffer(buffer); if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) { BufferDesc *buf; buf = &BufferDescriptors[buffer - 1]; fprintf(stderr, "UNPIN(WR) %ld relname = %s, blockNum = %d, \refcount = %ld, file: %s, line: %d\n", buffer, buf->sb_relname, buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line); }}#endif/* * DirtyBufferCopy() -- For a given dbid/relid/blockno, if the buffer is * in the cache and is dirty, mark it clean and copy * it to the requested location. This is a logical * write, and has been installed to support the cache * management code for write-once storage managers. * * DirtyBufferCopy() -- Copy a given dirty buffer to the requested * destination. * * We treat this as a write. If the requested buffer is in the pool * and is dirty, we copy it to the location requested and mark it * clean. This routine supports the Sony jukebox storage manager, * which agrees to take responsibility for the data once we mark * it clean. * * NOTE: used by sony jukebox code in postgres 4.2 - ay 2/95 */#ifdef NOT_USEDvoidDirtyBufferCopy(Oid dbid, Oid relid, BlockNumber blkno, char *dest){ BufferDesc *buf; BufferTag btag; btag.relId.relId = relid; btag.relId.dbId = dbid; btag.blockNum = blkno; SpinAcquire(BufMgrLock); buf = BufTableLookup(&btag); if (buf == (BufferDesc *) NULL || !(buf->flags & BM_DIRTY) || !(buf->flags & BM_VALID)) { SpinRelease(BufMgrLock); return; } /* * hate to do this holding the lock, but release and reacquire is * slower */ memmove(dest, (char *) MAKE_PTR(buf->data), BLCKSZ); buf->flags &= ~BM_DIRTY; SpinRelease(BufMgrLock);}#endif/* * FlushBuffer -- like WriteBuffer, but force the page to disk. * * 'buffer' is known to be dirty/pinned, so there should not be a * problem reading the BufferDesc members without the BufMgrLock * (nobody should be able to change tags, flags, etc. out from under * us). */static intFlushBuffer(Buffer buffer, bool release){ BufferDesc *bufHdr; Oid bufdb; Relation bufrel; int status; if (BufferIsLocal(buffer)) return FlushLocalBuffer(buffer, release); if (BAD_BUFFER_ID(buffer)) return STATUS_ERROR; bufHdr = &BufferDescriptors[buffer - 1]; bufdb = bufHdr->tag.relId.dbId; Assert(bufdb == MyDatabaseId || bufdb == (Oid) NULL); bufrel = RelationIdCacheGetRelation(bufHdr->tag.relId.relId); Assert(bufrel != (Relation) NULL); SharedBufferChanged = true; /* To check if block content changed while flushing. - vadim 01/17/97 */ SpinAcquire(BufMgrLock); bufHdr->flags &= ~BM_JUST_DIRTIED; SpinRelease(BufMgrLock); status = smgrflush(DEFAULT_SMGR, bufrel, bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data)); RelationDecrementReferenceCount(bufrel); if (status == SM_FAIL) { elog(ERROR, "FlushBuffer: cannot flush block %u of the relation %s", bufHdr->tag.blockNum, bufHdr->sb_relname); return STATUS_ERROR; } BufferFlushCount++; SpinAcquire(BufMgrLock); /* * If this buffer was marked by someone as DIRTY while we were * flushing it out we must not clear DIRTY flag - vadim 01/17/97 */ if (bufHdr->flags & BM_JUST_DIRTIED) { elog(NOTICE, "FlusfBuffer: content of block %u (%s) changed while flushing", bufHdr->tag.blockNum, bufHdr->sb_relname); } else bufHdr->flags &= ~BM_DIRTY; if (release) UnpinBuffer(bufHdr); SpinRelease(BufMgrLock); CommitInfoNeedsSave[buffer - 1] = 0; return STATUS_OK;}/* * WriteNoReleaseBuffer -- like WriteBuffer, but do not unpin the buffer * when the operation is complete. * * We know that the buffer is for a relation in our private cache, * because this routine is called only to write out buffers that * were changed by the executing backend. */intWriteNoReleaseBuffer(Buffer buffer){ BufferDesc *bufHdr; if (WriteMode == BUFFER_FLUSH_WRITE) return FlushBuffer(buffer, FALSE); else { if (BufferIsLocal(buffer)) return WriteLocalBuffer(buffer, FALSE); if (BAD_BUFFER_ID(buffer)) return STATUS_ERROR; bufHdr = &BufferDescriptors[buffer - 1]; SharedBufferChanged = true; SpinAcquire(BufMgrLock); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); SpinRelease(BufMgrLock); CommitInfoNeedsSave[buffer - 1] = 0; } return STATUS_OK;}#undef ReleaseAndReadBuffer/* * ReleaseAndReadBuffer -- combine ReleaseBuffer() and ReadBuffer() * so that only one semop needs to be called. * */BufferReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber blockNum){ BufferDesc *bufHdr; Buffer retbuf; if (BufferIsLocal(buffer)) { Assert(LocalRefCount[-buffer - 1] > 0); LocalRefCount[-buffer - 1]--; } else { if (BufferIsValid(buffer)) { bufHdr = &BufferDescriptors[buffer - 1]; Assert(PrivateRefCount[buffer - 1] > 0); PrivateRefCount[buffer - 1]--; if (PrivateRefCount[buffer - 1] == 0 && LastRefCount[buffer - 1] == 0) { /* * only release buffer if it is not pinned in previous * ExecMain level */ SpinAcquire(BufMgrLock); bufHdr->refcount--; if (bufHdr->refcount == 0) { AddBufferToFreelist(bufHdr); bufHdr->flags |= BM_FREE; } if (CommitInfoNeedsSave[buffer - 1]) { bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); CommitInfoNeedsSave[buffer - 1] = 0; } retbuf = ReadBufferWithBufferLock(relation, blockNum, true); return retbuf; } } } return ReadBuffer(relation, blockNum);}/* * BufferSync -- Flush all dirty buffers in the pool. * * This is called at transaction commit time. It does the wrong thing, * right now. We should flush only our own changes to stable storage, * and we should obey the lock protocol on the buffer manager metadata * as we do it. Also, we need to be sure that no other transaction is * modifying the page as we flush it. This is only a problem for objects * that use a non-two-phase locking protocol, like btree indices. For * those objects, we would like to set a write lock for the duration of * our IO. Another possibility is to code updates to btree pages * carefully, so that writing them out out of order cannot cause * any unrecoverable errors. * * I don't want to think hard about this right now, so I will try * to come back to it later. */static voidBufferSync(){ int i; Oid bufdb; Oid bufrel; Relation reln; BufferDesc *bufHdr; int status; SpinAcquire(BufMgrLock); for (i = 0, bufHdr = BufferDescriptors; i < NBuffers; i++, bufHdr++) { if ((bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY)) { bufdb = bufHdr->tag.relId.dbId; bufrel = bufHdr->tag.relId.relId; if (bufdb == MyDatabaseId || bufdb == (Oid) 0) { reln = RelationIdCacheGetRelation(bufrel); /* * We have to pin buffer to keep anyone from stealing it * from the buffer pool while we are flushing it or * waiting in WaitIO. It's bad for GetFreeBuffer in * BufferAlloc, but there is no other way to prevent * writing into disk block data from some other buffer, * getting smgr status of some other block and clearing * BM_DIRTY of ... - VAdim 09/16/96 */ PinBuffer(bufHdr); if (bufHdr->flags & BM_IO_IN_PROGRESS) { WaitIO(bufHdr, BufMgrLock); UnpinBuffer(bufHdr); if (bufHdr->flags & BM_IO_ERROR) { elog(ERROR, "BufferSync: write error %u for %s", bufHdr->tag.blockNum, bufHdr->sb_relname); } if (reln != (Relation) NULL) RelationDecrementReferenceCount(reln); continue; } /* * To check if block content changed while flushing (see * below). - vadim 01/17/97 */ bufHdr->flags &= ~BM_JUST_DIRTIED; /* * If we didn't have the reldesc in our local cache, flush * this page out using the 'blind write' storage manager * routine. If we did find it, use the standard * interface. */#ifndef OPTIMIZE_SINGLE SpinRelease(BufMgrLock);#endif /* OPTIMIZE_SINGLE */ if (reln == (Relation) NULL) { status = smgrblindwrt(DEFAULT_SMGR, bufHdr->sb_dbname, bufHdr->sb_relname, bufdb, bufrel, bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data)); } else { status = smgrwrite(DEFAULT_SMGR, reln, bufHdr->tag.blockNum, (char *) MAKE_PTR(bufHdr->data)); }#ifndef OPTIMIZE_SINGLE SpinAcquire(BufMgrLock);#endif /* OPTIMIZE_SINGLE */ UnpinBuffer(bufHdr); if (status == SM_FAIL) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -