📄 bufmgr.c
字号:
BufferDesc *buf = &BufferDescriptors[buffer - 1]; fprintf(stderr, "UNPIN(Rel&Rd) %d rel = %u/%u, blockNum = %u, \refcount = %ld, file: %s, line: %d\n", buffer, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, PrivateRefCount[buffer - 1], file, line); } if (ShowPinTrace && BufferIsLocal(buffer) && is_userbuffer(buffer)) { BufferDesc *buf = &BufferDescriptors[b - 1]; fprintf(stderr, "PIN(Rel&Rd) %d rel = %u/%u, blockNum = %u, \refcount = %ld, file: %s, line: %d\n", b, buf->tag.rnode.tblNode, buf->tag.rnode.relNode, buf->tag.blockNum, PrivateRefCount[b - 1], file, line); } return b;}#endif#ifdef BMTRACE/* * trace allocations and deallocations in a circular buffer in * shared memory. check the buffer before doing the allocation, * and die if there's anything fishy. */void_bm_trace(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType){ long start, cur; bmtrace *tb; start = *CurTraceBuf; if (start > 0) cur = start - 1; else cur = BMT_LIMIT - 1; for (;;) { tb = &TraceBuf[cur]; if (tb->bmt_op != BMT_NOTUSED) { if (tb->bmt_buf == bufNo) { if ((tb->bmt_op == BMT_DEALLOC) || (tb->bmt_dbid == dbId && tb->bmt_relid == relId && tb->bmt_blkno == blkNo)) goto okay; /* die holding the buffer lock */ _bm_die(dbId, relId, blkNo, bufNo, allocType, start, cur); } } if (cur == start) goto okay; if (cur == 0) cur = BMT_LIMIT - 1; else cur--; }okay: tb = &TraceBuf[start]; tb->bmt_pid = MyProcPid; tb->bmt_buf = bufNo; tb->bmt_dbid = dbId; tb->bmt_relid = relId; tb->bmt_blkno = blkNo; tb->bmt_op = allocType; *CurTraceBuf = (start + 1) % BMT_LIMIT;}void_bm_die(Oid dbId, Oid relId, int blkNo, int bufNo, int allocType, long start, long cur){ FILE *fp; bmtrace *tb; int i; tb = &TraceBuf[cur]; if ((fp = AllocateFile("/tmp/death_notice", "w")) == NULL) elog(FATAL, "buffer alloc trace error and can't open log file"); fprintf(fp, "buffer alloc trace detected the following error:\n\n"); fprintf(fp, " buffer %d being %s inconsistently with a previous %s\n\n", bufNo, (allocType == BMT_DEALLOC ? "deallocated" : "allocated"), (tb->bmt_op == BMT_DEALLOC ? "deallocation" : "allocation")); fprintf(fp, "the trace buffer contains:\n"); i = start; for (;;) { tb = &TraceBuf[i]; if (tb->bmt_op != BMT_NOTUSED) { fprintf(fp, " [%3d]%spid %d buf %2d for <%u,%u,%u> ", i, (i == cur ? " ---> " : "\t"), tb->bmt_pid, tb->bmt_buf, tb->bmt_dbid, tb->bmt_relid, tb->bmt_blkno); switch (tb->bmt_op) { case BMT_ALLOCFND: fprintf(fp, "allocate (found)\n"); break; case BMT_ALLOCNOTFND: fprintf(fp, "allocate (not found)\n"); break; case BMT_DEALLOC: fprintf(fp, "deallocate\n"); break; default: fprintf(fp, "unknown op type %d\n", tb->bmt_op); break; } } i = (i + 1) % BMT_LIMIT; if (i == start) break; } fprintf(fp, "\noperation causing error:\n"); fprintf(fp, "\tpid %d buf %d for <%d,%u,%d> ", getpid(), bufNo, dbId, relId, blkNo); switch (allocType) { case BMT_ALLOCFND: fprintf(fp, "allocate (found)\n"); break; case BMT_ALLOCNOTFND: fprintf(fp, "allocate (not found)\n"); break; case BMT_DEALLOC: fprintf(fp, "deallocate\n"); break; default: fprintf(fp, "unknown op type %d\n", allocType); break; } FreeFile(fp); kill(getpid(), SIGILL);}#endif /* BMTRACE *//* * SetBufferCommitInfoNeedsSave * * Mark a buffer dirty when we have updated tuple commit-status bits in it. * * This is similar to WriteNoReleaseBuffer, except that we have not made a * critical change that has to be flushed to disk before xact commit --- the * status-bit update could be redone by someone else just as easily. * * This routine might get called many times on the same page, if we are making * the first scan after commit of an xact that added/deleted many tuples. * So, be as quick as we can if the buffer is already dirty. */voidSetBufferCommitInfoNeedsSave(Buffer buffer){ BufferDesc *bufHdr; if (BufferIsLocal(buffer)) { WriteLocalBuffer(buffer, false); return; } if (BAD_BUFFER_ID(buffer)) elog(ERROR, "bad buffer id: %d", buffer); bufHdr = &BufferDescriptors[buffer - 1]; if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) != (BM_DIRTY | BM_JUST_DIRTIED)) { LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(bufHdr->refcount > 0); bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); LWLockRelease(BufMgrLock); }}/* * Release buffer context locks for shared buffers. * * Used to clean up after errors. */voidUnlockBuffers(void){ BufferDesc *buf; int i; for (i = 0; i < NBuffers; i++) { bits8 buflocks = BufferLocks[i]; if (buflocks == 0) continue; Assert(BufferIsValid(i + 1)); buf = &(BufferDescriptors[i]); HOLD_INTERRUPTS(); /* don't want to die() partway through... */ /* * The buffer's cntx_lock has already been released by lwlock.c. */ if (buflocks & BL_PIN_COUNT_LOCK) { LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); /* * Don't complain if flag bit not set; it could have been * reset but we got a cancel/die interrupt before getting the * signal. */ if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 && buf->wait_backend_id == MyBackendId) buf->flags &= ~BM_PIN_COUNT_WAITER; LWLockRelease(BufMgrLock); ProcCancelWaitForSignal(); } BufferLocks[i] = 0; RESUME_INTERRUPTS(); }}/* * Acquire or release the cntx_lock for the buffer. */voidLockBuffer(Buffer buffer, int mode){ BufferDesc *buf; Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) return; buf = &(BufferDescriptors[buffer - 1]); if (mode == BUFFER_LOCK_UNLOCK) LWLockRelease(buf->cntx_lock); else if (mode == BUFFER_LOCK_SHARE) LWLockAcquire(buf->cntx_lock, LW_SHARED); else if (mode == BUFFER_LOCK_EXCLUSIVE) { LWLockAcquire(buf->cntx_lock, LW_EXCLUSIVE); /* * This is not the best place to set cntxDirty flag (eg indices do * not always change buffer they lock in excl mode). But please * remember that it's critical to set cntxDirty *before* logging * changes with XLogInsert() - see comments in BufferSync(). */ buf->cntxDirty = true; } else elog(ERROR, "unrecognized buffer lock mode: %d", mode);}/* * Acquire the cntx_lock for the buffer, but only if we don't have to wait. * * This assumes the caller wants BUFFER_LOCK_EXCLUSIVE mode. */boolConditionalLockBuffer(Buffer buffer){ BufferDesc *buf; Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) return true; /* act as though we got it */ buf = &(BufferDescriptors[buffer - 1]); if (LWLockConditionalAcquire(buf->cntx_lock, LW_EXCLUSIVE)) { /* * This is not the best place to set cntxDirty flag (eg indices do * not always change buffer they lock in excl mode). But please * remember that it's critical to set cntxDirty *before* logging * changes with XLogInsert() - see comments in BufferSync(). */ buf->cntxDirty = true; return true; } return false;}/* * LockBufferForCleanup - lock a buffer in preparation for deleting items * * Items may be deleted from a disk page only when the caller (a) holds an * exclusive lock on the buffer and (b) has observed that no other backend * holds a pin on the buffer. If there is a pin, then the other backend * might have a pointer into the buffer (for example, a heapscan reference * to an item --- see README for more details). It's OK if a pin is added * after the cleanup starts, however; the newly-arrived backend will be * unable to look at the page until we release the exclusive lock. * * To implement this protocol, a would-be deleter must pin the buffer and * then call LockBufferForCleanup(). LockBufferForCleanup() is similar to * LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), except that it loops until * it has successfully observed pin count = 1. */voidLockBufferForCleanup(Buffer buffer){ BufferDesc *bufHdr; bits8 *buflock; Assert(BufferIsValid(buffer)); if (BufferIsLocal(buffer)) { /* There should be exactly one pin */ if (LocalRefCount[-buffer - 1] != 1) elog(ERROR, "incorrect local pin count: %ld", LocalRefCount[-buffer - 1]); /* Nobody else to wait for */ return; } /* There should be exactly one local pin */ if (PrivateRefCount[buffer - 1] != 1) elog(ERROR, "incorrect local pin count: %ld", PrivateRefCount[buffer - 1]); bufHdr = &BufferDescriptors[buffer - 1]; buflock = &(BufferLocks[buffer - 1]); for (;;) { /* Try to acquire lock */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(bufHdr->refcount > 0); if (bufHdr->refcount == 1) { /* Successfully acquired exclusive lock with pincount 1 */ LWLockRelease(BufMgrLock); return; } /* Failed, so mark myself as waiting for pincount 1 */ if (bufHdr->flags & BM_PIN_COUNT_WAITER) { LWLockRelease(BufMgrLock); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); elog(ERROR, "multiple backends attempting to wait for pincount 1"); } bufHdr->wait_backend_id = MyBackendId; bufHdr->flags |= BM_PIN_COUNT_WAITER; *buflock |= BL_PIN_COUNT_LOCK; LWLockRelease(BufMgrLock); LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* Wait to be signaled by UnpinBuffer() */ ProcWaitForSignal(); *buflock &= ~BL_PIN_COUNT_LOCK; /* Loop back and try again */ }}/* * Functions for IO error handling * * Note : We assume that nested buffer IO never occur. * i.e at most one io_in_progress lock is held per proc.*/static BufferDesc *InProgressBuf = (BufferDesc *) NULL;static bool IsForInput;/* * Function:StartBufferIO * (Assumptions) * My process is executing no IO * BufMgrLock is held * BM_IO_IN_PROGRESS mask is not set for the buffer * The buffer is Pinned * * Because BufMgrLock is held, we are already in an interrupt holdoff here, * and do not need another. */static voidStartBufferIO(BufferDesc *buf, bool forInput){ Assert(!InProgressBuf); Assert(!(buf->flags & BM_IO_IN_PROGRESS)); buf->flags |= BM_IO_IN_PROGRESS; LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE); InProgressBuf = buf; IsForInput = forInput;}/* * Function:TerminateBufferIO * (Assumptions) * My process is executing IO for the buffer * BufMgrLock is held * The buffer is Pinned * * Because BufMgrLock is held, we are already in an interrupt holdoff here, * and do not need another. */static voidTerminateBufferIO(BufferDesc *buf){ Assert(buf == InProgressBuf); LWLockRelease(buf->io_in_progress_lock); InProgressBuf = (BufferDesc *) NULL;}/* * Function:ContinueBufferIO * (Assumptions) * My process is executing IO for the buffer * BufMgrLock is held * The buffer is Pinned * * Because BufMgrLock is held, we are already in an interrupt holdoff here, * and do not need another. */static voidContinueBufferIO(BufferDesc *buf, bool forInput){ Assert(buf == InProgressBuf); Assert(buf->flags & BM_IO_IN_PROGRESS); IsForInput = forInput;}#ifdef NOT_USEDvoidInitBufferIO(void){ InProgressBuf = (BufferDesc *) NULL;}#endif/* * Clean up any active buffer I/O after an error. * BufMgrLock isn't held when this function is called. * * If I/O was in progress, we always set BM_IO_ERROR. */voidAbortBufferIO(void){ BufferDesc *buf = InProgressBuf; if (buf) { /* * Since LWLockReleaseAll has already been called, we're not * holding the buffer's io_in_progress_lock. We have to re-acquire * it so that we can use TerminateBufferIO. Anyone who's executing * WaitIO on the buffer will be in a busy spin until we succeed in * doing this. */ LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE); LWLockAcquire(BufMgrLock, LW_EXCLUSIVE); Assert(buf->flags & BM_IO_IN_PROGRESS); if (IsForInput) Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty)); else { Assert(buf->flags & BM_DIRTY || buf->cntxDirty); /* Issue notice if this is not the first failure... */ if (buf->flags & BM_IO_ERROR) { ereport(WARNING, (errcode(ERRCODE_IO_ERROR), errmsg("could not write block %u of %u/%u", buf->tag.blockNum, buf->tag.rnode.tblNode, buf->tag.rnode.relNode), errdetail("Multiple failures --- write error may be permanent."))); } buf->flags |= BM_DIRTY; } buf->flags |= BM_IO_ERROR; buf->flags &= ~BM_IO_IN_PROGRESS; TerminateBufferIO(buf); LWLockRelease(BufMgrLock); }}RelFileNodeBufferGetFileNode(Buffer buffer){ BufferDesc *bufHdr; if (BufferIsLocal(buffer)) bufHdr = &(LocalBufferDescriptors[-buffer - 1]); else bufHdr = &BufferDescriptors[buffer - 1]; return (bufHdr->tag.rnode);}/* * Error context callback for errors occurring during buffer writes. */static voidbuffer_write_error_callback(void *arg){ BufferDesc *bufHdr = (BufferDesc *) arg; if (bufHdr != NULL) errcontext("writing block %u of relation %u/%u", bufHdr->tag.blockNum, bufHdr->tag.rnode.tblNode, bufHdr->tag.rnode.relNode);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -