📄 nbtree.c
字号:
ReleaseBuffer(so->btso_mrkbuf); so->btso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } if (so->keyData != (ScanKey) NULL) pfree(so->keyData); pfree(so); PG_RETURN_VOID();}/* * btmarkpos() -- save current scan position */Datumbtmarkpos(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pin */ if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { ReleaseBuffer(so->btso_mrkbuf); so->btso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } /* bump pin on current buffer for assignment to mark buffer */ if (ItemPointerIsValid(&(scan->currentItemData))) { so->btso_mrkbuf = ReadBuffer(scan->indexRelation, BufferGetBlockNumber(so->btso_curbuf)); scan->currentMarkData = scan->currentItemData; so->mrkHeapIptr = so->curHeapIptr; } PG_RETURN_VOID();}/* * btrestrpos() -- restore scan to last saved position */Datumbtrestrpos(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pin */ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { ReleaseBuffer(so->btso_curbuf); so->btso_curbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } /* bump pin on marked buffer */ if (ItemPointerIsValid(&(scan->currentMarkData))) { so->btso_curbuf = ReadBuffer(scan->indexRelation, BufferGetBlockNumber(so->btso_mrkbuf)); scan->currentItemData = scan->currentMarkData; so->curHeapIptr = so->mrkHeapIptr; } PG_RETURN_VOID();}/* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells * whether any given heap tuple (identified by ItemPointer) is being deleted. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */Datumbtbulkdelete(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); void *callback_state = (void *) PG_GETARG_POINTER(2); IndexBulkDeleteResult *result; double tuples_removed; double num_index_tuples; OffsetNumber deletable[BLCKSZ / sizeof(OffsetNumber)]; int ndeletable; Buffer buf; BlockNumber num_pages; tuples_removed = 0; num_index_tuples = 0; /* * The outer loop iterates over index leaf pages, the inner over items * on a leaf page. We issue just one _bt_delitems() call per page, so * as to minimize WAL traffic. * * Note that we exclusive-lock every leaf page containing data items, in * sequence left to right. It sounds attractive to only * exclusive-lock those containing items we need to delete, but * unfortunately that is not safe: we could then pass a stopped * indexscan, which could in rare cases lead to deleting the item it * needs to find when it resumes. (See _bt_restscan --- this could * only happen if an indexscan stops on a deletable item and then a * page split moves that item into a page further to its right, which * the indexscan will have no pin on.) We can skip obtaining * exclusive lock on empty pages though, since no indexscan could be * stopped on those. */ buf = _bt_get_endpoint(rel, 0, false); if (BufferIsValid(buf)) /* check for empty index */ { for (;;) { Page page; BTPageOpaque opaque; OffsetNumber offnum, minoff, maxoff; BlockNumber nextpage; CHECK_FOR_INTERRUPTS(); ndeletable = 0; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); /* We probably cannot see deleted pages, but skip 'em if so */ if (minoff <= maxoff && !P_ISDELETED(opaque)) { /* * Trade in the initial read lock for a super-exclusive * write lock on this page. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); LockBufferForCleanup(buf); /* * Recompute minoff/maxoff, both of which could have * changed while we weren't holding the lock. */ minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); /* * Scan over all items to see which ones need deleted * according to the callback function. */ for (offnum = minoff; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { BTItem btitem; ItemPointer htup; btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); htup = &(btitem->bti_itup.t_tid); if (callback(htup, callback_state)) { deletable[ndeletable++] = offnum; tuples_removed += 1; } else num_index_tuples += 1; } } /* * If we need to delete anything, do it and write the buffer; * else just release the buffer. */ nextpage = opaque->btpo_next; if (ndeletable > 0) { _bt_delitems(rel, buf, deletable, ndeletable); _bt_wrtbuf(rel, buf); } else _bt_relbuf(rel, buf); /* And advance to next page, if any */ if (nextpage == P_NONE) break; buf = _bt_getbuf(rel, nextpage, BT_READ); } } /* return statistics */ num_pages = RelationGetNumberOfBlocks(rel); result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); result->num_pages = num_pages; result->num_index_tuples = num_index_tuples; result->tuples_removed = tuples_removed; PG_RETURN_POINTER(result);}/* * Post-VACUUM cleanup. * * Here, we scan looking for pages we can delete or return to the freelist. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */Datumbtvacuumcleanup(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2); BlockNumber num_pages; BlockNumber blkno; BlockNumber *freePages; int nFreePages, maxFreePages; BlockNumber pages_deleted = 0; MemoryContext mycontext; MemoryContext oldcontext; Assert(stats != NULL); num_pages = RelationGetNumberOfBlocks(rel); /* No point in remembering more than MaxFSMPages pages */ maxFreePages = MaxFSMPages; if ((BlockNumber) maxFreePages > num_pages) maxFreePages = (int) num_pages + 1; /* +1 to avoid palloc(0) */ freePages = (BlockNumber *) palloc(maxFreePages * sizeof(BlockNumber)); nFreePages = 0; /* Create a temporary memory context to run _bt_pagedel in */ mycontext = AllocSetContextCreate(CurrentMemoryContext, "_bt_pagedel", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * Scan through all pages of index, except metapage. (Any pages added * after we start the scan will not be examined; this should be fine, * since they can't possibly be empty.) */ for (blkno = BTREE_METAPAGE + 1; blkno < num_pages; blkno++) { Buffer buf; Page page; BTPageOpaque opaque; buf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (_bt_page_recyclable(page)) { /* Okay to recycle this page */ if (nFreePages < maxFreePages) freePages[nFreePages++] = blkno; pages_deleted++; } else if (P_ISDELETED(opaque)) { /* Already deleted, but can't recycle yet */ pages_deleted++; } else if ((opaque->btpo_flags & BTP_HALF_DEAD) || P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page)) { /* Empty, try to delete */ int ndel; /* Run pagedel in a temp context to avoid memory leakage */ MemoryContextReset(mycontext); oldcontext = MemoryContextSwitchTo(mycontext); ndel = _bt_pagedel(rel, buf, info->vacuum_full); /* count only this page, else may double-count parent */ if (ndel) pages_deleted++; /* * During VACUUM FULL it's okay to recycle deleted pages * immediately, since there can be no other transactions * scanning the index. Note that we will only recycle the * current page and not any parent pages that _bt_pagedel * might have recursed to; this seems reasonable in the name * of simplicity. (Trying to do otherwise would mean we'd * have to sort the list of recyclable pages we're building.) */ if (ndel && info->vacuum_full) { if (nFreePages < maxFreePages) freePages[nFreePages++] = blkno; } MemoryContextSwitchTo(oldcontext); continue; /* pagedel released buffer */ } _bt_relbuf(rel, buf); } /* * During VACUUM FULL, we truncate off any recyclable pages at the end * of the index. In a normal vacuum it'd be unsafe to do this except * by acquiring exclusive lock on the index and then rechecking all * the pages; doesn't seem worth it. */ if (info->vacuum_full && nFreePages > 0) { BlockNumber new_pages = num_pages; while (nFreePages > 0 && freePages[nFreePages - 1] == new_pages - 1) { new_pages--; pages_deleted--; nFreePages--; } if (new_pages != num_pages) { int i; /* * Okay to truncate. * * First, flush any shared buffers for the blocks we intend to * delete. FlushRelationBuffers is a bit more than we need * for this, since it will also write out dirty buffers for * blocks we aren't deleting, but it's the closest thing in * bufmgr's API. */ i = FlushRelationBuffers(rel, new_pages); if (i < 0) elog(ERROR, "FlushRelationBuffers returned %d", i); /* * Do the physical truncation. */ new_pages = smgrtruncate(DEFAULT_SMGR, rel, new_pages); rel->rd_nblocks = new_pages; /* update relcache * immediately */ rel->rd_targblock = InvalidBlockNumber; num_pages = new_pages; } } /* * Update the shared Free Space Map with the info we now have about * free pages in the index, discarding any old info the map may have. * We do not need to sort the page numbers; they're in order already. */ RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages); pfree(freePages); MemoryContextDelete(mycontext); /* update statistics */ stats->num_pages = num_pages; stats->pages_deleted = pages_deleted; stats->pages_free = nFreePages; PG_RETURN_POINTER(stats);}/* * Restore scan position when btgettuple is called to continue a scan. * * This is nontrivial because concurrent insertions might have moved the * index tuple we stopped on. We assume the tuple can only have moved to * the right from our stop point, because we kept a pin on the buffer, * and so no deletion can have occurred on that page. * * On entry, we have a pin but no read lock on the buffer that contained * the index tuple we stopped the scan on. On exit, we have pin and read * lock on the buffer that now contains that index tuple, and the scandesc's * current position is updated to point at it. */static void_bt_restscan(IndexScanDesc scan){ Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf = so->btso_curbuf; Page page; ItemPointer current = &(scan->currentItemData); OffsetNumber offnum = ItemPointerGetOffsetNumber(current), maxoff; BTPageOpaque opaque; Buffer nextbuf; ItemPointer target = &(so->curHeapIptr); BTItem item; BlockNumber blkno; /* * Reacquire read lock on the buffer. (We should still have a * reference-count pin on it, so need not get that.) */ LockBuffer(buf, BT_READ); page = BufferGetPage(buf); maxoff = PageGetMaxOffsetNumber(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* * We use this as flag when first index tuple on page is deleted but * we do not move left (this would slowdown vacuum) - so we set * current->ip_posid before first index tuple on the current page * (_bt_step will move it right)... XXX still needed? */ if (!ItemPointerIsValid(target)) { ItemPointerSetOffsetNumber(current, OffsetNumberPrev(P_FIRSTDATAKEY(opaque))); return; } /* * The item we were on may have moved right due to insertions. Find it * again. We use the heap TID to identify the item uniquely. */ for (;;) { /* Check for item on this page */ for (; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { item = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); if (BTTidSame(item->bti_itup.t_tid, *target)) { /* Found it */ current->ip_posid = offnum; return; } } /* * The item we're looking for moved right at least one page, so * move right. We are careful here to pin and read-lock the next * non-dead page before releasing the current one. This ensures * that a concurrent btbulkdelete scan cannot pass our position * --- if it did, it might be able to reach and delete our target * item before we can find it again. */ if (P_RIGHTMOST(opaque)) elog(ERROR, "failed to re-find previous key in \"%s\"", RelationGetRelationName(rel)); /* Advance to next non-dead page --- there must be one */ nextbuf = InvalidBuffer; for (;;) { blkno = opaque->btpo_next; if (nextbuf != InvalidBuffer) _bt_relbuf(rel, nextbuf); nextbuf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(nextbuf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) break; if (P_RIGHTMOST(opaque)) elog(ERROR, "fell off the end of \"%s\"", RelationGetRelationName(rel)); } _bt_relbuf(rel, buf); so->btso_curbuf = buf = nextbuf; maxoff = PageGetMaxOffsetNumber(page); offnum = P_FIRSTDATAKEY(opaque); ItemPointerSet(current, blkno, offnum); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -