📄 nbtree.c
字号:
if (BufferIsValid(so->btso_mrkbuf)) ReleaseBuffer(so->btso_mrkbuf); so->btso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } if (so->keyData != NULL) pfree(so->keyData); pfree(so); PG_RETURN_VOID();}/* * btmarkpos() -- save current scan position */Datumbtmarkpos(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pin */ if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) { ReleaseBuffer(so->btso_mrkbuf); so->btso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } /* bump pin on current buffer for assignment to mark buffer */ if (ItemPointerIsValid(&(scan->currentItemData))) { IncrBufferRefCount(so->btso_curbuf); so->btso_mrkbuf = so->btso_curbuf; scan->currentMarkData = scan->currentItemData; so->mrkHeapIptr = so->curHeapIptr; } PG_RETURN_VOID();}/* * btrestrpos() -- restore scan to last saved position */Datumbtrestrpos(PG_FUNCTION_ARGS){ IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ItemPointer iptr; BTScanOpaque so; so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pin */ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) { ReleaseBuffer(so->btso_curbuf); so->btso_curbuf = InvalidBuffer; ItemPointerSetInvalid(iptr); } /* bump pin on marked buffer */ if (ItemPointerIsValid(&(scan->currentMarkData))) { IncrBufferRefCount(so->btso_mrkbuf); so->btso_curbuf = so->btso_mrkbuf; scan->currentItemData = scan->currentMarkData; so->curHeapIptr = so->mrkHeapIptr; } PG_RETURN_VOID();}/* * Bulk deletion of all index entries pointing to a set of heap tuples. * The set of target tuples is specified via a callback routine that tells * whether any given heap tuple (identified by ItemPointer) is being deleted. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */Datumbtbulkdelete(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); void *callback_state = (void *) PG_GETARG_POINTER(2); IndexBulkDeleteResult *result; double tuples_removed; double num_index_tuples; OffsetNumber deletable[MaxOffsetNumber]; int ndeletable; Buffer buf; BlockNumber num_pages; tuples_removed = 0; num_index_tuples = 0; /* * The outer loop iterates over index leaf pages, the inner over items on * a leaf page. We issue just one _bt_delitems() call per page, so as to * minimize WAL traffic. * * Note that we exclusive-lock every leaf page containing data items, in * sequence left to right. It sounds attractive to only exclusive-lock * those containing items we need to delete, but unfortunately that is not * safe: we could then pass a stopped indexscan, which could in rare cases * lead to deleting the item it needs to find when it resumes. (See * _bt_restscan --- this could only happen if an indexscan stops on a * deletable item and then a page split moves that item into a page * further to its right, which the indexscan will have no pin on.) We can * skip obtaining exclusive lock on empty pages though, since no indexscan * could be stopped on those. */ buf = _bt_get_endpoint(rel, 0, false); if (BufferIsValid(buf)) /* check for empty index */ { for (;;) { Page page; BTPageOpaque opaque; OffsetNumber offnum, minoff, maxoff; BlockNumber nextpage; ndeletable = 0; page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); /* We probably cannot see deleted pages, but skip 'em if so */ if (minoff <= maxoff && !P_ISDELETED(opaque)) { /* * Trade in the initial read lock for a super-exclusive write * lock on this page. */ LockBuffer(buf, BUFFER_LOCK_UNLOCK); LockBufferForCleanup(buf); /* * Recompute minoff/maxoff, both of which could have changed * while we weren't holding the lock. */ minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); /* * Scan over all items to see which ones need deleted * according to the callback function. */ for (offnum = minoff; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { BTItem btitem; ItemPointer htup; btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); htup = &(btitem->bti_itup.t_tid); if (callback(htup, callback_state)) { deletable[ndeletable++] = offnum; tuples_removed += 1; } else num_index_tuples += 1; } } /* * If we need to delete anything, do it and write the buffer; else * just release the buffer. */ nextpage = opaque->btpo_next; if (ndeletable > 0) { _bt_delitems(rel, buf, deletable, ndeletable); _bt_wrtbuf(rel, buf); } else _bt_relbuf(rel, buf); /* call vacuum_delay_point while not holding any buffer lock */ vacuum_delay_point(); /* And advance to next page, if any */ if (nextpage == P_NONE) break; buf = _bt_getbuf(rel, nextpage, BT_READ); } } /* return statistics */ num_pages = RelationGetNumberOfBlocks(rel); result = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); result->num_pages = num_pages; result->num_index_tuples = num_index_tuples; result->tuples_removed = tuples_removed; PG_RETURN_POINTER(result);}/* * Post-VACUUM cleanup. * * Here, we scan looking for pages we can delete or return to the freelist. * * Result: a palloc'd struct containing statistical info for VACUUM displays. */Datumbtvacuumcleanup(PG_FUNCTION_ARGS){ Relation rel = (Relation) PG_GETARG_POINTER(0); IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1); IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2); BlockNumber num_pages; BlockNumber blkno; BlockNumber *freePages; int nFreePages, maxFreePages; BlockNumber pages_deleted = 0; MemoryContext mycontext; MemoryContext oldcontext; bool needLock; Assert(stats != NULL); /* * First find out the number of pages in the index. We must acquire the * relation-extension lock while doing this to avoid a race condition: if * someone else is extending the relation, there is a window where * bufmgr/smgr have created a new all-zero page but it hasn't yet been * write-locked by _bt_getbuf(). If we manage to scan such a page here, * we'll improperly assume it can be recycled. Taking the lock * synchronizes things enough to prevent a problem: either num_pages won't * include the new page, or _bt_getbuf already has write lock on the * buffer and it will be fully initialized before we can examine it. (See * also vacuumlazy.c, which has the same issue.) * * We can skip locking for new or temp relations, however, since no one * else could be accessing them. */ needLock = !RELATION_IS_LOCAL(rel); if (needLock) LockRelationForExtension(rel, ExclusiveLock); num_pages = RelationGetNumberOfBlocks(rel); if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); /* No point in remembering more than MaxFSMPages pages */ maxFreePages = MaxFSMPages; if ((BlockNumber) maxFreePages > num_pages) maxFreePages = (int) num_pages; freePages = (BlockNumber *) palloc(maxFreePages * sizeof(BlockNumber)); nFreePages = 0; /* Create a temporary memory context to run _bt_pagedel in */ mycontext = AllocSetContextCreate(CurrentMemoryContext, "_bt_pagedel", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * Scan through all pages of index, except metapage. (Any pages added * after we start the scan will not be examined; this should be fine, * since they can't possibly be empty.) */ for (blkno = BTREE_METAPAGE + 1; blkno < num_pages; blkno++) { Buffer buf; Page page; BTPageOpaque opaque; vacuum_delay_point(); buf = _bt_getbuf(rel, blkno, BT_READ); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (_bt_page_recyclable(page)) { /* Okay to recycle this page */ if (nFreePages < maxFreePages) freePages[nFreePages++] = blkno; pages_deleted++; } else if (P_ISDELETED(opaque)) { /* Already deleted, but can't recycle yet */ pages_deleted++; } else if ((opaque->btpo_flags & BTP_HALF_DEAD) || P_FIRSTDATAKEY(opaque) > PageGetMaxOffsetNumber(page)) { /* Empty, try to delete */ int ndel; /* Run pagedel in a temp context to avoid memory leakage */ MemoryContextReset(mycontext); oldcontext = MemoryContextSwitchTo(mycontext); ndel = _bt_pagedel(rel, buf, info->vacuum_full); /* count only this page, else may double-count parent */ if (ndel) pages_deleted++; /* * During VACUUM FULL it's okay to recycle deleted pages * immediately, since there can be no other transactions scanning * the index. Note that we will only recycle the current page and * not any parent pages that _bt_pagedel might have recursed to; * this seems reasonable in the name of simplicity. (Trying to do * otherwise would mean we'd have to sort the list of recyclable * pages we're building.) */ if (ndel && info->vacuum_full) { if (nFreePages < maxFreePages) freePages[nFreePages++] = blkno; } MemoryContextSwitchTo(oldcontext); continue; /* pagedel released buffer */ } _bt_relbuf(rel, buf); } /* * During VACUUM FULL, we truncate off any recyclable pages at the end of * the index. In a normal vacuum it'd be unsafe to do this except by * acquiring exclusive lock on the index and then rechecking all the * pages; doesn't seem worth it. */ if (info->vacuum_full && nFreePages > 0) { BlockNumber new_pages = num_pages; while (nFreePages > 0 && freePages[nFreePages - 1] == new_pages - 1) { new_pages--; pages_deleted--; nFreePages--; } if (new_pages != num_pages) { /* * Okay to truncate. */ RelationTruncate(rel, new_pages); /* update statistics */ stats->pages_removed = num_pages - new_pages; num_pages = new_pages; } } /* * Update the shared Free Space Map with the info we now have about free * pages in the index, discarding any old info the map may have. We do not * need to sort the page numbers; they're in order already. */ RecordIndexFreeSpace(&rel->rd_node, nFreePages, freePages); pfree(freePages); MemoryContextDelete(mycontext); /* update statistics */ stats->num_pages = num_pages; stats->pages_deleted = pages_deleted; stats->pages_free = nFreePages; PG_RETURN_POINTER(stats);}/* * Restore scan position when btgettuple is called to continue a scan. * * This is nontrivial because concurrent insertions might have moved the * index tuple we stopped on. We assume the tuple can only have moved to * the right from our stop point, because we kept a pin on the buffer, * and so no deletion can have occurred on that page. * * On entry, we have a pin but no read lock on the buffer that contained * the index tuple we stopped the scan on. On exit, we have pin and read * lock on the buffer that now contains that index tuple, and the scandesc's * current position is updated to point at it. */static void_bt_restscan(IndexScanDesc scan){ Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf = so->btso_curbuf; Page page; ItemPointer current = &(scan->currentItemData); OffsetNumber offnum = ItemPointerGetOffsetNumber(current), maxoff; BTPageOpaque opaque; Buffer nextbuf; ItemPointer target = &(so->curHeapIptr); BTItem item; BlockNumber blkno; /* * Reacquire read lock on the buffer. (We should still have a * reference-count pin on it, so need not get that.) */ LockBuffer(buf, BT_READ); page = BufferGetPage(buf); maxoff = PageGetMaxOffsetNumber(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* * We use this as flag when first index tuple on page is deleted but we do * not move left (this would slowdown vacuum) - so we set * current->ip_posid before first index tuple on the current page * (_bt_step will move it right)... XXX still needed? */ if (!ItemPointerIsValid(target)) { ItemPointerSetOffsetNumber(current, OffsetNumberPrev(P_FIRSTDATAKEY(opaque))); return; } /* * The item we were on may have moved right due to insertions. Find it * again. We use the heap TID to identify the item uniquely. */ for (;;) { /* Check for item on this page */ for (; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { item = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); if (BTTidSame(item->bti_itup.t_tid, *target)) { /* Found it */ current->ip_posid = offnum; return; } } /* * The item we're looking for moved right at least one page, so move * right. We are careful here to pin and read-lock the next non-dead * page before releasing the current one. This ensures that a * concurrent btbulkdelete scan cannot pass our position --- if it * did, it might be able to reach and delete our target item before we * can find it again. */ if (P_RIGHTMOST(opaque)) elog(ERROR, "failed to re-find previous key in \"%s\"", RelationGetRelationName(rel)); /* Advance to next non-dead page --- there must be one */ nextbuf = InvalidBuffer; for (;;) { blkno = opaque->btpo_next; nextbuf = _bt_relandgetbuf(rel, nextbuf, blkno, BT_READ); page = BufferGetPage(nextbuf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) break; if (P_RIGHTMOST(opaque)) elog(ERROR, "fell off the end of \"%s\"", RelationGetRelationName(rel)); } _bt_relbuf(rel, buf); so->btso_curbuf = buf = nextbuf; maxoff = PageGetMaxOffsetNumber(page); offnum = P_FIRSTDATAKEY(opaque); ItemPointerSet(current, blkno, offnum); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -