📄 vacuum.c
字号:
vacuum_delay_point(); buf = ReadBuffer(onerel, blkno); page = BufferGetPage(buf); /* * Since we are holding exclusive lock on the relation, no other * backend can be accessing the page; however it is possible that the * background writer will try to write the page if it's already marked * dirty. To ensure that invalid data doesn't get written to disk, we * must take exclusive buffer lock wherever we potentially modify * pages. */ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); vacpage->blkno = blkno; vacpage->offsets_used = 0; vacpage->offsets_free = 0; if (PageIsNew(page)) { VacPage vacpagecopy; ereport(WARNING, (errmsg("relation \"%s\" page %u is uninitialized --- fixing", relname, blkno))); PageInit(page, BufferGetPageSize(buf), 0); vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower; free_space += vacpage->free; empty_pages++; empty_end_pages++; vacpagecopy = copy_vac_page(vacpage); vpage_insert(vacuum_pages, vacpagecopy); vpage_insert(fraged_pages, vacpagecopy); LockBuffer(buf, BUFFER_LOCK_UNLOCK); WriteBuffer(buf); continue; } if (PageIsEmpty(page)) { VacPage vacpagecopy; vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower; free_space += vacpage->free; empty_pages++; empty_end_pages++; vacpagecopy = copy_vac_page(vacpage); vpage_insert(vacuum_pages, vacpagecopy); vpage_insert(fraged_pages, vacpagecopy); LockBuffer(buf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); continue; } pgchanged = false; notup = true; maxoff = PageGetMaxOffsetNumber(page); for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum)) { ItemId itemid = PageGetItemId(page, offnum); bool tupgone = false; /* * Collect un-used items too - it's possible to have indexes * pointing here after crash. */ if (!ItemIdIsUsed(itemid)) { vacpage->offsets[vacpage->offsets_free++] = offnum; nunused += 1; continue; } tuple.t_datamcxt = NULL; tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_len = ItemIdGetLength(itemid); ItemPointerSet(&(tuple.t_self), blkno, offnum); switch (HeapTupleSatisfiesVacuum(tuple.t_data, OldestXmin, buf)) { case HEAPTUPLE_DEAD: tupgone = true; /* we can delete the tuple */ break; case HEAPTUPLE_LIVE: /* * Tuple is good. Consider whether to replace its xmin * value with FrozenTransactionId. */ if (TransactionIdIsNormal(HeapTupleHeaderGetXmin(tuple.t_data)) && TransactionIdPrecedes(HeapTupleHeaderGetXmin(tuple.t_data), FreezeLimit)) { HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId); /* infomask should be okay already */ Assert(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED); pgchanged = true; } /* * Other checks... */ if (onerel->rd_rel->relhasoids && !OidIsValid(HeapTupleGetOid(&tuple))) elog(WARNING, "relation \"%s\" TID %u/%u: OID is invalid", relname, blkno, offnum); break; case HEAPTUPLE_RECENTLY_DEAD: /* * If tuple is recently deleted then we must not remove it * from relation. */ nkeep += 1; /* * If we do shrinking and this tuple is updated one then * remember it to construct updated tuple dependencies. */ if (do_shrinking && !(ItemPointerEquals(&(tuple.t_self), &(tuple.t_data->t_ctid)))) { if (free_vtlinks == 0) { free_vtlinks = 1000; vtlinks = (VTupleLink) repalloc(vtlinks, (free_vtlinks + num_vtlinks) * sizeof(VTupleLinkData)); } vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid; vtlinks[num_vtlinks].this_tid = tuple.t_self; free_vtlinks--; num_vtlinks++; } break; case HEAPTUPLE_INSERT_IN_PROGRESS: /* * This should not happen, since we hold exclusive lock on * the relation; shouldn't we raise an error? (Actually, * it can happen in system catalogs, since we tend to * release write lock before commit there.) */ ereport(NOTICE, (errmsg("relation \"%s\" TID %u/%u: InsertTransactionInProgress %u --- can't shrink relation", relname, blkno, offnum, HeapTupleHeaderGetXmin(tuple.t_data)))); do_shrinking = false; break; case HEAPTUPLE_DELETE_IN_PROGRESS: /* * This should not happen, since we hold exclusive lock on * the relation; shouldn't we raise an error? (Actually, * it can happen in system catalogs, since we tend to * release write lock before commit there.) */ ereport(NOTICE, (errmsg("relation \"%s\" TID %u/%u: DeleteTransactionInProgress %u --- can't shrink relation", relname, blkno, offnum, HeapTupleHeaderGetXmax(tuple.t_data)))); do_shrinking = false; break; default: elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); break; } if (tupgone) { ItemId lpp; /* * Here we are building a temporary copy of the page with dead * tuples removed. Below we will apply * PageRepairFragmentation to the copy, so that we can * determine how much space will be available after removal of * dead tuples. But note we are NOT changing the real page * yet... */ if (tempPage == NULL) { Size pageSize; pageSize = PageGetPageSize(page); tempPage = (Page) palloc(pageSize); memcpy(tempPage, page, pageSize); } /* mark it unused on the temp page */ lpp = PageGetItemId(tempPage, offnum); lpp->lp_flags &= ~LP_USED; vacpage->offsets[vacpage->offsets_free++] = offnum; tups_vacuumed += 1; } else { num_tuples += 1; notup = false; if (tuple.t_len < min_tlen) min_tlen = tuple.t_len; if (tuple.t_len > max_tlen) max_tlen = tuple.t_len; } } /* scan along page */ if (tempPage != NULL) { /* Some tuples are removable; figure free space after removal */ PageRepairFragmentation(tempPage, NULL); vacpage->free = ((PageHeader) tempPage)->pd_upper - ((PageHeader) tempPage)->pd_lower; pfree(tempPage); do_reap = true; } else { /* Just use current available space */ vacpage->free = ((PageHeader) page)->pd_upper - ((PageHeader) page)->pd_lower; /* Need to reap the page if it has ~LP_USED line pointers */ do_reap = (vacpage->offsets_free > 0); } free_space += vacpage->free; /* * Add the page to fraged_pages if it has a useful amount of free * space. "Useful" means enough for a minimal-sized tuple. But we * don't know that accurately near the start of the relation, so add * pages unconditionally if they have >= BLCKSZ/10 free space. */ do_frag = (vacpage->free >= min_tlen || vacpage->free >= BLCKSZ / 10); if (do_reap || do_frag) { VacPage vacpagecopy = copy_vac_page(vacpage); if (do_reap) vpage_insert(vacuum_pages, vacpagecopy); if (do_frag) vpage_insert(fraged_pages, vacpagecopy); } /* * Include the page in empty_end_pages if it will be empty after * vacuuming; this is to keep us from using it as a move destination. */ if (notup) { empty_pages++; empty_end_pages++; } else empty_end_pages = 0; LockBuffer(buf, BUFFER_LOCK_UNLOCK); if (pgchanged) WriteBuffer(buf); else ReleaseBuffer(buf); } pfree(vacpage); /* save stats in the rel list for use later */ vacrelstats->rel_tuples = num_tuples; vacrelstats->rel_pages = nblocks; if (num_tuples == 0) min_tlen = max_tlen = 0; vacrelstats->min_tlen = min_tlen; vacrelstats->max_tlen = max_tlen; vacuum_pages->empty_end_pages = empty_end_pages; fraged_pages->empty_end_pages = empty_end_pages; /* * Clear the fraged_pages list if we found we couldn't shrink. Else, * remove any "empty" end-pages from the list, and compute usable free * space = free space in remaining pages. */ if (do_shrinking) { int i; Assert((BlockNumber) fraged_pages->num_pages >= empty_end_pages); fraged_pages->num_pages -= empty_end_pages; usable_free_space = 0; for (i = 0; i < fraged_pages->num_pages; i++) usable_free_space += fraged_pages->pagedesc[i]->free; } else { fraged_pages->num_pages = 0; usable_free_space = 0; } /* don't bother to save vtlinks if we will not call repair_frag */ if (fraged_pages->num_pages > 0 && num_vtlinks > 0) { qsort((char *) vtlinks, num_vtlinks, sizeof(VTupleLinkData), vac_cmp_vtlinks); vacrelstats->vtlinks = vtlinks; vacrelstats->num_vtlinks = num_vtlinks; } else { vacrelstats->vtlinks = NULL; vacrelstats->num_vtlinks = 0; pfree(vtlinks); } ereport(elevel, (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages", RelationGetRelationName(onerel), tups_vacuumed, num_tuples, nblocks), errdetail("%.0f dead row versions cannot be removed yet.\n" "Nonremovable row versions range from %lu to %lu bytes long.\n" "There were %.0f unused item pointers.\n" "Total free space (including removable row versions) is %.0f bytes.\n" "%u pages are or will become empty, including %u at the end of the table.\n" "%u pages containing %.0f free bytes are potential move destinations.\n" "%s.", nkeep, (unsigned long) min_tlen, (unsigned long) max_tlen, nunused, free_space, empty_pages, empty_end_pages, fraged_pages->num_pages, usable_free_space, pg_rusage_show(&ru0))));}/* * repair_frag() -- try to repair relation's fragmentation * * This routine marks dead tuples as unused and tries re-use dead space * by moving tuples (and inserting indexes if needed). It constructs * Nvacpagelist list of free-ed pages (moved tuples) and clean indexes * for them after committing (in hack-manner - without losing locks * and freeing memory!) current transaction. It truncates relation * if some end-blocks are gone away. */static voidrepair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindexes, Relation *Irel){ TransactionId myXID = GetCurrentTransactionId(); Buffer dst_buffer = InvalidBuffer; BlockNumber nblocks, blkno; BlockNumber last_move_dest_block = 0, last_vacuum_block; Page dst_page = NULL; ExecContextData ec; VacPageListData Nvacpagelist; VacPage dst_vacpage = NULL, last_vacuum_page, vacpage, *curpage; int i; int num_moved = 0, num_fraged_pages, vacuumed_pages; int keep_tuples = 0; PGRUsage ru0; pg_rusage_init(&ru0); ExecContext_Init(&ec, onerel); Nvacpagelist.num_pages = 0; num_fraged_pages = fraged_pages->num_pages; Assert((BlockNumber) vacuum_pages->num_pages >= vacuum_pages->empty_end_pages); vacuumed_pages = vacuum_pages->num_pages - vacuum_pages->empty_end_pages; if (vacuumed_pages > 0) { /* get last reaped page from vacuum_pages */ last_vacuum_page = vacuum_pages->pagedesc[vacuumed_pages - 1]; last_vacuum_block = last_vacuum_page->blkno; } else { last_vacuum_page = NULL; last_vacuum_block = InvalidBlockNumber; } vacpage = (VacPage) palloc(sizeof(VacPageData) + MaxOffsetNumber * sizeof(OffsetNumber)); vacpage->offsets_used = vacpage->offsets_free = 0; /* * Scan pages backwards from the last nonempty page, trying to move tuples * down to lower pages. Quit when we reach a page that we have moved any * tuples onto, or the first page if we haven't moved anything, or when we * find a page we cannot completely empty (this last condition is handled * by "break" statements within the loop). * * NB: this code depends on the vacuum_pages and fraged_pages lists being * in order by blkno. */ nblocks = vacrelstats->rel_pages; for (blkno = nblocks - vacuum_pages->empty_end_pages - 1; blkno > last_move_dest_block; blkno--) { Buffer buf; Page page; OffsetNumber offnum, maxoff; bool isempty, dowrite, chain_tuple_moved; vacuum_delay_point(); /* * Forget fraged_pages pages at or after this one; they're no longer * useful as move targets, since we only want to move down. Note that * since we stop the outer loop at last_move_dest_block, pages removed * here cannot have had anything moved onto them already. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -