📄 vacuum.c
字号:
} if (Irel != (Relation *) NULL) /* pfree index' allocations */ { pfree(Idesc); pfree(idatum); pfree(inulls); vc_clsindices(nindices, Irel); } pfree(vpc); if (vacrelstats->vtlinks != NULL) pfree(vacrelstats->vtlinks);} /* vc_rpfheap *//* * vc_vacheap() -- free dead tuples * * This routine marks dead tuples as unused and truncates relation * if there are "empty" end-blocks. */static voidvc_vacheap(VRelStats *vacrelstats, Relation onerel, VPageList vacuum_pages){ Buffer buf; Page page; VPageDescr *vpp; int nblocks; int i; nblocks = vacuum_pages->vpl_num_pages; nblocks -= vacuum_pages->vpl_empty_end_pages; /* nothing to do with * them */ for (i = 0, vpp = vacuum_pages->vpl_pagedesc; i < nblocks; i++, vpp++) { if ((*vpp)->vpd_offsets_free > 0) { buf = ReadBuffer(onerel, (*vpp)->vpd_blkno); page = BufferGetPage(buf); vc_vacpage(page, *vpp); WriteBuffer(buf); } } /* truncate relation if there are some empty end-pages */ if (vacuum_pages->vpl_empty_end_pages > 0) { Assert(vacrelstats->num_pages >= vacuum_pages->vpl_empty_end_pages); nblocks = vacrelstats->num_pages - vacuum_pages->vpl_empty_end_pages; elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.", (RelationGetRelationName(onerel))->data, vacrelstats->num_pages, nblocks); /* * we have to flush "empty" end-pages (if changed, but who knows * it) before truncation */ FlushBufferPool(!TransactionFlushEnabled()); i = BlowawayRelationBuffers(onerel, nblocks); if (i < 0) elog(FATAL, "VACUUM (vc_vacheap): BlowawayRelationBuffers returned %d", i); nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks); Assert(nblocks >= 0); vacrelstats->num_pages = nblocks; /* set new number of * blocks */ }} /* vc_vacheap *//* * vc_vacpage() -- free dead tuples on a page * and repaire its fragmentation. */static voidvc_vacpage(Page page, VPageDescr vpd){ ItemId itemid; int i; for (i = 0; i < vpd->vpd_offsets_free; i++) { itemid = &(((PageHeader) page)->pd_linp[vpd->vpd_offsets[i] - 1]); itemid->lp_flags &= ~LP_USED; } PageRepairFragmentation(page);} /* vc_vacpage *//* * _vc_scanoneind() -- scan one index relation to update statistic. * */static voidvc_scanoneind(Relation indrel, int num_tuples){ RetrieveIndexResult res; IndexScanDesc iscan; int nitups; int nipages; struct rusage ru0, ru1; getrusage(RUSAGE_SELF, &ru0); /* walk through the entire index */ iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL); nitups = 0; while ((res = index_getnext(iscan, ForwardScanDirection)) != (RetrieveIndexResult) NULL) { nitups++; pfree(res); } index_endscan(iscan); /* now update statistics in pg_class */ nipages = RelationGetNumberOfBlocks(indrel); vc_updstats(RelationGetRelid(indrel), nipages, nitups, false, NULL); getrusage(RUSAGE_SELF, &ru1); elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. Elapsed %u/%u sec.", indrel->rd_rel->relname.data, nipages, nitups, ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec, ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec); if (nitups != num_tuples) elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)", indrel->rd_rel->relname.data, nitups, num_tuples);} /* vc_scanoneind *//* * vc_vaconeind() -- vacuum one index relation. * * Vpl is the VPageList of the heap we're currently vacuuming. * It's locked. Indrel is an index relation on the vacuumed heap. * We don't set locks on the index relation here, since the indexed * access methods support locking at different granularities. * We let them handle it. * * Finally, we arrange to update the index relation's statistics in * pg_class. */static voidvc_vaconeind(VPageList vpl, Relation indrel, int num_tuples, int keep_tuples){ RetrieveIndexResult res; IndexScanDesc iscan; ItemPointer heapptr; int tups_vacuumed; int num_index_tuples; int num_pages; VPageDescr vp; struct rusage ru0, ru1; getrusage(RUSAGE_SELF, &ru0); /* walk through the entire index */ iscan = index_beginscan(indrel, false, 0, (ScanKey) NULL); tups_vacuumed = 0; num_index_tuples = 0; while ((res = index_getnext(iscan, ForwardScanDirection)) != (RetrieveIndexResult) NULL) { heapptr = &res->heap_iptr; if ((vp = vc_tidreapped(heapptr, vpl)) != (VPageDescr) NULL) {#ifdef NOT_USED elog(DEBUG, "<%x,%x> -> <%x,%x>", ItemPointerGetBlockNumber(&(res->index_iptr)), ItemPointerGetOffsetNumber(&(res->index_iptr)), ItemPointerGetBlockNumber(&(res->heap_iptr)), ItemPointerGetOffsetNumber(&(res->heap_iptr)));#endif if (vp->vpd_offsets_free == 0) { /* this is EmptyPage !!! */ elog(NOTICE, "Index %s: pointer to EmptyPage (blk %u off %u) - fixing", indrel->rd_rel->relname.data, vp->vpd_blkno, ItemPointerGetOffsetNumber(heapptr)); } ++tups_vacuumed; index_delete(indrel, &res->index_iptr); } else num_index_tuples++; pfree(res); } index_endscan(iscan); /* now update statistics in pg_class */ num_pages = RelationGetNumberOfBlocks(indrel); vc_updstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL); getrusage(RUSAGE_SELF, &ru1); elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. Elapsed %u/%u sec.", indrel->rd_rel->relname.data, num_pages, num_index_tuples - keep_tuples, tups_vacuumed, ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec, ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec); if (num_index_tuples != num_tuples + keep_tuples) elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u)", indrel->rd_rel->relname.data, num_index_tuples, num_tuples);} /* vc_vaconeind *//* * vc_tidreapped() -- is a particular tid reapped? * * vpl->VPageDescr_array is sorted in right order. */static VPageDescrvc_tidreapped(ItemPointer itemptr, VPageList vpl){ OffsetNumber ioffno; OffsetNumber *voff; VPageDescr vp, *vpp; VPageDescrData vpd; vpd.vpd_blkno = ItemPointerGetBlockNumber(itemptr); ioffno = ItemPointerGetOffsetNumber(itemptr); vp = &vpd; vpp = (VPageDescr *) vc_find_eq((void *) (vpl->vpl_pagedesc), vpl->vpl_num_pages, sizeof(VPageDescr), (void *) &vp, vc_cmp_blk); if (vpp == (VPageDescr *) NULL) return (VPageDescr) NULL; vp = *vpp; /* ok - we are on true page */ if (vp->vpd_offsets_free == 0) { /* this is EmptyPage !!! */ return vp; } voff = (OffsetNumber *) vc_find_eq((void *) (vp->vpd_offsets), vp->vpd_offsets_free, sizeof(OffsetNumber), (void *) &ioffno, vc_cmp_offno); if (voff == (OffsetNumber *) NULL) return (VPageDescr) NULL; return vp;} /* vc_tidreapped *//* * vc_attrstats() -- compute column statistics used by the optimzer * * We compute the column min, max, null and non-null counts. * Plus we attempt to find the count of the value that occurs most * frequently in each column * These figures are used to compute the selectivity of the column * * We use a three-bucked cache to get the most frequent item * The 'guess' buckets count hits. A cache miss causes guess1 * to get the most hit 'guess' item in the most recent cycle, and * the new item goes into guess2. Whenever the total count of hits * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'. * * This method works perfectly for columns with unique values, and columns * with only two unique values, plus nulls. * * It becomes less perfect as the number of unique values increases and * their distribution in the table becomes more random. * */static voidvc_attrstats(Relation onerel, VRelStats *vacrelstats, HeapTuple tuple){ int i, attr_cnt = vacrelstats->va_natts; VacAttrStats *vacattrstats = vacrelstats->vacattrstats; TupleDesc tupDesc = onerel->rd_att; Datum value; bool isnull; for (i = 0; i < attr_cnt; i++) { VacAttrStats *stats = &vacattrstats[i]; bool value_hit = true; value = heap_getattr(tuple, stats->attr->attnum, tupDesc, &isnull); if (!VacAttrStatsEqValid(stats)) continue; if (isnull) stats->null_cnt++; else { stats->nonnull_cnt++; if (stats->initialized == false) { vc_bucketcpy(stats->attr, value, &stats->best, &stats->best_len); /* best_cnt gets incremented later */ vc_bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len); stats->guess1_cnt = stats->guess1_hits = 1; vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len); stats->guess2_hits = 1; if (VacAttrStatsLtGtValid(stats)) { vc_bucketcpy(stats->attr, value, &stats->max, &stats->max_len); vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len); } stats->initialized = true; } if (VacAttrStatsLtGtValid(stats)) { if ((*fmgr_faddr(&stats->f_cmplt)) (value, stats->min)) { vc_bucketcpy(stats->attr, value, &stats->min, &stats->min_len); stats->min_cnt = 0; } if ((*fmgr_faddr(&stats->f_cmpgt)) (value, stats->max)) { vc_bucketcpy(stats->attr, value, &stats->max, &stats->max_len); stats->max_cnt = 0; } if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->min)) stats->min_cnt++; else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->max)) stats->max_cnt++; } if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->best)) stats->best_cnt++; else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->guess1)) { stats->guess1_cnt++; stats->guess1_hits++; } else if ((*fmgr_faddr(&stats->f_cmpeq)) (value, stats->guess2)) stats->guess2_hits++; else value_hit = false; if (stats->guess2_hits > stats->guess1_hits) { swapDatum(stats->guess1, stats->guess2); swapInt(stats->guess1_len, stats->guess2_len); stats->guess1_cnt = stats->guess2_hits; swapLong(stats->guess1_hits, stats->guess2_hits); } if (stats->guess1_cnt > stats->best_cnt) { swapDatum(stats->best, stats->guess1); swapInt(stats->best_len, stats->guess1_len); swapLong(stats->best_cnt, stats->guess1_cnt); stats->guess1_hits = 1; stats->guess2_hits = 1; } if (!value_hit) { vc_bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len); stats->guess1_hits = 1; stats->guess2_hits = 1; } } } return;}/* * vc_bucketcpy() -- update pg_class statistics for one relation * */static voidvc_bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int16 *bucket_len){ if (attr->attbyval && attr->attlen != -1) *bucket = value; else { int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value)); if (len > *bucket_len) { if (*bucket_len != 0) pfree(DatumGetPointer(*bucket)); *bucket = PointerGetDatum(palloc(len)); *bucket_len = len; } memmove(DatumGetPointer(*bucket), DatumGetPointer(value), len); }}/* * vc_updstats() -- update pg_class statistics for one relation * * This routine works for both index and heap relation entries in * pg_class. We violate no-overwrite semantics here by storing new * values for num_tuples, num_pages, and hasindex directly in the pg_class * tuple that's already on the page. The reason for this is that if * we updated these tuples in the usual way, then every tuple in pg_class * would be replaced every day. This would make planning and executing * historical queries very expensive. Note that we also don't use * any locking while doing updation. */static voidvc_updstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats){ Relation rd, ad, sd; HeapScanDesc scan; HeapTupleData rtup; HeapTuple ctup, atup, stup; Form_pg_class pgcform; ScanKeyData askey; Form_pg_attribute attp; Buffer buffer; /* * update number of tuples and number of pages in pg_class */ ctup = SearchSysCacheTupleCopy(RELOID, ObjectIdGetDatum(relid), 0, 0, 0); if (!HeapTupleIsValid(ctup)) elog(ERROR, "pg_class entry for relid %u vanished during vacuuming", relid); rd = heap_openr(RelationRelationName); /* get the buffer cache tuple */ rtup.t_self = ctup->t_self; heap_fetch(rd, SnapshotNow, &rtup, &buffer); pfree(ctup); /* overwrite the existing statistics in the tuple */ pgcform = (Form_pg_class) GETSTRUCT(&rtup); pgcform->reltuples = num_tuples; pgcform->relpages = num_pages; pgcform->relhasindex = hasindex; if (vacrelstats != NULL && vacrelstats->va_natts > 0) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -