analyze.c
来自「PostgreSQL 8.1.4的源码 适用于Linux下的开源数据库系统」· C语言 代码 · 共 2,242 行 · 第 1/5 页
C
2,242 行
/*------------------------------------------------------------------------- * * analyze.c * the Postgres statistics generator * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.89.2.1 2005/11/22 18:23:06 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <math.h>#include "access/heapam.h"#include "access/tuptoaster.h"#include "catalog/catalog.h"#include "catalog/index.h"#include "catalog/indexing.h"#include "catalog/namespace.h"#include "catalog/pg_operator.h"#include "commands/vacuum.h"#include "executor/executor.h"#include "miscadmin.h"#include "parser/parse_expr.h"#include "parser/parse_oper.h"#include "parser/parse_relation.h"#include "pgstat.h"#include "utils/acl.h"#include "utils/builtins.h"#include "utils/datum.h"#include "utils/fmgroids.h"#include "utils/lsyscache.h"#include "utils/memutils.h"#include "utils/syscache.h"#include "utils/tuplesort.h"/* Data structure for Algorithm S from Knuth 3.4.2 */typedef struct{ BlockNumber N; /* number of blocks, known in advance */ int n; /* desired sample size */ BlockNumber t; /* current block number */ int m; /* blocks selected so far */} BlockSamplerData;typedef BlockSamplerData *BlockSampler;/* Per-index data for ANALYZE */typedef struct AnlIndexData{ IndexInfo *indexInfo; /* BuildIndexInfo result */ double tupleFract; /* fraction of rows for partial index */ VacAttrStats **vacattrstats; /* index attrs to analyze */ int attr_cnt;} AnlIndexData;/* Default statistics target (GUC parameter) */int default_statistics_target = 10;static int elevel = -1;static MemoryContext anl_context = NULL;static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize);static bool BlockSampler_HasMore(BlockSampler bs);static BlockNumber BlockSampler_Next(BlockSampler bs);static void compute_index_stats(Relation onerel, double totalrows, AnlIndexData *indexdata, int nindexes, HeapTuple *rows, int numrows, MemoryContext col_context);static VacAttrStats *examine_attribute(Relation onerel, int attnum);static int acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows);static double random_fract(void);static double init_selection_state(int n);static double get_next_S(double t, int n, double *stateptr);static int compare_rows(const void *a, const void *b);static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);static bool std_typanalyze(VacAttrStats *stats);/* * analyze_rel() -- analyze one relation */voidanalyze_rel(Oid relid, VacuumStmt *vacstmt){ Relation onerel; int attr_cnt, tcnt, i, ind; Relation *Irel; int nindexes; bool hasindex; bool analyzableindex; VacAttrStats **vacattrstats; AnlIndexData *indexdata; int targrows, numrows; double totalrows, totaldeadrows; HeapTuple *rows; if (vacstmt->verbose) elevel = INFO; else elevel = DEBUG2; /* * Use the current context for storing analysis info. vacuum.c ensures * that this context will be cleared when I return, thus releasing the * memory allocated here. */ anl_context = CurrentMemoryContext; /* * Check for user-requested abort. Note we want this to be inside a * transaction, so xact.c doesn't issue useless WARNING. */ CHECK_FOR_INTERRUPTS(); /* * Race condition -- if the pg_class tuple has gone away since the last * time we saw it, we don't need to process it. */ if (!SearchSysCacheExists(RELOID, ObjectIdGetDatum(relid), 0, 0, 0)) return; /* * Open the class, getting only a read lock on it, and check permissions. * Permissions check should match vacuum's check! */ onerel = relation_open(relid, AccessShareLock); if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) || (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared))) { /* No need for a WARNING if we already complained during VACUUM */ if (!vacstmt->vacuum) ereport(WARNING, (errmsg("skipping \"%s\" --- only table or database owner can analyze it", RelationGetRelationName(onerel)))); relation_close(onerel, AccessShareLock); return; } /* * Check that it's a plain table; we used to do this in get_rel_oids() but * seems safer to check after we've locked the relation. */ if (onerel->rd_rel->relkind != RELKIND_RELATION) { /* No need for a WARNING if we already complained during VACUUM */ if (!vacstmt->vacuum) ereport(WARNING, (errmsg("skipping \"%s\" --- cannot analyze indexes, views, or special system tables", RelationGetRelationName(onerel)))); relation_close(onerel, AccessShareLock); return; } /* * Silently ignore tables that are temp tables of other backends --- * trying to analyze these is rather pointless, since their contents are * probably not up-to-date on disk. (We don't throw a warning here; it * would just lead to chatter during a database-wide ANALYZE.) */ if (isOtherTempNamespace(RelationGetNamespace(onerel))) { relation_close(onerel, AccessShareLock); return; } /* * We can ANALYZE any table except pg_statistic. See update_attstats */ if (RelationGetRelid(onerel) == StatisticRelationId) { relation_close(onerel, AccessShareLock); return; } ereport(elevel, (errmsg("analyzing \"%s.%s\"", get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel)))); /* * Determine which columns to analyze * * Note that system attributes are never analyzed. */ if (vacstmt->va_cols != NIL) { ListCell *le; vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) * sizeof(VacAttrStats *)); tcnt = 0; foreach(le, vacstmt->va_cols) { char *col = strVal(lfirst(le)); i = attnameAttNum(onerel, col, false); vacattrstats[tcnt] = examine_attribute(onerel, i); if (vacattrstats[tcnt] != NULL) tcnt++; } attr_cnt = tcnt; } else { attr_cnt = onerel->rd_att->natts; vacattrstats = (VacAttrStats **) palloc(attr_cnt * sizeof(VacAttrStats *)); tcnt = 0; for (i = 1; i <= attr_cnt; i++) { vacattrstats[tcnt] = examine_attribute(onerel, i); if (vacattrstats[tcnt] != NULL) tcnt++; } attr_cnt = tcnt; } /* * Open all indexes of the relation, and see if there are any analyzable * columns in the indexes. We do not analyze index columns if there was * an explicit column list in the ANALYZE command, however. */ vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel); hasindex = (nindexes > 0); indexdata = NULL; analyzableindex = false; if (hasindex) { indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData)); for (ind = 0; ind < nindexes; ind++) { AnlIndexData *thisdata = &indexdata[ind]; IndexInfo *indexInfo; thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]); thisdata->tupleFract = 1.0; /* fix later if partial */ if (indexInfo->ii_Expressions != NIL && vacstmt->va_cols == NIL) { ListCell *indexpr_item = list_head(indexInfo->ii_Expressions); thisdata->vacattrstats = (VacAttrStats **) palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *)); tcnt = 0; for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) { int keycol = indexInfo->ii_KeyAttrNumbers[i]; if (keycol == 0) { /* Found an index expression */ Node *indexkey; if (indexpr_item == NULL) /* shouldn't happen */ elog(ERROR, "too few entries in indexprs list"); indexkey = (Node *) lfirst(indexpr_item); indexpr_item = lnext(indexpr_item); /* * Can't analyze if the opclass uses a storage type * different from the expression result type. We'd get * confused because the type shown in pg_attribute for * the index column doesn't match what we are getting * from the expression. Perhaps this can be fixed * someday, but for now, punt. */ if (exprType(indexkey) != Irel[ind]->rd_att->attrs[i]->atttypid) continue; thisdata->vacattrstats[tcnt] = examine_attribute(Irel[ind], i + 1); if (thisdata->vacattrstats[tcnt] != NULL) { tcnt++; analyzableindex = true; } } } thisdata->attr_cnt = tcnt; } } } /* * Quit if no analyzable columns */ if (attr_cnt <= 0 && !analyzableindex) { /* * We report that the table is empty; this is just so that the * autovacuum code doesn't go nuts trying to get stats about a * zero-column table. */ if (!vacstmt->vacuum) pgstat_report_analyze(RelationGetRelid(onerel), onerel->rd_rel->relisshared, 0, 0); vac_close_indexes(nindexes, Irel, AccessShareLock); relation_close(onerel, AccessShareLock); return; } /* * Determine how many rows we need to sample, using the worst case from * all analyzable columns. We use a lower bound of 100 rows to avoid * possible overflow in Vitter's algorithm. */ targrows = 100; for (i = 0; i < attr_cnt; i++) { if (targrows < vacattrstats[i]->minrows) targrows = vacattrstats[i]->minrows; } for (ind = 0; ind < nindexes; ind++) { AnlIndexData *thisdata = &indexdata[ind]; for (i = 0; i < thisdata->attr_cnt; i++) { if (targrows < thisdata->vacattrstats[i]->minrows) targrows = thisdata->vacattrstats[i]->minrows; } } /* * Acquire the sample rows */ rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple)); numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows, &totaldeadrows); /* * Compute the statistics. Temporary results during the calculations for * each column are stored in a child context. The calc routines are * responsible to make sure that whatever they store into the VacAttrStats * structure is allocated in anl_context. */ if (numrows > 0) { MemoryContext col_context, old_context; col_context = AllocSetContextCreate(anl_context, "Analyze Column", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); old_context = MemoryContextSwitchTo(col_context); for (i = 0; i < attr_cnt; i++) { VacAttrStats *stats = vacattrstats[i]; stats->rows = rows; stats->tupDesc = onerel->rd_att; (*stats->compute_stats) (stats, std_fetch_func, numrows, totalrows); MemoryContextResetAndDeleteChildren(col_context); } if (hasindex) compute_index_stats(onerel, totalrows, indexdata, nindexes, rows, numrows, col_context); MemoryContextSwitchTo(old_context); MemoryContextDelete(col_context); /* * Emit the completed stats rows into pg_statistic, replacing any * previous statistics for the target columns. (If there are stats in * pg_statistic for columns we didn't process, we leave them alone.) */ update_attstats(relid, attr_cnt, vacattrstats); for (ind = 0; ind < nindexes; ind++) { AnlIndexData *thisdata = &indexdata[ind]; update_attstats(RelationGetRelid(Irel[ind]), thisdata->attr_cnt, thisdata->vacattrstats); } } /* * If we are running a standalone ANALYZE, update pages/tuples stats in * pg_class. We know the accurate page count from the smgr, but only an * approximate number of tuples; therefore, if we are part of VACUUM * ANALYZE do *not* overwrite the accurate count already inserted by * VACUUM. The same consideration applies to indexes. */ if (!vacstmt->vacuum) { vac_update_relstats(RelationGetRelid(onerel), RelationGetNumberOfBlocks(onerel), totalrows, hasindex); for (ind = 0; ind < nindexes; ind++) { AnlIndexData *thisdata = &indexdata[ind]; double totalindexrows; totalindexrows = ceil(thisdata->tupleFract * totalrows); vac_update_relstats(RelationGetRelid(Irel[ind]), RelationGetNumberOfBlocks(Irel[ind]), totalindexrows, false); } /* report results to the stats collector, too */ pgstat_report_analyze(RelationGetRelid(onerel), onerel->rd_rel->relisshared, totalrows, totaldeadrows); } /* Done with indexes */ vac_close_indexes(nindexes, Irel, NoLock); /* * Close source relation now, but keep lock so that no one deletes it * before we commit. (If someone did, they'd fail to clean up the entries * we made in pg_statistic.) */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?