analyze.c

来自「PostgreSQL 8.1.4的源码 适用于Linux下的开源数据库系统」· C语言 代码 · 共 2,242 行 · 第 1/5 页

C
2,242
字号
/*------------------------------------------------------------------------- * * analyze.c *	  the Postgres statistics generator * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.89.2.1 2005/11/22 18:23:06 momjian Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <math.h>#include "access/heapam.h"#include "access/tuptoaster.h"#include "catalog/catalog.h"#include "catalog/index.h"#include "catalog/indexing.h"#include "catalog/namespace.h"#include "catalog/pg_operator.h"#include "commands/vacuum.h"#include "executor/executor.h"#include "miscadmin.h"#include "parser/parse_expr.h"#include "parser/parse_oper.h"#include "parser/parse_relation.h"#include "pgstat.h"#include "utils/acl.h"#include "utils/builtins.h"#include "utils/datum.h"#include "utils/fmgroids.h"#include "utils/lsyscache.h"#include "utils/memutils.h"#include "utils/syscache.h"#include "utils/tuplesort.h"/* Data structure for Algorithm S from Knuth 3.4.2 */typedef struct{	BlockNumber N;				/* number of blocks, known in advance */	int			n;				/* desired sample size */	BlockNumber t;				/* current block number */	int			m;				/* blocks selected so far */} BlockSamplerData;typedef BlockSamplerData *BlockSampler;/* Per-index data for ANALYZE */typedef struct AnlIndexData{	IndexInfo  *indexInfo;		/* BuildIndexInfo result */	double		tupleFract;		/* fraction of rows for partial index */	VacAttrStats **vacattrstats;	/* index attrs to analyze */	int			attr_cnt;} AnlIndexData;/* Default statistics target (GUC parameter) */int			default_statistics_target = 10;static int	elevel = -1;static MemoryContext anl_context = NULL;static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,				  int samplesize);static bool BlockSampler_HasMore(BlockSampler bs);static BlockNumber BlockSampler_Next(BlockSampler bs);static void compute_index_stats(Relation onerel, double totalrows,					AnlIndexData *indexdata, int nindexes,					HeapTuple *rows, int numrows,					MemoryContext col_context);static VacAttrStats *examine_attribute(Relation onerel, int attnum);static int acquire_sample_rows(Relation onerel, HeapTuple *rows,					int targrows, double *totalrows, double *totaldeadrows);static double random_fract(void);static double init_selection_state(int n);static double get_next_S(double t, int n, double *stateptr);static int	compare_rows(const void *a, const void *b);static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);static bool std_typanalyze(VacAttrStats *stats);/* *	analyze_rel() -- analyze one relation */voidanalyze_rel(Oid relid, VacuumStmt *vacstmt){	Relation	onerel;	int			attr_cnt,				tcnt,				i,				ind;	Relation   *Irel;	int			nindexes;	bool		hasindex;	bool		analyzableindex;	VacAttrStats **vacattrstats;	AnlIndexData *indexdata;	int			targrows,				numrows;	double		totalrows,				totaldeadrows;	HeapTuple  *rows;	if (vacstmt->verbose)		elevel = INFO;	else		elevel = DEBUG2;	/*	 * Use the current context for storing analysis info.  vacuum.c ensures	 * that this context will be cleared when I return, thus releasing the	 * memory allocated here.	 */	anl_context = CurrentMemoryContext;	/*	 * Check for user-requested abort.	Note we want this to be inside a	 * transaction, so xact.c doesn't issue useless WARNING.	 */	CHECK_FOR_INTERRUPTS();	/*	 * Race condition -- if the pg_class tuple has gone away since the last	 * time we saw it, we don't need to process it.	 */	if (!SearchSysCacheExists(RELOID,							  ObjectIdGetDatum(relid),							  0, 0, 0))		return;	/*	 * Open the class, getting only a read lock on it, and check permissions.	 * Permissions check should match vacuum's check!	 */	onerel = relation_open(relid, AccessShareLock);	if (!(pg_class_ownercheck(RelationGetRelid(onerel), GetUserId()) ||		  (pg_database_ownercheck(MyDatabaseId, GetUserId()) && !onerel->rd_rel->relisshared)))	{		/* No need for a WARNING if we already complained during VACUUM */		if (!vacstmt->vacuum)			ereport(WARNING,					(errmsg("skipping \"%s\" --- only table or database owner can analyze it",							RelationGetRelationName(onerel))));		relation_close(onerel, AccessShareLock);		return;	}	/*	 * Check that it's a plain table; we used to do this in get_rel_oids() but	 * seems safer to check after we've locked the relation.	 */	if (onerel->rd_rel->relkind != RELKIND_RELATION)	{		/* No need for a WARNING if we already complained during VACUUM */		if (!vacstmt->vacuum)			ereport(WARNING,					(errmsg("skipping \"%s\" --- cannot analyze indexes, views, or special system tables",							RelationGetRelationName(onerel))));		relation_close(onerel, AccessShareLock);		return;	}	/*	 * Silently ignore tables that are temp tables of other backends ---	 * trying to analyze these is rather pointless, since their contents are	 * probably not up-to-date on disk.  (We don't throw a warning here; it	 * would just lead to chatter during a database-wide ANALYZE.)	 */	if (isOtherTempNamespace(RelationGetNamespace(onerel)))	{		relation_close(onerel, AccessShareLock);		return;	}	/*	 * We can ANALYZE any table except pg_statistic. See update_attstats	 */	if (RelationGetRelid(onerel) == StatisticRelationId)	{		relation_close(onerel, AccessShareLock);		return;	}	ereport(elevel,			(errmsg("analyzing \"%s.%s\"",					get_namespace_name(RelationGetNamespace(onerel)),					RelationGetRelationName(onerel))));	/*	 * Determine which columns to analyze	 *	 * Note that system attributes are never analyzed.	 */	if (vacstmt->va_cols != NIL)	{		ListCell   *le;		vacattrstats = (VacAttrStats **) palloc(list_length(vacstmt->va_cols) *												sizeof(VacAttrStats *));		tcnt = 0;		foreach(le, vacstmt->va_cols)		{			char	   *col = strVal(lfirst(le));			i = attnameAttNum(onerel, col, false);			vacattrstats[tcnt] = examine_attribute(onerel, i);			if (vacattrstats[tcnt] != NULL)				tcnt++;		}		attr_cnt = tcnt;	}	else	{		attr_cnt = onerel->rd_att->natts;		vacattrstats = (VacAttrStats **)			palloc(attr_cnt * sizeof(VacAttrStats *));		tcnt = 0;		for (i = 1; i <= attr_cnt; i++)		{			vacattrstats[tcnt] = examine_attribute(onerel, i);			if (vacattrstats[tcnt] != NULL)				tcnt++;		}		attr_cnt = tcnt;	}	/*	 * Open all indexes of the relation, and see if there are any analyzable	 * columns in the indexes.	We do not analyze index columns if there was	 * an explicit column list in the ANALYZE command, however.	 */	vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);	hasindex = (nindexes > 0);	indexdata = NULL;	analyzableindex = false;	if (hasindex)	{		indexdata = (AnlIndexData *) palloc0(nindexes * sizeof(AnlIndexData));		for (ind = 0; ind < nindexes; ind++)		{			AnlIndexData *thisdata = &indexdata[ind];			IndexInfo  *indexInfo;			thisdata->indexInfo = indexInfo = BuildIndexInfo(Irel[ind]);			thisdata->tupleFract = 1.0; /* fix later if partial */			if (indexInfo->ii_Expressions != NIL && vacstmt->va_cols == NIL)			{				ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);				thisdata->vacattrstats = (VacAttrStats **)					palloc(indexInfo->ii_NumIndexAttrs * sizeof(VacAttrStats *));				tcnt = 0;				for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)				{					int			keycol = indexInfo->ii_KeyAttrNumbers[i];					if (keycol == 0)					{						/* Found an index expression */						Node	   *indexkey;						if (indexpr_item == NULL)		/* shouldn't happen */							elog(ERROR, "too few entries in indexprs list");						indexkey = (Node *) lfirst(indexpr_item);						indexpr_item = lnext(indexpr_item);						/*						 * Can't analyze if the opclass uses a storage type						 * different from the expression result type. We'd get						 * confused because the type shown in pg_attribute for						 * the index column doesn't match what we are getting						 * from the expression. Perhaps this can be fixed						 * someday, but for now, punt.						 */						if (exprType(indexkey) !=							Irel[ind]->rd_att->attrs[i]->atttypid)							continue;						thisdata->vacattrstats[tcnt] =							examine_attribute(Irel[ind], i + 1);						if (thisdata->vacattrstats[tcnt] != NULL)						{							tcnt++;							analyzableindex = true;						}					}				}				thisdata->attr_cnt = tcnt;			}		}	}	/*	 * Quit if no analyzable columns	 */	if (attr_cnt <= 0 && !analyzableindex)	{		/*		 * We report that the table is empty; this is just so that the		 * autovacuum code doesn't go nuts trying to get stats about a		 * zero-column table.		 */		if (!vacstmt->vacuum)			pgstat_report_analyze(RelationGetRelid(onerel),								  onerel->rd_rel->relisshared,								  0, 0);		vac_close_indexes(nindexes, Irel, AccessShareLock);		relation_close(onerel, AccessShareLock);		return;	}	/*	 * Determine how many rows we need to sample, using the worst case from	 * all analyzable columns.	We use a lower bound of 100 rows to avoid	 * possible overflow in Vitter's algorithm.	 */	targrows = 100;	for (i = 0; i < attr_cnt; i++)	{		if (targrows < vacattrstats[i]->minrows)			targrows = vacattrstats[i]->minrows;	}	for (ind = 0; ind < nindexes; ind++)	{		AnlIndexData *thisdata = &indexdata[ind];		for (i = 0; i < thisdata->attr_cnt; i++)		{			if (targrows < thisdata->vacattrstats[i]->minrows)				targrows = thisdata->vacattrstats[i]->minrows;		}	}	/*	 * Acquire the sample rows	 */	rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));	numrows = acquire_sample_rows(onerel, rows, targrows,								  &totalrows, &totaldeadrows);	/*	 * Compute the statistics.	Temporary results during the calculations for	 * each column are stored in a child context.  The calc routines are	 * responsible to make sure that whatever they store into the VacAttrStats	 * structure is allocated in anl_context.	 */	if (numrows > 0)	{		MemoryContext col_context,					old_context;		col_context = AllocSetContextCreate(anl_context,											"Analyze Column",											ALLOCSET_DEFAULT_MINSIZE,											ALLOCSET_DEFAULT_INITSIZE,											ALLOCSET_DEFAULT_MAXSIZE);		old_context = MemoryContextSwitchTo(col_context);		for (i = 0; i < attr_cnt; i++)		{			VacAttrStats *stats = vacattrstats[i];			stats->rows = rows;			stats->tupDesc = onerel->rd_att;			(*stats->compute_stats) (stats,									 std_fetch_func,									 numrows,									 totalrows);			MemoryContextResetAndDeleteChildren(col_context);		}		if (hasindex)			compute_index_stats(onerel, totalrows,								indexdata, nindexes,								rows, numrows,								col_context);		MemoryContextSwitchTo(old_context);		MemoryContextDelete(col_context);		/*		 * Emit the completed stats rows into pg_statistic, replacing any		 * previous statistics for the target columns.	(If there are stats in		 * pg_statistic for columns we didn't process, we leave them alone.)		 */		update_attstats(relid, attr_cnt, vacattrstats);		for (ind = 0; ind < nindexes; ind++)		{			AnlIndexData *thisdata = &indexdata[ind];			update_attstats(RelationGetRelid(Irel[ind]),							thisdata->attr_cnt, thisdata->vacattrstats);		}	}	/*	 * If we are running a standalone ANALYZE, update pages/tuples stats in	 * pg_class.  We know the accurate page count from the smgr, but only an	 * approximate number of tuples; therefore, if we are part of VACUUM	 * ANALYZE do *not* overwrite the accurate count already inserted by	 * VACUUM.	The same consideration applies to indexes.	 */	if (!vacstmt->vacuum)	{		vac_update_relstats(RelationGetRelid(onerel),							RelationGetNumberOfBlocks(onerel),							totalrows,							hasindex);		for (ind = 0; ind < nindexes; ind++)		{			AnlIndexData *thisdata = &indexdata[ind];			double		totalindexrows;			totalindexrows = ceil(thisdata->tupleFract * totalrows);			vac_update_relstats(RelationGetRelid(Irel[ind]),								RelationGetNumberOfBlocks(Irel[ind]),								totalindexrows,								false);		}		/* report results to the stats collector, too */		pgstat_report_analyze(RelationGetRelid(onerel),							  onerel->rd_rel->relisshared,							  totalrows, totaldeadrows);	}	/* Done with indexes */	vac_close_indexes(nindexes, Irel, NoLock);	/*	 * Close source relation now, but keep lock so that no one deletes it	 * before we commit.  (If someone did, they'd fail to clean up the entries	 * we made in pg_statistic.)	 */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?