analyze.c

来自「PostgreSQL 8.1.4的源码 适用于Linux下的开源数据库系统」· C语言 代码 · 共 2,242 行 · 第 1/5 页

C
2,242
字号
		qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);	/*	 * Estimate total numbers of rows in relation.	 */	if (bs.m > 0)	{		*totalrows = floor((liverows * totalblocks) / bs.m + 0.5);		*totaldeadrows = floor((deadrows * totalblocks) / bs.m + 0.5);	}	else	{		*totalrows = 0.0;		*totaldeadrows = 0.0;	}	/*	 * Emit some interesting relation info	 */	ereport(elevel,			(errmsg("\"%s\": scanned %d of %u pages, "					"containing %.0f live rows and %.0f dead rows; "					"%d rows in sample, %.0f estimated total rows",					RelationGetRelationName(onerel),					bs.m, totalblocks,					liverows, deadrows,					numrows, *totalrows)));	return numrows;}/* Select a random value R uniformly distributed in 0 < R < 1 */static doublerandom_fract(void){	long		z;	/* random() can produce endpoint values, try again if so */	do	{		z = random();	} while (z <= 0 || z >= MAX_RANDOM_VALUE);	return (double) z / (double) MAX_RANDOM_VALUE;}/* * These two routines embody Algorithm Z from "Random sampling with a * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1 * (Mar. 1985), Pages 37-57.  Vitter describes his algorithm in terms * of the count S of records to skip before processing another record. * It is computed primarily based on t, the number of records already read. * The only extra state needed between calls is W, a random state variable. * * init_selection_state computes the initial W value. * * Given that we've already read t records (t >= n), get_next_S * determines the number of records to skip before the next record is * processed. */static doubleinit_selection_state(int n){	/* Initial value of W (for use when Algorithm Z is first applied) */	return exp(-log(random_fract()) / n);}static doubleget_next_S(double t, int n, double *stateptr){	double		S;	/* The magic constant here is T from Vitter's paper */	if (t <= (22.0 * n))	{		/* Process records using Algorithm X until t is large enough */		double		V,					quot;		V = random_fract();		/* Generate V */		S = 0;		t += 1;		/* Note: "num" in Vitter's code is always equal to t - n */		quot = (t - (double) n) / t;		/* Find min S satisfying (4.1) */		while (quot > V)		{			S += 1;			t += 1;			quot *= (t - (double) n) / t;		}	}	else	{		/* Now apply Algorithm Z */		double		W = *stateptr;		double		term = t - (double) n + 1;		for (;;)		{			double		numer,						numer_lim,						denom;			double		U,						X,						lhs,						rhs,						y,						tmp;			/* Generate U and X */			U = random_fract();			X = t * (W - 1.0);			S = floor(X);		/* S is tentatively set to floor(X) */			/* Test if U <= h(S)/cg(X) in the manner of (6.3) */			tmp = (t + 1) / term;			lhs = exp(log(((U * tmp * tmp) * (term + S)) / (t + X)) / n);			rhs = (((t + X) / (term + S)) * term) / t;			if (lhs <= rhs)			{				W = rhs / lhs;				break;			}			/* Test if U <= f(S)/cg(X) */			y = (((U * (t + 1)) / term) * (t + S + 1)) / (t + X);			if ((double) n < S)			{				denom = t;				numer_lim = term + S;			}			else			{				denom = t - (double) n + S;				numer_lim = t + 1;			}			for (numer = t + S; numer >= numer_lim; numer -= 1)			{				y *= numer / denom;				denom -= 1;			}			W = exp(-log(random_fract()) / n);	/* Generate W in advance */			if (exp(log(y) / n) <= (t + X) / t)				break;		}		*stateptr = W;	}	return S;}/* * qsort comparator for sorting rows[] array */static intcompare_rows(const void *a, const void *b){	HeapTuple	ha = *(HeapTuple *) a;	HeapTuple	hb = *(HeapTuple *) b;	BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);	OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);	BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);	OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);	if (ba < bb)		return -1;	if (ba > bb)		return 1;	if (oa < ob)		return -1;	if (oa > ob)		return 1;	return 0;}/* *	update_attstats() -- update attribute statistics for one relation * *		Statistics are stored in several places: the pg_class row for the *		relation has stats about the whole relation, and there is a *		pg_statistic row for each (non-system) attribute that has ever *		been analyzed.	The pg_class values are updated by VACUUM, not here. * *		pg_statistic rows are just added or updated normally.  This means *		that pg_statistic will probably contain some deleted rows at the *		completion of a vacuum cycle, unless it happens to get vacuumed last. * *		To keep things simple, we punt for pg_statistic, and don't try *		to compute or store rows for pg_statistic itself in pg_statistic. *		This could possibly be made to work, but it's not worth the trouble. *		Note analyze_rel() has seen to it that we won't come here when *		vacuuming pg_statistic itself. * *		Note: if two backends concurrently try to analyze the same relation, *		the second one is likely to fail here with a "tuple concurrently *		updated" error.  This is slightly annoying, but no real harm is done. *		We could prevent the problem by using a stronger lock on the *		relation for ANALYZE (ie, ShareUpdateExclusiveLock instead *		of AccessShareLock); but that cure seems worse than the disease, *		especially now that ANALYZE doesn't start a new transaction *		for each relation.	The lock could be held for a long time... */static voidupdate_attstats(Oid relid, int natts, VacAttrStats **vacattrstats){	Relation	sd;	int			attno;	if (natts <= 0)		return;					/* nothing to do */	sd = heap_open(StatisticRelationId, RowExclusiveLock);	for (attno = 0; attno < natts; attno++)	{		VacAttrStats *stats = vacattrstats[attno];		HeapTuple	stup,					oldtup;		int			i,					k,					n;		Datum		values[Natts_pg_statistic];		char		nulls[Natts_pg_statistic];		char		replaces[Natts_pg_statistic];		/* Ignore attr if we weren't able to collect stats */		if (!stats->stats_valid)			continue;		/*		 * Construct a new pg_statistic tuple		 */		for (i = 0; i < Natts_pg_statistic; ++i)		{			nulls[i] = ' ';			replaces[i] = 'r';		}		i = 0;		values[i++] = ObjectIdGetDatum(relid);	/* starelid */		values[i++] = Int16GetDatum(stats->attr->attnum);		/* staattnum */		values[i++] = Float4GetDatum(stats->stanullfrac);		/* stanullfrac */		values[i++] = Int32GetDatum(stats->stawidth);	/* stawidth */		values[i++] = Float4GetDatum(stats->stadistinct);		/* stadistinct */		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)		{			values[i++] = Int16GetDatum(stats->stakind[k]);		/* stakindN */		}		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)		{			values[i++] = ObjectIdGetDatum(stats->staop[k]);	/* staopN */		}		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)		{			int			nnum = stats->numnumbers[k];			if (nnum > 0)			{				Datum	   *numdatums = (Datum *) palloc(nnum * sizeof(Datum));				ArrayType  *arry;				for (n = 0; n < nnum; n++)					numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);				/* XXX knows more than it should about type float4: */				arry = construct_array(numdatums, nnum,									   FLOAT4OID,									   sizeof(float4), false, 'i');				values[i++] = PointerGetDatum(arry);	/* stanumbersN */			}			else			{				nulls[i] = 'n';				values[i++] = (Datum) 0;			}		}		for (k = 0; k < STATISTIC_NUM_SLOTS; k++)		{			if (stats->numvalues[k] > 0)			{				ArrayType  *arry;				arry = construct_array(stats->stavalues[k],									   stats->numvalues[k],									   stats->attr->atttypid,									   stats->attrtype->typlen,									   stats->attrtype->typbyval,									   stats->attrtype->typalign);				values[i++] = PointerGetDatum(arry);	/* stavaluesN */			}			else			{				nulls[i] = 'n';				values[i++] = (Datum) 0;			}		}		/* Is there already a pg_statistic tuple for this attribute? */		oldtup = SearchSysCache(STATRELATT,								ObjectIdGetDatum(relid),								Int16GetDatum(stats->attr->attnum),								0, 0);		if (HeapTupleIsValid(oldtup))		{			/* Yes, replace it */			stup = heap_modifytuple(oldtup,									RelationGetDescr(sd),									values,									nulls,									replaces);			ReleaseSysCache(oldtup);			simple_heap_update(sd, &stup->t_self, stup);		}		else		{			/* No, insert new tuple */			stup = heap_formtuple(sd->rd_att, values, nulls);			simple_heap_insert(sd, stup);		}		/* update indexes too */		CatalogUpdateIndexes(sd, stup);		heap_freetuple(stup);	}	heap_close(sd, RowExclusiveLock);}/* * Standard fetch function for use by compute_stats subroutines. * * This exists to provide some insulation between compute_stats routines * and the actual storage of the sample data. */static Datumstd_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull){	int			attnum = stats->tupattnum;	HeapTuple	tuple = stats->rows[rownum];	TupleDesc	tupDesc = stats->tupDesc;	return heap_getattr(tuple, attnum, tupDesc, isNull);}/* * Fetch function for analyzing index expressions. * * We have not bothered to construct index tuples, instead the data is * just in Datum arrays. */static Datumind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull){	int			i;	/* exprvals and exprnulls are already offset for proper column */	i = rownum * stats->rowstride;	*isNull = stats->exprnulls[i];	return stats->exprvals[i];}/*========================================================================== * * Code below this point represents the "standard" type-specific statistics * analysis algorithms.  This code can be replaced on a per-data-type basis * by setting a nonzero value in pg_type.typanalyze. * *========================================================================== *//* * To avoid consuming too much memory during analysis and/or too much space * in the resulting pg_statistic rows, we ignore varlena datums that are wider * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV * and distinct-value calculations since a wide value is unlikely to be * duplicated at all, much less be a most-common value.  For the same reason, * ignoring wide values will not affect our estimates of histogram bin * boundaries very much. */#define WIDTH_THRESHOLD  1024#define swapInt(a,b)	do {int _tmp; _tmp=a; a=b; b=_tmp;} while(0)#define swapDatum(a,b)	do {Datum _tmp; _tmp=a; a=b; b=_tmp;} while(0)/* * Extra information used by the default analysis routines */typedef struct{	Oid			eqopr;			/* '=' operator for datatype, if any */	Oid			eqfunc;			/* and associated function */	Oid			ltopr;			/* '<' operator for datatype, if any */} StdAnalyzeData;typedef struct{	Datum		value;			/* a data value */	int			tupno;			/* position index for tuple it came from */} ScalarItem;typedef struct{	int			count;			/* # of duplicates */	int			first;			/* values[] index of first occurrence */} ScalarMCVItem;/* context information for compare_scalars() */static FmgrInfo *datumCmpFn;static SortFunctionKind datumCmpFnKind;static int *datumCmpTupnoLink;static void compute_minimal_stats(VacAttrStatsP stats,					  AnalyzeAttrFetchFunc fetchfunc,					  int samplerows,					  double totalrows);static void compute_scalar_stats(VacAttrStatsP stats,					 AnalyzeAttrFetchFunc fetchfunc,					 int samplerows,					 double totalrows);static int	compare_scalars(const void *a, const void *b);static int	compare_mcvs(const void *a, const void *b);/* * std_typanalyze -- the default type-specific typanalyze function */static boolstd_typanalyze(VacAttrStats *stats){	Form_pg_attribute attr = stats->attr;	Operator	func_operator;	Oid			eqopr = InvalidOid;	Oid			eqfunc = InvalidOid;	Oid			ltopr = InvalidOid;	StdAnalyzeData *mystats;	/* If the attstattarget column is negative, use the default value */	/* NB: it is okay to scribble on stats->attr since it's a copy */	if (attr->attstattarget < 0)		attr->attstattarget = default_statistics_target;	/* If column has no "=" operator, we can't do much of anything */	func_operator = equality_oper(attr->atttypid, true);	if (func_operator != NULL)	{		eqopr = oprid(func_operator);

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?