analyze.c

来自「PostgreSQL 8.1.4的源码 适用于Linux下的开源数据库系统」· C语言 代码 · 共 2,242 行 · 第 1/5 页

C
2,242
字号
	relation_close(onerel, NoLock);}/* * Compute statistics about indexes of a relation */static voidcompute_index_stats(Relation onerel, double totalrows,					AnlIndexData *indexdata, int nindexes,					HeapTuple *rows, int numrows,					MemoryContext col_context){	MemoryContext ind_context,				old_context;	Datum		values[INDEX_MAX_KEYS];	bool		isnull[INDEX_MAX_KEYS];	int			ind,				i;	ind_context = AllocSetContextCreate(anl_context,										"Analyze Index",										ALLOCSET_DEFAULT_MINSIZE,										ALLOCSET_DEFAULT_INITSIZE,										ALLOCSET_DEFAULT_MAXSIZE);	old_context = MemoryContextSwitchTo(ind_context);	for (ind = 0; ind < nindexes; ind++)	{		AnlIndexData *thisdata = &indexdata[ind];		IndexInfo  *indexInfo = thisdata->indexInfo;		int			attr_cnt = thisdata->attr_cnt;		TupleTableSlot *slot;		EState	   *estate;		ExprContext *econtext;		List	   *predicate;		Datum	   *exprvals;		bool	   *exprnulls;		int			numindexrows,					tcnt,					rowno;		double		totalindexrows;		/* Ignore index if no columns to analyze and not partial */		if (attr_cnt == 0 && indexInfo->ii_Predicate == NIL)			continue;		/*		 * Need an EState for evaluation of index expressions and		 * partial-index predicates.  Create it in the per-index context to be		 * sure it gets cleaned up at the bottom of the loop.		 */		estate = CreateExecutorState();		econtext = GetPerTupleExprContext(estate);		/* Need a slot to hold the current heap tuple, too */		slot = MakeSingleTupleTableSlot(RelationGetDescr(onerel));		/* Arrange for econtext's scan tuple to be the tuple under test */		econtext->ecxt_scantuple = slot;		/* Set up execution state for predicate. */		predicate = (List *)			ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,							estate);		/* Compute and save index expression values */		exprvals = (Datum *) palloc(numrows * attr_cnt * sizeof(Datum));		exprnulls = (bool *) palloc(numrows * attr_cnt * sizeof(bool));		numindexrows = 0;		tcnt = 0;		for (rowno = 0; rowno < numrows; rowno++)		{			HeapTuple	heapTuple = rows[rowno];			/* Set up for predicate or expression evaluation */			ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);			/* If index is partial, check predicate */			if (predicate != NIL)			{				if (!ExecQual(predicate, econtext, false))					continue;			}			numindexrows++;			if (attr_cnt > 0)			{				/*				 * Evaluate the index row to compute expression values. We				 * could do this by hand, but FormIndexDatum is convenient.				 */				FormIndexDatum(indexInfo,							   slot,							   estate,							   values,							   isnull);				/*				 * Save just the columns we care about.				 */				for (i = 0; i < attr_cnt; i++)				{					VacAttrStats *stats = thisdata->vacattrstats[i];					int			attnum = stats->attr->attnum;					exprvals[tcnt] = values[attnum - 1];					exprnulls[tcnt] = isnull[attnum - 1];					tcnt++;				}			}		}		/*		 * Having counted the number of rows that pass the predicate in the		 * sample, we can estimate the total number of rows in the index.		 */		thisdata->tupleFract = (double) numindexrows / (double) numrows;		totalindexrows = ceil(thisdata->tupleFract * totalrows);		/*		 * Now we can compute the statistics for the expression columns.		 */		if (numindexrows > 0)		{			MemoryContextSwitchTo(col_context);			for (i = 0; i < attr_cnt; i++)			{				VacAttrStats *stats = thisdata->vacattrstats[i];				stats->exprvals = exprvals + i;				stats->exprnulls = exprnulls + i;				stats->rowstride = attr_cnt;				(*stats->compute_stats) (stats,										 ind_fetch_func,										 numindexrows,										 totalindexrows);				MemoryContextResetAndDeleteChildren(col_context);			}		}		/* And clean up */		MemoryContextSwitchTo(ind_context);		ExecDropSingleTupleTableSlot(slot);		FreeExecutorState(estate);		MemoryContextResetAndDeleteChildren(ind_context);	}	MemoryContextSwitchTo(old_context);	MemoryContextDelete(ind_context);}/* * examine_attribute -- pre-analysis of a single column * * Determine whether the column is analyzable; if so, create and initialize * a VacAttrStats struct for it.  If not, return NULL. */static VacAttrStats *examine_attribute(Relation onerel, int attnum){	Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1];	HeapTuple	typtuple;	VacAttrStats *stats;	bool		ok;	/* Never analyze dropped columns */	if (attr->attisdropped)		return NULL;	/* Don't analyze column if user has specified not to */	if (attr->attstattarget == 0)		return NULL;	/*	 * Create the VacAttrStats struct.	 */	stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats));	stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);	memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);	typtuple = SearchSysCache(TYPEOID,							  ObjectIdGetDatum(attr->atttypid),							  0, 0, 0);	if (!HeapTupleIsValid(typtuple))		elog(ERROR, "cache lookup failed for type %u", attr->atttypid);	stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));	memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));	ReleaseSysCache(typtuple);	stats->anl_context = anl_context;	stats->tupattnum = attnum;	/*	 * Call the type-specific typanalyze function.	If none is specified, use	 * std_typanalyze().	 */	if (OidIsValid(stats->attrtype->typanalyze))		ok = DatumGetBool(OidFunctionCall1(stats->attrtype->typanalyze,										   PointerGetDatum(stats)));	else		ok = std_typanalyze(stats);	if (!ok || stats->compute_stats == NULL || stats->minrows <= 0)	{		pfree(stats->attrtype);		pfree(stats->attr);		pfree(stats);		return NULL;	}	return stats;}/* * BlockSampler_Init -- prepare for random sampling of blocknumbers * * BlockSampler is used for stage one of our new two-stage tuple * sampling mechanism as discussed on pgsql-hackers 2004-04-02 (subject * "Large DB").  It selects a random sample of samplesize blocks out of * the nblocks blocks in the table.  If the table has less than * samplesize blocks, all blocks are selected. * * Since we know the total number of blocks in advance, we can use the * straightforward Algorithm S from Knuth 3.4.2, rather than Vitter's * algorithm. */static voidBlockSampler_Init(BlockSampler bs, BlockNumber nblocks, int samplesize){	bs->N = nblocks;			/* measured table size */	/*	 * If we decide to reduce samplesize for tables that have less or not much	 * more than samplesize blocks, here is the place to do it.	 */	bs->n = samplesize;	bs->t = 0;					/* blocks scanned so far */	bs->m = 0;					/* blocks selected so far */}static boolBlockSampler_HasMore(BlockSampler bs){	return (bs->t < bs->N) && (bs->m < bs->n);}static BlockNumberBlockSampler_Next(BlockSampler bs){	BlockNumber K = bs->N - bs->t;		/* remaining blocks */	int			k = bs->n - bs->m;		/* blocks still to sample */	double		p;				/* probability to skip block */	double		V;				/* random */	Assert(BlockSampler_HasMore(bs));	/* hence K > 0 and k > 0 */	if ((BlockNumber) k >= K)	{		/* need all the rest */		bs->m++;		return bs->t++;	}	/*----------	 * It is not obvious that this code matches Knuth's Algorithm S.	 * Knuth says to skip the current block with probability 1 - k/K.	 * If we are to skip, we should advance t (hence decrease K), and	 * repeat the same probabilistic test for the next block.  The naive	 * implementation thus requires a random_fract() call for each block	 * number.	But we can reduce this to one random_fract() call per	 * selected block, by noting that each time the while-test succeeds,	 * we can reinterpret V as a uniform random number in the range 0 to p.	 * Therefore, instead of choosing a new V, we just adjust p to be	 * the appropriate fraction of its former value, and our next loop	 * makes the appropriate probabilistic test.	 *	 * We have initially K > k > 0.  If the loop reduces K to equal k,	 * the next while-test must fail since p will become exactly zero	 * (we assume there will not be roundoff error in the division).	 * (Note: Knuth suggests a "<=" loop condition, but we use "<" just	 * to be doubly sure about roundoff error.)  Therefore K cannot become	 * less than k, which means that we cannot fail to select enough blocks.	 *----------	 */	V = random_fract();	p = 1.0 - (double) k / (double) K;	while (V < p)	{		/* skip */		bs->t++;		K--;					/* keep K == N - t */		/* adjust p to be new cutoff point in reduced range */		p *= 1.0 - (double) k / (double) K;	}	/* select */	bs->m++;	return bs->t++;}/* * acquire_sample_rows -- acquire a random sample of rows from the table * * As of May 2004 we use a new two-stage method:  Stage one selects up * to targrows random blocks (or all blocks, if there aren't so many). * Stage two scans these blocks and uses the Vitter algorithm to create * a random sample of targrows rows (or less, if there are less in the * sample of blocks).  The two stages are executed simultaneously: each * block is processed as soon as stage one returns its number and while * the rows are read stage two controls which ones are to be inserted * into the sample. * * Although every row has an equal chance of ending up in the final * sample, this sampling method is not perfect: not every possible * sample has an equal chance of being selected.  For large relations * the number of different blocks represented by the sample tends to be * too small.  We can live with that for now.  Improvements are welcome. * * We also estimate the total numbers of live and dead rows in the table, * and return them into *totalrows and *totaldeadrows, respectively. * * An important property of this sampling method is that because we do * look at a statistically unbiased set of blocks, we should get * unbiased estimates of the average numbers of live and dead rows per * block.  The previous sampling method put too much credence in the row * density near the start of the table. * * The returned list of tuples is in order by physical position in the table. * (We will rely on this later to derive correlation estimates.) */static intacquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,					double *totalrows, double *totaldeadrows){	int			numrows = 0;	/* # rows collected */	double		liverows = 0;	/* # rows seen */	double		deadrows = 0;	/* # dead rows seen */	double		rowstoskip = -1;	/* -1 means not set yet */	BlockNumber totalblocks;	BlockSamplerData bs;	double		rstate;	Assert(targrows > 1);	totalblocks = RelationGetNumberOfBlocks(onerel);	/* Prepare for sampling block numbers */	BlockSampler_Init(&bs, totalblocks, targrows);	/* Prepare for sampling rows */	rstate = init_selection_state(targrows);	/* Outer loop over blocks to sample */	while (BlockSampler_HasMore(&bs))	{		BlockNumber targblock = BlockSampler_Next(&bs);		Buffer		targbuffer;		Page		targpage;		OffsetNumber targoffset,					maxoffset;		vacuum_delay_point();		/*		 * We must maintain a pin on the target page's buffer to ensure that		 * the maxoffset value stays good (else concurrent VACUUM might delete		 * tuples out from under us).  Hence, pin the page until we are done		 * looking at it.  We don't maintain a lock on the page, so tuples		 * could get added to it, but we ignore such tuples.		 */		targbuffer = ReadBuffer(onerel, targblock);		LockBuffer(targbuffer, BUFFER_LOCK_SHARE);		targpage = BufferGetPage(targbuffer);		maxoffset = PageGetMaxOffsetNumber(targpage);		LockBuffer(targbuffer, BUFFER_LOCK_UNLOCK);		/* Inner loop over all tuples on the selected page */		for (targoffset = FirstOffsetNumber; targoffset <= maxoffset; targoffset++)		{			HeapTupleData targtuple;			ItemPointerSet(&targtuple.t_self, targblock, targoffset);			/* We use heap_release_fetch to avoid useless bufmgr traffic */			if (heap_release_fetch(onerel, SnapshotNow,								   &targtuple, &targbuffer,								   true, NULL))			{				/*				 * The first targrows live rows are simply copied into the				 * reservoir. Then we start replacing tuples in the sample				 * until we reach the end of the relation.	This algorithm is				 * from Jeff Vitter's paper (see full citation below). It				 * works by repeatedly computing the number of tuples to skip				 * before selecting a tuple, which replaces a randomly chosen				 * element of the reservoir (current set of tuples).  At all				 * times the reservoir is a true random sample of the tuples				 * we've passed over so far, so when we fall off the end of				 * the relation we're done.				 */				if (numrows < targrows)					rows[numrows++] = heap_copytuple(&targtuple);				else				{					/*					 * t in Vitter's paper is the number of records already					 * processed.  If we need to compute a new S value, we					 * must use the not-yet-incremented value of liverows as					 * t.					 */					if (rowstoskip < 0)						rowstoskip = get_next_S(liverows, targrows, &rstate);					if (rowstoskip <= 0)					{						/*						 * Found a suitable tuple, so save it, replacing one						 * old tuple at random						 */						int			k = (int) (targrows * random_fract());						Assert(k >= 0 && k < targrows);						heap_freetuple(rows[k]);						rows[k] = heap_copytuple(&targtuple);					}					rowstoskip -= 1;				}				liverows += 1;			}			else			{				/* Count dead rows, but not empty slots */				if (targtuple.t_data != NULL)					deadrows += 1;			}		}		/* Now release the pin on the page */		ReleaseBuffer(targbuffer);	}	/*	 * If we didn't find as many tuples as we wanted then we're done. No sort	 * is needed, since they're already in order.	 *	 * Otherwise we need to sort the collected tuples by position	 * (itempointer). It's not worth worrying about corner cases where the	 * tuples are already sorted.	 */	if (numrows == targrows)

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?