selfuncs.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 2,362 行 · 第 1/5 页

C
2,362
字号
	mcv_selec = 0.0;	sumcommon = 0.0;	if (HeapTupleIsValid(vardata->statsTuple) &&		get_attstatsslot(vardata->statsTuple,						 vardata->atttype, vardata->atttypmod,						 STATISTIC_KIND_MCV, InvalidOid,						 &values, &nvalues,						 &numbers, &nnumbers))	{		for (i = 0; i < nvalues; i++)		{			if (varonleft ?				DatumGetBool(FunctionCall2(opproc,										   values[i],										   constval)) :				DatumGetBool(FunctionCall2(opproc,										   constval,										   values[i])))				mcv_selec += numbers[i];			sumcommon += numbers[i];		}		free_attstatsslot(vardata->atttype, values, nvalues,						  numbers, nnumbers);	}	*sumcommonp = sumcommon;	return mcv_selec;}/* *	histogram_selectivity	- Examine the histogram for selectivity estimates * * Determine the fraction of the variable's histogram entries that satisfy * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. * * This code will work for any boolean-returning predicate operator, whether * or not it has anything to do with the histogram sort operator.  We are * essentially using the histogram just as a representative sample.  However, * small histograms are unlikely to be all that representative, so the caller * should specify a minimum histogram size to use, and fall back on some * other approach if this routine fails. * * The caller also specifies n_skip, which causes us to ignore the first and * last n_skip histogram elements, on the grounds that they are outliers and * hence not very representative.  If in doubt, min_hist_size = 100 and * n_skip = 1 are reasonable values. * * The function result is the selectivity, or -1 if there is no histogram * or it's smaller than min_hist_size. * * Note that the result disregards both the most-common-values (if any) and * null entries.  The caller is expected to combine this result with * statistics for those portions of the column population.	It may also be * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs. */doublehistogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,					  Datum constval, bool varonleft,					  int min_hist_size, int n_skip){	double		result;	Datum	   *values;	int			nvalues;	/* check sanity of parameters */	Assert(n_skip >= 0);	Assert(min_hist_size > 2 * n_skip);	if (HeapTupleIsValid(vardata->statsTuple) &&		get_attstatsslot(vardata->statsTuple,						 vardata->atttype, vardata->atttypmod,						 STATISTIC_KIND_HISTOGRAM, InvalidOid,						 &values, &nvalues,						 NULL, NULL))	{		if (nvalues >= min_hist_size)		{			int			nmatch = 0;			int			i;			for (i = n_skip; i < nvalues - n_skip; i++)			{				if (varonleft ?					DatumGetBool(FunctionCall2(opproc,											   values[i],											   constval)) :					DatumGetBool(FunctionCall2(opproc,											   constval,											   values[i])))					nmatch++;			}			result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip));		}		else			result = -1;		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);	}	else		result = -1;	return result;}/* *	ineq_histogram_selectivity	- Examine the histogram for scalarineqsel * * Determine the fraction of the variable's histogram population that * satisfies the inequality condition, ie, VAR < CONST or VAR > CONST. * * Returns zero if there is no histogram (valid results will always be * greater than zero). * * Note that the result disregards both the most-common-values (if any) and * null entries.  The caller is expected to combine this result with * statistics for those portions of the column population. */static doubleineq_histogram_selectivity(VariableStatData *vardata,						   FmgrInfo *opproc, bool isgt,						   Datum constval, Oid consttype){	double		hist_selec;	Datum	   *values;	int			nvalues;	hist_selec = 0.0;	/*	 * Someday, ANALYZE might store more than one histogram per rel/att,	 * corresponding to more than one possible sort ordering defined for the	 * column type.  However, to make that work we will need to figure out	 * which staop to search for --- it's not necessarily the one we have at	 * hand!  (For example, we might have a '<=' operator rather than the '<'	 * operator that will appear in staop.)  For now, assume that whatever	 * appears in pg_statistic is sorted the same way our operator sorts, or	 * the reverse way if isgt is TRUE.	 */	if (HeapTupleIsValid(vardata->statsTuple) &&		get_attstatsslot(vardata->statsTuple,						 vardata->atttype, vardata->atttypmod,						 STATISTIC_KIND_HISTOGRAM, InvalidOid,						 &values, &nvalues,						 NULL, NULL))	{		if (nvalues > 1)		{			/*			 * Use binary search to find proper location, ie, the first slot			 * at which the comparison fails.  (If the given operator isn't			 * actually sort-compatible with the histogram, you'll get garbage			 * results ... but probably not any more garbage-y than you would			 * from the old linear search.)			 */			double		histfrac;			int			lobound = 0;	/* first possible slot to search */			int			hibound = nvalues;		/* last+1 slot to search */			while (lobound < hibound)			{				int			probe = (lobound + hibound) / 2;				bool		ltcmp;				ltcmp = DatumGetBool(FunctionCall2(opproc,												   values[probe],												   constval));				if (isgt)					ltcmp = !ltcmp;				if (ltcmp)					lobound = probe + 1;				else					hibound = probe;			}			if (lobound <= 0)			{				/* Constant is below lower histogram boundary. */				histfrac = 0.0;			}			else if (lobound >= nvalues)			{				/* Constant is above upper histogram boundary. */				histfrac = 1.0;			}			else			{				int			i = lobound;				double		val,							high,							low;				double		binfrac;				/*				 * We have values[i-1] < constant < values[i].				 *				 * Convert the constant and the two nearest bin boundary				 * values to a uniform comparison scale, and do a linear				 * interpolation within this bin.				 */				if (convert_to_scalar(constval, consttype, &val,									  values[i - 1], values[i],									  vardata->vartype,									  &low, &high))				{					if (high <= low)					{						/* cope if bin boundaries appear identical */						binfrac = 0.5;					}					else if (val <= low)						binfrac = 0.0;					else if (val >= high)						binfrac = 1.0;					else					{						binfrac = (val - low) / (high - low);						/*						 * Watch out for the possibility that we got a NaN or						 * Infinity from the division.	This can happen						 * despite the previous checks, if for example "low"						 * is -Infinity.						 */						if (isnan(binfrac) ||							binfrac < 0.0 || binfrac > 1.0)							binfrac = 0.5;					}				}				else				{					/*					 * Ideally we'd produce an error here, on the grounds that					 * the given operator shouldn't have scalarXXsel					 * registered as its selectivity func unless we can deal					 * with its operand types.	But currently, all manner of					 * stuff is invoking scalarXXsel, so give a default					 * estimate until that can be fixed.					 */					binfrac = 0.5;				}				/*				 * Now, compute the overall selectivity across the values				 * represented by the histogram.  We have i-1 full bins and				 * binfrac partial bin below the constant.				 */				histfrac = (double) (i - 1) + binfrac;				histfrac /= (double) (nvalues - 1);			}			/*			 * Now histfrac = fraction of histogram entries below the			 * constant.			 *			 * Account for "<" vs ">"			 */			hist_selec = isgt ? (1.0 - histfrac) : histfrac;			/*			 * The histogram boundaries are only approximate to begin with,			 * and may well be out of date anyway.	Therefore, don't believe			 * extremely small or large selectivity estimates.			 */			if (hist_selec < 0.0001)				hist_selec = 0.0001;			else if (hist_selec > 0.9999)				hist_selec = 0.9999;		}		free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0);	}	return hist_selec;}/* *		scalarltsel		- Selectivity of "<" (also "<=") for scalars. */Datumscalarltsel(PG_FUNCTION_ARGS){	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	VariableStatData vardata;	Node	   *other;	bool		varonleft;	Datum		constval;	Oid			consttype;	bool		isgt;	double		selec;	/*	 * If expression is not variable op something or something op variable,	 * then punt and return a default estimate.	 */	if (!get_restriction_variable(root, args, varRelid,								  &vardata, &other, &varonleft))		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);	/*	 * Can't do anything useful if the something is not a constant, either.	 */	if (!IsA(other, Const))	{		ReleaseVariableStats(vardata);		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);	}	/*	 * If the constant is NULL, assume operator is strict and return zero, ie,	 * operator will never return TRUE.	 */	if (((Const *) other)->constisnull)	{		ReleaseVariableStats(vardata);		PG_RETURN_FLOAT8(0.0);	}	constval = ((Const *) other)->constvalue;	consttype = ((Const *) other)->consttype;	/*	 * Force the var to be on the left to simplify logic in scalarineqsel.	 */	if (varonleft)	{		/* we have var < other */		isgt = false;	}	else	{		/* we have other < var, commute to make var > other */		operator = get_commutator(operator);		if (!operator)		{			/* Use default selectivity (should we raise an error instead?) */			ReleaseVariableStats(vardata);			PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);		}		isgt = true;	}	selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);	ReleaseVariableStats(vardata);	PG_RETURN_FLOAT8((float8) selec);}/* *		scalargtsel		- Selectivity of ">" (also ">=") for integers. */Datumscalargtsel(PG_FUNCTION_ARGS){	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	VariableStatData vardata;	Node	   *other;	bool		varonleft;	Datum		constval;	Oid			consttype;	bool		isgt;	double		selec;	/*	 * If expression is not variable op something or something op variable,	 * then punt and return a default estimate.	 */	if (!get_restriction_variable(root, args, varRelid,								  &vardata, &other, &varonleft))		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);	/*	 * Can't do anything useful if the something is not a constant, either.	 */	if (!IsA(other, Const))	{		ReleaseVariableStats(vardata);		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);	}	/*	 * If the constant is NULL, assume operator is strict and return zero, ie,	 * operator will never return TRUE.	 */	if (((Const *) other)->constisnull)	{		ReleaseVariableStats(vardata);		PG_RETURN_FLOAT8(0.0);	}	constval = ((Const *) other)->constvalue;	consttype = ((Const *) other)->consttype;	/*	 * Force the var to be on the left to simplify logic in scalarineqsel.	 */	if (varonleft)	{		/* we have var > other */		isgt = true;	}	else	{		/* we have other > var, commute to make var < other */		operator = get_commutator(operator);		if (!operator)		{			/* Use default selectivity (should we raise an error instead?) */			ReleaseVariableStats(vardata);			PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);		}		isgt = false;	}	selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype);	ReleaseVariableStats(vardata);	PG_RETURN_FLOAT8((float8) selec);}/* * patternsel			- Generic code for pattern-match selectivity. */static doublepatternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate){	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	VariableStatData vardata;	Node	   *variable;	Node	   *other;	bool		varonleft;	Datum		constval;	Oid			consttype;	Oid			vartype;	Oid			opfamily;	Pattern_Prefix_Status pstatus;	Const	   *patt = NULL;	Const	   *prefix = NULL;	Const	   *rest = NULL;	double		result;	/*	 * If this is for a NOT LIKE or similar operator, get the corresponding	 * positive-match operator and work with that.	Set result to the correct	 * default estimate, too.	 */	if (negate)	{		operator = get_negator(operator);		if (!OidIsValid(operator))			elog(ERROR, "patternsel called for operator without a negator");		result = 1.0 - DEFAULT_MATCH_SEL;	}	else	{		result = DEFAULT_MATCH_SEL;	}	/*	 * If expression is not variable op constant, then punt and return a	 * default estimate.	 */	if (!get_restriction_variable(root, args, varRelid,								  &vardata, &other, &varonleft))

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?