selfuncs.c

来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 2,384 行 · 第 1/5 页

C
2,384
字号
		/* no stats available, so default result */		return DEFAULT_INEQ_SEL;	}	stats = (Form_pg_statistic) GETSTRUCT(statsTuple);	fmgr_info(get_opcode(operator), &opproc);	/*	 * If we have most-common-values info, add up the fractions of the MCV	 * entries that satisfy MCV OP CONST.  These fractions contribute	 * directly to the result selectivity.	Also add up the total fraction	 * represented by MCV entries.	 */	mcv_selec = 0.0;	sumcommon = 0.0;	if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,						 STATISTIC_KIND_MCV, InvalidOid,						 &values, &nvalues,						 &numbers, &nnumbers))	{		for (i = 0; i < nvalues; i++)		{			if (DatumGetBool(FunctionCall2(&opproc,										   values[i],										   constval)))				mcv_selec += numbers[i];			sumcommon += numbers[i];		}		free_attstatsslot(var->vartype, values, nvalues, numbers, nnumbers);	}	/*	 * If there is a histogram, determine which bin the constant falls in,	 * and compute the resulting contribution to selectivity.	 *	 * Someday, VACUUM might store more than one histogram per rel/att,	 * corresponding to more than one possible sort ordering defined for	 * the column type.  However, to make that work we will need to figure	 * out which staop to search for --- it's not necessarily the one we	 * have at hand!  (For example, we might have a '<=' operator rather	 * than the '<' operator that will appear in staop.)  For now, assume	 * that whatever appears in pg_statistic is sorted the same way our	 * operator sorts, or the reverse way if isgt is TRUE.	 */	hist_selec = 0.0;	if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,						 STATISTIC_KIND_HISTOGRAM, InvalidOid,						 &values, &nvalues,						 NULL, NULL))	{		if (nvalues > 1)		{			double		histfrac;			bool		ltcmp;			ltcmp = DatumGetBool(FunctionCall2(&opproc,											   values[0],											   constval));			if (isgt)				ltcmp = !ltcmp;			if (!ltcmp)			{				/* Constant is below lower histogram boundary. */				histfrac = 0.0;			}			else			{				/*				 * Scan to find proper location.  This could be made				 * faster by using a binary-search method, but it's				 * probably not worth the trouble for typical histogram				 * sizes.				 */				for (i = 1; i < nvalues; i++)				{					ltcmp = DatumGetBool(FunctionCall2(&opproc,													   values[i],													   constval));					if (isgt)						ltcmp = !ltcmp;					if (!ltcmp)						break;				}				if (i >= nvalues)				{					/* Constant is above upper histogram boundary. */					histfrac = 1.0;				}				else				{					double		val,								high,								low;					double		binfrac;					/*					 * We have values[i-1] < constant < values[i].					 *					 * Convert the constant and the two nearest bin boundary					 * values to a uniform comparison scale, and do a					 * linear interpolation within this bin.					 */					if (convert_to_scalar(constval, consttype, &val,										  values[i - 1], values[i],										  var->vartype,										  &low, &high))					{						if (high <= low)						{							/* cope if bin boundaries appear identical */							binfrac = 0.5;						}						else if (val <= low)							binfrac = 0.0;						else if (val >= high)							binfrac = 1.0;						else						{							binfrac = (val - low) / (high - low);							/*							 * Watch out for the possibility that we got a							 * NaN or Infinity from the division.  This							 * can happen despite the previous checks, if							 * for example "low" is -Infinity.							 */							if (isnan(binfrac) ||								binfrac < 0.0 || binfrac > 1.0)								binfrac = 0.5;						}					}					else					{						/*						 * Ideally we'd produce an error here, on the						 * grounds that the given operator shouldn't have						 * scalarXXsel registered as its selectivity func						 * unless we can deal with its operand types.  But						 * currently, all manner of stuff is invoking						 * scalarXXsel, so give a default estimate until						 * that can be fixed.						 */						binfrac = 0.5;					}					/*					 * Now, compute the overall selectivity across the					 * values represented by the histogram.  We have i-1					 * full bins and binfrac partial bin below the					 * constant.					 */					histfrac = (double) (i - 1) + binfrac;					histfrac /= (double) (nvalues - 1);				}			}			/*			 * Now histfrac = fraction of histogram entries below the			 * constant.			 *			 * Account for "<" vs ">"			 */			hist_selec = isgt ? (1.0 - histfrac) : histfrac;			/*			 * The histogram boundaries are only approximate to begin			 * with, and may well be out of date anyway.  Therefore, don't			 * believe extremely small or large selectivity estimates.			 */			if (hist_selec < 0.0001)				hist_selec = 0.0001;			else if (hist_selec > 0.9999)				hist_selec = 0.9999;		}		free_attstatsslot(var->vartype, values, nvalues, NULL, 0);	}	/*	 * Now merge the results from the MCV and histogram calculations,	 * realizing that the histogram covers only the non-null values that	 * are not listed in MCV.	 */	selec = 1.0 - stats->stanullfrac - sumcommon;	if (hist_selec > 0.0)		selec *= hist_selec;	else	{		/*		 * If no histogram but there are values not accounted for by MCV,		 * arbitrarily assume half of them will match.		 */		selec *= 0.5;	}	selec += mcv_selec;	ReleaseSysCache(statsTuple);	/* result should be in range, but make sure... */	CLAMP_PROBABILITY(selec);	return selec;}/* *		scalarltsel		- Selectivity of "<" (also "<=") for scalars. */Datumscalarltsel(PG_FUNCTION_ARGS){	Query	   *root = (Query *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	Var		   *var;	Node	   *other;	Datum		constval;	Oid			consttype;	bool		varonleft;	bool		isgt;	double		selec;	/*	 * If expression is not var op something or something op var for a	 * simple var of a real relation (no subqueries, for now), then punt	 * and return a default estimate.	 */	if (!get_restriction_var(args, varRelid,							 &var, &other, &varonleft))		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);	/*	 * Can't do anything useful if the something is not a constant,	 * either.	 */	if (!IsA(other, Const))		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);	/*	 * If the constant is NULL, assume operator is strict and return zero,	 * ie, operator will never return TRUE.	 */	if (((Const *) other)->constisnull)		PG_RETURN_FLOAT8(0.0);	constval = ((Const *) other)->constvalue;	consttype = ((Const *) other)->consttype;	/*	 * Force the var to be on the left to simplify logic in scalarineqsel.	 */	if (varonleft)	{		/* we have var < other */		isgt = false;	}	else	{		/* we have other < var, commute to make var > other */		operator = get_commutator(operator);		if (!operator)		{			/* Use default selectivity (should we raise an error instead?) */			PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);		}		isgt = true;	}	selec = scalarineqsel(root, operator, isgt, var, constval, consttype);	PG_RETURN_FLOAT8((float8) selec);}/* *		scalargtsel		- Selectivity of ">" (also ">=") for integers. */Datumscalargtsel(PG_FUNCTION_ARGS){	Query	   *root = (Query *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	Var		   *var;	Node	   *other;	Datum		constval;	Oid			consttype;	bool		varonleft;	bool		isgt;	double		selec;	/*	 * If expression is not var op something or something op var for a	 * simple var of a real relation (no subqueries, for now), then punt	 * and return a default estimate.	 */	if (!get_restriction_var(args, varRelid,							 &var, &other, &varonleft))		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);	/*	 * Can't do anything useful if the something is not a constant,	 * either.	 */	if (!IsA(other, Const))		PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);	/*	 * If the constant is NULL, assume operator is strict and return zero,	 * ie, operator will never return TRUE.	 */	if (((Const *) other)->constisnull)		PG_RETURN_FLOAT8(0.0);	constval = ((Const *) other)->constvalue;	consttype = ((Const *) other)->consttype;	/*	 * Force the var to be on the left to simplify logic in scalarineqsel.	 */	if (varonleft)	{		/* we have var > other */		isgt = true;	}	else	{		/* we have other > var, commute to make var < other */		operator = get_commutator(operator);		if (!operator)		{			/* Use default selectivity (should we raise an error instead?) */			PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);		}		isgt = false;	}	selec = scalarineqsel(root, operator, isgt, var, constval, consttype);	PG_RETURN_FLOAT8((float8) selec);}/* * patternsel			- Generic code for pattern-match selectivity. */static doublepatternsel(PG_FUNCTION_ARGS, Pattern_Type ptype){	Query	   *root = (Query *) PG_GETARG_POINTER(0);#ifdef NOT_USED	Oid			operator = PG_GETARG_OID(1);#endif	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	Var		   *var;	Node	   *other;	bool		varonleft;	Oid			relid;	Datum		constval;	Oid			consttype;	Oid			vartype;	Oid			opclass;	Pattern_Prefix_Status pstatus;	Const	   *patt = NULL;	Const	   *prefix = NULL;	Const	   *rest = NULL;	double		result;	/*	 * If expression is not var op constant for a simple var of a real	 * relation (no subqueries, for now), then punt and return a default	 * estimate.	 */	if (!get_restriction_var(args, varRelid,							 &var, &other, &varonleft))		return DEFAULT_MATCH_SEL;	if (!varonleft || !IsA(other, Const))		return DEFAULT_MATCH_SEL;	relid = getrelid(var->varno, root->rtable);	if (relid == InvalidOid)		return DEFAULT_MATCH_SEL;	/*	 * If the constant is NULL, assume operator is strict and return zero,	 * ie, operator will never return TRUE.	 */	if (((Const *) other)->constisnull)		return 0.0;	constval = ((Const *) other)->constvalue;	consttype = ((Const *) other)->consttype;	/*	 * The right-hand const is type text or bytea for all supported	 * operators.  We do not expect to see binary-compatible types here,	 * since const-folding should have relabeled the const to exactly	 * match the operator's declared type.	 */	if (consttype != TEXTOID && consttype != BYTEAOID)		return DEFAULT_MATCH_SEL;	/*	 * The var, on the other hand, might be a binary-compatible type;	 * particularly a domain.  Try to fold it if it's not recognized	 * immediately.	 */	vartype = var->vartype;	if (vartype != consttype)		vartype = getBaseType(vartype);	/*	 * We should now be able to recognize the var's datatype.  Choose the	 * index opclass from which we must draw the comparison operators.	 *	 * NOTE: It would be more correct to use the PATTERN opclasses than the	 * simple ones, but at the moment ANALYZE will not generate statistics	 * for the PATTERN operators.  But our results are so approximate	 * anyway that it probably hardly matters.	 */	switch (vartype)	{		case TEXTOID:			opclass = TEXT_BTREE_OPS_OID;			break;		case VARCHAROID:			opclass = VARCHAR_BTREE_OPS_OID;			break;		case BPCHAROID:			opclass = BPCHAR_BTREE_OPS_OID;			break;		case NAMEOID:			opclass = NAME_BTREE_OPS_OID;			break;		case BYTEAOID:			opclass = BYTEA_BTREE_OPS_OID;			break;		default:			return DEFAULT_MATCH_SEL;	}	/* divide pattern into fixed prefix and remainder */	patt = (Const *) other;	pstatus = pattern_fixed_prefix(patt, ptype, &prefix, &rest);	/*	 * If necessary, coerce the prefix constant to the right type. (The	 * "rest" constant need not be changed.)	 */	if (prefix && prefix->consttype != vartype)	{		char	   *prefixstr;		switch (prefix->consttype)		{			case TEXTOID:				prefixstr = DatumGetCString(DirectFunctionCall1(textout,													prefix->constvalue));				break;			case BYTEAOID:				prefixstr = DatumGetCString(DirectFunctionCall1(byteaout,													prefix->constvalue));				break;			default:				elog(ERROR, "unrecognized consttype: %u",					 prefix->consttype);				return DEFAULT_MATCH_SEL;		}		prefix = string_to_const(prefixstr, vartype);		pfree(prefixstr);	}	if (pstatus == Pattern_Prefix_Exact)	{		/*		 * Pattern specifies an exact match, so pretend operator is '='

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?