selfuncs.c

来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 2,384 行 · 第 1/5 页

C
2,384
字号
/*------------------------------------------------------------------------- * * selfuncs.c *	  Selectivity functions and index cost estimation functions for *	  standard operators and index access methods. * *	  Selectivity routines are registered in the pg_operator catalog *	  in the "oprrest" and "oprjoin" attributes. * *	  Index cost functions are registered in the pg_am catalog *	  in the "amcostestimate" attribute. * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.147.2.3 2004/02/27 21:44:44 tgl Exp $ * *------------------------------------------------------------------------- *//*---------- * Operator selectivity estimation functions are called to estimate the * selectivity of WHERE clauses whose top-level operator is their operator. * We divide the problem into two cases: *		Restriction clause estimation: the clause involves vars of just *			one relation. *		Join clause estimation: the clause involves vars of multiple rels. * Join selectivity estimation is far more difficult and usually less accurate * than restriction estimation. * * When dealing with the inner scan of a nestloop join, we consider the * join's joinclauses as restriction clauses for the inner relation, and * treat vars of the outer relation as parameters (a/k/a constants of unknown * values).  So, restriction estimators need to be able to accept an argument * telling which relation is to be treated as the variable. * * The call convention for a restriction estimator (oprrest function) is * *		Selectivity oprrest (Query *root, *							 Oid operator, *							 List *args, *							 int varRelid); * * root: general information about the query (rtable and RelOptInfo lists * are particularly important for the estimator). * operator: OID of the specific operator in question. * args: argument list from the operator clause. * varRelid: if not zero, the relid (rtable index) of the relation to * be treated as the variable relation.  May be zero if the args list * is known to contain vars of only one relation. * * This is represented at the SQL level (in pg_proc) as * *		float8 oprrest (internal, oid, internal, int4); * * The call convention for a join estimator (oprjoin function) is similar * except that varRelid is not needed, and instead the join type is * supplied: * *		Selectivity oprjoin (Query *root, *							 Oid operator, *							 List *args, *							 JoinType jointype); * *		float8 oprjoin (internal, oid, internal, int2); * * (We deliberately make the SQL signature different to facilitate * catching errors.) *---------- */#include "postgres.h"#include <ctype.h>#include <math.h>#include "access/heapam.h"#include "access/nbtree.h"#include "access/tuptoaster.h"#include "catalog/catname.h"#include "catalog/pg_namespace.h"#include "catalog/pg_opclass.h"#include "catalog/pg_operator.h"#include "catalog/pg_proc.h"#include "catalog/pg_statistic.h"#include "catalog/pg_type.h"#include "mb/pg_wchar.h"#include "nodes/makefuncs.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/paths.h"#include "optimizer/plancat.h"#include "optimizer/prep.h"#include "optimizer/tlist.h"#include "optimizer/var.h"#include "parser/parse_expr.h"#include "parser/parse_func.h"#include "parser/parse_oper.h"#include "parser/parsetree.h"#include "utils/builtins.h"#include "utils/date.h"#include "utils/datum.h"#include "utils/int8.h"#include "utils/lsyscache.h"#include "utils/pg_locale.h"#include "utils/selfuncs.h"#include "utils/syscache.h"/* * Note: the default selectivity estimates are not chosen entirely at random. * We want them to be small enough to ensure that indexscans will be used if * available, for typical table densities of ~100 tuples/page.	Thus, for * example, 0.01 is not quite small enough, since that makes it appear that * nearly all pages will be hit anyway.  Also, since we sometimes estimate * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal * 1/DEFAULT_EQ_SEL. *//* default selectivity estimate for equalities such as "A = b" */#define DEFAULT_EQ_SEL	0.005/* default selectivity estimate for inequalities such as "A < b" */#define DEFAULT_INEQ_SEL  (1.0 / 3.0)/* default selectivity estimate for pattern-match operators such as LIKE */#define DEFAULT_MATCH_SEL	0.005/* default number of distinct values in a table */#define DEFAULT_NUM_DISTINCT  200/* default selectivity estimate for boolean and null test nodes */#define DEFAULT_UNK_SEL			0.005#define DEFAULT_NOT_UNK_SEL		(1.0 - DEFAULT_UNK_SEL)#define DEFAULT_BOOL_SEL		0.5/* * Clamp a computed probability estimate (which may suffer from roundoff or * estimation errors) to valid range.  Argument must be a float variable. */#define CLAMP_PROBABILITY(p) \	do { \		if (p < 0.0) \			p = 0.0; \		else if (p > 1.0) \			p = 1.0; \	} while (0)static bool get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max);static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,				  Datum lobound, Datum hibound, Oid boundstypid,				  double *scaledlobound, double *scaledhibound);static double convert_numeric_to_scalar(Datum value, Oid typid);static void convert_string_to_scalar(unsigned char *value,						 double *scaledvalue,						 unsigned char *lobound,						 double *scaledlobound,						 unsigned char *hibound,						 double *scaledhibound);static void convert_bytea_to_scalar(Datum value,						double *scaledvalue,						Datum lobound,						double *scaledlobound,						Datum hibound,						double *scaledhibound);static double convert_one_string_to_scalar(unsigned char *value,							 int rangelo, int rangehi);static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,							int rangelo, int rangehi);static unsigned char *convert_string_datum(Datum value, Oid typid);static double convert_timevalue_to_scalar(Datum value, Oid typid);static double get_att_numdistinct(Query *root, Var *var,					Form_pg_statistic stats);static bool get_restriction_var(List *args, int varRelid,					Var **var, Node **other,					bool *varonleft);static void get_join_vars(List *args, Var **var1, Var **var2);static Selectivity prefix_selectivity(Query *root, Var *var,				   Oid opclass, Const *prefix);static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);static Datum string_to_datum(const char *str, Oid datatype);static Const *string_to_const(const char *str, Oid datatype);static Const *string_to_bytea_const(const char *str, size_t str_len);/* *		eqsel			- Selectivity of "=" for any data types. * * Note: this routine is also used to estimate selectivity for some * operators that are not "=" but have comparable selectivity behavior, * such as "~=" (geometric approximate-match).	Even for "=", we must * keep in mind that the left and right datatypes may differ. */Datumeqsel(PG_FUNCTION_ARGS){	Query	   *root = (Query *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	Var		   *var;	Node	   *other;	bool		varonleft;	Oid			relid;	HeapTuple	statsTuple;	Datum	   *values;	int			nvalues;	float4	   *numbers;	int			nnumbers;	double		selec;	/*	 * If expression is not var = something or something = var for a	 * simple var of a real relation (no subqueries, for now), then punt	 * and return a default estimate.	 */	if (!get_restriction_var(args, varRelid,							 &var, &other, &varonleft))		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);	relid = getrelid(var->varno, root->rtable);	if (relid == InvalidOid)		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);	/*	 * If the something is a NULL constant, assume operator is strict and	 * return zero, ie, operator will never return TRUE.	 */	if (IsA(other, Const) &&		((Const *) other)->constisnull)		PG_RETURN_FLOAT8(0.0);	/* get stats for the attribute, if available */	statsTuple = SearchSysCache(STATRELATT,								ObjectIdGetDatum(relid),								Int16GetDatum(var->varattno),								0, 0);	if (HeapTupleIsValid(statsTuple))	{		Form_pg_statistic stats;		stats = (Form_pg_statistic) GETSTRUCT(statsTuple);		if (IsA(other, Const))		{			/* Var is being compared to a known non-null constant */			Datum		constval = ((Const *) other)->constvalue;			bool		match = false;			int			i;			/*			 * Is the constant "=" to any of the column's most common			 * values?	(Although the given operator may not really be			 * "=", we will assume that seeing whether it returns TRUE is			 * an appropriate test.  If you don't like this, maybe you			 * shouldn't be using eqsel for your operator...)			 */			if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,								 STATISTIC_KIND_MCV, InvalidOid,								 &values, &nvalues,								 &numbers, &nnumbers))			{				FmgrInfo	eqproc;				fmgr_info(get_opcode(operator), &eqproc);				for (i = 0; i < nvalues; i++)				{					/* be careful to apply operator right way 'round */					if (varonleft)						match = DatumGetBool(FunctionCall2(&eqproc,														   values[i],														   constval));					else						match = DatumGetBool(FunctionCall2(&eqproc,														   constval,														   values[i]));					if (match)						break;				}			}			else			{				/* no most-common-value info available */				values = NULL;				numbers = NULL;				i = nvalues = nnumbers = 0;			}			if (match)			{				/*				 * Constant is "=" to this common value.  We know				 * selectivity exactly (or as exactly as VACUUM could				 * calculate it, anyway).				 */				selec = numbers[i];			}			else			{				/*				 * Comparison is against a constant that is neither NULL				 * nor any of the common values.  Its selectivity cannot				 * be more than this:				 */				double		sumcommon = 0.0;				double		otherdistinct;				for (i = 0; i < nnumbers; i++)					sumcommon += numbers[i];				selec = 1.0 - sumcommon - stats->stanullfrac;				CLAMP_PROBABILITY(selec);				/*				 * and in fact it's probably a good deal less. We				 * approximate that all the not-common values share this				 * remaining fraction equally, so we divide by the number				 * of other distinct values.				 */				otherdistinct = get_att_numdistinct(root, var, stats)					- nnumbers;				if (otherdistinct > 1)					selec /= otherdistinct;				/*				 * Another cross-check: selectivity shouldn't be estimated				 * as more than the least common "most common value".				 */				if (nnumbers > 0 && selec > numbers[nnumbers - 1])					selec = numbers[nnumbers - 1];			}			free_attstatsslot(var->vartype, values, nvalues,							  numbers, nnumbers);		}		else		{			double		ndistinct;			/*			 * Search is for a value that we do not know a priori, but we			 * will assume it is not NULL.	Estimate the selectivity as			 * non-null fraction divided by number of distinct values, so			 * that we get a result averaged over all possible values			 * whether common or uncommon.	(Essentially, we are assuming			 * that the not-yet-known comparison value is equally likely			 * to be any of the possible values, regardless of their			 * frequency in the table.	Is that a good idea?)			 */			selec = 1.0 - stats->stanullfrac;			ndistinct = get_att_numdistinct(root, var, stats);			if (ndistinct > 1)				selec /= ndistinct;			/*			 * Cross-check: selectivity should never be estimated as more			 * than the most common value's.			 */			if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod,								 STATISTIC_KIND_MCV, InvalidOid,								 NULL, NULL,								 &numbers, &nnumbers))			{				if (nnumbers > 0 && selec > numbers[0])					selec = numbers[0];				free_attstatsslot(var->vartype, NULL, 0, numbers, nnumbers);			}		}		ReleaseSysCache(statsTuple);	}	else	{		/*		 * No VACUUM ANALYZE stats available, so make a guess using		 * estimated number of distinct values and assuming they are		 * equally common.	(The guess is unlikely to be very good, but we		 * do know a few special cases.)		 */		selec = 1.0 / get_att_numdistinct(root, var, NULL);	}	/* result should be in range, but make sure... */	CLAMP_PROBABILITY(selec);	PG_RETURN_FLOAT8((float8) selec);}/* *		neqsel			- Selectivity of "!=" for any data types. * * This routine is also used for some operators that are not "!=" * but have comparable selectivity behavior.  See above comments * for eqsel(). */Datumneqsel(PG_FUNCTION_ARGS){	Query	   *root = (Query *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	Oid			eqop;	float8		result;	/*	 * We want 1 - eqsel() where the equality operator is the one	 * associated with this != operator, that is, its negator.	 */	eqop = get_negator(operator);	if (eqop)	{		result = DatumGetFloat8(DirectFunctionCall4(eqsel,													PointerGetDatum(root),												  ObjectIdGetDatum(eqop),													PointerGetDatum(args),											   Int32GetDatum(varRelid)));	}	else	{		/* Use default selectivity (should we raise an error instead?) */		result = DEFAULT_EQ_SEL;	}	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *	scalarineqsel		- Selectivity of "<", "<=", ">", ">=" for scalars. * * This is the guts of both scalarltsel and scalargtsel.  The caller has * commuted the clause, if necessary, so that we can treat the Var as * being on the left.  The caller must also make sure that the other side * of the clause is a non-null Const, and dissect same into a value and * datatype. * * This routine works for any datatype (or pair of datatypes) known to * convert_to_scalar().  If it is applied to some other datatype, * it will return a default estimate. */static doublescalarineqsel(Query *root, Oid operator, bool isgt,			  Var *var, Datum constval, Oid consttype){	Oid			relid;	HeapTuple	statsTuple;	Form_pg_statistic stats;	FmgrInfo	opproc;	Datum	   *values;	int			nvalues;	float4	   *numbers;	int			nnumbers;	double		mcv_selec,				hist_selec,				sumcommon;	double		selec;	int			i;	/*	 * If expression is not var op something or something op var for a	 * simple var of a real relation (no subqueries, for now), then punt	 * and return a default estimate.	 */	relid = getrelid(var->varno, root->rtable);	if (relid == InvalidOid)		return DEFAULT_INEQ_SEL;	/* get stats for the attribute */	statsTuple = SearchSysCache(STATRELATT,								ObjectIdGetDatum(relid),								Int16GetDatum(var->varattno),								0, 0);	if (!HeapTupleIsValid(statsTuple))	{

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?