⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 selfuncs.c

📁 PostgreSQL 8.1.4的源码 适用于Linux下的开源数据库系统
💻 C
📖 第 1 页 / 共 5 页
字号:
/*------------------------------------------------------------------------- * * selfuncs.c *	  Selectivity functions and index cost estimation functions for *	  standard operators and index access methods. * *	  Selectivity routines are registered in the pg_operator catalog *	  in the "oprrest" and "oprjoin" attributes. * *	  Index cost functions are registered in the pg_am catalog *	  in the "amcostestimate" attribute. * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.191.2.2 2006/05/02 04:34:24 tgl Exp $ * *------------------------------------------------------------------------- *//*---------- * Operator selectivity estimation functions are called to estimate the * selectivity of WHERE clauses whose top-level operator is their operator. * We divide the problem into two cases: *		Restriction clause estimation: the clause involves vars of just *			one relation. *		Join clause estimation: the clause involves vars of multiple rels. * Join selectivity estimation is far more difficult and usually less accurate * than restriction estimation. * * When dealing with the inner scan of a nestloop join, we consider the * join's joinclauses as restriction clauses for the inner relation, and * treat vars of the outer relation as parameters (a/k/a constants of unknown * values).  So, restriction estimators need to be able to accept an argument * telling which relation is to be treated as the variable. * * The call convention for a restriction estimator (oprrest function) is * *		Selectivity oprrest (PlannerInfo *root, *							 Oid operator, *							 List *args, *							 int varRelid); * * root: general information about the query (rtable and RelOptInfo lists * are particularly important for the estimator). * operator: OID of the specific operator in question. * args: argument list from the operator clause. * varRelid: if not zero, the relid (rtable index) of the relation to * be treated as the variable relation.  May be zero if the args list * is known to contain vars of only one relation. * * This is represented at the SQL level (in pg_proc) as * *		float8 oprrest (internal, oid, internal, int4); * * The call convention for a join estimator (oprjoin function) is similar * except that varRelid is not needed, and instead the join type is * supplied: * *		Selectivity oprjoin (PlannerInfo *root, *							 Oid operator, *							 List *args, *							 JoinType jointype); * *		float8 oprjoin (internal, oid, internal, int2); * * (We deliberately make the SQL signature different to facilitate * catching errors.) *---------- */#include "postgres.h"#include <ctype.h>#include <math.h>#include "access/heapam.h"#include "access/nbtree.h"#include "access/tuptoaster.h"#include "catalog/pg_namespace.h"#include "catalog/pg_opclass.h"#include "catalog/pg_operator.h"#include "catalog/pg_proc.h"#include "catalog/pg_statistic.h"#include "catalog/pg_type.h"#include "mb/pg_wchar.h"#include "nodes/makefuncs.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/paths.h"#include "optimizer/plancat.h"#include "optimizer/prep.h"#include "optimizer/restrictinfo.h"#include "optimizer/tlist.h"#include "optimizer/var.h"#include "parser/parse_expr.h"#include "parser/parse_func.h"#include "parser/parse_oper.h"#include "parser/parsetree.h"#include "utils/builtins.h"#include "utils/date.h"#include "utils/datum.h"#include "utils/int8.h"#include "utils/lsyscache.h"#include "utils/nabstime.h"#include "utils/pg_locale.h"#include "utils/selfuncs.h"#include "utils/syscache.h"/* Return data from examine_variable and friends */typedef struct{	Node	   *var;			/* the Var or expression tree */	RelOptInfo *rel;			/* Relation, or NULL if not identifiable */	HeapTuple	statsTuple;		/* pg_statistic tuple, or NULL if none */	/* NB: if statsTuple!=NULL, it must be freed when caller is done */	Oid			vartype;		/* exposed type of expression */	Oid			atttype;		/* type to pass to get_attstatsslot */	int32		atttypmod;		/* typmod to pass to get_attstatsslot */	bool		isunique;		/* true if matched to a unique index */} VariableStatData;#define ReleaseVariableStats(vardata)  \	do { \		if (HeapTupleIsValid((vardata).statsTuple)) \			ReleaseSysCache((vardata).statsTuple); \	} while(0)static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,				  Datum lobound, Datum hibound, Oid boundstypid,				  double *scaledlobound, double *scaledhibound);static double convert_numeric_to_scalar(Datum value, Oid typid);static void convert_string_to_scalar(char *value,						 double *scaledvalue,						 char *lobound,						 double *scaledlobound,						 char *hibound,						 double *scaledhibound);static void convert_bytea_to_scalar(Datum value,						double *scaledvalue,						Datum lobound,						double *scaledlobound,						Datum hibound,						double *scaledhibound);static double convert_one_string_to_scalar(char *value,							 int rangelo, int rangehi);static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,							int rangelo, int rangehi);static char *convert_string_datum(Datum value, Oid typid);static double convert_timevalue_to_scalar(Datum value, Oid typid);static bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid,						 VariableStatData *vardata, Node **other,						 bool *varonleft);static void get_join_variables(PlannerInfo *root, List *args,				   VariableStatData *vardata1,				   VariableStatData *vardata2);static void examine_variable(PlannerInfo *root, Node *node, int varRelid,				 VariableStatData *vardata);static double get_variable_numdistinct(VariableStatData *vardata);static bool get_variable_maximum(PlannerInfo *root, VariableStatData *vardata,					 Oid sortop, Datum *max);static Selectivity prefix_selectivity(PlannerInfo *root, Node *variable,				   Oid opclass, Const *prefix);static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);static Datum string_to_datum(const char *str, Oid datatype);static Const *string_to_const(const char *str, Oid datatype);static Const *string_to_bytea_const(const char *str, size_t str_len);/* *		eqsel			- Selectivity of "=" for any data types. * * Note: this routine is also used to estimate selectivity for some * operators that are not "=" but have comparable selectivity behavior, * such as "~=" (geometric approximate-match).	Even for "=", we must * keep in mind that the left and right datatypes may differ. */Datumeqsel(PG_FUNCTION_ARGS){	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	VariableStatData vardata;	Node	   *other;	bool		varonleft;	Datum	   *values;	int			nvalues;	float4	   *numbers;	int			nnumbers;	double		selec;	/*	 * If expression is not variable = something or something = variable, then	 * punt and return a default estimate.	 */	if (!get_restriction_variable(root, args, varRelid,								  &vardata, &other, &varonleft))		PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);	/*	 * If the something is a NULL constant, assume operator is strict and	 * return zero, ie, operator will never return TRUE.	 */	if (IsA(other, Const) &&		((Const *) other)->constisnull)	{		ReleaseVariableStats(vardata);		PG_RETURN_FLOAT8(0.0);	}	if (HeapTupleIsValid(vardata.statsTuple))	{		Form_pg_statistic stats;		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);		if (IsA(other, Const))		{			/* Variable is being compared to a known non-null constant */			Datum		constval = ((Const *) other)->constvalue;			bool		match = false;			int			i;			/*			 * Is the constant "=" to any of the column's most common values?			 * (Although the given operator may not really be "=", we will			 * assume that seeing whether it returns TRUE is an appropriate			 * test.  If you don't like this, maybe you shouldn't be using			 * eqsel for your operator...)			 */			if (get_attstatsslot(vardata.statsTuple,								 vardata.atttype, vardata.atttypmod,								 STATISTIC_KIND_MCV, InvalidOid,								 &values, &nvalues,								 &numbers, &nnumbers))			{				FmgrInfo	eqproc;				fmgr_info(get_opcode(operator), &eqproc);				for (i = 0; i < nvalues; i++)				{					/* be careful to apply operator right way 'round */					if (varonleft)						match = DatumGetBool(FunctionCall2(&eqproc,														   values[i],														   constval));					else						match = DatumGetBool(FunctionCall2(&eqproc,														   constval,														   values[i]));					if (match)						break;				}			}			else			{				/* no most-common-value info available */				values = NULL;				numbers = NULL;				i = nvalues = nnumbers = 0;			}			if (match)			{				/*				 * Constant is "=" to this common value.  We know selectivity				 * exactly (or as exactly as VACUUM could calculate it,				 * anyway).				 */				selec = numbers[i];			}			else			{				/*				 * Comparison is against a constant that is neither NULL nor				 * any of the common values.  Its selectivity cannot be more				 * than this:				 */				double		sumcommon = 0.0;				double		otherdistinct;				for (i = 0; i < nnumbers; i++)					sumcommon += numbers[i];				selec = 1.0 - sumcommon - stats->stanullfrac;				CLAMP_PROBABILITY(selec);				/*				 * and in fact it's probably a good deal less. We approximate				 * that all the not-common values share this remaining				 * fraction equally, so we divide by the number of other				 * distinct values.				 */				otherdistinct = get_variable_numdistinct(&vardata)					- nnumbers;				if (otherdistinct > 1)					selec /= otherdistinct;				/*				 * Another cross-check: selectivity shouldn't be estimated as				 * more than the least common "most common value".				 */				if (nnumbers > 0 && selec > numbers[nnumbers - 1])					selec = numbers[nnumbers - 1];			}			free_attstatsslot(vardata.atttype, values, nvalues,							  numbers, nnumbers);		}		else		{			double		ndistinct;			/*			 * Search is for a value that we do not know a priori, but we will			 * assume it is not NULL.  Estimate the selectivity as non-null			 * fraction divided by number of distinct values, so that we get a			 * result averaged over all possible values whether common or			 * uncommon.  (Essentially, we are assuming that the not-yet-known			 * comparison value is equally likely to be any of the possible			 * values, regardless of their frequency in the table.	Is that a			 * good idea?)			 */			selec = 1.0 - stats->stanullfrac;			ndistinct = get_variable_numdistinct(&vardata);			if (ndistinct > 1)				selec /= ndistinct;			/*			 * Cross-check: selectivity should never be estimated as more than			 * the most common value's.			 */			if (get_attstatsslot(vardata.statsTuple,								 vardata.atttype, vardata.atttypmod,								 STATISTIC_KIND_MCV, InvalidOid,								 NULL, NULL,								 &numbers, &nnumbers))			{				if (nnumbers > 0 && selec > numbers[0])					selec = numbers[0];				free_attstatsslot(vardata.atttype, NULL, 0, numbers, nnumbers);			}		}	}	else	{		/*		 * No VACUUM ANALYZE stats available, so make a guess using estimated		 * number of distinct values and assuming they are equally common.		 * (The guess is unlikely to be very good, but we do know a few		 * special cases.)		 */		selec = 1.0 / get_variable_numdistinct(&vardata);	}	ReleaseVariableStats(vardata);	/* result should be in range, but make sure... */	CLAMP_PROBABILITY(selec);	PG_RETURN_FLOAT8((float8) selec);}/* *		neqsel			- Selectivity of "!=" for any data types. * * This routine is also used for some operators that are not "!=" * but have comparable selectivity behavior.  See above comments * for eqsel(). */Datumneqsel(PG_FUNCTION_ARGS){	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);	Oid			operator = PG_GETARG_OID(1);	List	   *args = (List *) PG_GETARG_POINTER(2);	int			varRelid = PG_GETARG_INT32(3);	Oid			eqop;	float8		result;	/*	 * We want 1 - eqsel() where the equality operator is the one associated	 * with this != operator, that is, its negator.	 */	eqop = get_negator(operator);	if (eqop)	{		result = DatumGetFloat8(DirectFunctionCall4(eqsel,													PointerGetDatum(root),													ObjectIdGetDatum(eqop),													PointerGetDatum(args),													Int32GetDatum(varRelid)));	}	else	{		/* Use default selectivity (should we raise an error instead?) */		result = DEFAULT_EQ_SEL;	}	result = 1.0 - result;	PG_RETURN_FLOAT8(result);}/* *	scalarineqsel		- Selectivity of "<", "<=", ">", ">=" for scalars. * * This is the guts of both scalarltsel and scalargtsel.  The caller has * commuted the clause, if necessary, so that we can treat the variable as * being on the left.  The caller must also make sure that the other side * of the clause is a non-null Const, and dissect same into a value and * datatype. * * This routine works for any datatype (or pair of datatypes) known to * convert_to_scalar().  If it is applied to some other datatype, * it will return a default estimate. */static doublescalarineqsel(PlannerInfo *root, Oid operator, bool isgt,			  VariableStatData *vardata, Datum constval, Oid consttype){	Form_pg_statistic stats;	FmgrInfo	opproc;	Datum	   *values;	int			nvalues;	float4	   *numbers;	int			nnumbers;	double		mcv_selec,				hist_selec,				sumcommon;	double		selec;	int			i;	if (!HeapTupleIsValid(vardata->statsTuple))	{		/* no stats available, so default result */		return DEFAULT_INEQ_SEL;	}	stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);	fmgr_info(get_opcode(operator), &opproc);	/*	 * If we have most-common-values info, add up the fractions of the MCV	 * entries that satisfy MCV OP CONST.  These fractions contribute directly	 * to the result selectivity.  Also add up the total fraction represented	 * by MCV entries.	 */	mcv_selec = 0.0;	sumcommon = 0.0;	if (get_attstatsslot(vardata->statsTuple,						 vardata->atttype, vardata->atttypmod,						 STATISTIC_KIND_MCV, InvalidOid,						 &values, &nvalues,						 &numbers, &nnumbers))	{		for (i = 0; i < nvalues; i++)		{			if (DatumGetBool(FunctionCall2(&opproc,										   values[i],

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -