📄 selfuncs.c
字号:
/*------------------------------------------------------------------------- * * selfuncs.c * Selectivity functions and index cost estimation functions for * standard operators and index access methods. * * Selectivity routines are registered in the pg_operator catalog * in the "oprrest" and "oprjoin" attributes. * * Index cost functions are registered in the pg_am catalog * in the "amcostestimate" attribute. * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.191.2.2 2006/05/02 04:34:24 tgl Exp $ * *------------------------------------------------------------------------- *//*---------- * Operator selectivity estimation functions are called to estimate the * selectivity of WHERE clauses whose top-level operator is their operator. * We divide the problem into two cases: * Restriction clause estimation: the clause involves vars of just * one relation. * Join clause estimation: the clause involves vars of multiple rels. * Join selectivity estimation is far more difficult and usually less accurate * than restriction estimation. * * When dealing with the inner scan of a nestloop join, we consider the * join's joinclauses as restriction clauses for the inner relation, and * treat vars of the outer relation as parameters (a/k/a constants of unknown * values). So, restriction estimators need to be able to accept an argument * telling which relation is to be treated as the variable. * * The call convention for a restriction estimator (oprrest function) is * * Selectivity oprrest (PlannerInfo *root, * Oid operator, * List *args, * int varRelid); * * root: general information about the query (rtable and RelOptInfo lists * are particularly important for the estimator). * operator: OID of the specific operator in question. * args: argument list from the operator clause. * varRelid: if not zero, the relid (rtable index) of the relation to * be treated as the variable relation. May be zero if the args list * is known to contain vars of only one relation. * * This is represented at the SQL level (in pg_proc) as * * float8 oprrest (internal, oid, internal, int4); * * The call convention for a join estimator (oprjoin function) is similar * except that varRelid is not needed, and instead the join type is * supplied: * * Selectivity oprjoin (PlannerInfo *root, * Oid operator, * List *args, * JoinType jointype); * * float8 oprjoin (internal, oid, internal, int2); * * (We deliberately make the SQL signature different to facilitate * catching errors.) *---------- */#include "postgres.h"#include <ctype.h>#include <math.h>#include "access/heapam.h"#include "access/nbtree.h"#include "access/tuptoaster.h"#include "catalog/pg_namespace.h"#include "catalog/pg_opclass.h"#include "catalog/pg_operator.h"#include "catalog/pg_proc.h"#include "catalog/pg_statistic.h"#include "catalog/pg_type.h"#include "mb/pg_wchar.h"#include "nodes/makefuncs.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/paths.h"#include "optimizer/plancat.h"#include "optimizer/prep.h"#include "optimizer/restrictinfo.h"#include "optimizer/tlist.h"#include "optimizer/var.h"#include "parser/parse_expr.h"#include "parser/parse_func.h"#include "parser/parse_oper.h"#include "parser/parsetree.h"#include "utils/builtins.h"#include "utils/date.h"#include "utils/datum.h"#include "utils/int8.h"#include "utils/lsyscache.h"#include "utils/nabstime.h"#include "utils/pg_locale.h"#include "utils/selfuncs.h"#include "utils/syscache.h"/* Return data from examine_variable and friends */typedef struct{ Node *var; /* the Var or expression tree */ RelOptInfo *rel; /* Relation, or NULL if not identifiable */ HeapTuple statsTuple; /* pg_statistic tuple, or NULL if none */ /* NB: if statsTuple!=NULL, it must be freed when caller is done */ Oid vartype; /* exposed type of expression */ Oid atttype; /* type to pass to get_attstatsslot */ int32 atttypmod; /* typmod to pass to get_attstatsslot */ bool isunique; /* true if matched to a unique index */} VariableStatData;#define ReleaseVariableStats(vardata) \ do { \ if (HeapTupleIsValid((vardata).statsTuple)) \ ReleaseSysCache((vardata).statsTuple); \ } while(0)static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound);static double convert_numeric_to_scalar(Datum value, Oid typid);static void convert_string_to_scalar(char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound);static void convert_bytea_to_scalar(Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound);static double convert_one_string_to_scalar(char *value, int rangelo, int rangehi);static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi);static char *convert_string_datum(Datum value, Oid typid);static double convert_timevalue_to_scalar(Datum value, Oid typid);static bool get_restriction_variable(PlannerInfo *root, List *args, int varRelid, VariableStatData *vardata, Node **other, bool *varonleft);static void get_join_variables(PlannerInfo *root, List *args, VariableStatData *vardata1, VariableStatData *vardata2);static void examine_variable(PlannerInfo *root, Node *node, int varRelid, VariableStatData *vardata);static double get_variable_numdistinct(VariableStatData *vardata);static bool get_variable_maximum(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *max);static Selectivity prefix_selectivity(PlannerInfo *root, Node *variable, Oid opclass, Const *prefix);static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);static Datum string_to_datum(const char *str, Oid datatype);static Const *string_to_const(const char *str, Oid datatype);static Const *string_to_bytea_const(const char *str, size_t str_len);/* * eqsel - Selectivity of "=" for any data types. * * Note: this routine is also used to estimate selectivity for some * operators that are not "=" but have comparable selectivity behavior, * such as "~=" (geometric approximate-match). Even for "=", we must * keep in mind that the left and right datatypes may differ. */Datumeqsel(PG_FUNCTION_ARGS){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Datum *values; int nvalues; float4 *numbers; int nnumbers; double selec; /* * If expression is not variable = something or something = variable, then * punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * If the something is a NULL constant, assume operator is strict and * return zero, ie, operator will never return TRUE. */ if (IsA(other, Const) && ((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } if (HeapTupleIsValid(vardata.statsTuple)) { Form_pg_statistic stats; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); if (IsA(other, Const)) { /* Variable is being compared to a known non-null constant */ Datum constval = ((Const *) other)->constvalue; bool match = false; int i; /* * Is the constant "=" to any of the column's most common values? * (Although the given operator may not really be "=", we will * assume that seeing whether it returns TRUE is an appropriate * test. If you don't like this, maybe you shouldn't be using * eqsel for your operator...) */ if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values, &nvalues, &numbers, &nnumbers)) { FmgrInfo eqproc; fmgr_info(get_opcode(operator), &eqproc); for (i = 0; i < nvalues; i++) { /* be careful to apply operator right way 'round */ if (varonleft) match = DatumGetBool(FunctionCall2(&eqproc, values[i], constval)); else match = DatumGetBool(FunctionCall2(&eqproc, constval, values[i])); if (match) break; } } else { /* no most-common-value info available */ values = NULL; numbers = NULL; i = nvalues = nnumbers = 0; } if (match) { /* * Constant is "=" to this common value. We know selectivity * exactly (or as exactly as VACUUM could calculate it, * anyway). */ selec = numbers[i]; } else { /* * Comparison is against a constant that is neither NULL nor * any of the common values. Its selectivity cannot be more * than this: */ double sumcommon = 0.0; double otherdistinct; for (i = 0; i < nnumbers; i++) sumcommon += numbers[i]; selec = 1.0 - sumcommon - stats->stanullfrac; CLAMP_PROBABILITY(selec); /* * and in fact it's probably a good deal less. We approximate * that all the not-common values share this remaining * fraction equally, so we divide by the number of other * distinct values. */ otherdistinct = get_variable_numdistinct(&vardata) - nnumbers; if (otherdistinct > 1) selec /= otherdistinct; /* * Another cross-check: selectivity shouldn't be estimated as * more than the least common "most common value". */ if (nnumbers > 0 && selec > numbers[nnumbers - 1]) selec = numbers[nnumbers - 1]; } free_attstatsslot(vardata.atttype, values, nvalues, numbers, nnumbers); } else { double ndistinct; /* * Search is for a value that we do not know a priori, but we will * assume it is not NULL. Estimate the selectivity as non-null * fraction divided by number of distinct values, so that we get a * result averaged over all possible values whether common or * uncommon. (Essentially, we are assuming that the not-yet-known * comparison value is equally likely to be any of the possible * values, regardless of their frequency in the table. Is that a * good idea?) */ selec = 1.0 - stats->stanullfrac; ndistinct = get_variable_numdistinct(&vardata); if (ndistinct > 1) selec /= ndistinct; /* * Cross-check: selectivity should never be estimated as more than * the most common value's. */ if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, NULL, &numbers, &nnumbers)) { if (nnumbers > 0 && selec > numbers[0]) selec = numbers[0]; free_attstatsslot(vardata.atttype, NULL, 0, numbers, nnumbers); } } } else { /* * No VACUUM ANALYZE stats available, so make a guess using estimated * number of distinct values and assuming they are equally common. * (The guess is unlikely to be very good, but we do know a few * special cases.) */ selec = 1.0 / get_variable_numdistinct(&vardata); } ReleaseVariableStats(vardata); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec);}/* * neqsel - Selectivity of "!=" for any data types. * * This routine is also used for some operators that are not "!=" * but have comparable selectivity behavior. See above comments * for eqsel(). */Datumneqsel(PG_FUNCTION_ARGS){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); Oid eqop; float8 result; /* * We want 1 - eqsel() where the equality operator is the one associated * with this != operator, that is, its negator. */ eqop = get_negator(operator); if (eqop) { result = DatumGetFloat8(DirectFunctionCall4(eqsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), Int32GetDatum(varRelid))); } else { /* Use default selectivity (should we raise an error instead?) */ result = DEFAULT_EQ_SEL; } result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars. * * This is the guts of both scalarltsel and scalargtsel. The caller has * commuted the clause, if necessary, so that we can treat the variable as * being on the left. The caller must also make sure that the other side * of the clause is a non-null Const, and dissect same into a value and * datatype. * * This routine works for any datatype (or pair of datatypes) known to * convert_to_scalar(). If it is applied to some other datatype, * it will return a default estimate. */static doublescalarineqsel(PlannerInfo *root, Oid operator, bool isgt, VariableStatData *vardata, Datum constval, Oid consttype){ Form_pg_statistic stats; FmgrInfo opproc; Datum *values; int nvalues; float4 *numbers; int nnumbers; double mcv_selec, hist_selec, sumcommon; double selec; int i; if (!HeapTupleIsValid(vardata->statsTuple)) { /* no stats available, so default result */ return DEFAULT_INEQ_SEL; } stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); fmgr_info(get_opcode(operator), &opproc); /* * If we have most-common-values info, add up the fractions of the MCV * entries that satisfy MCV OP CONST. These fractions contribute directly * to the result selectivity. Also add up the total fraction represented * by MCV entries. */ mcv_selec = 0.0; sumcommon = 0.0; if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values, &nvalues, &numbers, &nnumbers)) { for (i = 0; i < nvalues; i++) { if (DatumGetBool(FunctionCall2(&opproc, values[i],
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -