selfuncs.c
来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 2,384 行 · 第 1/5 页
C
2,384 行
/*------------------------------------------------------------------------- * * selfuncs.c * Selectivity functions and index cost estimation functions for * standard operators and index access methods. * * Selectivity routines are registered in the pg_operator catalog * in the "oprrest" and "oprjoin" attributes. * * Index cost functions are registered in the pg_am catalog * in the "amcostestimate" attribute. * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.147.2.3 2004/02/27 21:44:44 tgl Exp $ * *------------------------------------------------------------------------- *//*---------- * Operator selectivity estimation functions are called to estimate the * selectivity of WHERE clauses whose top-level operator is their operator. * We divide the problem into two cases: * Restriction clause estimation: the clause involves vars of just * one relation. * Join clause estimation: the clause involves vars of multiple rels. * Join selectivity estimation is far more difficult and usually less accurate * than restriction estimation. * * When dealing with the inner scan of a nestloop join, we consider the * join's joinclauses as restriction clauses for the inner relation, and * treat vars of the outer relation as parameters (a/k/a constants of unknown * values). So, restriction estimators need to be able to accept an argument * telling which relation is to be treated as the variable. * * The call convention for a restriction estimator (oprrest function) is * * Selectivity oprrest (Query *root, * Oid operator, * List *args, * int varRelid); * * root: general information about the query (rtable and RelOptInfo lists * are particularly important for the estimator). * operator: OID of the specific operator in question. * args: argument list from the operator clause. * varRelid: if not zero, the relid (rtable index) of the relation to * be treated as the variable relation. May be zero if the args list * is known to contain vars of only one relation. * * This is represented at the SQL level (in pg_proc) as * * float8 oprrest (internal, oid, internal, int4); * * The call convention for a join estimator (oprjoin function) is similar * except that varRelid is not needed, and instead the join type is * supplied: * * Selectivity oprjoin (Query *root, * Oid operator, * List *args, * JoinType jointype); * * float8 oprjoin (internal, oid, internal, int2); * * (We deliberately make the SQL signature different to facilitate * catching errors.) *---------- */#include "postgres.h"#include <ctype.h>#include <math.h>#include "access/heapam.h"#include "access/nbtree.h"#include "access/tuptoaster.h"#include "catalog/catname.h"#include "catalog/pg_namespace.h"#include "catalog/pg_opclass.h"#include "catalog/pg_operator.h"#include "catalog/pg_proc.h"#include "catalog/pg_statistic.h"#include "catalog/pg_type.h"#include "mb/pg_wchar.h"#include "nodes/makefuncs.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/paths.h"#include "optimizer/plancat.h"#include "optimizer/prep.h"#include "optimizer/tlist.h"#include "optimizer/var.h"#include "parser/parse_expr.h"#include "parser/parse_func.h"#include "parser/parse_oper.h"#include "parser/parsetree.h"#include "utils/builtins.h"#include "utils/date.h"#include "utils/datum.h"#include "utils/int8.h"#include "utils/lsyscache.h"#include "utils/pg_locale.h"#include "utils/selfuncs.h"#include "utils/syscache.h"/* * Note: the default selectivity estimates are not chosen entirely at random. * We want them to be small enough to ensure that indexscans will be used if * available, for typical table densities of ~100 tuples/page. Thus, for * example, 0.01 is not quite small enough, since that makes it appear that * nearly all pages will be hit anyway. Also, since we sometimes estimate * eqsel as 1/num_distinct, we probably want DEFAULT_NUM_DISTINCT to equal * 1/DEFAULT_EQ_SEL. *//* default selectivity estimate for equalities such as "A = b" */#define DEFAULT_EQ_SEL 0.005/* default selectivity estimate for inequalities such as "A < b" */#define DEFAULT_INEQ_SEL (1.0 / 3.0)/* default selectivity estimate for pattern-match operators such as LIKE */#define DEFAULT_MATCH_SEL 0.005/* default number of distinct values in a table */#define DEFAULT_NUM_DISTINCT 200/* default selectivity estimate for boolean and null test nodes */#define DEFAULT_UNK_SEL 0.005#define DEFAULT_NOT_UNK_SEL (1.0 - DEFAULT_UNK_SEL)#define DEFAULT_BOOL_SEL 0.5/* * Clamp a computed probability estimate (which may suffer from roundoff or * estimation errors) to valid range. Argument must be a float variable. */#define CLAMP_PROBABILITY(p) \ do { \ if (p < 0.0) \ p = 0.0; \ else if (p > 1.0) \ p = 1.0; \ } while (0)static bool get_var_maximum(Query *root, Var *var, Oid sortop, Datum *max);static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound);static double convert_numeric_to_scalar(Datum value, Oid typid);static void convert_string_to_scalar(unsigned char *value, double *scaledvalue, unsigned char *lobound, double *scaledlobound, unsigned char *hibound, double *scaledhibound);static void convert_bytea_to_scalar(Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound);static double convert_one_string_to_scalar(unsigned char *value, int rangelo, int rangehi);static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi);static unsigned char *convert_string_datum(Datum value, Oid typid);static double convert_timevalue_to_scalar(Datum value, Oid typid);static double get_att_numdistinct(Query *root, Var *var, Form_pg_statistic stats);static bool get_restriction_var(List *args, int varRelid, Var **var, Node **other, bool *varonleft);static void get_join_vars(List *args, Var **var1, Var **var2);static Selectivity prefix_selectivity(Query *root, Var *var, Oid opclass, Const *prefix);static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);static Datum string_to_datum(const char *str, Oid datatype);static Const *string_to_const(const char *str, Oid datatype);static Const *string_to_bytea_const(const char *str, size_t str_len);/* * eqsel - Selectivity of "=" for any data types. * * Note: this routine is also used to estimate selectivity for some * operators that are not "=" but have comparable selectivity behavior, * such as "~=" (geometric approximate-match). Even for "=", we must * keep in mind that the left and right datatypes may differ. */Datumeqsel(PG_FUNCTION_ARGS){ Query *root = (Query *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); Var *var; Node *other; bool varonleft; Oid relid; HeapTuple statsTuple; Datum *values; int nvalues; float4 *numbers; int nnumbers; double selec; /* * If expression is not var = something or something = var for a * simple var of a real relation (no subqueries, for now), then punt * and return a default estimate. */ if (!get_restriction_var(args, varRelid, &var, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); relid = getrelid(var->varno, root->rtable); if (relid == InvalidOid) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * If the something is a NULL constant, assume operator is strict and * return zero, ie, operator will never return TRUE. */ if (IsA(other, Const) && ((Const *) other)->constisnull) PG_RETURN_FLOAT8(0.0); /* get stats for the attribute, if available */ statsTuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(var->varattno), 0, 0); if (HeapTupleIsValid(statsTuple)) { Form_pg_statistic stats; stats = (Form_pg_statistic) GETSTRUCT(statsTuple); if (IsA(other, Const)) { /* Var is being compared to a known non-null constant */ Datum constval = ((Const *) other)->constvalue; bool match = false; int i; /* * Is the constant "=" to any of the column's most common * values? (Although the given operator may not really be * "=", we will assume that seeing whether it returns TRUE is * an appropriate test. If you don't like this, maybe you * shouldn't be using eqsel for your operator...) */ if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod, STATISTIC_KIND_MCV, InvalidOid, &values, &nvalues, &numbers, &nnumbers)) { FmgrInfo eqproc; fmgr_info(get_opcode(operator), &eqproc); for (i = 0; i < nvalues; i++) { /* be careful to apply operator right way 'round */ if (varonleft) match = DatumGetBool(FunctionCall2(&eqproc, values[i], constval)); else match = DatumGetBool(FunctionCall2(&eqproc, constval, values[i])); if (match) break; } } else { /* no most-common-value info available */ values = NULL; numbers = NULL; i = nvalues = nnumbers = 0; } if (match) { /* * Constant is "=" to this common value. We know * selectivity exactly (or as exactly as VACUUM could * calculate it, anyway). */ selec = numbers[i]; } else { /* * Comparison is against a constant that is neither NULL * nor any of the common values. Its selectivity cannot * be more than this: */ double sumcommon = 0.0; double otherdistinct; for (i = 0; i < nnumbers; i++) sumcommon += numbers[i]; selec = 1.0 - sumcommon - stats->stanullfrac; CLAMP_PROBABILITY(selec); /* * and in fact it's probably a good deal less. We * approximate that all the not-common values share this * remaining fraction equally, so we divide by the number * of other distinct values. */ otherdistinct = get_att_numdistinct(root, var, stats) - nnumbers; if (otherdistinct > 1) selec /= otherdistinct; /* * Another cross-check: selectivity shouldn't be estimated * as more than the least common "most common value". */ if (nnumbers > 0 && selec > numbers[nnumbers - 1]) selec = numbers[nnumbers - 1]; } free_attstatsslot(var->vartype, values, nvalues, numbers, nnumbers); } else { double ndistinct; /* * Search is for a value that we do not know a priori, but we * will assume it is not NULL. Estimate the selectivity as * non-null fraction divided by number of distinct values, so * that we get a result averaged over all possible values * whether common or uncommon. (Essentially, we are assuming * that the not-yet-known comparison value is equally likely * to be any of the possible values, regardless of their * frequency in the table. Is that a good idea?) */ selec = 1.0 - stats->stanullfrac; ndistinct = get_att_numdistinct(root, var, stats); if (ndistinct > 1) selec /= ndistinct; /* * Cross-check: selectivity should never be estimated as more * than the most common value's. */ if (get_attstatsslot(statsTuple, var->vartype, var->vartypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, NULL, &numbers, &nnumbers)) { if (nnumbers > 0 && selec > numbers[0]) selec = numbers[0]; free_attstatsslot(var->vartype, NULL, 0, numbers, nnumbers); } } ReleaseSysCache(statsTuple); } else { /* * No VACUUM ANALYZE stats available, so make a guess using * estimated number of distinct values and assuming they are * equally common. (The guess is unlikely to be very good, but we * do know a few special cases.) */ selec = 1.0 / get_att_numdistinct(root, var, NULL); } /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec);}/* * neqsel - Selectivity of "!=" for any data types. * * This routine is also used for some operators that are not "!=" * but have comparable selectivity behavior. See above comments * for eqsel(). */Datumneqsel(PG_FUNCTION_ARGS){ Query *root = (Query *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); Oid eqop; float8 result; /* * We want 1 - eqsel() where the equality operator is the one * associated with this != operator, that is, its negator. */ eqop = get_negator(operator); if (eqop) { result = DatumGetFloat8(DirectFunctionCall4(eqsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), Int32GetDatum(varRelid))); } else { /* Use default selectivity (should we raise an error instead?) */ result = DEFAULT_EQ_SEL; } result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars. * * This is the guts of both scalarltsel and scalargtsel. The caller has * commuted the clause, if necessary, so that we can treat the Var as * being on the left. The caller must also make sure that the other side * of the clause is a non-null Const, and dissect same into a value and * datatype. * * This routine works for any datatype (or pair of datatypes) known to * convert_to_scalar(). If it is applied to some other datatype, * it will return a default estimate. */static doublescalarineqsel(Query *root, Oid operator, bool isgt, Var *var, Datum constval, Oid consttype){ Oid relid; HeapTuple statsTuple; Form_pg_statistic stats; FmgrInfo opproc; Datum *values; int nvalues; float4 *numbers; int nnumbers; double mcv_selec, hist_selec, sumcommon; double selec; int i; /* * If expression is not var op something or something op var for a * simple var of a real relation (no subqueries, for now), then punt * and return a default estimate. */ relid = getrelid(var->varno, root->rtable); if (relid == InvalidOid) return DEFAULT_INEQ_SEL; /* get stats for the attribute */ statsTuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(var->varattno), 0, 0); if (!HeapTupleIsValid(statsTuple)) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?