selfuncs.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 2,362 行 · 第 1/5 页
C
2,362 行
/*------------------------------------------------------------------------- * * selfuncs.c * Selectivity functions and index cost estimation functions for * standard operators and index access methods. * * Selectivity routines are registered in the pg_operator catalog * in the "oprrest" and "oprjoin" attributes. * * Index cost functions are registered in the pg_am catalog * in the "amcostestimate" attribute. * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.243.2.1 2008/07/07 20:25:06 tgl Exp $ * *------------------------------------------------------------------------- *//*---------- * Operator selectivity estimation functions are called to estimate the * selectivity of WHERE clauses whose top-level operator is their operator. * We divide the problem into two cases: * Restriction clause estimation: the clause involves vars of just * one relation. * Join clause estimation: the clause involves vars of multiple rels. * Join selectivity estimation is far more difficult and usually less accurate * than restriction estimation. * * When dealing with the inner scan of a nestloop join, we consider the * join's joinclauses as restriction clauses for the inner relation, and * treat vars of the outer relation as parameters (a/k/a constants of unknown * values). So, restriction estimators need to be able to accept an argument * telling which relation is to be treated as the variable. * * The call convention for a restriction estimator (oprrest function) is * * Selectivity oprrest (PlannerInfo *root, * Oid operator, * List *args, * int varRelid); * * root: general information about the query (rtable and RelOptInfo lists * are particularly important for the estimator). * operator: OID of the specific operator in question. * args: argument list from the operator clause. * varRelid: if not zero, the relid (rtable index) of the relation to * be treated as the variable relation. May be zero if the args list * is known to contain vars of only one relation. * * This is represented at the SQL level (in pg_proc) as * * float8 oprrest (internal, oid, internal, int4); * * The call convention for a join estimator (oprjoin function) is similar * except that varRelid is not needed, and instead the join type is * supplied: * * Selectivity oprjoin (PlannerInfo *root, * Oid operator, * List *args, * JoinType jointype); * * float8 oprjoin (internal, oid, internal, int2); * * (We deliberately make the SQL signature different to facilitate * catching errors.) *---------- */#include "postgres.h"#include <ctype.h>#include <math.h>#include "catalog/pg_opfamily.h"#include "catalog/pg_statistic.h"#include "catalog/pg_type.h"#include "mb/pg_wchar.h"#include "nodes/makefuncs.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/paths.h"#include "optimizer/plancat.h"#include "optimizer/predtest.h"#include "optimizer/restrictinfo.h"#include "optimizer/var.h"#include "parser/parse_coerce.h"#include "parser/parse_expr.h"#include "parser/parsetree.h"#include "utils/builtins.h"#include "utils/date.h"#include "utils/datum.h"#include "utils/fmgroids.h"#include "utils/lsyscache.h"#include "utils/nabstime.h"#include "utils/pg_locale.h"#include "utils/selfuncs.h"#include "utils/syscache.h"static double ineq_histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, bool isgt, Datum constval, Oid consttype);static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue, Datum lobound, Datum hibound, Oid boundstypid, double *scaledlobound, double *scaledhibound);static double convert_numeric_to_scalar(Datum value, Oid typid);static void convert_string_to_scalar(char *value, double *scaledvalue, char *lobound, double *scaledlobound, char *hibound, double *scaledhibound);static void convert_bytea_to_scalar(Datum value, double *scaledvalue, Datum lobound, double *scaledlobound, Datum hibound, double *scaledhibound);static double convert_one_string_to_scalar(char *value, int rangelo, int rangehi);static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen, int rangelo, int rangehi);static char *convert_string_datum(Datum value, Oid typid);static double convert_timevalue_to_scalar(Datum value, Oid typid);static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop, Datum *min, Datum *max);static Selectivity prefix_selectivity(VariableStatData *vardata, Oid vartype, Oid opfamily, Const *prefixcon);static Selectivity pattern_selectivity(Const *patt, Pattern_Type ptype);static Datum string_to_datum(const char *str, Oid datatype);static Const *string_to_const(const char *str, Oid datatype);static Const *string_to_bytea_const(const char *str, size_t str_len);/* * eqsel - Selectivity of "=" for any data types. * * Note: this routine is also used to estimate selectivity for some * operators that are not "=" but have comparable selectivity behavior, * such as "~=" (geometric approximate-match). Even for "=", we must * keep in mind that the left and right datatypes may differ. */Datumeqsel(PG_FUNCTION_ARGS){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Datum *values; int nvalues; float4 *numbers; int nnumbers; double selec; /* * If expression is not variable = something or something = variable, then * punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * If the something is a NULL constant, assume operator is strict and * return zero, ie, operator will never return TRUE. */ if (IsA(other, Const) && ((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } if (HeapTupleIsValid(vardata.statsTuple)) { Form_pg_statistic stats; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); if (IsA(other, Const)) { /* Variable is being compared to a known non-null constant */ Datum constval = ((Const *) other)->constvalue; bool match = false; int i; /* * Is the constant "=" to any of the column's most common values? * (Although the given operator may not really be "=", we will * assume that seeing whether it returns TRUE is an appropriate * test. If you don't like this, maybe you shouldn't be using * eqsel for your operator...) */ if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values, &nvalues, &numbers, &nnumbers)) { FmgrInfo eqproc; fmgr_info(get_opcode(operator), &eqproc); for (i = 0; i < nvalues; i++) { /* be careful to apply operator right way 'round */ if (varonleft) match = DatumGetBool(FunctionCall2(&eqproc, values[i], constval)); else match = DatumGetBool(FunctionCall2(&eqproc, constval, values[i])); if (match) break; } } else { /* no most-common-value info available */ values = NULL; numbers = NULL; i = nvalues = nnumbers = 0; } if (match) { /* * Constant is "=" to this common value. We know selectivity * exactly (or as exactly as ANALYZE could calculate it, * anyway). */ selec = numbers[i]; } else { /* * Comparison is against a constant that is neither NULL nor * any of the common values. Its selectivity cannot be more * than this: */ double sumcommon = 0.0; double otherdistinct; for (i = 0; i < nnumbers; i++) sumcommon += numbers[i]; selec = 1.0 - sumcommon - stats->stanullfrac; CLAMP_PROBABILITY(selec); /* * and in fact it's probably a good deal less. We approximate * that all the not-common values share this remaining * fraction equally, so we divide by the number of other * distinct values. */ otherdistinct = get_variable_numdistinct(&vardata) - nnumbers; if (otherdistinct > 1) selec /= otherdistinct; /* * Another cross-check: selectivity shouldn't be estimated as * more than the least common "most common value". */ if (nnumbers > 0 && selec > numbers[nnumbers - 1]) selec = numbers[nnumbers - 1]; } free_attstatsslot(vardata.atttype, values, nvalues, numbers, nnumbers); } else { double ndistinct; /* * Search is for a value that we do not know a priori, but we will * assume it is not NULL. Estimate the selectivity as non-null * fraction divided by number of distinct values, so that we get a * result averaged over all possible values whether common or * uncommon. (Essentially, we are assuming that the not-yet-known * comparison value is equally likely to be any of the possible * values, regardless of their frequency in the table. Is that a * good idea?) */ selec = 1.0 - stats->stanullfrac; ndistinct = get_variable_numdistinct(&vardata); if (ndistinct > 1) selec /= ndistinct; /* * Cross-check: selectivity should never be estimated as more than * the most common value's. */ if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, NULL, &numbers, &nnumbers)) { if (nnumbers > 0 && selec > numbers[0]) selec = numbers[0]; free_attstatsslot(vardata.atttype, NULL, 0, numbers, nnumbers); } } } else { /* * No ANALYZE stats available, so make a guess using estimated number * of distinct values and assuming they are equally common. (The guess * is unlikely to be very good, but we do know a few special cases.) */ selec = 1.0 / get_variable_numdistinct(&vardata); } ReleaseVariableStats(vardata); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec);}/* * neqsel - Selectivity of "!=" for any data types. * * This routine is also used for some operators that are not "!=" * but have comparable selectivity behavior. See above comments * for eqsel(). */Datumneqsel(PG_FUNCTION_ARGS){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); Oid eqop; float8 result; /* * We want 1 - eqsel() where the equality operator is the one associated * with this != operator, that is, its negator. */ eqop = get_negator(operator); if (eqop) { result = DatumGetFloat8(DirectFunctionCall4(eqsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), Int32GetDatum(varRelid))); } else { /* Use default selectivity (should we raise an error instead?) */ result = DEFAULT_EQ_SEL; } result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars. * * This is the guts of both scalarltsel and scalargtsel. The caller has * commuted the clause, if necessary, so that we can treat the variable as * being on the left. The caller must also make sure that the other side * of the clause is a non-null Const, and dissect same into a value and * datatype. * * This routine works for any datatype (or pair of datatypes) known to * convert_to_scalar(). If it is applied to some other datatype, * it will return a default estimate. */static doublescalarineqsel(PlannerInfo *root, Oid operator, bool isgt, VariableStatData *vardata, Datum constval, Oid consttype){ Form_pg_statistic stats; FmgrInfo opproc; double mcv_selec, hist_selec, sumcommon; double selec; if (!HeapTupleIsValid(vardata->statsTuple)) { /* no stats available, so default result */ return DEFAULT_INEQ_SEL; } stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); fmgr_info(get_opcode(operator), &opproc); /* * If we have most-common-values info, add up the fractions of the MCV * entries that satisfy MCV OP CONST. These fractions contribute directly * to the result selectivity. Also add up the total fraction represented * by MCV entries. */ mcv_selec = mcv_selectivity(vardata, &opproc, constval, true, &sumcommon); /* * If there is a histogram, determine which bin the constant falls in, and * compute the resulting contribution to selectivity. */ hist_selec = ineq_histogram_selectivity(vardata, &opproc, isgt, constval, consttype); /* * Now merge the results from the MCV and histogram calculations, * realizing that the histogram covers only the non-null values that are * not listed in MCV. */ selec = 1.0 - stats->stanullfrac - sumcommon; if (hist_selec > 0.0) selec *= hist_selec; else { /* * If no histogram but there are values not accounted for by MCV, * arbitrarily assume half of them will match. */ selec *= 0.5; } selec += mcv_selec; /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); return selec;}/* * mcv_selectivity - Examine the MCV list for selectivity estimates * * Determine the fraction of the variable's MCV population that satisfies * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. Also * compute the fraction of the total column population represented by the MCV * list. This code will work for any boolean-returning predicate operator. * * The function result is the MCV selectivity, and the fraction of the * total population is returned into *sumcommonp. Zeroes are returned * if there is no MCV list. */doublemcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, double *sumcommonp){ double mcv_selec, sumcommon; Datum *values; int nvalues; float4 *numbers; int nnumbers; int i;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?