selfuncs.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 2,362 行 · 第 1/5 页
C
2,362 行
mcv_selec = 0.0; sumcommon = 0.0; if (HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values, &nvalues, &numbers, &nnumbers)) { for (i = 0; i < nvalues; i++) { if (varonleft ? DatumGetBool(FunctionCall2(opproc, values[i], constval)) : DatumGetBool(FunctionCall2(opproc, constval, values[i]))) mcv_selec += numbers[i]; sumcommon += numbers[i]; } free_attstatsslot(vardata->atttype, values, nvalues, numbers, nnumbers); } *sumcommonp = sumcommon; return mcv_selec;}/* * histogram_selectivity - Examine the histogram for selectivity estimates * * Determine the fraction of the variable's histogram entries that satisfy * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. * * This code will work for any boolean-returning predicate operator, whether * or not it has anything to do with the histogram sort operator. We are * essentially using the histogram just as a representative sample. However, * small histograms are unlikely to be all that representative, so the caller * should specify a minimum histogram size to use, and fall back on some * other approach if this routine fails. * * The caller also specifies n_skip, which causes us to ignore the first and * last n_skip histogram elements, on the grounds that they are outliers and * hence not very representative. If in doubt, min_hist_size = 100 and * n_skip = 1 are reasonable values. * * The function result is the selectivity, or -1 if there is no histogram * or it's smaller than min_hist_size. * * Note that the result disregards both the most-common-values (if any) and * null entries. The caller is expected to combine this result with * statistics for those portions of the column population. It may also be * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs. */doublehistogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Datum constval, bool varonleft, int min_hist_size, int n_skip){ double result; Datum *values; int nvalues; /* check sanity of parameters */ Assert(n_skip >= 0); Assert(min_hist_size > 2 * n_skip); if (HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, &values, &nvalues, NULL, NULL)) { if (nvalues >= min_hist_size) { int nmatch = 0; int i; for (i = n_skip; i < nvalues - n_skip; i++) { if (varonleft ? DatumGetBool(FunctionCall2(opproc, values[i], constval)) : DatumGetBool(FunctionCall2(opproc, constval, values[i]))) nmatch++; } result = ((double) nmatch) / ((double) (nvalues - 2 * n_skip)); } else result = -1; free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } else result = -1; return result;}/* * ineq_histogram_selectivity - Examine the histogram for scalarineqsel * * Determine the fraction of the variable's histogram population that * satisfies the inequality condition, ie, VAR < CONST or VAR > CONST. * * Returns zero if there is no histogram (valid results will always be * greater than zero). * * Note that the result disregards both the most-common-values (if any) and * null entries. The caller is expected to combine this result with * statistics for those portions of the column population. */static doubleineq_histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc, bool isgt, Datum constval, Oid consttype){ double hist_selec; Datum *values; int nvalues; hist_selec = 0.0; /* * Someday, ANALYZE might store more than one histogram per rel/att, * corresponding to more than one possible sort ordering defined for the * column type. However, to make that work we will need to figure out * which staop to search for --- it's not necessarily the one we have at * hand! (For example, we might have a '<=' operator rather than the '<' * operator that will appear in staop.) For now, assume that whatever * appears in pg_statistic is sorted the same way our operator sorts, or * the reverse way if isgt is TRUE. */ if (HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, &values, &nvalues, NULL, NULL)) { if (nvalues > 1) { /* * Use binary search to find proper location, ie, the first slot * at which the comparison fails. (If the given operator isn't * actually sort-compatible with the histogram, you'll get garbage * results ... but probably not any more garbage-y than you would * from the old linear search.) */ double histfrac; int lobound = 0; /* first possible slot to search */ int hibound = nvalues; /* last+1 slot to search */ while (lobound < hibound) { int probe = (lobound + hibound) / 2; bool ltcmp; ltcmp = DatumGetBool(FunctionCall2(opproc, values[probe], constval)); if (isgt) ltcmp = !ltcmp; if (ltcmp) lobound = probe + 1; else hibound = probe; } if (lobound <= 0) { /* Constant is below lower histogram boundary. */ histfrac = 0.0; } else if (lobound >= nvalues) { /* Constant is above upper histogram boundary. */ histfrac = 1.0; } else { int i = lobound; double val, high, low; double binfrac; /* * We have values[i-1] < constant < values[i]. * * Convert the constant and the two nearest bin boundary * values to a uniform comparison scale, and do a linear * interpolation within this bin. */ if (convert_to_scalar(constval, consttype, &val, values[i - 1], values[i], vardata->vartype, &low, &high)) { if (high <= low) { /* cope if bin boundaries appear identical */ binfrac = 0.5; } else if (val <= low) binfrac = 0.0; else if (val >= high) binfrac = 1.0; else { binfrac = (val - low) / (high - low); /* * Watch out for the possibility that we got a NaN or * Infinity from the division. This can happen * despite the previous checks, if for example "low" * is -Infinity. */ if (isnan(binfrac) || binfrac < 0.0 || binfrac > 1.0) binfrac = 0.5; } } else { /* * Ideally we'd produce an error here, on the grounds that * the given operator shouldn't have scalarXXsel * registered as its selectivity func unless we can deal * with its operand types. But currently, all manner of * stuff is invoking scalarXXsel, so give a default * estimate until that can be fixed. */ binfrac = 0.5; } /* * Now, compute the overall selectivity across the values * represented by the histogram. We have i-1 full bins and * binfrac partial bin below the constant. */ histfrac = (double) (i - 1) + binfrac; histfrac /= (double) (nvalues - 1); } /* * Now histfrac = fraction of histogram entries below the * constant. * * Account for "<" vs ">" */ hist_selec = isgt ? (1.0 - histfrac) : histfrac; /* * The histogram boundaries are only approximate to begin with, * and may well be out of date anyway. Therefore, don't believe * extremely small or large selectivity estimates. */ if (hist_selec < 0.0001) hist_selec = 0.0001; else if (hist_selec > 0.9999) hist_selec = 0.9999; } free_attstatsslot(vardata->atttype, values, nvalues, NULL, 0); } return hist_selec;}/* * scalarltsel - Selectivity of "<" (also "<=") for scalars. */Datumscalarltsel(PG_FUNCTION_ARGS){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Datum constval; Oid consttype; bool isgt; double selec; /* * If expression is not variable op something or something op variable, * then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } /* * If the constant is NULL, assume operator is strict and return zero, ie, * operator will never return TRUE. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } constval = ((Const *) other)->constvalue; consttype = ((Const *) other)->consttype; /* * Force the var to be on the left to simplify logic in scalarineqsel. */ if (varonleft) { /* we have var < other */ isgt = false; } else { /* we have other < var, commute to make var > other */ operator = get_commutator(operator); if (!operator) { /* Use default selectivity (should we raise an error instead?) */ ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } isgt = true; } selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype); ReleaseVariableStats(vardata); PG_RETURN_FLOAT8((float8) selec);}/* * scalargtsel - Selectivity of ">" (also ">=") for integers. */Datumscalargtsel(PG_FUNCTION_ARGS){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Datum constval; Oid consttype; bool isgt; double selec; /* * If expression is not variable op something or something op variable, * then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } /* * If the constant is NULL, assume operator is strict and return zero, ie, * operator will never return TRUE. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } constval = ((Const *) other)->constvalue; consttype = ((Const *) other)->consttype; /* * Force the var to be on the left to simplify logic in scalarineqsel. */ if (varonleft) { /* we have var > other */ isgt = true; } else { /* we have other > var, commute to make var < other */ operator = get_commutator(operator); if (!operator) { /* Use default selectivity (should we raise an error instead?) */ ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL); } isgt = false; } selec = scalarineqsel(root, operator, isgt, &vardata, constval, consttype); ReleaseVariableStats(vardata); PG_RETURN_FLOAT8((float8) selec);}/* * patternsel - Generic code for pattern-match selectivity. */static doublepatternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *variable; Node *other; bool varonleft; Datum constval; Oid consttype; Oid vartype; Oid opfamily; Pattern_Prefix_Status pstatus; Const *patt = NULL; Const *prefix = NULL; Const *rest = NULL; double result; /* * If this is for a NOT LIKE or similar operator, get the corresponding * positive-match operator and work with that. Set result to the correct * default estimate, too. */ if (negate) { operator = get_negator(operator); if (!OidIsValid(operator)) elog(ERROR, "patternsel called for operator without a negator"); result = 1.0 - DEFAULT_MATCH_SEL; } else { result = DEFAULT_MATCH_SEL; } /* * If expression is not variable op constant, then punt and return a * default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft))
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?