📄 selfuncs.c
字号:
*/ Oid eqopr = get_opclass_member(opclass, InvalidOid, BTEqualStrategyNumber); List *eqargs; if (eqopr == InvalidOid) elog(ERROR, "no = operator for opclass %u", opclass); eqargs = list_make2(variable, prefix); result = DatumGetFloat8(DirectFunctionCall4(eqsel, PointerGetDatum(root), ObjectIdGetDatum(eqopr), PointerGetDatum(eqargs), Int32GetDatum(varRelid))); } else { /* * Not exact-match pattern. We estimate selectivity of the fixed * prefix and remainder of pattern separately, then combine the two. */ Selectivity prefixsel; Selectivity restsel; Selectivity selec; if (pstatus == Pattern_Prefix_Partial) prefixsel = prefix_selectivity(root, variable, opclass, prefix); else prefixsel = 1.0; restsel = pattern_selectivity(rest, ptype); selec = prefixsel * restsel; /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); result = selec; } if (prefix) { pfree(DatumGetPointer(prefix->constvalue)); pfree(prefix); } ReleaseVariableStats(vardata); return result;}/* * regexeqsel - Selectivity of regular-expression pattern match. */Datumregexeqsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex));}/* * icregexeqsel - Selectivity of case-insensitive regex match. */Datumicregexeqsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC));}/* * likesel - Selectivity of LIKE pattern match. */Datumlikesel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like));}/* * iclikesel - Selectivity of ILIKE pattern match. */Datumiclikesel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC));}/* * regexnesel - Selectivity of regular-expression pattern non-match. */Datumregexnesel(PG_FUNCTION_ARGS){ double result; result = patternsel(fcinfo, Pattern_Type_Regex); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * icregexnesel - Selectivity of case-insensitive regex non-match. */Datumicregexnesel(PG_FUNCTION_ARGS){ double result; result = patternsel(fcinfo, Pattern_Type_Regex_IC); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * nlikesel - Selectivity of LIKE pattern non-match. */Datumnlikesel(PG_FUNCTION_ARGS){ double result; result = patternsel(fcinfo, Pattern_Type_Like); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * icnlikesel - Selectivity of ILIKE pattern non-match. */Datumicnlikesel(PG_FUNCTION_ARGS){ double result; result = patternsel(fcinfo, Pattern_Type_Like_IC); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * booltestsel - Selectivity of BooleanTest Node. */Selectivitybooltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg, int varRelid, JoinType jointype){ VariableStatData vardata; double selec; examine_variable(root, arg, varRelid, &vardata); if (HeapTupleIsValid(vardata.statsTuple)) { Form_pg_statistic stats; double freq_null; Datum *values; int nvalues; float4 *numbers; int nnumbers; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); freq_null = stats->stanullfrac; if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values, &nvalues, &numbers, &nnumbers) && nnumbers > 0) { double freq_true; double freq_false; /* * Get first MCV frequency and derive frequency for true. */ if (DatumGetBool(values[0])) freq_true = numbers[0]; else freq_true = 1.0 - numbers[0] - freq_null; /* * Next derive frequency for false. Then use these as appropriate * to derive frequency for each case. */ freq_false = 1.0 - freq_true - freq_null; switch (booltesttype) { case IS_UNKNOWN: /* select only NULL values */ selec = freq_null; break; case IS_NOT_UNKNOWN: /* select non-NULL values */ selec = 1.0 - freq_null; break; case IS_TRUE: /* select only TRUE values */ selec = freq_true; break; case IS_NOT_TRUE: /* select non-TRUE values */ selec = 1.0 - freq_true; break; case IS_FALSE: /* select only FALSE values */ selec = freq_false; break; case IS_NOT_FALSE: /* select non-FALSE values */ selec = 1.0 - freq_false; break; default: elog(ERROR, "unrecognized booltesttype: %d", (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } free_attstatsslot(vardata.atttype, values, nvalues, numbers, nnumbers); } else { /* * No most-common-value info available. Still have null fraction * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust * for null fraction and assume an even split for boolean tests. */ switch (booltesttype) { case IS_UNKNOWN: /* * Use freq_null directly. */ selec = freq_null; break; case IS_NOT_UNKNOWN: /* * Select not unknown (not null) values. Calculate from * freq_null. */ selec = 1.0 - freq_null; break; case IS_TRUE: case IS_NOT_TRUE: case IS_FALSE: case IS_NOT_FALSE: selec = (1.0 - freq_null) / 2.0; break; default: elog(ERROR, "unrecognized booltesttype: %d", (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } } } else { /* * If we can't get variable statistics for the argument, perhaps * clause_selectivity can do something with it. We ignore the * possibility of a NULL value when using clause_selectivity, and just * assume the value is either TRUE or FALSE. */ switch (booltesttype) { case IS_UNKNOWN: selec = DEFAULT_UNK_SEL; break; case IS_NOT_UNKNOWN: selec = DEFAULT_NOT_UNK_SEL; break; case IS_TRUE: case IS_NOT_FALSE: selec = (double) clause_selectivity(root, arg, varRelid, jointype); break; case IS_FALSE: case IS_NOT_TRUE: selec = 1.0 - (double) clause_selectivity(root, arg, varRelid, jointype); break; default: elog(ERROR, "unrecognized booltesttype: %d", (int) booltesttype); selec = 0.0; /* Keep compiler quiet */ break; } } ReleaseVariableStats(vardata); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); return (Selectivity) selec;}/* * nulltestsel - Selectivity of NullTest Node. */Selectivitynulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg, int varRelid){ VariableStatData vardata; double selec; examine_variable(root, arg, varRelid, &vardata); if (HeapTupleIsValid(vardata.statsTuple)) { Form_pg_statistic stats; double freq_null; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); freq_null = stats->stanullfrac; switch (nulltesttype) { case IS_NULL: /* * Use freq_null directly. */ selec = freq_null; break; case IS_NOT_NULL: /* * Select not unknown (not null) values. Calculate from * freq_null. */ selec = 1.0 - freq_null; break; default: elog(ERROR, "unrecognized nulltesttype: %d", (int) nulltesttype); return (Selectivity) 0; /* keep compiler quiet */ } } else { /* * No VACUUM ANALYZE stats available, so make a guess */ switch (nulltesttype) { case IS_NULL: selec = DEFAULT_UNK_SEL; break; case IS_NOT_NULL: selec = DEFAULT_NOT_UNK_SEL; break; default: elog(ERROR, "unrecognized nulltesttype: %d", (int) nulltesttype); return (Selectivity) 0; /* keep compiler quiet */ } } ReleaseVariableStats(vardata); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); return (Selectivity) selec;}/* * eqjoinsel - Join selectivity of "=" */Datumeqjoinsel(PG_FUNCTION_ARGS){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); double selec; VariableStatData vardata1; VariableStatData vardata2; double nd1; double nd2; Form_pg_statistic stats1 = NULL; Form_pg_statistic stats2 = NULL; bool have_mcvs1 = false; Datum *values1 = NULL; int nvalues1 = 0; float4 *numbers1 = NULL; int nnumbers1 = 0; bool have_mcvs2 = false; Datum *values2 = NULL; int nvalues2 = 0; float4 *numbers2 = NULL; int nnumbers2 = 0; get_join_variables(root, args, &vardata1, &vardata2); nd1 = get_variable_numdistinct(&vardata1); nd2 = get_variable_numdistinct(&vardata2); if (HeapTupleIsValid(vardata1.statsTuple)) { stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple); have_mcvs1 = get_attstatsslot(vardata1.statsTuple, vardata1.atttype, vardata1.atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values1, &nvalues1, &numbers1, &nnumbers1); } if (HeapTupleIsValid(vardata2.statsTuple)) { stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple); have_mcvs2 = get_attstatsslot(vardata2.statsTuple, vardata2.atttype, vardata2.atttypmod, STATISTIC_KIND_MCV, InvalidOid, &values2, &nvalues2, &numbers2, &nnumbers2); } if (have_mcvs1 && have_mcvs2) { /* * We have most-common-value lists for both relations. Run through * the lists to see which MCVs actually join to each other with the * given operator. This allows us to determine the exact join * selectivity for the portion of the relations represented by the MCV * lists. We still have to estimate for the remaining population, but * in a skewed distribution this gives us a big leg up in accuracy. * For motivation see the analysis in Y. Ioannidis and S. * Christodoulakis, "On the propagation of errors in the size of join * results", Technical Report 1018, Computer Science Dept., University * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu). */ FmgrInfo eqproc; bool *hasmatch1; bool *hasmatch2; double nullfrac1 = stats1->stanullfrac; double nullfrac2 = stats2->stanullfrac; double matchprodfreq, matchfreq1, matchfreq2, unmatchfreq1, unmatchfreq2, otherfreq1, otherfreq2, totalsel1, totalsel2; int i, nmatches; fmgr_info(get_opcode(operator), &eqproc); hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); /* * If we are doing any variant of JOIN_IN, pretend all the values of * the righthand relation are unique (ie, act as if it's been * DISTINCT'd). *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -