📄 selfuncs.c
字号:
* NOTE: it might seem that we should unique-ify the lefthand input * when considering JOIN_REVERSE_IN. But this is not so, because the * join clause we've been handed has not been commuted from the way * the parser originally wrote it. We know that the unique side of * the IN clause is *always* on the right. * * NOTE: it would be dangerous to try to be smart about JOIN_LEFT or * JOIN_RIGHT here, because we do not have enough information to * determine which var is really on which side of the join. Perhaps * someday we should pass in more information. */ if (jointype == JOIN_IN || jointype == JOIN_REVERSE_IN || jointype == JOIN_UNIQUE_INNER || jointype == JOIN_UNIQUE_OUTER) { float4 oneovern = 1.0 / nd2; for (i = 0; i < nvalues2; i++) numbers2[i] = oneovern; nullfrac2 = oneovern; } /* * Note we assume that each MCV will match at most one member of the * other MCV list. If the operator isn't really equality, there could * be multiple matches --- but we don't look for them, both for speed * and because the math wouldn't add up... */ matchprodfreq = 0.0; nmatches = 0; for (i = 0; i < nvalues1; i++) { int j; for (j = 0; j < nvalues2; j++) { if (hasmatch2[j]) continue; if (DatumGetBool(FunctionCall2(&eqproc, values1[i], values2[j]))) { hasmatch1[i] = hasmatch2[j] = true; matchprodfreq += numbers1[i] * numbers2[j]; nmatches++; break; } } } CLAMP_PROBABILITY(matchprodfreq); /* Sum up frequencies of matched and unmatched MCVs */ matchfreq1 = unmatchfreq1 = 0.0; for (i = 0; i < nvalues1; i++) { if (hasmatch1[i]) matchfreq1 += numbers1[i]; else unmatchfreq1 += numbers1[i]; } CLAMP_PROBABILITY(matchfreq1); CLAMP_PROBABILITY(unmatchfreq1); matchfreq2 = unmatchfreq2 = 0.0; for (i = 0; i < nvalues2; i++) { if (hasmatch2[i]) matchfreq2 += numbers2[i]; else unmatchfreq2 += numbers2[i]; } CLAMP_PROBABILITY(matchfreq2); CLAMP_PROBABILITY(unmatchfreq2); pfree(hasmatch1); pfree(hasmatch2); /* * Compute total frequency of non-null values that are not in the MCV * lists. */ otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1; otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2; CLAMP_PROBABILITY(otherfreq1); CLAMP_PROBABILITY(otherfreq2); /* * We can estimate the total selectivity from the point of view of * relation 1 as: the known selectivity for matched MCVs, plus * unmatched MCVs that are assumed to match against random members of * relation 2's non-MCV population, plus non-MCV values that are * assumed to match against random members of relation 2's unmatched * MCVs plus non-MCV values. */ totalsel1 = matchprodfreq; if (nd2 > nvalues2) totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2); if (nd2 > nmatches) totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) / (nd2 - nmatches); /* Same estimate from the point of view of relation 2. */ totalsel2 = matchprodfreq; if (nd1 > nvalues1) totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1); if (nd1 > nmatches) totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) / (nd1 - nmatches); /* * Use the smaller of the two estimates. This can be justified in * essentially the same terms as given below for the no-stats case: to * a first approximation, we are estimating from the point of view of * the relation with smaller nd. */ selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2; } else { /* * We do not have MCV lists for both sides. Estimate the join * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This * is plausible if we assume that the join operator is strict and the * non-null values are about equally distributed: a given non-null * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows * of rel2, so total join rows are at most * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression * with MIN() is an upper bound. Using the MIN() means we estimate * from the point of view of the relation with smaller nd (since the * larger nd is determining the MIN). It is reasonable to assume that * most tuples in this rel will have join partners, so the bound is * probably reasonably tight and should be taken as-is. * * XXX Can we be smarter if we have an MCV list for just one side? It * seems that if we assume equal distribution for the other side, we * end up with the same answer anyway. */ double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0; selec = (1.0 - nullfrac1) * (1.0 - nullfrac2); if (nd1 > nd2) selec /= nd1; else selec /= nd2; } if (have_mcvs1) free_attstatsslot(vardata1.atttype, values1, nvalues1, numbers1, nnumbers1); if (have_mcvs2) free_attstatsslot(vardata2.atttype, values2, nvalues2, numbers2, nnumbers2); ReleaseVariableStats(vardata1); ReleaseVariableStats(vardata2); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec);}/* * neqjoinsel - Join selectivity of "!=" */Datumneqjoinsel(PG_FUNCTION_ARGS){ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); Oid eqop; float8 result; /* * We want 1 - eqjoinsel() where the equality operator is the one * associated with this != operator, that is, its negator. */ eqop = get_negator(operator); if (eqop) { result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), Int16GetDatum(jointype))); } else { /* Use default selectivity (should we raise an error instead?) */ result = DEFAULT_EQ_SEL; } result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * scalarltjoinsel - Join selectivity of "<" and "<=" for scalars */Datumscalarltjoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);}/* * scalargtjoinsel - Join selectivity of ">" and ">=" for scalars */Datumscalargtjoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);}/* * regexeqjoinsel - Join selectivity of regular-expression pattern match. */Datumregexeqjoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* * icregexeqjoinsel - Join selectivity of case-insensitive regex match. */Datumicregexeqjoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* * likejoinsel - Join selectivity of LIKE pattern match. */Datumlikejoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* * iclikejoinsel - Join selectivity of ILIKE pattern match. */Datumiclikejoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* * regexnejoinsel - Join selectivity of regex non-match. */Datumregexnejoinsel(PG_FUNCTION_ARGS){ float8 result; result = DatumGetFloat8(regexeqjoinsel(fcinfo)); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * icregexnejoinsel - Join selectivity of case-insensitive regex non-match. */Datumicregexnejoinsel(PG_FUNCTION_ARGS){ float8 result; result = DatumGetFloat8(icregexeqjoinsel(fcinfo)); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * nlikejoinsel - Join selectivity of LIKE pattern non-match. */Datumnlikejoinsel(PG_FUNCTION_ARGS){ float8 result; result = DatumGetFloat8(likejoinsel(fcinfo)); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * icnlikejoinsel - Join selectivity of ILIKE pattern non-match. */Datumicnlikejoinsel(PG_FUNCTION_ARGS){ float8 result; result = DatumGetFloat8(iclikejoinsel(fcinfo)); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * mergejoinscansel - Scan selectivity of merge join. * * A merge join will stop as soon as it exhausts either input stream. * Therefore, if we can estimate the ranges of both input variables, * we can estimate how much of the input will actually be read. This * can have a considerable impact on the cost when using indexscans. * * clause should be a clause already known to be mergejoinable. * * *leftscan is set to the fraction of the left-hand variable expected * to be scanned (0 to 1), and similarly *rightscan for the right-hand * variable. */voidmergejoinscansel(PlannerInfo *root, Node *clause, Selectivity *leftscan, Selectivity *rightscan){ Node *left, *right; VariableStatData leftvar, rightvar; Oid lefttype, righttype; Oid opno, lsortop, rsortop, ltop, gtop, leop, revgtop, revleop; Datum leftmax, rightmax; double selec; /* Set default results if we can't figure anything out. */ *leftscan = *rightscan = 1.0; /* Deconstruct the merge clause */ if (!is_opclause(clause)) return; /* shouldn't happen */ opno = ((OpExpr *) clause)->opno; left = get_leftop((Expr *) clause); right = get_rightop((Expr *) clause); if (!right) return; /* shouldn't happen */ /* Look for stats for the inputs */ examine_variable(root, left, 0, &leftvar); examine_variable(root, right, 0, &rightvar); /* Get the direct input types of the operator */ lefttype = exprType(left); righttype = exprType(right); /* Verify mergejoinability and get left and right "<" operators */ if (!op_mergejoinable(opno, &lsortop, &rsortop)) goto fail; /* shouldn't happen */ /* Try to get maximum values of both inputs */ if (!get_variable_maximum(root, &leftvar, lsortop, &leftmax)) goto fail; /* no max available from stats */ if (!get_variable_maximum(root, &rightvar, rsortop, &rightmax)) goto fail; /* no max available from stats */ /* Look up the "left < right" and "left > right" operators */ op_mergejoin_crossops(opno, <op, >op, NULL, NULL); /* Look up the "left <= right" operator */ leop = get_negator(gtop); if (!OidIsValid(leop)) goto fail; /* insufficient info in catalogs */ /* Look up the "right > left" operator */ revgtop = get_commutator(ltop); if (!OidIsValid(revgtop)) goto fail; /* insufficient info in catalogs */ /* Look up the "right <= left" operator */ revleop = get_negator(revgtop); if (!OidIsValid(revleop)) goto fail; /* insufficient info in catalogs */ /* * Now, the fraction of the left variable that will be scanned is the * fraction that's <= the right-side maximum value. But only believe * non-default estimates, else stick with our 1.0. */ selec = scalarineqsel(root, leop, false, &leftvar, rightmax, righttype); if (selec != DEFAULT_INEQ_SEL) *leftscan = selec; /* And similarly for the right variable. */ selec = scalarineqsel(root, revleop, false, &rightvar, leftmax, lefttype); if (selec != DEFAULT_INEQ_SEL) *rightscan = selec; /* * Only one of the two fractions can really be less than 1.0; believe the * smaller estimate and reset the other one to exactly 1.0. If we get * exactly equal estimates (as can easily happen with self-joins), believe * neither. */ if (*leftscan > *rightscan) *leftscan = 1.0; else if (*leftscan < *rightscan) *rightscan = 1.0; else *leftscan = *rightscan = 1.0;fail: ReleaseVariableStats(leftvar); ReleaseVariableStats(rightvar);}/* * Helper routine for estimate_num_groups: add an item to a list of * GroupVarInfos, but only if it's not known equal to any of the existing * entries. */typedef struct{ Node *var; /* might be an expression, not just a Var */ RelOptInfo *rel; /* relation it belongs to */ double ndistinct; /* # distinct values */} GroupVarInfo;static List *add_unique_group_var(PlannerInfo *root, List *varinfos, Node *var, VariableStatData *vardata){ GroupVarInfo *varinfo; double ndistinct; ListCell *lc; ndistinct = get_variable_numdistinct(vardata); /* cannot use foreach here because of possible list_delete */ lc = list_head(varinfos); while (lc) { varinfo = (GroupVarInfo *) lfirst(lc); /* must advance lc before list_delete possibly pfree's it */ lc = lnext(lc); /* Drop exact duplicates */ if (equal(var, varinfo->var)) return varinfos; /* * Drop known-equal vars, but only if they belong to different * relations (see comments for estimate_num_groups) */ if (vardata->rel != varinfo->rel && exprs_known_equal(root, var, varinfo->var))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -