selfuncs.c
来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 2,384 行 · 第 1/5 页
C
2,384 行
float4 *numbers2 = NULL; int nnumbers2 = 0; if (var1 != NULL) { /* get stats for the attribute, if available */ Oid relid1 = getrelid(var1->varno, root->rtable); if (relid1 != InvalidOid) { statsTuple1 = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno), 0, 0); if (HeapTupleIsValid(statsTuple1)) { stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1); have_mcvs1 = get_attstatsslot(statsTuple1, var1->vartype, var1->vartypmod, STATISTIC_KIND_MCV, InvalidOid, &values1, &nvalues1, &numbers1, &nnumbers1); } nd1 = get_att_numdistinct(root, var1, stats1); } } if (var2 != NULL) { /* get stats for the attribute, if available */ Oid relid2 = getrelid(var2->varno, root->rtable); if (relid2 != InvalidOid) { statsTuple2 = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno), 0, 0); if (HeapTupleIsValid(statsTuple2)) { stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2); have_mcvs2 = get_attstatsslot(statsTuple2, var2->vartype, var2->vartypmod, STATISTIC_KIND_MCV, InvalidOid, &values2, &nvalues2, &numbers2, &nnumbers2); } nd2 = get_att_numdistinct(root, var2, stats2); } } if (have_mcvs1 && have_mcvs2) { /* * We have most-common-value lists for both relations. Run * through the lists to see which MCVs actually join to each * other with the given operator. This allows us to determine * the exact join selectivity for the portion of the relations * represented by the MCV lists. We still have to estimate * for the remaining population, but in a skewed distribution * this gives us a big leg up in accuracy. For motivation see * the analysis in Y. Ioannidis and S. Christodoulakis, "On * the propagation of errors in the size of join results", * Technical Report 1018, Computer Science Dept., University * of Wisconsin, Madison, March 1991 (available from * ftp.cs.wisc.edu). */ FmgrInfo eqproc; bool *hasmatch1; bool *hasmatch2; double nullfrac1 = stats1->stanullfrac; double nullfrac2 = stats2->stanullfrac; double matchprodfreq, matchfreq1, matchfreq2, unmatchfreq1, unmatchfreq2, otherfreq1, otherfreq2, totalsel1, totalsel2; int i, nmatches; fmgr_info(get_opcode(operator), &eqproc); hasmatch1 = (bool *) palloc0(nvalues1 * sizeof(bool)); hasmatch2 = (bool *) palloc0(nvalues2 * sizeof(bool)); /* * If we are doing any variant of JOIN_IN, pretend all the * values of the righthand relation are unique (ie, act as if * it's been DISTINCT'd). * * NOTE: it might seem that we should unique-ify the lefthand * input when considering JOIN_REVERSE_IN. But this is not * so, because the join clause we've been handed has not been * commuted from the way the parser originally wrote it. We * know that the unique side of the IN clause is *always* on * the right. * * NOTE: it would be dangerous to try to be smart about JOIN_LEFT * or JOIN_RIGHT here, because we do not have enough * information to determine which var is really on which side * of the join. Perhaps someday we should pass in more * information. */ if (jointype == JOIN_IN || jointype == JOIN_REVERSE_IN || jointype == JOIN_UNIQUE_INNER || jointype == JOIN_UNIQUE_OUTER) { float4 oneovern = 1.0 / nd2; for (i = 0; i < nvalues2; i++) numbers2[i] = oneovern; nullfrac2 = oneovern; } /* * Note we assume that each MCV will match at most one member * of the other MCV list. If the operator isn't really * equality, there could be multiple matches --- but we don't * look for them, both for speed and because the math wouldn't * add up... */ matchprodfreq = 0.0; nmatches = 0; for (i = 0; i < nvalues1; i++) { int j; for (j = 0; j < nvalues2; j++) { if (hasmatch2[j]) continue; if (DatumGetBool(FunctionCall2(&eqproc, values1[i], values2[j]))) { hasmatch1[i] = hasmatch2[j] = true; matchprodfreq += numbers1[i] * numbers2[j]; nmatches++; break; } } } CLAMP_PROBABILITY(matchprodfreq); /* Sum up frequencies of matched and unmatched MCVs */ matchfreq1 = unmatchfreq1 = 0.0; for (i = 0; i < nvalues1; i++) { if (hasmatch1[i]) matchfreq1 += numbers1[i]; else unmatchfreq1 += numbers1[i]; } CLAMP_PROBABILITY(matchfreq1); CLAMP_PROBABILITY(unmatchfreq1); matchfreq2 = unmatchfreq2 = 0.0; for (i = 0; i < nvalues2; i++) { if (hasmatch2[i]) matchfreq2 += numbers2[i]; else unmatchfreq2 += numbers2[i]; } CLAMP_PROBABILITY(matchfreq2); CLAMP_PROBABILITY(unmatchfreq2); pfree(hasmatch1); pfree(hasmatch2); /* * Compute total frequency of non-null values that are not in * the MCV lists. */ otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1; otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2; CLAMP_PROBABILITY(otherfreq1); CLAMP_PROBABILITY(otherfreq2); /* * We can estimate the total selectivity from the point of * view of relation 1 as: the known selectivity for matched * MCVs, plus unmatched MCVs that are assumed to match against * random members of relation 2's non-MCV population, plus * non-MCV values that are assumed to match against random * members of relation 2's unmatched MCVs plus non-MCV values. */ totalsel1 = matchprodfreq; if (nd2 > nvalues2) totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - nvalues2); if (nd2 > nmatches) totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) / (nd2 - nmatches); /* Same estimate from the point of view of relation 2. */ totalsel2 = matchprodfreq; if (nd1 > nvalues1) totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - nvalues1); if (nd1 > nmatches) totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) / (nd1 - nmatches); /* * Use the smaller of the two estimates. This can be * justified in essentially the same terms as given below for * the no-stats case: to a first approximation, we are * estimating from the point of view of the relation with * smaller nd. */ selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2; } else { /* * We do not have MCV lists for both sides. Estimate the join * selectivity as * MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This is * plausible if we assume that the join operator is strict and * the non-null values are about equally distributed: a given * non-null tuple of rel1 will join to either zero or * N2*(1-nullfrac2)/nd2 rows of rel2, so total join rows are * at most N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join * selectivity of not more than * (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it is * not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the * expression with MIN() is an upper bound. Using the MIN() * means we estimate from the point of view of the relation * with smaller nd (since the larger nd is determining the * MIN). It is reasonable to assume that most tuples in this * rel will have join partners, so the bound is probably * reasonably tight and should be taken as-is. * * XXX Can we be smarter if we have an MCV list for just one * side? It seems that if we assume equal distribution for the * other side, we end up with the same answer anyway. */ double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0; selec = (1.0 - nullfrac1) * (1.0 - nullfrac2); if (nd1 > nd2) selec /= nd1; else selec /= nd2; } if (have_mcvs1) free_attstatsslot(var1->vartype, values1, nvalues1, numbers1, nnumbers1); if (have_mcvs2) free_attstatsslot(var2->vartype, values2, nvalues2, numbers2, nnumbers2); if (HeapTupleIsValid(statsTuple1)) ReleaseSysCache(statsTuple1); if (HeapTupleIsValid(statsTuple2)) ReleaseSysCache(statsTuple2); } CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec);}/* * neqjoinsel - Join selectivity of "!=" */Datumneqjoinsel(PG_FUNCTION_ARGS){ Query *root = (Query *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); Oid eqop; float8 result; /* * We want 1 - eqjoinsel() where the equality operator is the one * associated with this != operator, that is, its negator. */ eqop = get_negator(operator); if (eqop) { result = DatumGetFloat8(DirectFunctionCall4(eqjoinsel, PointerGetDatum(root), ObjectIdGetDatum(eqop), PointerGetDatum(args), Int16GetDatum(jointype))); } else { /* Use default selectivity (should we raise an error instead?) */ result = DEFAULT_EQ_SEL; } result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * scalarltjoinsel - Join selectivity of "<" and "<=" for scalars */Datumscalarltjoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);}/* * scalargtjoinsel - Join selectivity of ">" and ">=" for scalars */Datumscalargtjoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);}/* * regexeqjoinsel - Join selectivity of regular-expression pattern match. */Datumregexeqjoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* * icregexeqjoinsel - Join selectivity of case-insensitive regex match. */Datumicregexeqjoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* * likejoinsel - Join selectivity of LIKE pattern match. */Datumlikejoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* * iclikejoinsel - Join selectivity of ILIKE pattern match. */Datumiclikejoinsel(PG_FUNCTION_ARGS){ PG_RETURN_FLOAT8(DEFAULT_MATCH_SEL);}/* * regexnejoinsel - Join selectivity of regex non-match. */Datumregexnejoinsel(PG_FUNCTION_ARGS){ float8 result; result = DatumGetFloat8(regexeqjoinsel(fcinfo)); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * icregexnejoinsel - Join selectivity of case-insensitive regex non-match. */Datumicregexnejoinsel(PG_FUNCTION_ARGS){ float8 result; result = DatumGetFloat8(icregexeqjoinsel(fcinfo)); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * nlikejoinsel - Join selectivity of LIKE pattern non-match. */Datumnlikejoinsel(PG_FUNCTION_ARGS){ float8 result; result = DatumGetFloat8(likejoinsel(fcinfo)); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * icnlikejoinsel - Join selectivity of ILIKE pattern non-match. */Datumicnlikejoinsel(PG_FUNCTION_ARGS){ float8 result; result = DatumGetFloat8(iclikejoinsel(fcinfo)); result = 1.0 - result; PG_RETURN_FLOAT8(result);}/* * mergejoinscansel - Scan selectivity of merge join. * * A merge join will stop as soon as it exhausts either input stream. * Therefore, if we can estimate the ranges of both input variables, * we can estimate how much of the input will actually be read. This * can have a considerable impact on the cost when using indexscans. * * clause should be a clause already known to be mergejoinable. * * *leftscan is set to the fraction of the left-hand variable expected * to be scanned (0 to 1), and similarly *rightscan for the right-hand * variable. */voidmergejoinscansel(Query *root, Node *clause, Selectivity *leftscan, Selectivity *rightscan){ Var *left, *right; Oid lefttype, righttype; Oid opno, lsortop, rsortop, ltop, gtop, leop, revgtop, revleop; Datum leftmax, rightmax; double selec; /* Set default results if we can't figure anything out. */ *leftscan = *rightscan = 1.0; /* Deconstruct the merge clause */ if (!is_opclause(clause)) return; /* shouldn't happen */ opno = ((OpExpr *) clause)->opno; left = (Var *) get_leftop((Expr *) clause); right = (Var *) get_rightop((Expr *) clause); if (!right) return; /* shouldn't happen */ /* Save the direct input types of the operator */ lefttype = exprType((Node *) left); righttype = exprType((Node *) right); /* * Now skip any binary-compatible relabeling; there can only be one * level since constant-expression folder eliminates adjacent * RelabelTypes. */ if (IsA(left, RelabelType)) left = (Var *) ((RelabelType *) left)->arg; if (IsA(right, RelabelType)) right = (Var *) ((RelabelType *) right)->arg; /* Can't do anything if inputs are not Vars */ if (!IsA(left, Var) || !IsA(right, Var)) return; /* Verify mergejoinability and get left and right "<" operators */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?