📄 costsize.c
字号:
* actually need to scan. NOTE: this logic should agree with the * estimates used by make_subplan() in plan/subselect.c. */ Cost plan_run_cost = plan->total_cost - plan->startup_cost; if (subplan->subLinkType == EXISTS_SUBLINK) { /* we only need to fetch 1 tuple */ total->per_tuple += plan_run_cost / plan->plan_rows; } else if (subplan->subLinkType == ALL_SUBLINK || subplan->subLinkType == ANY_SUBLINK) { /* assume we need 50% of the tuples */ total->per_tuple += 0.50 * plan_run_cost; /* also charge a cpu_operator_cost per row examined */ total->per_tuple += 0.50 * plan->plan_rows * cpu_operator_cost; } else { /* assume we need all tuples */ total->per_tuple += plan_run_cost; } /* * Also account for subplan's startup cost. If the subplan is * uncorrelated or undirect correlated, AND its topmost node is a * Sort or Material node, assume that we'll only need to pay its * startup cost once; otherwise assume we pay the startup cost * every time. */ if (subplan->parParam == NIL && (IsA(plan, Sort) || IsA(plan, Material))) total->startup += plan->startup_cost; else total->per_tuple += plan->startup_cost; } } return expression_tree_walker(node, cost_qual_eval_walker, (void *) total);}/* * approx_selectivity * Quick-and-dirty estimation of clause selectivities. * The input can be either an implicitly-ANDed list of boolean * expressions, or a list of RestrictInfo nodes (typically the latter). * * This is quick-and-dirty because we bypass clauselist_selectivity, and * simply multiply the independent clause selectivities together. Now * clauselist_selectivity often can't do any better than that anyhow, but * for some situations (such as range constraints) it is smarter. However, * we can't effectively cache the results of clauselist_selectivity, whereas * the individual clause selectivities can be and are cached. * * Since we are only using the results to estimate how many potential * output tuples are generated and passed through qpqual checking, it * seems OK to live with the approximation. */static Selectivityapprox_selectivity(PlannerInfo *root, List *quals, JoinType jointype){ Selectivity total = 1.0; ListCell *l; foreach(l, quals) { Node *qual = (Node *) lfirst(l); /* Note that clause_selectivity will be able to cache its result */ total *= clause_selectivity(root, qual, 0, jointype); } return total;}/* * set_baserel_size_estimates * Set the size estimates for the given base relation. * * The rel's targetlist and restrictinfo list must have been constructed * already. * * We set the following fields of the rel node: * rows: the estimated number of output tuples (after applying * restriction clauses). * width: the estimated average output tuple width in bytes. * baserestrictcost: estimated cost of evaluating baserestrictinfo clauses. */voidset_baserel_size_estimates(PlannerInfo *root, RelOptInfo *rel){ double nrows; /* Should only be applied to base relations */ Assert(rel->relid > 0); nrows = rel->tuples * clauselist_selectivity(root, rel->baserestrictinfo, 0, JOIN_INNER); rel->rows = clamp_row_est(nrows); cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo); set_rel_width(root, rel);}/* * set_joinrel_size_estimates * Set the size estimates for the given join relation. * * The rel's targetlist must have been constructed already, and a * restriction clause list that matches the given component rels must * be provided. * * Since there is more than one way to make a joinrel for more than two * base relations, the results we get here could depend on which component * rel pair is provided. In theory we should get the same answers no matter * which pair is provided; in practice, since the selectivity estimation * routines don't handle all cases equally well, we might not. But there's * not much to be done about it. (Would it make sense to repeat the * calculations for each pair of input rels that's encountered, and somehow * average the results? Probably way more trouble than it's worth.) * * It's important that the results for symmetric JoinTypes be symmetric, * eg, (rel1, rel2, JOIN_LEFT) should produce the same result as (rel2, * rel1, JOIN_RIGHT). Also, JOIN_IN should produce the same result as * JOIN_UNIQUE_INNER, likewise JOIN_REVERSE_IN == JOIN_UNIQUE_OUTER. * * We set only the rows field here. The width field was already set by * build_joinrel_tlist, and baserestrictcost is not used for join rels. */voidset_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *outer_rel, RelOptInfo *inner_rel, JoinType jointype, List *restrictlist){ Selectivity selec; double nrows; UniquePath *upath; /* * Compute joinclause selectivity. Note that we are only considering * clauses that become restriction clauses at this join level; we are not * double-counting them because they were not considered in estimating the * sizes of the component rels. */ selec = clauselist_selectivity(root, restrictlist, 0, jointype); /* * Basically, we multiply size of Cartesian product by selectivity. * * If we are doing an outer join, take that into account: the output must * be at least as large as the non-nullable input. (Is there any chance * of being even smarter?) * * For JOIN_IN and variants, the Cartesian product is figured with respect * to a unique-ified input, and then we can clamp to the size of the other * input. */ switch (jointype) { case JOIN_INNER: nrows = outer_rel->rows * inner_rel->rows * selec; break; case JOIN_LEFT: nrows = outer_rel->rows * inner_rel->rows * selec; if (nrows < outer_rel->rows) nrows = outer_rel->rows; break; case JOIN_RIGHT: nrows = outer_rel->rows * inner_rel->rows * selec; if (nrows < inner_rel->rows) nrows = inner_rel->rows; break; case JOIN_FULL: nrows = outer_rel->rows * inner_rel->rows * selec; if (nrows < outer_rel->rows) nrows = outer_rel->rows; if (nrows < inner_rel->rows) nrows = inner_rel->rows; break; case JOIN_IN: case JOIN_UNIQUE_INNER: upath = create_unique_path(root, inner_rel, inner_rel->cheapest_total_path); nrows = outer_rel->rows * upath->rows * selec; if (nrows > outer_rel->rows) nrows = outer_rel->rows; break; case JOIN_REVERSE_IN: case JOIN_UNIQUE_OUTER: upath = create_unique_path(root, outer_rel, outer_rel->cheapest_total_path); nrows = upath->rows * inner_rel->rows * selec; if (nrows > inner_rel->rows) nrows = inner_rel->rows; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); nrows = 0; /* keep compiler quiet */ break; } rel->rows = clamp_row_est(nrows);}/* * join_in_selectivity * Determines the factor by which a JOIN_IN join's result is expected * to be smaller than an ordinary inner join. * * 'path' is already filled in except for the cost fields */static Selectivityjoin_in_selectivity(JoinPath *path, PlannerInfo *root){ RelOptInfo *innerrel; UniquePath *innerunique; Selectivity selec; double nrows; /* Return 1.0 whenever it's not JOIN_IN */ if (path->jointype != JOIN_IN) return 1.0; /* * Return 1.0 if the inner side is already known unique. The case where * the inner path is already a UniquePath probably cannot happen in * current usage, but check it anyway for completeness. The interesting * case is where we've determined the inner relation itself is unique, * which we can check by looking at the rows estimate for its UniquePath. */ if (IsA(path->innerjoinpath, UniquePath)) return 1.0; innerrel = path->innerjoinpath->parent; innerunique = create_unique_path(root, innerrel, innerrel->cheapest_total_path); if (innerunique->rows >= innerrel->rows) return 1.0; /* * Compute same result set_joinrel_size_estimates would compute for * JOIN_INNER. Note that we use the input rels' absolute size estimates, * not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be * double-counting the effects of any join clauses used in input scans. */ selec = clauselist_selectivity(root, path->joinrestrictinfo, 0, JOIN_INNER); nrows = path->outerjoinpath->parent->rows * innerrel->rows * selec; nrows = clamp_row_est(nrows); /* See if it's larger than the actual JOIN_IN size estimate */ if (nrows > path->path.parent->rows) return path->path.parent->rows / nrows; else return 1.0;}/* * set_function_size_estimates * Set the size estimates for a base relation that is a function call. * * The rel's targetlist and restrictinfo list must have been constructed * already. * * We set the same fields as set_baserel_size_estimates. */voidset_function_size_estimates(PlannerInfo *root, RelOptInfo *rel){ RangeTblEntry *rte; /* Should only be applied to base relations that are functions */ Assert(rel->relid > 0); rte = rt_fetch(rel->relid, root->parse->rtable); Assert(rte->rtekind == RTE_FUNCTION); /* * Estimate number of rows the function itself will return. * * XXX no idea how to do this yet; but we can at least check whether * function returns set or not... */ if (expression_returns_set(rte->funcexpr)) rel->tuples = 1000; else rel->tuples = 1; /* Now estimate number of output rows, etc */ set_baserel_size_estimates(root, rel);}/* * set_rel_width * Set the estimated output width of a base relation. * * NB: this works best on plain relations because it prefers to look at * real Vars. It will fail to make use of pg_statistic info when applied * to a subquery relation, even if the subquery outputs are simple vars * that we could have gotten info for. Is it worth trying to be smarter * about subqueries? * * The per-attribute width estimates are cached for possible re-use while * building join relations. */static voidset_rel_width(PlannerInfo *root, RelOptInfo *rel){ int32 tuple_width = 0; ListCell *tllist; foreach(tllist, rel->reltargetlist) { Var *var = (Var *) lfirst(tllist); int ndx; Oid relid; int32 item_width; /* For now, punt on whole-row child Vars */ if (!IsA(var, Var)) { tuple_width += 32; /* arbitrary */ continue; } ndx = var->varattno - rel->min_attr; /* * The width probably hasn't been cached yet, but may as well check */ if (rel->attr_widths[ndx] > 0) { tuple_width += rel->attr_widths[ndx]; continue; } relid = getrelid(var->varno, root->parse->rtable); if (relid != InvalidOid) { item_width = get_attavgwidth(relid, var->varattno); if (item_width > 0) { rel->attr_widths[ndx] = item_width; tuple_width += item_width; continue; } } /* * Not a plain relation, or can't find statistics for it. Estimate * using just the type info. */ item_width = get_typavgwidth(var->vartype, var->vartypmod); Assert(item_width > 0); rel->attr_widths[ndx] = item_width; tuple_width += item_width; } Assert(tuple_width >= 0); rel->width = tuple_width;}/* * relation_byte_size * Estimate the storage space in bytes for a given number of tuples * of a given width (size in bytes). */static doublerelation_byte_size(double tuples, int width){ return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleHeaderData)));}/* * page_size * Returns an estimate of the number of pages covered by a given * number of tuples of a given width (size in bytes). */static doublepage_size(double tuples, int width){ return ceil(relation_byte_size(tuples, width) / BLCKSZ);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -