planner.c
来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 1,553 行 · 第 1/4 页
C
1,553 行
else { /* OFFSET is an expression ... punt ... */ limit_fraction = 0.10; } } } } else { /* LIMIT is an expression ... punt ... */ limit_fraction = 0.10; } if (limit_fraction > 0.0) { /* * If we have absolute limits from both caller and LIMIT, * use the smaller value; if one is fractional and the * other absolute, treat the fraction as a fraction of the * absolute value; else we can multiply the two fractions * together. */ if (tuple_fraction >= 1.0) { if (limit_fraction >= 1.0) { /* both absolute */ tuple_fraction = Min(tuple_fraction, limit_fraction); } else { /* caller absolute, limit fractional */ tuple_fraction *= limit_fraction; if (tuple_fraction < 1.0) tuple_fraction = 1.0; } } else if (tuple_fraction > 0.0) { if (limit_fraction >= 1.0) { /* caller fractional, limit absolute */ tuple_fraction *= limit_fraction; if (tuple_fraction < 1.0) tuple_fraction = 1.0; } else { /* both fractional */ tuple_fraction *= limit_fraction; } } else { /* no info from caller, just use limit */ tuple_fraction = limit_fraction; } } } /* * With grouping or aggregation, the tuple fraction to pass to * query_planner() may be different from what it is at top level. */ sub_tuple_fraction = tuple_fraction; if (parse->groupClause) { /* * In GROUP BY mode, we have the little problem that we don't * really know how many input tuples will be needed to make a * group, so we can't translate an output LIMIT count into an * input count. For lack of a better idea, assume 25% of the * input data will be processed if there is any output limit. * However, if the caller gave us a fraction rather than an * absolute count, we can keep using that fraction (which * amounts to assuming that all the groups are about the same * size). */ if (sub_tuple_fraction >= 1.0) sub_tuple_fraction = 0.25; /* * If both GROUP BY and ORDER BY are specified, we will need * two levels of sort --- and, therefore, certainly need to * read all the input tuples --- unless ORDER BY is a subset * of GROUP BY. (We have not yet canonicalized the pathkeys, * so must use the slower noncanonical comparison method.) */ if (parse->groupClause && parse->sortClause && !noncanonical_pathkeys_contained_in(sort_pathkeys, group_pathkeys)) sub_tuple_fraction = 0.0; } else if (parse->hasAggs) { /* * Ungrouped aggregate will certainly want all the input * tuples. */ sub_tuple_fraction = 0.0; } else if (parse->distinctClause) { /* * SELECT DISTINCT, like GROUP, will absorb an unpredictable * number of input tuples per output tuple. Handle the same * way. */ if (sub_tuple_fraction >= 1.0) sub_tuple_fraction = 0.25; } /* * Generate the best unsorted and presorted paths for this Query * (but note there may not be any presorted path). */ query_planner(parse, sub_tlist, sub_tuple_fraction, &cheapest_path, &sorted_path); /* * We couldn't canonicalize group_pathkeys and sort_pathkeys * before running query_planner(), so do it now. */ group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys); sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys); /* * Consider whether we might want to use hashed grouping. */ if (parse->groupClause) { List *groupExprs; double cheapest_path_rows; int cheapest_path_width; /* * Beware in this section of the possibility that * cheapest_path->parent is NULL. This could happen if user * does something silly like SELECT 'foo' GROUP BY 1; */ if (cheapest_path->parent) { cheapest_path_rows = cheapest_path->parent->rows; cheapest_path_width = cheapest_path->parent->width; } else { cheapest_path_rows = 1; /* assume non-set result */ cheapest_path_width = 100; /* arbitrary */ } /* * Always estimate the number of groups. We can't do this * until after running query_planner(), either. */ groupExprs = get_sortgrouplist_exprs(parse->groupClause, parse->targetList); dNumGroups = estimate_num_groups(parse, groupExprs, cheapest_path_rows); /* Also want it as a long int --- but 'ware overflow! */ numGroups = (long) Min(dNumGroups, (double) LONG_MAX); /* * Check can't-do-it conditions, including whether the * grouping operators are hashjoinable. * * Executor doesn't support hashed aggregation with DISTINCT * aggregates. (Doing so would imply storing *all* the input * values in the hash table, which seems like a certain * loser.) */ if (!enable_hashagg || !hash_safe_grouping(parse)) use_hashed_grouping = false; else if (parse->hasAggs && (contain_distinct_agg_clause((Node *) tlist) || contain_distinct_agg_clause(parse->havingQual))) use_hashed_grouping = false; else { /* * Use hashed grouping if (a) we think we can fit the * hashtable into SortMem, *and* (b) the estimated cost is * no more than doing it the other way. While avoiding * the need for sorted input is usually a win, the fact * that the output won't be sorted may be a loss; so we * need to do an actual cost comparison. * * In most cases we have no good way to estimate the size of * the transition value needed by an aggregate; * arbitrarily assume it is 100 bytes. Also set the * overhead per hashtable entry at 64 bytes. */ int hashentrysize = cheapest_path_width + 64 + numAggs * 100; if (hashentrysize * dNumGroups <= SortMem * 1024L) { /* * Okay, do the cost comparison. We need to consider * cheapest_path + hashagg [+ final sort] versus * either cheapest_path [+ sort] + group or agg [+ * final sort] or presorted_path + group or agg [+ * final sort] where brackets indicate a step that may * not be needed. We assume query_planner() will have * returned a presorted path only if it's a winner * compared to cheapest_path for this purpose. * * These path variables are dummies that just hold cost * fields; we don't make actual Paths for these steps. */ Path hashed_p; Path sorted_p; cost_agg(&hashed_p, parse, AGG_HASHED, numAggs, numGroupCols, dNumGroups, cheapest_path->startup_cost, cheapest_path->total_cost, cheapest_path_rows); /* Result of hashed agg is always unsorted */ if (sort_pathkeys) cost_sort(&hashed_p, parse, sort_pathkeys, hashed_p.total_cost, dNumGroups, cheapest_path_width); if (sorted_path) { sorted_p.startup_cost = sorted_path->startup_cost; sorted_p.total_cost = sorted_path->total_cost; current_pathkeys = sorted_path->pathkeys; } else { sorted_p.startup_cost = cheapest_path->startup_cost; sorted_p.total_cost = cheapest_path->total_cost; current_pathkeys = cheapest_path->pathkeys; } if (!pathkeys_contained_in(group_pathkeys, current_pathkeys)) { cost_sort(&sorted_p, parse, group_pathkeys, sorted_p.total_cost, cheapest_path_rows, cheapest_path_width); current_pathkeys = group_pathkeys; } if (parse->hasAggs) cost_agg(&sorted_p, parse, AGG_SORTED, numAggs, numGroupCols, dNumGroups, sorted_p.startup_cost, sorted_p.total_cost, cheapest_path_rows); else cost_group(&sorted_p, parse, numGroupCols, dNumGroups, sorted_p.startup_cost, sorted_p.total_cost, cheapest_path_rows); /* The Agg or Group node will preserve ordering */ if (sort_pathkeys && !pathkeys_contained_in(sort_pathkeys, current_pathkeys)) { cost_sort(&sorted_p, parse, sort_pathkeys, sorted_p.total_cost, dNumGroups, cheapest_path_width); } /* * Now make the decision using the top-level tuple * fraction. First we have to convert an absolute * count (LIMIT) into fractional form. */ if (tuple_fraction >= 1.0) tuple_fraction /= dNumGroups; if (compare_fractional_path_costs(&hashed_p, &sorted_p, tuple_fraction) < 0) { /* Hashed is cheaper, so use it */ use_hashed_grouping = true; } } } } /* * Select the best path and create a plan to execute it. * * If we are doing hashed grouping, we will always read all the input * tuples, so use the cheapest-total path. Otherwise, trust * query_planner's decision about which to use. */ if (sorted_path && !use_hashed_grouping) { result_plan = create_plan(parse, sorted_path); current_pathkeys = sorted_path->pathkeys; } else { result_plan = create_plan(parse, cheapest_path); current_pathkeys = cheapest_path->pathkeys; } /* * create_plan() returns a plan with just a "flat" tlist of * required Vars. Usually we need to insert the sub_tlist as the * tlist of the top plan node. However, we can skip that if we * determined that whatever query_planner chose to return will be * good enough. */ if (need_tlist_eval) { /* * If the top-level plan node is one that cannot do expression * evaluation, we must insert a Result node to project the * desired tlist. Currently, the only plan node we might see * here that falls into that category is Append. */ if (IsA(result_plan, Append)) { result_plan = (Plan *) make_result(sub_tlist, NULL, result_plan); } else { /* * Otherwise, just replace the subplan's flat tlist with * the desired tlist. */ result_plan->targetlist = sub_tlist; } /* * Also, account for the cost of evaluation of the sub_tlist. * * Up to now, we have only been dealing with "flat" tlists, * containing just Vars. So their evaluation cost is zero * according to the model used by cost_qual_eval() (or if you * prefer, the cost is factored into cpu_tuple_cost). Thus we * can avoid accounting for tlist cost throughout * query_planner() and subroutines. But now we've inserted a * tlist that might contain actual operators, sub-selects, etc * --- so we'd better account for its cost. * * Below this point, any tlist eval cost for added-on nodes * should be accounted for as we create those nodes. * Presently, of the node types we can add on, only Agg and * Group project new tlists (the rest just copy their input * tuples) --- so make_agg() and make_group() are responsible * for computing the added cost. */ cost_qual_eval(&tlist_cost, sub_tlist); result_plan->startup_cost += tlist_cost.startup; result_plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple * result_plan->plan_rows; } else { /* * Since we're using query_planner's tlist and not the one * make_subplanTargetList calculated, we have to refigure any * grouping-column indexes make_subplanTargetList computed. */ locate_grouping_columns(parse, tlist, result_plan->targetlist, groupColIdx); } /* * Insert AGG or GROUP node if needed, plus an explicit sort step * if necessary. * * HAVING clause, if any, becomes qual of the Agg node */ if (use_hashed_grouping) { /* Hashed aggregate plan --- no sort needed */ result_plan = (Plan *) make_agg(parse, tlist, (List *) parse->havingQual, AGG_HASHED, numGroupCols, groupColIdx, numGroups,
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?