📄 planner.c
字号:
* together; likewise if they are both fractional. If one is * fractional and the other absolute, we want to take the larger, and * we heuristically assume that's the fractional one. */ if (tuple_fraction >= 1.0) { if (limit_fraction >= 1.0) { /* both absolute, so add them together */ tuple_fraction += limit_fraction; } else { /* caller absolute, limit fractional; use limit */ tuple_fraction = limit_fraction; } } else { if (limit_fraction >= 1.0) { /* caller fractional, limit absolute; use caller's value */ } else { /* both fractional, so add them together */ tuple_fraction += limit_fraction; if (tuple_fraction >= 1.0) tuple_fraction = 0.0; /* assume fetch all */ } } } return tuple_fraction;}/* * choose_hashed_grouping - should we use hashed grouping? */static boolchoose_hashed_grouping(PlannerInfo *root, double tuple_fraction, Path *cheapest_path, Path *sorted_path, double dNumGroups, AggClauseCounts *agg_counts){ int numGroupCols = list_length(root->parse->groupClause); double cheapest_path_rows; int cheapest_path_width; Size hashentrysize; List *current_pathkeys; Path hashed_p; Path sorted_p; /* * Check can't-do-it conditions, including whether the grouping operators * are hashjoinable. * * Executor doesn't support hashed aggregation with DISTINCT aggregates. * (Doing so would imply storing *all* the input values in the hash table, * which seems like a certain loser.) */ if (!enable_hashagg) return false; if (agg_counts->numDistinctAggs != 0) return false; if (!hash_safe_grouping(root)) return false; /* * Don't do it if it doesn't look like the hashtable will fit into * work_mem. * * Beware here of the possibility that cheapest_path->parent is NULL. This * could happen if user does something silly like SELECT 'foo' GROUP BY 1; */ if (cheapest_path->parent) { cheapest_path_rows = cheapest_path->parent->rows; cheapest_path_width = cheapest_path->parent->width; } else { cheapest_path_rows = 1; /* assume non-set result */ cheapest_path_width = 100; /* arbitrary */ } /* Estimate per-hash-entry space at tuple width... */ hashentrysize = cheapest_path_width; /* plus space for pass-by-ref transition values... */ hashentrysize += agg_counts->transitionSpace; /* plus the per-hash-entry overhead */ hashentrysize += hash_agg_entry_size(agg_counts->numAggs); if (hashentrysize * dNumGroups > work_mem * 1024L) return false; /* * See if the estimated cost is no more than doing it the other way. While * avoiding the need for sorted input is usually a win, the fact that the * output won't be sorted may be a loss; so we need to do an actual cost * comparison. * * We need to consider cheapest_path + hashagg [+ final sort] versus * either cheapest_path [+ sort] + group or agg [+ final sort] or * presorted_path + group or agg [+ final sort] where brackets indicate a * step that may not be needed. We assume query_planner() will have * returned a presorted path only if it's a winner compared to * cheapest_path for this purpose. * * These path variables are dummies that just hold cost fields; we don't * make actual Paths for these steps. */ cost_agg(&hashed_p, root, AGG_HASHED, agg_counts->numAggs, numGroupCols, dNumGroups, cheapest_path->startup_cost, cheapest_path->total_cost, cheapest_path_rows); /* Result of hashed agg is always unsorted */ if (root->sort_pathkeys) cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost, dNumGroups, cheapest_path_width); if (sorted_path) { sorted_p.startup_cost = sorted_path->startup_cost; sorted_p.total_cost = sorted_path->total_cost; current_pathkeys = sorted_path->pathkeys; } else { sorted_p.startup_cost = cheapest_path->startup_cost; sorted_p.total_cost = cheapest_path->total_cost; current_pathkeys = cheapest_path->pathkeys; } if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost, cheapest_path_rows, cheapest_path_width); current_pathkeys = root->group_pathkeys; } if (root->parse->hasAggs) cost_agg(&sorted_p, root, AGG_SORTED, agg_counts->numAggs, numGroupCols, dNumGroups, sorted_p.startup_cost, sorted_p.total_cost, cheapest_path_rows); else cost_group(&sorted_p, root, numGroupCols, dNumGroups, sorted_p.startup_cost, sorted_p.total_cost, cheapest_path_rows); /* The Agg or Group node will preserve ordering */ if (root->sort_pathkeys && !pathkeys_contained_in(root->sort_pathkeys, current_pathkeys)) cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost, dNumGroups, cheapest_path_width); /* * Now make the decision using the top-level tuple fraction. First we * have to convert an absolute count (LIMIT) into fractional form. */ if (tuple_fraction >= 1.0) tuple_fraction /= dNumGroups; if (compare_fractional_path_costs(&hashed_p, &sorted_p, tuple_fraction) < 0) { /* Hashed is cheaper, so use it */ return true; } return false;}/* * hash_safe_grouping - are grouping operators hashable? * * We assume hashed aggregation will work if the datatype's equality operator * is marked hashjoinable. */static boolhash_safe_grouping(PlannerInfo *root){ ListCell *gl; foreach(gl, root->parse->groupClause) { GroupClause *grpcl = (GroupClause *) lfirst(gl); TargetEntry *tle = get_sortgroupclause_tle(grpcl, root->parse->targetList); Operator optup; bool oprcanhash; optup = equality_oper(exprType((Node *) tle->expr), true); if (!optup) return false; oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash; ReleaseSysCache(optup); if (!oprcanhash) return false; } return true;}/*--------------- * make_subplanTargetList * Generate appropriate target list when grouping is required. * * When grouping_planner inserts Aggregate, Group, or Result plan nodes * above the result of query_planner, we typically want to pass a different * target list to query_planner than the outer plan nodes should have. * This routine generates the correct target list for the subplan. * * The initial target list passed from the parser already contains entries * for all ORDER BY and GROUP BY expressions, but it will not have entries * for variables used only in HAVING clauses; so we need to add those * variables to the subplan target list. Also, we flatten all expressions * except GROUP BY items into their component variables; the other expressions * will be computed by the inserted nodes rather than by the subplan. * For example, given a query like * SELECT a+b,SUM(c+d) FROM table GROUP BY a+b; * we want to pass this targetlist to the subplan: * a,b,c,d,a+b * where the a+b target will be used by the Sort/Group steps, and the * other targets will be used for computing the final results. (In the * above example we could theoretically suppress the a and b targets and * pass down only c,d,a+b, but it's not really worth the trouble to * eliminate simple var references from the subplan. We will avoid doing * the extra computation to recompute a+b at the outer level; see * replace_vars_with_subplan_refs() in setrefs.c.) * * If we are grouping or aggregating, *and* there are no non-Var grouping * expressions, then the returned tlist is effectively dummy; we do not * need to force it to be evaluated, because all the Vars it contains * should be present in the output of query_planner anyway. * * 'tlist' is the query's target list. * 'groupColIdx' receives an array of column numbers for the GROUP BY * expressions (if there are any) in the subplan's target list. * 'need_tlist_eval' is set true if we really need to evaluate the * result tlist. * * The result is the targetlist to be passed to the subplan. *--------------- */static List *make_subplanTargetList(PlannerInfo *root, List *tlist, AttrNumber **groupColIdx, bool *need_tlist_eval){ Query *parse = root->parse; List *sub_tlist; List *extravars; int numCols; *groupColIdx = NULL; /* * If we're not grouping or aggregating, there's nothing to do here; * query_planner should receive the unmodified target list. */ if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual) { *need_tlist_eval = true; return tlist; } /* * Otherwise, start with a "flattened" tlist (having just the vars * mentioned in the targetlist and HAVING qual --- but not upper- level * Vars; they will be replaced by Params later on). */ sub_tlist = flatten_tlist(tlist); extravars = pull_var_clause(parse->havingQual, false); sub_tlist = add_to_flat_tlist(sub_tlist, extravars); list_free(extravars); *need_tlist_eval = false; /* only eval if not flat tlist */ /* * If grouping, create sub_tlist entries for all GROUP BY expressions * (GROUP BY items that are simple Vars should be in the list already), * and make an array showing where the group columns are in the sub_tlist. */ numCols = list_length(parse->groupClause); if (numCols > 0) { int keyno = 0; AttrNumber *grpColIdx; ListCell *gl; grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols); *groupColIdx = grpColIdx; foreach(gl, parse->groupClause) { GroupClause *grpcl = (GroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); TargetEntry *te = NULL; ListCell *sl; /* Find or make a matching sub_tlist entry */ foreach(sl, sub_tlist) { te = (TargetEntry *) lfirst(sl); if (equal(groupexpr, te->expr)) break; } if (!sl) { te = makeTargetEntry((Expr *) groupexpr, list_length(sub_tlist) + 1, NULL, false); sub_tlist = lappend(sub_tlist, te); *need_tlist_eval = true; /* it's not flat anymore */ } /* and save its resno */ grpColIdx[keyno++] = te->resno; } } return sub_tlist;}/* * locate_grouping_columns * Locate grouping columns in the tlist chosen by query_planner. * * This is only needed if we don't use the sub_tlist chosen by * make_subplanTargetList. We have to forget the column indexes found * by that routine and re-locate the grouping vars in the real sub_tlist. */static voidlocate_grouping_columns(PlannerInfo *root, List *tlist, List *sub_tlist, AttrNumber *groupColIdx){ int keyno = 0; ListCell *gl; /* * No work unless grouping. */ if (!root->parse->groupClause) { Assert(groupColIdx == NULL); return; } Assert(groupColIdx != NULL); foreach(gl, root->parse->groupClause) { GroupClause *grpcl = (GroupClause *) lfirst(gl); Node *groupexpr = get_sortgroupclause_expr(grpcl, tlist); TargetEntry *te = NULL; ListCell *sl; foreach(sl, sub_tlist) { te = (TargetEntry *) lfirst(sl); if (equal(groupexpr, te->expr)) break; } if (!sl) elog(ERROR, "failed to locate grouping columns"); groupColIdx[keyno++] = te->resno; }}/* * postprocess_setop_tlist * Fix up targetlist returned by plan_set_operations(). * * We need to transpose sort key info from the orig_tlist into new_tlist. * NOTE: this would not be good enough if we supported resjunk sort keys * for results of set operations --- then, we'd need to project a whole * new tlist to evaluate the resjunk columns. For now, just ereport if we * find any resjunk columns in orig_tlist. */static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist){ ListCell *l; ListCell *orig_tlist_item = list_head(orig_tlist); foreach(l, new_tlist) { TargetEntry *new_tle = (TargetEntry *) lfirst(l); TargetEntry *orig_tle; /* ignore resjunk columns in setop result */ if (new_tle->resjunk) continue; Assert(orig_tlist_item != NULL); orig_tle = (TargetEntry *) lfirst(orig_tlist_item); orig_tlist_item = lnext(orig_tlist_item); if (orig_tle->resjunk) /* should not happen */ elog(ERROR, "resjunk output columns are not implemented"); Assert(new_tle->resno == orig_tle->resno); new_tle->ressortgroupref = orig_tle->ressortgroupref; } if (orig_tlist_item != NULL) elog(ERROR, "resjunk output columns are not implemented"); return new_tlist;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -