📄 planagg.c
字号:
/*------------------------------------------------------------------------- * * planagg.c * Special planning for aggregate queries. * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.10.2.2 2006/04/28 20:57:59 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include "access/skey.h"#include "catalog/pg_aggregate.h"#include "catalog/pg_type.h"#include "nodes/makefuncs.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/paths.h"#include "optimizer/planmain.h"#include "optimizer/subselect.h"#include "parser/parsetree.h"#include "parser/parse_clause.h"#include "parser/parse_expr.h"#include "utils/lsyscache.h"#include "utils/syscache.h"typedef struct{ Oid aggfnoid; /* pg_proc Oid of the aggregate */ Oid aggsortop; /* Oid of its sort operator */ Expr *target; /* expression we are aggregating on */ IndexPath *path; /* access path for index scan */ Cost pathcost; /* estimated cost to fetch first row */ Param *param; /* param for subplan's output */} MinMaxAggInfo;static bool find_minmax_aggs_walker(Node *node, List **context);static bool build_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *info);static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol);static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info, List *constant_quals);static Node *replace_aggs_with_params_mutator(Node *node, List **context);static Oid fetch_agg_sort_op(Oid aggfnoid);/* * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes * * This checks to see if we can replace MIN/MAX aggregate functions by * subqueries of the form * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1) * Given a suitable index on tab.col, this can be much faster than the * generic scan-all-the-rows plan. * * We are passed the preprocessed tlist, and the best path * devised for computing the input of a standard Agg node. If we are able * to optimize all the aggregates, and the result is estimated to be cheaper * than the generic aggregate method, then generate and return a Plan that * does it that way. Otherwise, return NULL. */Plan *optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path){ Query *parse = root->parse; RangeTblRef *rtr; RangeTblEntry *rte; RelOptInfo *rel; List *aggs_list; ListCell *l; Cost total_cost; Path agg_p; Plan *plan; Node *hqual; QualCost tlist_cost; List *constant_quals; /* Nothing to do if query has no aggregates */ if (!parse->hasAggs) return NULL; Assert(!parse->setOperations); /* shouldn't get here if a setop */ Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ /* * Reject unoptimizable cases. * * We don't handle GROUP BY, because our current implementations of * grouping require looking at all the rows anyway, and so there's not * much point in optimizing MIN/MAX. */ if (parse->groupClause) return NULL; /* * We also restrict the query to reference exactly one table, since join * conditions can't be handled reasonably. (We could perhaps handle a * query containing cartesian-product joins, but it hardly seems worth the * trouble.) */ Assert(parse->jointree != NULL && IsA(parse->jointree, FromExpr)); if (list_length(parse->jointree->fromlist) != 1) return NULL; rtr = (RangeTblRef *) linitial(parse->jointree->fromlist); if (!IsA(rtr, RangeTblRef)) return NULL; rte = rt_fetch(rtr->rtindex, parse->rtable); if (rte->rtekind != RTE_RELATION || rte->inh) return NULL; rel = find_base_rel(root, rtr->rtindex); /* * Since this optimization is not applicable all that often, we want to * fall out before doing very much work if possible. Therefore we do the * work in several passes. The first pass scans the tlist and HAVING qual * to find all the aggregates and verify that each of them is a MIN/MAX * aggregate. If that succeeds, the second pass looks at each aggregate * to see if it is optimizable; if so we make an IndexPath describing how * we would scan it. (We do not try to optimize if only some aggs are * optimizable, since that means we'll have to scan all the rows anyway.) * If that succeeds, we have enough info to compare costs against the * generic implementation. Only if that test passes do we build a Plan. */ /* Pass 1: find all the aggregates */ aggs_list = NIL; if (find_minmax_aggs_walker((Node *) tlist, &aggs_list)) return NULL; if (find_minmax_aggs_walker(parse->havingQual, &aggs_list)) return NULL; /* Pass 2: see if each one is optimizable */ total_cost = 0; foreach(l, aggs_list) { MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l); if (!build_minmax_path(root, rel, info)) return NULL; total_cost += info->pathcost; } /* * Make the cost comparison. * * Note that we don't include evaluation cost of the tlist here; this is * OK since it isn't included in best_path's cost either, and should be * the same in either case. */ cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list), 0, 0, best_path->startup_cost, best_path->total_cost, best_path->parent->rows); if (total_cost > agg_p.total_cost) return NULL; /* too expensive */ /* * OK, we are going to generate an optimized plan. The first thing we * need to do is look for any non-variable WHERE clauses that * query_planner might have removed from the basic plan. (Normal WHERE * clauses will be properly incorporated into the sub-plans by * create_plan.) If there are any, they will be in a gating Result node * atop the best_path. They have to be incorporated into a gating Result * in each sub-plan in order to produce the semantically correct result. */ if (IsA(best_path, ResultPath)) { constant_quals = ((ResultPath *) best_path)->constantqual; /* no need to do this more than once: */ constant_quals = order_qual_clauses(root, constant_quals); } else constant_quals = NIL; /* Pass 3: generate subplans and output Param nodes */ foreach(l, aggs_list) { make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l), constant_quals); } /* * Modify the targetlist and HAVING qual to reference subquery outputs */ tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist, &aggs_list); hqual = replace_aggs_with_params_mutator(parse->havingQual, &aggs_list); /* * Generate the output plan --- basically just a Result */ plan = (Plan *) make_result(tlist, hqual, NULL); /* Account for evaluation cost of the tlist (make_result did the rest) */ cost_qual_eval(&tlist_cost, tlist); plan->startup_cost += tlist_cost.startup; plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple; return plan;}/* * find_minmax_aggs_walker * Recursively scan the Aggref nodes in an expression tree, and check * that each one is a MIN/MAX aggregate. If so, build a list of the * distinct aggregate calls in the tree. * * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise. * (This seemingly-backward definition is used because expression_tree_walker * aborts the scan on TRUE return, which is what we want.) * * Found aggregates are added to the list at *context; it's up to the caller * to initialize the list to NIL. * * This does not descend into subqueries, and so should be used only after * reduction of sublinks to subplans. There mustn't be outer-aggregate * references either. */static boolfind_minmax_aggs_walker(Node *node, List **context){ if (node == NULL) return false; if (IsA(node, Aggref)) { Aggref *aggref = (Aggref *) node; Oid aggsortop; MinMaxAggInfo *info; ListCell *l; Assert(aggref->agglevelsup == 0); if (aggref->aggstar) return true; /* foo(*) is surely not optimizable */ /* note: we do not care if DISTINCT is mentioned ... */ aggsortop = fetch_agg_sort_op(aggref->aggfnoid); if (!OidIsValid(aggsortop)) return true; /* not a MIN/MAX aggregate */ /* * Check whether it's already in the list, and add it if not. */ foreach(l, *context) { info = (MinMaxAggInfo *) lfirst(l); if (info->aggfnoid == aggref->aggfnoid && equal(info->target, aggref->target)) return false; } info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo)); info->aggfnoid = aggref->aggfnoid; info->aggsortop = aggsortop; info->target = aggref->target; *context = lappend(*context, info); /* * We need not recurse into the argument, since it can't contain any * aggregates. */ return false; } Assert(!IsA(node, SubLink)); return expression_tree_walker(node, find_minmax_aggs_walker, (void *) context);}/* * build_minmax_path * Given a MIN/MAX aggregate, try to find an index it can be optimized * with. Build a Path describing the best such index path. * * Returns TRUE if successful, FALSE if not. In the TRUE case, info->path * is filled in. * * XXX look at sharing more code with indxpath.c. *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -