planner.c

来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 1,553 行 · 第 1/4 页

C
1,553
字号
						else						{							/* OFFSET is an expression ... punt ... */							limit_fraction = 0.10;						}					}				}			}			else			{				/* LIMIT is an expression ... punt ... */				limit_fraction = 0.10;			}			if (limit_fraction > 0.0)			{				/*				 * If we have absolute limits from both caller and LIMIT,				 * use the smaller value; if one is fractional and the				 * other absolute, treat the fraction as a fraction of the				 * absolute value; else we can multiply the two fractions				 * together.				 */				if (tuple_fraction >= 1.0)				{					if (limit_fraction >= 1.0)					{						/* both absolute */						tuple_fraction = Min(tuple_fraction, limit_fraction);					}					else					{						/* caller absolute, limit fractional */						tuple_fraction *= limit_fraction;						if (tuple_fraction < 1.0)							tuple_fraction = 1.0;					}				}				else if (tuple_fraction > 0.0)				{					if (limit_fraction >= 1.0)					{						/* caller fractional, limit absolute */						tuple_fraction *= limit_fraction;						if (tuple_fraction < 1.0)							tuple_fraction = 1.0;					}					else					{						/* both fractional */						tuple_fraction *= limit_fraction;					}				}				else				{					/* no info from caller, just use limit */					tuple_fraction = limit_fraction;				}			}		}		/*		 * With grouping or aggregation, the tuple fraction to pass to		 * query_planner() may be different from what it is at top level.		 */		sub_tuple_fraction = tuple_fraction;		if (parse->groupClause)		{			/*			 * In GROUP BY mode, we have the little problem that we don't			 * really know how many input tuples will be needed to make a			 * group, so we can't translate an output LIMIT count into an			 * input count.  For lack of a better idea, assume 25% of the			 * input data will be processed if there is any output limit.			 * However, if the caller gave us a fraction rather than an			 * absolute count, we can keep using that fraction (which			 * amounts to assuming that all the groups are about the same			 * size).			 */			if (sub_tuple_fraction >= 1.0)				sub_tuple_fraction = 0.25;			/*			 * If both GROUP BY and ORDER BY are specified, we will need			 * two levels of sort --- and, therefore, certainly need to			 * read all the input tuples --- unless ORDER BY is a subset			 * of GROUP BY.  (We have not yet canonicalized the pathkeys,			 * so must use the slower noncanonical comparison method.)			 */			if (parse->groupClause && parse->sortClause &&				!noncanonical_pathkeys_contained_in(sort_pathkeys,													group_pathkeys))				sub_tuple_fraction = 0.0;		}		else if (parse->hasAggs)		{			/*			 * Ungrouped aggregate will certainly want all the input			 * tuples.			 */			sub_tuple_fraction = 0.0;		}		else if (parse->distinctClause)		{			/*			 * SELECT DISTINCT, like GROUP, will absorb an unpredictable			 * number of input tuples per output tuple.  Handle the same			 * way.			 */			if (sub_tuple_fraction >= 1.0)				sub_tuple_fraction = 0.25;		}		/*		 * Generate the best unsorted and presorted paths for this Query		 * (but note there may not be any presorted path).		 */		query_planner(parse, sub_tlist, sub_tuple_fraction,					  &cheapest_path, &sorted_path);		/*		 * We couldn't canonicalize group_pathkeys and sort_pathkeys		 * before running query_planner(), so do it now.		 */		group_pathkeys = canonicalize_pathkeys(parse, group_pathkeys);		sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);		/*		 * Consider whether we might want to use hashed grouping.		 */		if (parse->groupClause)		{			List	   *groupExprs;			double		cheapest_path_rows;			int			cheapest_path_width;			/*			 * Beware in this section of the possibility that			 * cheapest_path->parent is NULL.  This could happen if user			 * does something silly like SELECT 'foo' GROUP BY 1;			 */			if (cheapest_path->parent)			{				cheapest_path_rows = cheapest_path->parent->rows;				cheapest_path_width = cheapest_path->parent->width;			}			else			{				cheapest_path_rows = 1; /* assume non-set result */				cheapest_path_width = 100;		/* arbitrary */			}			/*			 * Always estimate the number of groups.  We can't do this			 * until after running query_planner(), either.			 */			groupExprs = get_sortgrouplist_exprs(parse->groupClause,												 parse->targetList);			dNumGroups = estimate_num_groups(parse,											 groupExprs,											 cheapest_path_rows);			/* Also want it as a long int --- but 'ware overflow! */			numGroups = (long) Min(dNumGroups, (double) LONG_MAX);			/*			 * Check can't-do-it conditions, including whether the			 * grouping operators are hashjoinable.			 *			 * Executor doesn't support hashed aggregation with DISTINCT			 * aggregates.	(Doing so would imply storing *all* the input			 * values in the hash table, which seems like a certain			 * loser.)			 */			if (!enable_hashagg || !hash_safe_grouping(parse))				use_hashed_grouping = false;			else if (parse->hasAggs &&					 (contain_distinct_agg_clause((Node *) tlist) ||					  contain_distinct_agg_clause(parse->havingQual)))				use_hashed_grouping = false;			else			{				/*				 * Use hashed grouping if (a) we think we can fit the				 * hashtable into SortMem, *and* (b) the estimated cost is				 * no more than doing it the other way.  While avoiding				 * the need for sorted input is usually a win, the fact				 * that the output won't be sorted may be a loss; so we				 * need to do an actual cost comparison.				 *				 * In most cases we have no good way to estimate the size of				 * the transition value needed by an aggregate;				 * arbitrarily assume it is 100 bytes.	Also set the				 * overhead per hashtable entry at 64 bytes.				 */				int			hashentrysize = cheapest_path_width + 64 + numAggs * 100;				if (hashentrysize * dNumGroups <= SortMem * 1024L)				{					/*					 * Okay, do the cost comparison.  We need to consider					 * cheapest_path + hashagg [+ final sort] versus					 * either cheapest_path [+ sort] + group or agg [+					 * final sort] or presorted_path + group or agg [+					 * final sort] where brackets indicate a step that may					 * not be needed. We assume query_planner() will have					 * returned a presorted path only if it's a winner					 * compared to cheapest_path for this purpose.					 *					 * These path variables are dummies that just hold cost					 * fields; we don't make actual Paths for these steps.					 */					Path		hashed_p;					Path		sorted_p;					cost_agg(&hashed_p, parse,							 AGG_HASHED, numAggs,							 numGroupCols, dNumGroups,							 cheapest_path->startup_cost,							 cheapest_path->total_cost,							 cheapest_path_rows);					/* Result of hashed agg is always unsorted */					if (sort_pathkeys)						cost_sort(&hashed_p, parse, sort_pathkeys,								  hashed_p.total_cost,								  dNumGroups,								  cheapest_path_width);					if (sorted_path)					{						sorted_p.startup_cost = sorted_path->startup_cost;						sorted_p.total_cost = sorted_path->total_cost;						current_pathkeys = sorted_path->pathkeys;					}					else					{						sorted_p.startup_cost = cheapest_path->startup_cost;						sorted_p.total_cost = cheapest_path->total_cost;						current_pathkeys = cheapest_path->pathkeys;					}					if (!pathkeys_contained_in(group_pathkeys,											   current_pathkeys))					{						cost_sort(&sorted_p, parse, group_pathkeys,								  sorted_p.total_cost,								  cheapest_path_rows,								  cheapest_path_width);						current_pathkeys = group_pathkeys;					}					if (parse->hasAggs)						cost_agg(&sorted_p, parse,								 AGG_SORTED, numAggs,								 numGroupCols, dNumGroups,								 sorted_p.startup_cost,								 sorted_p.total_cost,								 cheapest_path_rows);					else						cost_group(&sorted_p, parse,								   numGroupCols, dNumGroups,								   sorted_p.startup_cost,								   sorted_p.total_cost,								   cheapest_path_rows);					/* The Agg or Group node will preserve ordering */					if (sort_pathkeys &&						!pathkeys_contained_in(sort_pathkeys,											   current_pathkeys))					{						cost_sort(&sorted_p, parse, sort_pathkeys,								  sorted_p.total_cost,								  dNumGroups,								  cheapest_path_width);					}					/*					 * Now make the decision using the top-level tuple					 * fraction.  First we have to convert an absolute					 * count (LIMIT) into fractional form.					 */					if (tuple_fraction >= 1.0)						tuple_fraction /= dNumGroups;					if (compare_fractional_path_costs(&hashed_p, &sorted_p,													  tuple_fraction) < 0)					{						/* Hashed is cheaper, so use it */						use_hashed_grouping = true;					}				}			}		}		/*		 * Select the best path and create a plan to execute it.		 *		 * If we are doing hashed grouping, we will always read all the input		 * tuples, so use the cheapest-total path.	Otherwise, trust		 * query_planner's decision about which to use.		 */		if (sorted_path && !use_hashed_grouping)		{			result_plan = create_plan(parse, sorted_path);			current_pathkeys = sorted_path->pathkeys;		}		else		{			result_plan = create_plan(parse, cheapest_path);			current_pathkeys = cheapest_path->pathkeys;		}		/*		 * create_plan() returns a plan with just a "flat" tlist of		 * required Vars.  Usually we need to insert the sub_tlist as the		 * tlist of the top plan node.	However, we can skip that if we		 * determined that whatever query_planner chose to return will be		 * good enough.		 */		if (need_tlist_eval)		{			/*			 * If the top-level plan node is one that cannot do expression			 * evaluation, we must insert a Result node to project the			 * desired tlist. Currently, the only plan node we might see			 * here that falls into that category is Append.			 */			if (IsA(result_plan, Append))			{				result_plan = (Plan *) make_result(sub_tlist, NULL,												   result_plan);			}			else			{				/*				 * Otherwise, just replace the subplan's flat tlist with				 * the desired tlist.				 */				result_plan->targetlist = sub_tlist;			}			/*			 * Also, account for the cost of evaluation of the sub_tlist.			 *			 * Up to now, we have only been dealing with "flat" tlists,			 * containing just Vars.  So their evaluation cost is zero			 * according to the model used by cost_qual_eval() (or if you			 * prefer, the cost is factored into cpu_tuple_cost).  Thus we			 * can avoid accounting for tlist cost throughout			 * query_planner() and subroutines.  But now we've inserted a			 * tlist that might contain actual operators, sub-selects, etc			 * --- so we'd better account for its cost.			 *			 * Below this point, any tlist eval cost for added-on nodes			 * should be accounted for as we create those nodes.			 * Presently, of the node types we can add on, only Agg and			 * Group project new tlists (the rest just copy their input			 * tuples) --- so make_agg() and make_group() are responsible			 * for computing the added cost.			 */			cost_qual_eval(&tlist_cost, sub_tlist);			result_plan->startup_cost += tlist_cost.startup;			result_plan->total_cost += tlist_cost.startup +				tlist_cost.per_tuple * result_plan->plan_rows;		}		else		{			/*			 * Since we're using query_planner's tlist and not the one			 * make_subplanTargetList calculated, we have to refigure any			 * grouping-column indexes make_subplanTargetList computed.			 */			locate_grouping_columns(parse, tlist, result_plan->targetlist,									groupColIdx);		}		/*		 * Insert AGG or GROUP node if needed, plus an explicit sort step		 * if necessary.		 *		 * HAVING clause, if any, becomes qual of the Agg node		 */		if (use_hashed_grouping)		{			/* Hashed aggregate plan --- no sort needed */			result_plan = (Plan *) make_agg(parse,											tlist,											(List *) parse->havingQual,											AGG_HASHED,											numGroupCols,											groupColIdx,											numGroups,

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?