planner.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 1,820 行 · 第 1/4 页

C
1,820
字号
			else			{				/* caller absolute, limit fractional; use caller's value */			}		}		else if (tuple_fraction > 0.0)		{			if (limit_fraction >= 1.0)			{				/* caller fractional, limit absolute; use limit */				tuple_fraction = limit_fraction;			}			else			{				/* both fractional */				tuple_fraction = Min(tuple_fraction, limit_fraction);			}		}		else		{			/* no info from caller, just use limit */			tuple_fraction = limit_fraction;		}	}	else if (*offset_est != 0 && tuple_fraction > 0.0)	{		/*		 * We have an OFFSET but no LIMIT.	This acts entirely differently		 * from the LIMIT case: here, we need to increase rather than decrease		 * the caller's tuple_fraction, because the OFFSET acts to cause more		 * tuples to be fetched instead of fewer.  This only matters if we got		 * a tuple_fraction > 0, however.		 *		 * As above, use 10% if OFFSET is present but unestimatable.		 */		if (*offset_est < 0)			limit_fraction = 0.10;		else			limit_fraction = (double) *offset_est;		/*		 * If we have absolute counts from both caller and OFFSET, add them		 * together; likewise if they are both fractional.	If one is		 * fractional and the other absolute, we want to take the larger, and		 * we heuristically assume that's the fractional one.		 */		if (tuple_fraction >= 1.0)		{			if (limit_fraction >= 1.0)			{				/* both absolute, so add them together */				tuple_fraction += limit_fraction;			}			else			{				/* caller absolute, limit fractional; use limit */				tuple_fraction = limit_fraction;			}		}		else		{			if (limit_fraction >= 1.0)			{				/* caller fractional, limit absolute; use caller's value */			}			else			{				/* both fractional, so add them together */				tuple_fraction += limit_fraction;				if (tuple_fraction >= 1.0)					tuple_fraction = 0.0;		/* assume fetch all */			}		}	}	return tuple_fraction;}/* * extract_grouping_ops - make an array of the equality operator OIDs *		for the GROUP BY clause */static Oid *extract_grouping_ops(List *groupClause){	int			numCols = list_length(groupClause);	int			colno = 0;	Oid		   *groupOperators;	ListCell   *glitem;	groupOperators = (Oid *) palloc(sizeof(Oid) * numCols);	foreach(glitem, groupClause)	{		GroupClause *groupcl = (GroupClause *) lfirst(glitem);		groupOperators[colno] = get_equality_op_for_ordering_op(groupcl->sortop);		if (!OidIsValid(groupOperators[colno])) /* shouldn't happen */			elog(ERROR, "could not find equality operator for ordering operator %u",				 groupcl->sortop);		colno++;	}	return groupOperators;}/* * choose_hashed_grouping - should we use hashed grouping? */static boolchoose_hashed_grouping(PlannerInfo *root,					   double tuple_fraction, double limit_tuples,					   Path *cheapest_path, Path *sorted_path,					   Oid *groupOperators, double dNumGroups,					   AggClauseCounts *agg_counts){	int			numGroupCols = list_length(root->parse->groupClause);	double		cheapest_path_rows;	int			cheapest_path_width;	Size		hashentrysize;	List	   *current_pathkeys;	Path		hashed_p;	Path		sorted_p;	int			i;	/*	 * Check can't-do-it conditions, including whether the grouping operators	 * are hashjoinable.  (We assume hashing is OK if they are marked	 * oprcanhash.	If there isn't actually a supporting hash function, the	 * executor will complain at runtime.)	 *	 * Executor doesn't support hashed aggregation with DISTINCT aggregates.	 * (Doing so would imply storing *all* the input values in the hash table,	 * which seems like a certain loser.)	 */	if (!enable_hashagg)		return false;	if (agg_counts->numDistinctAggs != 0)		return false;	for (i = 0; i < numGroupCols; i++)	{		if (!op_hashjoinable(groupOperators[i]))			return false;	}	/*	 * Don't do it if it doesn't look like the hashtable will fit into	 * work_mem.	 *	 * Beware here of the possibility that cheapest_path->parent is NULL. This	 * could happen if user does something silly like SELECT 'foo' GROUP BY 1;	 */	if (cheapest_path->parent)	{		cheapest_path_rows = cheapest_path->parent->rows;		cheapest_path_width = cheapest_path->parent->width;	}	else	{		cheapest_path_rows = 1; /* assume non-set result */		cheapest_path_width = 100;		/* arbitrary */	}	/* Estimate per-hash-entry space at tuple width... */	hashentrysize = MAXALIGN(cheapest_path_width) + MAXALIGN(sizeof(MinimalTupleData));	/* plus space for pass-by-ref transition values... */	hashentrysize += agg_counts->transitionSpace;	/* plus the per-hash-entry overhead */	hashentrysize += hash_agg_entry_size(agg_counts->numAggs);	if (hashentrysize * dNumGroups > work_mem * 1024L)		return false;	/*	 * See if the estimated cost is no more than doing it the other way. While	 * avoiding the need for sorted input is usually a win, the fact that the	 * output won't be sorted may be a loss; so we need to do an actual cost	 * comparison.	 *	 * We need to consider cheapest_path + hashagg [+ final sort] versus	 * either cheapest_path [+ sort] + group or agg [+ final sort] or	 * presorted_path + group or agg [+ final sort] where brackets indicate a	 * step that may not be needed. We assume query_planner() will have	 * returned a presorted path only if it's a winner compared to	 * cheapest_path for this purpose.	 *	 * These path variables are dummies that just hold cost fields; we don't	 * make actual Paths for these steps.	 */	cost_agg(&hashed_p, root, AGG_HASHED, agg_counts->numAggs,			 numGroupCols, dNumGroups,			 cheapest_path->startup_cost, cheapest_path->total_cost,			 cheapest_path_rows);	/* Result of hashed agg is always unsorted */	if (root->sort_pathkeys)		cost_sort(&hashed_p, root, root->sort_pathkeys, hashed_p.total_cost,				  dNumGroups, cheapest_path_width, limit_tuples);	if (sorted_path)	{		sorted_p.startup_cost = sorted_path->startup_cost;		sorted_p.total_cost = sorted_path->total_cost;		current_pathkeys = sorted_path->pathkeys;	}	else	{		sorted_p.startup_cost = cheapest_path->startup_cost;		sorted_p.total_cost = cheapest_path->total_cost;		current_pathkeys = cheapest_path->pathkeys;	}	if (!pathkeys_contained_in(root->group_pathkeys, current_pathkeys))	{		cost_sort(&sorted_p, root, root->group_pathkeys, sorted_p.total_cost,				  cheapest_path_rows, cheapest_path_width, -1.0);		current_pathkeys = root->group_pathkeys;	}	if (root->parse->hasAggs)		cost_agg(&sorted_p, root, AGG_SORTED, agg_counts->numAggs,				 numGroupCols, dNumGroups,				 sorted_p.startup_cost, sorted_p.total_cost,				 cheapest_path_rows);	else		cost_group(&sorted_p, root, numGroupCols, dNumGroups,				   sorted_p.startup_cost, sorted_p.total_cost,				   cheapest_path_rows);	/* The Agg or Group node will preserve ordering */	if (root->sort_pathkeys &&		!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))		cost_sort(&sorted_p, root, root->sort_pathkeys, sorted_p.total_cost,				  dNumGroups, cheapest_path_width, limit_tuples);	/*	 * Now make the decision using the top-level tuple fraction.  First we	 * have to convert an absolute count (LIMIT) into fractional form.	 */	if (tuple_fraction >= 1.0)		tuple_fraction /= dNumGroups;	if (compare_fractional_path_costs(&hashed_p, &sorted_p,									  tuple_fraction) < 0)	{		/* Hashed is cheaper, so use it */		return true;	}	return false;}/*--------------- * make_subplanTargetList *	  Generate appropriate target list when grouping is required. * * When grouping_planner inserts Aggregate, Group, or Result plan nodes * above the result of query_planner, we typically want to pass a different * target list to query_planner than the outer plan nodes should have. * This routine generates the correct target list for the subplan. * * The initial target list passed from the parser already contains entries * for all ORDER BY and GROUP BY expressions, but it will not have entries * for variables used only in HAVING clauses; so we need to add those * variables to the subplan target list.  Also, we flatten all expressions * except GROUP BY items into their component variables; the other expressions * will be computed by the inserted nodes rather than by the subplan. * For example, given a query like *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b; * we want to pass this targetlist to the subplan: *		a,b,c,d,a+b * where the a+b target will be used by the Sort/Group steps, and the * other targets will be used for computing the final results.	(In the * above example we could theoretically suppress the a and b targets and * pass down only c,d,a+b, but it's not really worth the trouble to * eliminate simple var references from the subplan.  We will avoid doing * the extra computation to recompute a+b at the outer level; see * fix_upper_expr() in setrefs.c.) * * If we are grouping or aggregating, *and* there are no non-Var grouping * expressions, then the returned tlist is effectively dummy; we do not * need to force it to be evaluated, because all the Vars it contains * should be present in the output of query_planner anyway. * * 'tlist' is the query's target list. * 'groupColIdx' receives an array of column numbers for the GROUP BY *			expressions (if there are any) in the subplan's target list. * 'need_tlist_eval' is set true if we really need to evaluate the *			result tlist. * * The result is the targetlist to be passed to the subplan. *--------------- */static List *make_subplanTargetList(PlannerInfo *root,					   List *tlist,					   AttrNumber **groupColIdx,					   bool *need_tlist_eval){	Query	   *parse = root->parse;	List	   *sub_tlist;	List	   *extravars;	int			numCols;	*groupColIdx = NULL;	/*	 * If we're not grouping or aggregating, there's nothing to do here;	 * query_planner should receive the unmodified target list.	 */	if (!parse->hasAggs && !parse->groupClause && !root->hasHavingQual)	{		*need_tlist_eval = true;		return tlist;	}	/*	 * Otherwise, start with a "flattened" tlist (having just the vars	 * mentioned in the targetlist and HAVING qual --- but not upper- level	 * Vars; they will be replaced by Params later on).	 */	sub_tlist = flatten_tlist(tlist);	extravars = pull_var_clause(parse->havingQual, false);	sub_tlist = add_to_flat_tlist(sub_tlist, extravars);	list_free(extravars);	*need_tlist_eval = false;	/* only eval if not flat tlist */	/*	 * If grouping, create sub_tlist entries for all GROUP BY expressions	 * (GROUP BY items that are simple Vars should be in the list already),	 * and make an array showing where the group columns are in the sub_tlist.	 */	numCols = list_length(parse->groupClause);	if (numCols > 0)	{		int			keyno = 0;		AttrNumber *grpColIdx;		ListCell   *gl;		grpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);		*groupColIdx = grpColIdx;		foreach(gl, parse->groupClause)		{			GroupClause *grpcl = (GroupClause *) lfirst(gl);			Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);			TargetEntry *te = NULL;			ListCell   *sl;			/* Find or make a matching sub_tlist entry */			foreach(sl, sub_tlist)			{				te = (TargetEntry *) lfirst(sl);				if (equal(groupexpr, te->expr))					break;			}			if (!sl)			{				te = makeTargetEntry((Expr *) groupexpr,									 list_length(sub_tlist) + 1,									 NULL,									 false);				sub_tlist = lappend(sub_tlist, te);				*need_tlist_eval = true;		/* it's not flat anymore */			}			/* and save its resno */			grpColIdx[keyno++] = te->resno;		}	}	return sub_tlist;}/* * locate_grouping_columns *		Locate grouping columns in the tlist chosen by query_planner. * * This is only needed if we don't use the sub_tlist chosen by * make_subplanTargetList.	We have to forget the column indexes found * by that routine and re-locate the grouping vars in the real sub_tlist. */static voidlocate_grouping_columns(PlannerInfo *root,						List *tlist,						List *sub_tlist,						AttrNumber *groupColIdx){	int			keyno = 0;	ListCell   *gl;	/*	 * No work unless grouping.	 */	if (!root->parse->groupClause)	{		Assert(groupColIdx == NULL);		return;	}	Assert(groupColIdx != NULL);	foreach(gl, root->parse->groupClause)	{		GroupClause *grpcl = (GroupClause *) lfirst(gl);		Node	   *groupexpr = get_sortgroupclause_expr(grpcl, tlist);		TargetEntry *te = NULL;		ListCell   *sl;		foreach(sl, sub_tlist)		{			te = (TargetEntry *) lfirst(sl);			if (equal(groupexpr, te->expr))				break;		}		if (!sl)			elog(ERROR, "failed to locate grouping columns");		groupColIdx[keyno++] = te->resno;	}}/* * postprocess_setop_tlist *	  Fix up targetlist returned by plan_set_operations(). * * We need to transpose sort key info from the orig_tlist into new_tlist. * NOTE: this would not be good enough if we supported resjunk sort keys * for results of set operations --- then, we'd need to project a whole * new tlist to evaluate the resjunk columns.  For now, just ereport if we * find any resjunk columns in orig_tlist. */static List *postprocess_setop_tlist(List *new_tlist, List *orig_tlist){	ListCell   *l;	ListCell   *orig_tlist_item = list_head(orig_tlist);	foreach(l, new_tlist)	{		TargetEntry *new_tle = (TargetEntry *) lfirst(l);		TargetEntry *orig_tle;		/* ignore resjunk columns in setop result */		if (new_tle->resjunk)			continue;		Assert(orig_tlist_item != NULL);		orig_tle = (TargetEntry *) lfirst(orig_tlist_item);		orig_tlist_item = lnext(orig_tlist_item);		if (orig_tle->resjunk)	/* should not happen */			elog(ERROR, "resjunk output columns are not implemented");		Assert(new_tle->resno == orig_tle->resno);		new_tle->ressortgroupref = orig_tle->ressortgroupref;	}	if (orig_tlist_item != NULL)		elog(ERROR, "resjunk output columns are not implemented");	return new_tlist;}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?