costsize.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 2,021 行 · 第 1/5 页

C
2,021
字号
voidcost_group(Path *path, PlannerInfo *root,		   int numGroupCols, double numGroups,		   Cost input_startup_cost, Cost input_total_cost,		   double input_tuples){	Cost		startup_cost;	Cost		total_cost;	startup_cost = input_startup_cost;	total_cost = input_total_cost;	/*	 * Charge one cpu_operator_cost per comparison per input tuple. We assume	 * all columns get compared at most of the tuples.	 */	total_cost += cpu_operator_cost * input_tuples * numGroupCols;	path->startup_cost = startup_cost;	path->total_cost = total_cost;}/* * If a nestloop's inner path is an indexscan, be sure to use its estimated * output row count, which may be lower than the restriction-clause-only row * count of its parent.  (We don't include this case in the PATH_ROWS macro * because it applies *only* to a nestloop's inner relation.)  We have to * be prepared to recurse through Append nodes in case of an appendrel. */static doublenestloop_inner_path_rows(Path *path){	double		result;	if (IsA(path, IndexPath))		result = ((IndexPath *) path)->rows;	else if (IsA(path, BitmapHeapPath))		result = ((BitmapHeapPath *) path)->rows;	else if (IsA(path, AppendPath))	{		ListCell   *l;		result = 0;		foreach(l, ((AppendPath *) path)->subpaths)		{			result += nestloop_inner_path_rows((Path *) lfirst(l));		}	}	else		result = PATH_ROWS(path);	return result;}/* * cost_nestloop *	  Determines and returns the cost of joining two relations using the *	  nested loop algorithm. * * 'path' is already filled in except for the cost fields */voidcost_nestloop(NestPath *path, PlannerInfo *root){	Path	   *outer_path = path->outerjoinpath;	Path	   *inner_path = path->innerjoinpath;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	QualCost	restrict_qual_cost;	double		outer_path_rows = PATH_ROWS(outer_path);	double		inner_path_rows = nestloop_inner_path_rows(inner_path);	double		ntuples;	Selectivity joininfactor;	if (!enable_nestloop)		startup_cost += disable_cost;	/*	 * If we're doing JOIN_IN then we will stop scanning inner tuples for an	 * outer tuple as soon as we have one match.  Account for the effects of	 * this by scaling down the cost estimates in proportion to the JOIN_IN	 * selectivity.  (This assumes that all the quals attached to the join are	 * IN quals, which should be true.)	 */	joininfactor = join_in_selectivity(path, root);	/* cost of source data */	/*	 * NOTE: clearly, we must pay both outer and inner paths' startup_cost	 * before we can start returning tuples, so the join's startup cost is	 * their sum.  What's not so clear is whether the inner path's	 * startup_cost must be paid again on each rescan of the inner path. This	 * is not true if the inner path is materialized or is a hashjoin, but	 * probably is true otherwise.	 */	startup_cost += outer_path->startup_cost + inner_path->startup_cost;	run_cost += outer_path->total_cost - outer_path->startup_cost;	if (IsA(inner_path, MaterialPath) ||		IsA(inner_path, HashPath))	{		/* charge only run cost for each iteration of inner path */	}	else	{		/*		 * charge startup cost for each iteration of inner path, except we		 * already charged the first startup_cost in our own startup		 */		run_cost += (outer_path_rows - 1) * inner_path->startup_cost;	}	run_cost += outer_path_rows *		(inner_path->total_cost - inner_path->startup_cost) * joininfactor;	/*	 * Compute number of tuples processed (not number emitted!)	 */	ntuples = outer_path_rows * inner_path_rows * joininfactor;	/* CPU costs */	cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root);	startup_cost += restrict_qual_cost.startup;	cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;	run_cost += cpu_per_tuple * ntuples;	path->path.startup_cost = startup_cost;	path->path.total_cost = startup_cost + run_cost;}/* * cost_mergejoin *	  Determines and returns the cost of joining two relations using the *	  merge join algorithm. * * 'path' is already filled in except for the cost fields * * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list; * outersortkeys and innersortkeys are lists of the keys to be used * to sort the outer and inner relations, or NIL if no explicit * sort is needed because the source path is already ordered. */voidcost_mergejoin(MergePath *path, PlannerInfo *root){	Path	   *outer_path = path->jpath.outerjoinpath;	Path	   *inner_path = path->jpath.innerjoinpath;	List	   *mergeclauses = path->path_mergeclauses;	List	   *outersortkeys = path->outersortkeys;	List	   *innersortkeys = path->innersortkeys;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	Selectivity merge_selec;	QualCost	merge_qual_cost;	QualCost	qp_qual_cost;	double		outer_path_rows = PATH_ROWS(outer_path);	double		inner_path_rows = PATH_ROWS(inner_path);	double		outer_rows,				inner_rows,				outer_skip_rows,				inner_skip_rows;	double		mergejointuples,				rescannedtuples;	double		rescanratio;	Selectivity outerstartsel,				outerendsel,				innerstartsel,				innerendsel;	Selectivity joininfactor;	Path		sort_path;		/* dummy for result of cost_sort */	/* Protect some assumptions below that rowcounts aren't zero */	if (outer_path_rows <= 0)		outer_path_rows = 1;	if (inner_path_rows <= 0)		inner_path_rows = 1;	if (!enable_mergejoin)		startup_cost += disable_cost;	/*	 * Compute cost and selectivity of the mergequals and qpquals (other	 * restriction clauses) separately.  We use approx_selectivity here for	 * speed --- in most cases, any errors won't affect the result much.	 *	 * Note: it's probably bogus to use the normal selectivity calculation	 * here when either the outer or inner path is a UniquePath.	 */	merge_selec = approx_selectivity(root, mergeclauses,									 path->jpath.jointype);	cost_qual_eval(&merge_qual_cost, mergeclauses, root);	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root);	qp_qual_cost.startup -= merge_qual_cost.startup;	qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;	/* approx # tuples passing the merge quals */	mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);	/*	 * When there are equal merge keys in the outer relation, the mergejoin	 * must rescan any matching tuples in the inner relation. This means	 * re-fetching inner tuples.  Our cost model for this is that a re-fetch	 * costs the same as an original fetch, which is probably an overestimate;	 * but on the other hand we ignore the bookkeeping costs of mark/restore.	 * Not clear if it's worth developing a more refined model.	 *	 * The number of re-fetches can be estimated approximately as size of	 * merge join output minus size of inner relation.	Assume that the	 * distinct key values are 1, 2, ..., and denote the number of values of	 * each key in the outer relation as m1, m2, ...; in the inner relation,	 * n1, n2, ... Then we have	 *	 * size of join = m1 * n1 + m2 * n2 + ...	 *	 * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *	 * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner	 * relation	 *	 * This equation works correctly for outer tuples having no inner match	 * (nk = 0), but not for inner tuples having no outer match (mk = 0); we	 * are effectively subtracting those from the number of rescanned tuples,	 * when we should not.	Can we do better without expensive selectivity	 * computations?	 */	if (IsA(outer_path, UniquePath))		rescannedtuples = 0;	else	{		rescannedtuples = mergejointuples - inner_path_rows;		/* Must clamp because of possible underestimate */		if (rescannedtuples < 0)			rescannedtuples = 0;	}	/* We'll inflate inner run cost this much to account for rescanning */	rescanratio = 1.0 + (rescannedtuples / inner_path_rows);	/*	 * A merge join will stop as soon as it exhausts either input stream	 * (unless it's an outer join, in which case the outer side has to be	 * scanned all the way anyway).  Estimate fraction of the left and right	 * inputs that will actually need to be scanned.  Likewise, we can	 * estimate the number of rows that will be skipped before the first	 * join pair is found, which should be factored into startup cost.	 * We use only the first (most significant) merge clause for this purpose.	 * Since mergejoinscansel() is a fairly expensive computation, we cache	 * the results in the merge clause RestrictInfo.	 */	if (mergeclauses && path->jpath.jointype != JOIN_FULL)	{		RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses);		List	   *opathkeys;		List	   *ipathkeys;		PathKey    *opathkey;		PathKey    *ipathkey;		MergeScanSelCache *cache;		/* Get the input pathkeys to determine the sort-order details */		opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys;		ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys;		Assert(opathkeys);		Assert(ipathkeys);		opathkey = (PathKey *) linitial(opathkeys);		ipathkey = (PathKey *) linitial(ipathkeys);		/* debugging check */		if (opathkey->pk_opfamily != ipathkey->pk_opfamily ||			opathkey->pk_strategy != ipathkey->pk_strategy ||			opathkey->pk_nulls_first != ipathkey->pk_nulls_first)			elog(ERROR, "left and right pathkeys do not match in mergejoin");		/* Get the selectivity with caching */		cache = cached_scansel(root, firstclause, opathkey);		if (bms_is_subset(firstclause->left_relids,						  outer_path->parent->relids))		{			/* left side of clause is outer */			outerstartsel = cache->leftstartsel;			outerendsel = cache->leftendsel;			innerstartsel = cache->rightstartsel;			innerendsel = cache->rightendsel;		}		else		{			/* left side of clause is inner */			outerstartsel = cache->rightstartsel;			outerendsel = cache->rightendsel;			innerstartsel = cache->leftstartsel;			innerendsel = cache->leftendsel;		}		if (path->jpath.jointype == JOIN_LEFT)		{			outerstartsel = 0.0;			outerendsel = 1.0;		}		else if (path->jpath.jointype == JOIN_RIGHT)		{			innerstartsel = 0.0;			innerendsel = 1.0;		}	}	else	{		/* cope with clauseless or full mergejoin */		outerstartsel = innerstartsel = 0.0;		outerendsel = innerendsel = 1.0;	}	/*	 * Convert selectivities to row counts.  We force outer_rows and	 * inner_rows to be at least 1, but the skip_rows estimates can be zero.	 */	outer_skip_rows = rint(outer_path_rows * outerstartsel);	inner_skip_rows = rint(inner_path_rows * innerstartsel);	outer_rows = clamp_row_est(outer_path_rows * outerendsel);	inner_rows = clamp_row_est(inner_path_rows * innerendsel);	Assert(outer_skip_rows <= outer_rows);	Assert(inner_skip_rows <= inner_rows);	/*	 * Readjust scan selectivities to account for above rounding.  This is	 * normally an insignificant effect, but when there are only a few rows in	 * the inputs, failing to do this makes for a large percentage error.	 */	outerstartsel = outer_skip_rows / outer_path_rows;	innerstartsel = inner_skip_rows / inner_path_rows;	outerendsel = outer_rows / outer_path_rows;	innerendsel = inner_rows / inner_path_rows;	Assert(outerstartsel <= outerendsel);	Assert(innerstartsel <= innerendsel);	/* cost of source data */	if (outersortkeys)			/* do we need to sort outer? */	{		cost_sort(&sort_path,				  root,				  outersortkeys,				  outer_path->total_cost,				  outer_path_rows,				  outer_path->parent->width,				  -1.0);		startup_cost += sort_path.startup_cost;		startup_cost += (sort_path.total_cost - sort_path.startup_cost)			* outerstartsel;		run_cost += (sort_path.total_cost - sort_path.startup_cost)			* (outerendsel - outerstartsel);	}	else	{		startup_cost += outer_path->startup_cost;		startup_cost += (outer_path->total_cost - outer_path->startup_cost)			* outerstartsel;		run_cost += (outer_path->total_cost - outer_path->startup_cost)			* (outerendsel - outerstartsel);	}	if (innersortkeys)			/* do we need to sort inner? */	{		cost_sort(&sort_path,				  root,				  innersortkeys,				  inner_path->total_cost,				  inner_path_rows,				  inner_path->parent->width,				  -1.0);		startup_cost += sort_path.startup_cost;		startup_cost += (sort_path.total_cost - sort_path.startup_cost)			* innerstartsel * rescanratio;		run_cost += (sort_path.total_cost - sort_path.startup_cost)			* (innerendsel - innerstartsel) * rescanratio;	}	else	{		startup_cost += inner_path->startup_cost;		startup_cost += (inner_path->total_cost - inner_path->startup_cost)			* innerstartsel * rescanratio;		run_cost += (inner_path->total_cost - inner_path->startup_cost)			* (innerendsel - innerstartsel) * rescanratio;	}	/* CPU costs */	/*	 * If we're doing JOIN_IN then we will stop outputting inner tuples for an	 * outer tuple as soon as we have one match.  Account for the effects of	 * this by scaling down the cost estimates in proportion to the expected	 * output size.  (This assumes that all the quals attached to the join are	 * IN quals, which should be true.)	 */	joininfactor = join_in_selectivity(&path->jpath, root);	/*	 * The number of tuple comparisons needed is approximately number of outer	 * rows plus number of inner rows plus number of rescanned tuples (can we	 * refine this?).  At each one, we need to evaluate the mergejoin quals.	 * NOTE: JOIN_IN mode does not save any work here, so do NOT include	 * joininfactor.	 */	startup_cost += merge_qual_cost.startup;	startup_cost += merge_qual_cost.per_tuple *		(outer_skip_rows + inner_skip_rows * rescanratio);	run_cost += merge_qual_cost.per_tuple *

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?