costsize.c

来自「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」· C语言代码 · 共 2,000 行 · 第 1/5 页
2,000 行
 *	  the cost of reading the input data. * * If the total volume of data to materialize exceeds work_mem, we will need * to write it to disk, so the cost is much higher in that case. */voidcost_material(Path *path,			  Cost input_cost, double tuples, int width){	Cost		startup_cost = input_cost;	Cost		run_cost = 0;	double		nbytes = relation_byte_size(tuples, width);	long		work_mem_bytes = work_mem * 1024L;	/* disk costs */	if (nbytes > work_mem_bytes)	{		double		npages = ceil(nbytes / BLCKSZ);		/* We'll write during startup and read during retrieval */		startup_cost += npages;		run_cost += npages;	}	/*	 * Charge a very small amount per inserted tuple, to reflect bookkeeping	 * costs.  We use cpu_tuple_cost/10 for this.  This is needed to break the	 * tie that would otherwise exist between nestloop with A outer,	 * materialized B inner and nestloop with B outer, materialized A inner.	 * The extra cost ensures we'll prefer materializing the smaller rel.	 */	startup_cost += cpu_tuple_cost * 0.1 * tuples;	/*	 * Also charge a small amount per extracted tuple.	We use cpu_tuple_cost	 * so that it doesn't appear worthwhile to materialize a bare seqscan.	 */	run_cost += cpu_tuple_cost * tuples;	path->startup_cost = startup_cost;	path->total_cost = startup_cost + run_cost;}/* * cost_agg *		Determines and returns the cost of performing an Agg plan node, *		including the cost of its input. * * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs * are for appropriately-sorted input. */voidcost_agg(Path *path, PlannerInfo *root,		 AggStrategy aggstrategy, int numAggs,		 int numGroupCols, double numGroups,		 Cost input_startup_cost, Cost input_total_cost,		 double input_tuples){	Cost		startup_cost;	Cost		total_cost;	/*	 * We charge one cpu_operator_cost per aggregate function per input tuple,	 * and another one per output tuple (corresponding to transfn and finalfn	 * calls respectively).  If we are grouping, we charge an additional	 * cpu_operator_cost per grouping column per input tuple for grouping	 * comparisons.	 *	 * We will produce a single output tuple if not grouping, and a tuple per	 * group otherwise.  We charge cpu_tuple_cost for each output tuple.	 *	 * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the	 * same total CPU cost, but AGG_SORTED has lower startup cost.	If the	 * input path is already sorted appropriately, AGG_SORTED should be	 * preferred (since it has no risk of memory overflow).  This will happen	 * as long as the computed total costs are indeed exactly equal --- but if	 * there's roundoff error we might do the wrong thing.  So be sure that	 * the computations below form the same intermediate values in the same	 * order.	 */	if (aggstrategy == AGG_PLAIN)	{		startup_cost = input_total_cost;		startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;		/* we aren't grouping */		total_cost = startup_cost + cpu_tuple_cost;	}	else if (aggstrategy == AGG_SORTED)	{		/* Here we are able to deliver output on-the-fly */		startup_cost = input_startup_cost;		total_cost = input_total_cost;		/* calcs phrased this way to match HASHED case, see note above */		total_cost += cpu_operator_cost * input_tuples * numGroupCols;		total_cost += cpu_operator_cost * input_tuples * numAggs;		total_cost += cpu_operator_cost * numGroups * numAggs;		total_cost += cpu_tuple_cost * numGroups;	}	else	{		/* must be AGG_HASHED */		startup_cost = input_total_cost;		startup_cost += cpu_operator_cost * input_tuples * numGroupCols;		startup_cost += cpu_operator_cost * input_tuples * numAggs;		total_cost = startup_cost;		total_cost += cpu_operator_cost * numGroups * numAggs;		total_cost += cpu_tuple_cost * numGroups;	}	path->startup_cost = startup_cost;	path->total_cost = total_cost;}/* * cost_group *		Determines and returns the cost of performing a Group plan node, *		including the cost of its input. * * Note: caller must ensure that input costs are for appropriately-sorted * input. */voidcost_group(Path *path, PlannerInfo *root,		   int numGroupCols, double numGroups,		   Cost input_startup_cost, Cost input_total_cost,		   double input_tuples){	Cost		startup_cost;	Cost		total_cost;	startup_cost = input_startup_cost;	total_cost = input_total_cost;	/*	 * Charge one cpu_operator_cost per comparison per input tuple. We assume	 * all columns get compared at most of the tuples.	 */	total_cost += cpu_operator_cost * input_tuples * numGroupCols;	path->startup_cost = startup_cost;	path->total_cost = total_cost;}/* * cost_nestloop *	  Determines and returns the cost of joining two relations using the *	  nested loop algorithm. * * 'path' is already filled in except for the cost fields */voidcost_nestloop(NestPath *path, PlannerInfo *root){	Path	   *outer_path = path->outerjoinpath;	Path	   *inner_path = path->innerjoinpath;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	QualCost	restrict_qual_cost;	double		outer_path_rows = PATH_ROWS(outer_path);	double		inner_path_rows = PATH_ROWS(inner_path);	double		ntuples;	Selectivity joininfactor;	/*	 * If inner path is an indexscan, be sure to use its estimated output row	 * count, which may be lower than the restriction-clause-only row count of	 * its parent.	(We don't include this case in the PATH_ROWS macro because	 * it applies *only* to a nestloop's inner relation.)	 */	if (IsA(inner_path, IndexPath))		inner_path_rows = ((IndexPath *) inner_path)->rows;	else if (IsA(inner_path, BitmapHeapPath))		inner_path_rows = ((BitmapHeapPath *) inner_path)->rows;	if (!enable_nestloop)		startup_cost += disable_cost;	/*	 * If we're doing JOIN_IN then we will stop scanning inner tuples for an	 * outer tuple as soon as we have one match.  Account for the effects of	 * this by scaling down the cost estimates in proportion to the JOIN_IN	 * selectivity.  (This assumes that all the quals attached to the join are	 * IN quals, which should be true.)	 */	joininfactor = join_in_selectivity(path, root);	/* cost of source data */	/*	 * NOTE: clearly, we must pay both outer and inner paths' startup_cost	 * before we can start returning tuples, so the join's startup cost is	 * their sum.  What's not so clear is whether the inner path's	 * startup_cost must be paid again on each rescan of the inner path. This	 * is not true if the inner path is materialized or is a hashjoin, but	 * probably is true otherwise.	 */	startup_cost += outer_path->startup_cost + inner_path->startup_cost;	run_cost += outer_path->total_cost - outer_path->startup_cost;	if (IsA(inner_path, MaterialPath) ||		IsA(inner_path, HashPath))	{		/* charge only run cost for each iteration of inner path */	}	else	{		/*		 * charge startup cost for each iteration of inner path, except we		 * already charged the first startup_cost in our own startup		 */		run_cost += (outer_path_rows - 1) * inner_path->startup_cost;	}	run_cost += outer_path_rows *		(inner_path->total_cost - inner_path->startup_cost) * joininfactor;	/*	 * Compute number of tuples processed (not number emitted!)	 */	ntuples = outer_path_rows * inner_path_rows * joininfactor;	/* CPU costs */	cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo);	startup_cost += restrict_qual_cost.startup;	cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;	run_cost += cpu_per_tuple * ntuples;	path->path.startup_cost = startup_cost;	path->path.total_cost = startup_cost + run_cost;}/* * cost_mergejoin *	  Determines and returns the cost of joining two relations using the *	  merge join algorithm. * * 'path' is already filled in except for the cost fields * * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list; * outersortkeys and innersortkeys are lists of the keys to be used * to sort the outer and inner relations, or NIL if no explicit * sort is needed because the source path is already ordered. */voidcost_mergejoin(MergePath *path, PlannerInfo *root){	Path	   *outer_path = path->jpath.outerjoinpath;	Path	   *inner_path = path->jpath.innerjoinpath;	List	   *mergeclauses = path->path_mergeclauses;	List	   *outersortkeys = path->outersortkeys;	List	   *innersortkeys = path->innersortkeys;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	Selectivity merge_selec;	QualCost	merge_qual_cost;	QualCost	qp_qual_cost;	RestrictInfo *firstclause;	double		outer_path_rows = PATH_ROWS(outer_path);	double		inner_path_rows = PATH_ROWS(inner_path);	double		outer_rows,				inner_rows;	double		mergejointuples,				rescannedtuples;	double		rescanratio;	Selectivity outerscansel,				innerscansel;	Selectivity joininfactor;	Path		sort_path;		/* dummy for result of cost_sort */	if (!enable_mergejoin)		startup_cost += disable_cost;	/*	 * Compute cost and selectivity of the mergequals and qpquals (other	 * restriction clauses) separately.  We use approx_selectivity here for	 * speed --- in most cases, any errors won't affect the result much.	 *	 * Note: it's probably bogus to use the normal selectivity calculation	 * here when either the outer or inner path is a UniquePath.	 */	merge_selec = approx_selectivity(root, mergeclauses,									 path->jpath.jointype);	cost_qual_eval(&merge_qual_cost, mergeclauses);	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo);	qp_qual_cost.startup -= merge_qual_cost.startup;	qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple;	/* approx # tuples passing the merge quals */	mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);	/*	 * When there are equal merge keys in the outer relation, the mergejoin	 * must rescan any matching tuples in the inner relation. This means	 * re-fetching inner tuples.  Our cost model for this is that a re-fetch	 * costs the same as an original fetch, which is probably an overestimate;	 * but on the other hand we ignore the bookkeeping costs of mark/restore.	 * Not clear if it's worth developing a more refined model.	 *	 * The number of re-fetches can be estimated approximately as size of	 * merge join output minus size of inner relation.	Assume that the	 * distinct key values are 1, 2, ..., and denote the number of values of	 * each key in the outer relation as m1, m2, ...; in the inner relation,	 * n1, n2, ... Then we have	 *	 * size of join = m1 * n1 + m2 * n2 + ...	 *	 * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *	 * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner	 * relation	 *	 * This equation works correctly for outer tuples having no inner match	 * (nk = 0), but not for inner tuples having no outer match (mk = 0); we	 * are effectively subtracting those from the number of rescanned tuples,	 * when we should not.	Can we do better without expensive selectivity	 * computations?	 */	if (IsA(outer_path, UniquePath))		rescannedtuples = 0;	else	{		rescannedtuples = mergejointuples - inner_path_rows;		/* Must clamp because of possible underestimate */		if (rescannedtuples < 0)			rescannedtuples = 0;	}	/* We'll inflate inner run cost this much to account for rescanning */	rescanratio = 1.0 + (rescannedtuples / inner_path_rows);	/*	 * A merge join will stop as soon as it exhausts either input stream	 * (unless it's an outer join, in which case the outer side has to be	 * scanned all the way anyway).  Estimate fraction of the left and right	 * inputs that will actually need to be scanned. We use only the first	 * (most significant) merge clause for this purpose.	 *	 * Since this calculation is somewhat expensive, and will be the same for	 * all mergejoin paths associated with the merge clause, we cache the	 * results in the RestrictInfo node.	 */	if (mergeclauses && path->jpath.jointype != JOIN_FULL)	{		firstclause = (RestrictInfo *) linitial(mergeclauses);		if (firstclause->left_mergescansel < 0) /* not computed yet? */			mergejoinscansel(root, (Node *) firstclause->clause,							 &firstclause->left_mergescansel,							 &firstclause->right_mergescansel);		if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids))		{			/* left side of clause is outer */			outerscansel = firstclause->left_mergescansel;			innerscansel = firstclause->right_mergescansel;		}		else		{			/* left side of clause is inner */			outerscansel = firstclause->right_mergescansel;			innerscansel = firstclause->left_mergescansel;		}		if (path->jpath.jointype == JOIN_LEFT)			outerscansel = 1.0;		else if (path->jpath.jointype == JOIN_RIGHT)			innerscansel = 1.0;	}	else	{		/* cope with clauseless or full mergejoin */		outerscansel = innerscansel = 1.0;	}	/* convert selectivity to row count; must scan at least one row */	outer_rows = clamp_row_est(outer_path_rows * outerscansel);	inner_rows = clamp_row_est(inner_path_rows * innerscansel);	/*	 * Readjust scan selectivities to account for above rounding.  This is	 * normally an insignificant effect, but when there are only a few rows in	 * the inputs, failing to do this makes for a large percentage error.	 */	outerscansel = outer_rows / outer_path_rows;	innerscansel = inner_rows / inner_path_rows;	/* cost of source data */	if (outersortkeys)			/* do we need to sort outer? */	{		cost_sort(&sort_path,				  root,				  outersortkeys,				  outer_path->total_cost,				  outer_path_rows,				  outer_path->parent->width);		startup_cost += sort_path.startup_cost;		run_cost += (sort_path.total_cost - sort_path.startup_cost)			* outerscansel;	}	else	{		startup_cost += outer_path->startup_cost;		run_cost += (outer_path->total_cost - outer_path->startup_cost)
costsize.c - 源码说明

本页面展示了「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」中的 costsize.c 源码文件，采用 C语言编程语言编写，共 2,000 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?