costsize.c

来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 1,993 行 · 第 1/5 页

C
1,993
字号
	/* Should only be applied to base relations */	Assert(baserel->relid > 0);	Assert(baserel->rtekind == RTE_RELATION);	if (!enable_tidscan)		startup_cost += disable_cost;	/* disk costs --- assume each tuple on a different page */	run_cost += random_page_cost * ntuples;	/* CPU costs */	startup_cost += baserel->baserestrictcost.startup;	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;	run_cost += cpu_per_tuple * ntuples;	path->startup_cost = startup_cost;	path->total_cost = startup_cost + run_cost;}/* * cost_subqueryscan *	  Determines and returns the cost of scanning a subquery RTE. */voidcost_subqueryscan(Path *path, RelOptInfo *baserel){	Cost		startup_cost;	Cost		run_cost;	Cost		cpu_per_tuple;	/* Should only be applied to base relations that are subqueries */	Assert(baserel->relid > 0);	Assert(baserel->rtekind == RTE_SUBQUERY);	/*	 * Cost of path is cost of evaluating the subplan, plus cost of	 * evaluating any restriction clauses that will be attached to the	 * SubqueryScan node, plus cpu_tuple_cost to account for selection and	 * projection overhead.	 */	path->startup_cost = baserel->subplan->startup_cost;	path->total_cost = baserel->subplan->total_cost;	startup_cost = baserel->baserestrictcost.startup;	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;	run_cost = cpu_per_tuple * baserel->tuples;	path->startup_cost += startup_cost;	path->total_cost += startup_cost + run_cost;}/* * cost_functionscan *	  Determines and returns the cost of scanning a function RTE. */voidcost_functionscan(Path *path, Query *root, RelOptInfo *baserel){	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	/* Should only be applied to base relations that are functions */	Assert(baserel->relid > 0);	Assert(baserel->rtekind == RTE_FUNCTION);	/*	 * For now, estimate function's cost at one operator eval per function	 * call.  Someday we should revive the function cost estimate columns	 * in pg_proc...	 */	cpu_per_tuple = cpu_operator_cost;	/* Add scanning CPU costs */	startup_cost += baserel->baserestrictcost.startup;	cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple;	run_cost += cpu_per_tuple * baserel->tuples;	path->startup_cost = startup_cost;	path->total_cost = startup_cost + run_cost;}/* * cost_sort *	  Determines and returns the cost of sorting a relation, including *	  the cost of reading the input data. * * If the total volume of data to sort is less than SortMem, we will do * an in-memory sort, which requires no I/O and about t*log2(t) tuple * comparisons for t tuples. * * If the total volume exceeds SortMem, we switch to a tape-style merge * algorithm.  There will still be about t*log2(t) tuple comparisons in * total, but we will also need to write and read each tuple once per * merge pass.	We expect about ceil(log6(r)) merge passes where r is the * number of initial runs formed (log6 because tuplesort.c uses six-tape * merging).  Since the average initial run should be about twice SortMem, * we have *		disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem))) *		cpu = comparison_cost * t * log2(t) * * The disk traffic is assumed to be half sequential and half random * accesses (XXX can't we refine that guess?) * * We charge two operator evals per tuple comparison, which should be in * the right ballpark in most cases. * * 'pathkeys' is a list of sort keys * 'input_cost' is the total cost for reading the input data * 'tuples' is the number of tuples in the relation * 'width' is the average tuple width in bytes * * NOTE: some callers currently pass NIL for pathkeys because they * can't conveniently supply the sort keys.  Since this routine doesn't * currently do anything with pathkeys anyway, that doesn't matter... * but if it ever does, it should react gracefully to lack of key data. * (Actually, the thing we'd most likely be interested in is just the number * of sort keys, which all callers *could* supply.) */voidcost_sort(Path *path, Query *root,		  List *pathkeys, Cost input_cost, double tuples, int width){	Cost		startup_cost = input_cost;	Cost		run_cost = 0;	double		nbytes = relation_byte_size(tuples, width);	long		sortmembytes = SortMem * 1024L;	if (!enable_sort)		startup_cost += disable_cost;	/*	 * We want to be sure the cost of a sort is never estimated as zero,	 * even if passed-in tuple count is zero.  Besides, mustn't do	 * log(0)...	 */	if (tuples < 2.0)		tuples = 2.0;	/*	 * CPU costs	 *	 * Assume about two operator evals per tuple comparison and N log2 N	 * comparisons	 */	startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);	/* disk costs */	if (nbytes > sortmembytes)	{		double		npages = ceil(nbytes / BLCKSZ);		double		nruns = nbytes / (sortmembytes * 2);		double		log_runs = ceil(LOG6(nruns));		double		npageaccesses;		if (log_runs < 1.0)			log_runs = 1.0;		npageaccesses = 2.0 * npages * log_runs;		/* Assume half are sequential (cost 1), half are not */		startup_cost += npageaccesses *			(1.0 + cost_nonsequential_access(npages)) * 0.5;	}	/*	 * Also charge a small amount (arbitrarily set equal to operator cost)	 * per extracted tuple.	 */	run_cost += cpu_operator_cost * tuples;	path->startup_cost = startup_cost;	path->total_cost = startup_cost + run_cost;}/* * cost_material *	  Determines and returns the cost of materializing a relation, including *	  the cost of reading the input data. * * If the total volume of data to materialize exceeds SortMem, we will need * to write it to disk, so the cost is much higher in that case. */voidcost_material(Path *path,			  Cost input_cost, double tuples, int width){	Cost		startup_cost = input_cost;	Cost		run_cost = 0;	double		nbytes = relation_byte_size(tuples, width);	long		sortmembytes = SortMem * 1024L;	/* disk costs */	if (nbytes > sortmembytes)	{		double		npages = ceil(nbytes / BLCKSZ);		/* We'll write during startup and read during retrieval */		startup_cost += npages;		run_cost += npages;	}	/*	 * Also charge a small amount per extracted tuple.	We use	 * cpu_tuple_cost so that it doesn't appear worthwhile to materialize	 * a bare seqscan.	 */	run_cost += cpu_tuple_cost * tuples;	path->startup_cost = startup_cost;	path->total_cost = startup_cost + run_cost;}/* * cost_agg *		Determines and returns the cost of performing an Agg plan node, *		including the cost of its input. * * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs * are for appropriately-sorted input. */voidcost_agg(Path *path, Query *root,		 AggStrategy aggstrategy, int numAggs,		 int numGroupCols, double numGroups,		 Cost input_startup_cost, Cost input_total_cost,		 double input_tuples){	Cost		startup_cost;	Cost		total_cost;	/*	 * We charge one cpu_operator_cost per aggregate function per input	 * tuple, and another one per output tuple (corresponding to transfn	 * and finalfn calls respectively).  If we are grouping, we charge an	 * additional cpu_operator_cost per grouping column per input tuple	 * for grouping comparisons.	 *	 * We will produce a single output tuple if not grouping, and a tuple per	 * group otherwise.	 *	 * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the	 * same total CPU cost, but AGG_SORTED has lower startup cost.	If the	 * input path is already sorted appropriately, AGG_SORTED should be	 * preferred (since it has no risk of memory overflow).  This will	 * happen as long as the computed total costs are indeed exactly equal	 * --- but if there's roundoff error we might do the wrong thing.  So	 * be sure that the computations below form the same intermediate	 * values in the same order.	 */	if (aggstrategy == AGG_PLAIN)	{		startup_cost = input_total_cost;		startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;		/* we aren't grouping */		total_cost = startup_cost;	}	else if (aggstrategy == AGG_SORTED)	{		/* Here we are able to deliver output on-the-fly */		startup_cost = input_startup_cost;		total_cost = input_total_cost;		/* calcs phrased this way to match HASHED case, see note above */		total_cost += cpu_operator_cost * input_tuples * numGroupCols;		total_cost += cpu_operator_cost * input_tuples * numAggs;		total_cost += cpu_operator_cost * numGroups * numAggs;	}	else	{		/* must be AGG_HASHED */		startup_cost = input_total_cost;		startup_cost += cpu_operator_cost * input_tuples * numGroupCols;		startup_cost += cpu_operator_cost * input_tuples * numAggs;		total_cost = startup_cost;		total_cost += cpu_operator_cost * numGroups * numAggs;	}	path->startup_cost = startup_cost;	path->total_cost = total_cost;}/* * cost_group *		Determines and returns the cost of performing a Group plan node, *		including the cost of its input. * * Note: caller must ensure that input costs are for appropriately-sorted * input. */voidcost_group(Path *path, Query *root,		   int numGroupCols, double numGroups,		   Cost input_startup_cost, Cost input_total_cost,		   double input_tuples){	Cost		startup_cost;	Cost		total_cost;	startup_cost = input_startup_cost;	total_cost = input_total_cost;	/*	 * Charge one cpu_operator_cost per comparison per input tuple. We	 * assume all columns get compared at most of the tuples.	 */	total_cost += cpu_operator_cost * input_tuples * numGroupCols;	path->startup_cost = startup_cost;	path->total_cost = total_cost;}/* * cost_nestloop *	  Determines and returns the cost of joining two relations using the *	  nested loop algorithm. * * 'path' is already filled in except for the cost fields */voidcost_nestloop(NestPath *path, Query *root){	Path	   *outer_path = path->outerjoinpath;	Path	   *inner_path = path->innerjoinpath;	List	   *restrictlist = path->joinrestrictinfo;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	QualCost	restrict_qual_cost;	double		outer_path_rows = PATH_ROWS(outer_path);	double		inner_path_rows = PATH_ROWS(inner_path);	double		ntuples;	Selectivity joininfactor;	if (!enable_nestloop)		startup_cost += disable_cost;	/*	 * If we're doing JOIN_IN then we will stop scanning inner tuples for	 * an outer tuple as soon as we have one match.  Account for the	 * effects of this by scaling down the cost estimates in proportion to	 * the expected output size.  (This assumes that all the quals	 * attached to the join are IN quals, which should be true.)	 *	 * Note: it's probably bogus to use the normal selectivity calculation	 * here when either the outer or inner path is a UniquePath.	 */	if (path->jointype == JOIN_IN)	{		Selectivity qual_selec = approx_selectivity(root, restrictlist,													path->jointype);		double		qptuples;		qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);		if (qptuples > path->path.parent->rows)			joininfactor = path->path.parent->rows / qptuples;		else			joininfactor = 1.0;	}	else		joininfactor = 1.0;	/* cost of source data */	/*	 * NOTE: clearly, we must pay both outer and inner paths' startup_cost	 * before we can start returning tuples, so the join's startup cost is	 * their sum.  What's not so clear is whether the inner path's	 * startup_cost must be paid again on each rescan of the inner path.	 * This is not true if the inner path is materialized or is a	 * hashjoin, but probably is true otherwise.	 */	startup_cost += outer_path->startup_cost + inner_path->startup_cost;	run_cost += outer_path->total_cost - outer_path->startup_cost;	if (IsA(inner_path, MaterialPath) ||		IsA(inner_path, HashPath))	{		/* charge only run cost for each iteration of inner path */	}	else	{		/*		 * charge startup cost for each iteration of inner path, except we		 * already charged the first startup_cost in our own startup		 */		run_cost += (outer_path_rows - 1) * inner_path->startup_cost;	}	run_cost += outer_path_rows *		(inner_path->total_cost - inner_path->startup_cost) * joininfactor;	/*	 * Compute number of tuples processed (not number emitted!). If inner	 * path is an indexscan, be sure to use its estimated output row	 * count, which may be lower than the restriction-clause-only row	 * count of its parent.  (We don't include this case in the PATH_ROWS	 * macro because it applies *only* to a nestloop's inner relation.)	 * Note: it is correct to use the unadjusted inner_path_rows in the	 * above calculation for joininfactor, since otherwise we'd be	 * double-counting the selectivity of the join clause being used for	 * the index.	 */	if (IsA(inner_path, IndexPath))

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?