costsize.c

来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 1,993 行 · 第 1/5 页

C
1,993
字号
		inner_path_rows = ((IndexPath *) inner_path)->rows;	ntuples = inner_path_rows * outer_path_rows;	/* CPU costs */	cost_qual_eval(&restrict_qual_cost, restrictlist);	startup_cost += restrict_qual_cost.startup;	cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple;	run_cost += cpu_per_tuple * ntuples;	path->path.startup_cost = startup_cost;	path->path.total_cost = startup_cost + run_cost;}/* * cost_mergejoin *	  Determines and returns the cost of joining two relations using the *	  merge join algorithm. * * 'path' is already filled in except for the cost fields * * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list; * outersortkeys and innersortkeys are lists of the keys to be used * to sort the outer and inner relations, or NIL if no explicit * sort is needed because the source path is already ordered. */voidcost_mergejoin(MergePath *path, Query *root){	Path	   *outer_path = path->jpath.outerjoinpath;	Path	   *inner_path = path->jpath.innerjoinpath;	List	   *restrictlist = path->jpath.joinrestrictinfo;	List	   *mergeclauses = path->path_mergeclauses;	List	   *outersortkeys = path->outersortkeys;	List	   *innersortkeys = path->innersortkeys;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	Selectivity merge_selec;	Selectivity qp_selec;	QualCost	merge_qual_cost;	QualCost	qp_qual_cost;	RestrictInfo *firstclause;	List	   *qpquals;	double		outer_path_rows = PATH_ROWS(outer_path);	double		inner_path_rows = PATH_ROWS(inner_path);	double		outer_rows,				inner_rows;	double		mergejointuples,				rescannedtuples;	double		qptuples;	double		rescanratio;	Selectivity outerscansel,				innerscansel;	Selectivity joininfactor;	Path		sort_path;		/* dummy for result of cost_sort */	if (!enable_mergejoin)		startup_cost += disable_cost;	/*	 * Compute cost and selectivity of the mergequals and qpquals (other	 * restriction clauses) separately.  We use approx_selectivity here	 * for speed --- in most cases, any errors won't affect the result	 * much.	 *	 * Note: it's probably bogus to use the normal selectivity calculation	 * here when either the outer or inner path is a UniquePath.	 */	merge_selec = approx_selectivity(root, mergeclauses,									 path->jpath.jointype);	cost_qual_eval(&merge_qual_cost, mergeclauses);	qpquals = set_ptrDifference(restrictlist, mergeclauses);	qp_selec = approx_selectivity(root, qpquals,								  path->jpath.jointype);	cost_qual_eval(&qp_qual_cost, qpquals);	freeList(qpquals);	/* approx # tuples passing the merge quals */	mergejointuples = ceil(merge_selec * outer_path_rows * inner_path_rows);	/* approx # tuples passing qpquals as well */	qptuples = ceil(mergejointuples * qp_selec);	/*	 * When there are equal merge keys in the outer relation, the	 * mergejoin must rescan any matching tuples in the inner relation.	 * This means re-fetching inner tuples.  Our cost model for this is	 * that a re-fetch costs the same as an original fetch, which is	 * probably an overestimate; but on the other hand we ignore the	 * bookkeeping costs of mark/restore. Not clear if it's worth	 * developing a more refined model.	 *	 * The number of re-fetches can be estimated approximately as size of	 * merge join output minus size of inner relation.	Assume that the	 * distinct key values are 1, 2, ..., and denote the number of values	 * of each key in the outer relation as m1, m2, ...; in the inner	 * relation, n1, n2, ...  Then we have	 *	 * size of join = m1 * n1 + m2 * n2 + ...	 *	 * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *	 * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner	 * relation	 *	 * This equation works correctly for outer tuples having no inner match	 * (nk = 0), but not for inner tuples having no outer match (mk = 0);	 * we are effectively subtracting those from the number of rescanned	 * tuples, when we should not.	Can we do better without expensive	 * selectivity computations?	 */	if (IsA(outer_path, UniquePath))		rescannedtuples = 0;	else	{		rescannedtuples = mergejointuples - inner_path_rows;		/* Must clamp because of possible underestimate */		if (rescannedtuples < 0)			rescannedtuples = 0;	}	/* We'll inflate inner run cost this much to account for rescanning */	rescanratio = 1.0 + (rescannedtuples / inner_path_rows);	/*	 * A merge join will stop as soon as it exhausts either input stream.	 * Estimate fraction of the left and right inputs that will actually	 * need to be scanned.	We use only the first (most significant) merge	 * clause for this purpose.	 *	 * Since this calculation is somewhat expensive, and will be the same for	 * all mergejoin paths associated with the merge clause, we cache the	 * results in the RestrictInfo node.	 */	if (mergeclauses)	{		firstclause = (RestrictInfo *) lfirst(mergeclauses);		if (firstclause->left_mergescansel < 0)	/* not computed yet? */			mergejoinscansel(root, (Node *) firstclause->clause,							 &firstclause->left_mergescansel,							 &firstclause->right_mergescansel);		if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids))		{			/* left side of clause is outer */			outerscansel = firstclause->left_mergescansel;			innerscansel = firstclause->right_mergescansel;		}		else		{			/* left side of clause is inner */			outerscansel = firstclause->right_mergescansel;			innerscansel = firstclause->left_mergescansel;		}	}	else	{		/* cope with clauseless mergejoin */		outerscansel = innerscansel = 1.0;	}	/* convert selectivity to row count; must scan at least one row */	outer_rows = ceil(outer_path_rows * outerscansel);	if (outer_rows < 1)		outer_rows = 1;	inner_rows = ceil(inner_path_rows * innerscansel);	if (inner_rows < 1)		inner_rows = 1;	/*	 * Readjust scan selectivities to account for above rounding.  This is	 * normally an insignificant effect, but when there are only a few	 * rows in the inputs, failing to do this makes for a large percentage	 * error.	 */	outerscansel = outer_rows / outer_path_rows;	innerscansel = inner_rows / inner_path_rows;	/* cost of source data */	if (outersortkeys)			/* do we need to sort outer? */	{		cost_sort(&sort_path,				  root,				  outersortkeys,				  outer_path->total_cost,				  outer_path_rows,				  outer_path->parent->width);		startup_cost += sort_path.startup_cost;		run_cost += (sort_path.total_cost - sort_path.startup_cost)			* outerscansel;	}	else	{		startup_cost += outer_path->startup_cost;		run_cost += (outer_path->total_cost - outer_path->startup_cost)			* outerscansel;	}	if (innersortkeys)			/* do we need to sort inner? */	{		cost_sort(&sort_path,				  root,				  innersortkeys,				  inner_path->total_cost,				  inner_path_rows,				  inner_path->parent->width);		startup_cost += sort_path.startup_cost;		run_cost += (sort_path.total_cost - sort_path.startup_cost)			* innerscansel * rescanratio;	}	else	{		startup_cost += inner_path->startup_cost;		run_cost += (inner_path->total_cost - inner_path->startup_cost)			* innerscansel * rescanratio;	}	/* CPU costs */	/*	 * If we're doing JOIN_IN then we will stop outputting inner tuples	 * for an outer tuple as soon as we have one match.  Account for the	 * effects of this by scaling down the cost estimates in proportion to	 * the expected output size.  (This assumes that all the quals	 * attached to the join are IN quals, which should be true.)	 */	if (path->jpath.jointype == JOIN_IN &&		qptuples > path->jpath.path.parent->rows)		joininfactor = path->jpath.path.parent->rows / qptuples;	else		joininfactor = 1.0;	/*	 * The number of tuple comparisons needed is approximately number of	 * outer rows plus number of inner rows plus number of rescanned	 * tuples (can we refine this?).  At each one, we need to evaluate the	 * mergejoin quals.  NOTE: JOIN_IN mode does not save any work here,	 * so do NOT include joininfactor.	 */	startup_cost += merge_qual_cost.startup;	run_cost += merge_qual_cost.per_tuple *		(outer_rows + inner_rows * rescanratio);	/*	 * For each tuple that gets through the mergejoin proper, we charge	 * cpu_tuple_cost plus the cost of evaluating additional restriction	 * clauses that are to be applied at the join.	(This is pessimistic	 * since not all of the quals may get evaluated at each tuple.)  This	 * work is skipped in JOIN_IN mode, so apply the factor.	 */	startup_cost += qp_qual_cost.startup;	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;	run_cost += cpu_per_tuple * mergejointuples * joininfactor;	path->jpath.path.startup_cost = startup_cost;	path->jpath.path.total_cost = startup_cost + run_cost;}/* * cost_hashjoin *	  Determines and returns the cost of joining two relations using the *	  hash join algorithm. * * 'path' is already filled in except for the cost fields * * Note: path's hashclauses should be a subset of the joinrestrictinfo list */voidcost_hashjoin(HashPath *path, Query *root){	Path	   *outer_path = path->jpath.outerjoinpath;	Path	   *inner_path = path->jpath.innerjoinpath;	List	   *restrictlist = path->jpath.joinrestrictinfo;	List	   *hashclauses = path->path_hashclauses;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	Selectivity hash_selec;	Selectivity qp_selec;	QualCost	hash_qual_cost;	QualCost	qp_qual_cost;	double		hashjointuples;	double		qptuples;	double		outer_path_rows = PATH_ROWS(outer_path);	double		inner_path_rows = PATH_ROWS(inner_path);	double		outerbytes = relation_byte_size(outer_path_rows,											  outer_path->parent->width);	double		innerbytes = relation_byte_size(inner_path_rows,											  inner_path->parent->width);	int			num_hashclauses = length(hashclauses);	int			virtualbuckets;	int			physicalbuckets;	int			numbatches;	Selectivity innerbucketsize;	Selectivity joininfactor;	List	   *hcl;	List	   *qpquals;	if (!enable_hashjoin)		startup_cost += disable_cost;	/*	 * Compute cost and selectivity of the hashquals and qpquals (other	 * restriction clauses) separately.  We use approx_selectivity here	 * for speed --- in most cases, any errors won't affect the result	 * much.	 *	 * Note: it's probably bogus to use the normal selectivity calculation	 * here when either the outer or inner path is a UniquePath.	 */	hash_selec = approx_selectivity(root, hashclauses,									path->jpath.jointype);	cost_qual_eval(&hash_qual_cost, hashclauses);	qpquals = set_ptrDifference(restrictlist, hashclauses);	qp_selec = approx_selectivity(root, qpquals,								  path->jpath.jointype);	cost_qual_eval(&qp_qual_cost, qpquals);	freeList(qpquals);	/* approx # tuples passing the hash quals */	hashjointuples = ceil(hash_selec * outer_path_rows * inner_path_rows);	/* approx # tuples passing qpquals as well */	qptuples = ceil(hashjointuples * qp_selec);	/* cost of source data */	startup_cost += outer_path->startup_cost;	run_cost += outer_path->total_cost - outer_path->startup_cost;	startup_cost += inner_path->total_cost;	/*	 * Cost of computing hash function: must do it once per input tuple.	 * We charge one cpu_operator_cost for each column's hash function.	 *	 * XXX when a hashclause is more complex than a single operator, we	 * really should charge the extra eval costs of the left or right	 * side, as appropriate, here.	This seems more work than it's worth	 * at the moment.	 */	startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows;	run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;	/* Get hash table size that executor would use for inner relation */	ExecChooseHashTableSize(inner_path_rows,							inner_path->parent->width,							&virtualbuckets,							&physicalbuckets,							&numbatches);	/*	 * Determine bucketsize fraction for inner relation.  We use the	 * smallest bucketsize estimated for any individual hashclause; this	 * is undoubtedly conservative.	 *	 * BUT: if inner relation has been unique-ified, we can assume it's good	 * for hashing.  This is important both because it's the right answer,	 * and because we avoid contaminating the cache with a value that's	 * wrong for non-unique-ified paths.	 */	if (IsA(inner_path, UniquePath))		innerbucketsize = 1.0 / virtualbuckets;	else	{		innerbucketsize = 1.0;		foreach(hcl, hashclauses)		{			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);			Selectivity thisbucketsize;			Assert(IsA(restrictinfo, RestrictInfo));			/*			 * First we have to figure out which side of the hashjoin			 * clause is the inner side.			 *			 * Since we tend to visit the same clauses over and over when			 * planning a large query, we cache the bucketsize estimate in			 * the RestrictInfo node to avoid repeated lookups of			 * statistics.			 */			if (bms_is_subset(restrictinfo->right_relids,							  inner_path->parent->relids))			{				/* righthand side is inner */				thisbucketsize = restrictinfo->right_bucketsize;				if (thisbucketsize < 0)				{					/* not cached yet */					thisbucketsize =						estimate_hash_bucketsize(root,							   (Var *) get_rightop(restrictinfo->clause),												 virtualbuckets);					restrictinfo->right_bucketsize = thisbucketsize;				}			}			else			{				Assert(bms_is_subset(restrictinfo->left_relids,									 inner_path->parent->relids));				/* lefthand side is inner */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?