⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 costsize.c

📁 PostgreSQL 8.1.4的源码 适用于Linux下的开源数据库系统
💻 C
📖 第 1 页 / 共 5 页
字号:
			* outerscansel;	}	if (innersortkeys)			/* do we need to sort inner? */	{		cost_sort(&sort_path,				  root,				  innersortkeys,				  inner_path->total_cost,				  inner_path_rows,				  inner_path->parent->width);		startup_cost += sort_path.startup_cost;		run_cost += (sort_path.total_cost - sort_path.startup_cost)			* innerscansel * rescanratio;	}	else	{		startup_cost += inner_path->startup_cost;		run_cost += (inner_path->total_cost - inner_path->startup_cost)			* innerscansel * rescanratio;	}	/* CPU costs */	/*	 * If we're doing JOIN_IN then we will stop outputting inner tuples for an	 * outer tuple as soon as we have one match.  Account for the effects of	 * this by scaling down the cost estimates in proportion to the expected	 * output size.  (This assumes that all the quals attached to the join are	 * IN quals, which should be true.)	 */	joininfactor = join_in_selectivity(&path->jpath, root);	/*	 * The number of tuple comparisons needed is approximately number of outer	 * rows plus number of inner rows plus number of rescanned tuples (can we	 * refine this?).  At each one, we need to evaluate the mergejoin quals.	 * NOTE: JOIN_IN mode does not save any work here, so do NOT include	 * joininfactor.	 */	startup_cost += merge_qual_cost.startup;	run_cost += merge_qual_cost.per_tuple *		(outer_rows + inner_rows * rescanratio);	/*	 * For each tuple that gets through the mergejoin proper, we charge	 * cpu_tuple_cost plus the cost of evaluating additional restriction	 * clauses that are to be applied at the join.	(This is pessimistic since	 * not all of the quals may get evaluated at each tuple.)  This work is	 * skipped in JOIN_IN mode, so apply the factor.	 */	startup_cost += qp_qual_cost.startup;	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;	run_cost += cpu_per_tuple * mergejointuples * joininfactor;	path->jpath.path.startup_cost = startup_cost;	path->jpath.path.total_cost = startup_cost + run_cost;}/* * cost_hashjoin *	  Determines and returns the cost of joining two relations using the *	  hash join algorithm. * * 'path' is already filled in except for the cost fields * * Note: path's hashclauses should be a subset of the joinrestrictinfo list */voidcost_hashjoin(HashPath *path, PlannerInfo *root){	Path	   *outer_path = path->jpath.outerjoinpath;	Path	   *inner_path = path->jpath.innerjoinpath;	List	   *hashclauses = path->path_hashclauses;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	Selectivity hash_selec;	QualCost	hash_qual_cost;	QualCost	qp_qual_cost;	double		hashjointuples;	double		outer_path_rows = PATH_ROWS(outer_path);	double		inner_path_rows = PATH_ROWS(inner_path);	double		outerbytes = relation_byte_size(outer_path_rows,												outer_path->parent->width);	double		innerbytes = relation_byte_size(inner_path_rows,												inner_path->parent->width);	int			num_hashclauses = list_length(hashclauses);	int			numbuckets;	int			numbatches;	double		virtualbuckets;	Selectivity innerbucketsize;	Selectivity joininfactor;	ListCell   *hcl;	if (!enable_hashjoin)		startup_cost += disable_cost;	/*	 * Compute cost and selectivity of the hashquals and qpquals (other	 * restriction clauses) separately.  We use approx_selectivity here for	 * speed --- in most cases, any errors won't affect the result much.	 *	 * Note: it's probably bogus to use the normal selectivity calculation	 * here when either the outer or inner path is a UniquePath.	 */	hash_selec = approx_selectivity(root, hashclauses,									path->jpath.jointype);	cost_qual_eval(&hash_qual_cost, hashclauses);	cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo);	qp_qual_cost.startup -= hash_qual_cost.startup;	qp_qual_cost.per_tuple -= hash_qual_cost.per_tuple;	/* approx # tuples passing the hash quals */	hashjointuples = clamp_row_est(hash_selec * outer_path_rows * inner_path_rows);	/* cost of source data */	startup_cost += outer_path->startup_cost;	run_cost += outer_path->total_cost - outer_path->startup_cost;	startup_cost += inner_path->total_cost;	/*	 * Cost of computing hash function: must do it once per input tuple. We	 * charge one cpu_operator_cost for each column's hash function.	 *	 * XXX when a hashclause is more complex than a single operator, we really	 * should charge the extra eval costs of the left or right side, as	 * appropriate, here.  This seems more work than it's worth at the moment.	 */	startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows;	run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;	/* Get hash table size that executor would use for inner relation */	ExecChooseHashTableSize(inner_path_rows,							inner_path->parent->width,							&numbuckets,							&numbatches);	virtualbuckets = (double) numbuckets *(double) numbatches;	/*	 * Determine bucketsize fraction for inner relation.  We use the smallest	 * bucketsize estimated for any individual hashclause; this is undoubtedly	 * conservative.	 *	 * BUT: if inner relation has been unique-ified, we can assume it's good	 * for hashing.  This is important both because it's the right answer, and	 * because we avoid contaminating the cache with a value that's wrong for	 * non-unique-ified paths.	 */	if (IsA(inner_path, UniquePath))		innerbucketsize = 1.0 / virtualbuckets;	else	{		innerbucketsize = 1.0;		foreach(hcl, hashclauses)		{			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);			Selectivity thisbucketsize;			Assert(IsA(restrictinfo, RestrictInfo));			/*			 * First we have to figure out which side of the hashjoin clause			 * is the inner side.			 *			 * Since we tend to visit the same clauses over and over when			 * planning a large query, we cache the bucketsize estimate in the			 * RestrictInfo node to avoid repeated lookups of statistics.			 */			if (bms_is_subset(restrictinfo->right_relids,							  inner_path->parent->relids))			{				/* righthand side is inner */				thisbucketsize = restrictinfo->right_bucketsize;				if (thisbucketsize < 0)				{					/* not cached yet */					thisbucketsize =						estimate_hash_bucketsize(root,										   get_rightop(restrictinfo->clause),												 virtualbuckets);					restrictinfo->right_bucketsize = thisbucketsize;				}			}			else			{				Assert(bms_is_subset(restrictinfo->left_relids,									 inner_path->parent->relids));				/* lefthand side is inner */				thisbucketsize = restrictinfo->left_bucketsize;				if (thisbucketsize < 0)				{					/* not cached yet */					thisbucketsize =						estimate_hash_bucketsize(root,											get_leftop(restrictinfo->clause),												 virtualbuckets);					restrictinfo->left_bucketsize = thisbucketsize;				}			}			if (innerbucketsize > thisbucketsize)				innerbucketsize = thisbucketsize;		}	}	/*	 * If inner relation is too big then we will need to "batch" the join,	 * which implies writing and reading most of the tuples to disk an extra	 * time.  Charge one cost unit per page of I/O (correct since it should be	 * nice and sequential...).  Writing the inner rel counts as startup cost,	 * all the rest as run cost.	 */	if (numbatches > 1)	{		double		outerpages = page_size(outer_path_rows,										   outer_path->parent->width);		double		innerpages = page_size(inner_path_rows,										   inner_path->parent->width);		startup_cost += innerpages;		run_cost += innerpages + 2 * outerpages;	}	/* CPU costs */	/*	 * If we're doing JOIN_IN then we will stop comparing inner tuples to an	 * outer tuple as soon as we have one match.  Account for the effects of	 * this by scaling down the cost estimates in proportion to the expected	 * output size.  (This assumes that all the quals attached to the join are	 * IN quals, which should be true.)	 */	joininfactor = join_in_selectivity(&path->jpath, root);	/*	 * The number of tuple comparisons needed is the number of outer tuples	 * times the typical number of tuples in a hash bucket, which is the inner	 * relation size times its bucketsize fraction.  At each one, we need to	 * evaluate the hashjoin quals.  (Note: charging the full qual eval cost	 * at each tuple is pessimistic, since we don't evaluate the quals unless	 * the hash values match exactly.)	 */	startup_cost += hash_qual_cost.startup;	run_cost += hash_qual_cost.per_tuple *		outer_path_rows * clamp_row_est(inner_path_rows * innerbucketsize) *		joininfactor;	/*	 * For each tuple that gets through the hashjoin proper, we charge	 * cpu_tuple_cost plus the cost of evaluating additional restriction	 * clauses that are to be applied at the join.	(This is pessimistic since	 * not all of the quals may get evaluated at each tuple.)	 */	startup_cost += qp_qual_cost.startup;	cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;	run_cost += cpu_per_tuple * hashjointuples * joininfactor;	/*	 * Bias against putting larger relation on inside.	We don't want an	 * absolute prohibition, though, since larger relation might have better	 * bucketsize --- and we can't trust the size estimates unreservedly,	 * anyway.	Instead, inflate the run cost by the square root of the size	 * ratio.  (Why square root?  No real good reason, but it seems	 * reasonable...)	 *	 * Note: before 7.4 we implemented this by inflating startup cost; but if	 * there's a disable_cost component in the input paths' startup cost, that	 * unfairly penalizes the hash.  Probably it'd be better to keep track of	 * disable penalty separately from cost.	 */	if (innerbytes > outerbytes && outerbytes > 0)		run_cost *= sqrt(innerbytes / outerbytes);	path->jpath.path.startup_cost = startup_cost;	path->jpath.path.total_cost = startup_cost + run_cost;}/* * cost_qual_eval *		Estimate the CPU costs of evaluating a WHERE clause. *		The input can be either an implicitly-ANDed list of boolean *		expressions, or a list of RestrictInfo nodes. *		The result includes both a one-time (startup) component, *		and a per-evaluation component. */voidcost_qual_eval(QualCost *cost, List *quals){	ListCell   *l;	cost->startup = 0;	cost->per_tuple = 0;	/* We don't charge any cost for the implicit ANDing at top level ... */	foreach(l, quals)	{		Node	   *qual = (Node *) lfirst(l);		/*		 * RestrictInfo nodes contain an eval_cost field reserved for this		 * routine's use, so that it's not necessary to evaluate the qual		 * clause's cost more than once.  If the clause's cost hasn't been		 * computed yet, the field's startup value will contain -1.		 */		if (qual && IsA(qual, RestrictInfo))		{			RestrictInfo *restrictinfo = (RestrictInfo *) qual;			if (restrictinfo->eval_cost.startup < 0)			{				restrictinfo->eval_cost.startup = 0;				restrictinfo->eval_cost.per_tuple = 0;				cost_qual_eval_walker((Node *) restrictinfo->clause,									  &restrictinfo->eval_cost);			}			cost->startup += restrictinfo->eval_cost.startup;			cost->per_tuple += restrictinfo->eval_cost.per_tuple;		}		else		{			/* If it's a bare expression, must always do it the hard way */			cost_qual_eval_walker(qual, cost);		}	}}static boolcost_qual_eval_walker(Node *node, QualCost *total){	if (node == NULL)		return false;	/*	 * Our basic strategy is to charge one cpu_operator_cost for each operator	 * or function node in the given tree.	Vars and Consts are charged zero,	 * and so are boolean operators (AND, OR, NOT). Simplistic, but a lot	 * better than no model at all.	 *	 * Should we try to account for the possibility of short-circuit	 * evaluation of AND/OR?	 */	if (IsA(node, FuncExpr) ||		IsA(node, OpExpr) ||		IsA(node, DistinctExpr) ||		IsA(node, NullIfExpr))		total->per_tuple += cpu_operator_cost;	else if (IsA(node, ScalarArrayOpExpr))	{		/* should charge more than 1 op cost, but how many? */		total->per_tuple += cpu_operator_cost * 10;	}	else if (IsA(node, SubLink))	{		/* This routine should not be applied to un-planned expressions */		elog(ERROR, "cannot handle unplanned sub-select");	}	else if (IsA(node, SubPlan))	{		/*		 * A subplan node in an expression typically indicates that the		 * subplan will be executed on each evaluation, so charge accordingly.		 * (Sub-selects that can be executed as InitPlans have already been		 * removed from the expression.)		 *		 * An exception occurs when we have decided we can implement the		 * subplan by hashing.		 *		 */		SubPlan    *subplan = (SubPlan *) node;		Plan	   *plan = subplan->plan;		if (subplan->useHashTable)		{			/*			 * If we are using a hash table for the subquery outputs, then the			 * cost of evaluating the query is a one-time cost. We charge one			 * cpu_operator_cost per tuple for the work of loading the			 * hashtable, too.			 */			total->startup += plan->total_cost +				cpu_operator_cost * plan->plan_rows;			/*			 * The per-tuple costs include the cost of evaluating the lefthand			 * expressions, plus the cost of probing the hashtable. Recursion			 * into the exprs list will handle the lefthand expressions			 * properly, and will count one cpu_operator_cost for each			 * comparison operator.  That is probably too low for the probing			 * cost, but it's hard to make a better estimate, so live with it			 * for now.			 */		}		else		{			/*			 * Otherwise we will be rescanning the subplan output on each			 * evaluation.	We need to estimate how much of the output we will

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -