costsize.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 2,021 行 · 第 1/5 页

C
2,021
字号
 * and in any case counting all the tables may well be an overestimate, since * depending on the join plan not all the tables may be scanned concurrently.) * * The product Ns is the number of tuples fetched; we pass in that * product rather than calculating it here.  "pages" is the number of pages * in the object under consideration (either an index or a table). * "index_pages" is the amount to add to the total table space, which was * computed for us by query_planner. * * Caller is expected to have ensured that tuples_fetched is greater than zero * and rounded to integer (see clamp_row_est).	The result will likewise be * greater than zero and integral. */doubleindex_pages_fetched(double tuples_fetched, BlockNumber pages,					double index_pages, PlannerInfo *root){	double		pages_fetched;	double		total_pages;	double		T,				b;	/* T is # pages in table, but don't allow it to be zero */	T = (pages > 1) ? (double) pages : 1.0;	/* Compute number of pages assumed to be competing for cache space */	total_pages = root->total_table_pages + index_pages;	total_pages = Max(total_pages, 1.0);	Assert(T <= total_pages);	/* b is pro-rated share of effective_cache_size */	b = (double) effective_cache_size *T / total_pages;	/* force it positive and integral */	if (b <= 1.0)		b = 1.0;	else		b = ceil(b);	/* This part is the Mackert and Lohman formula */	if (T <= b)	{		pages_fetched =			(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);		if (pages_fetched >= T)			pages_fetched = T;		else			pages_fetched = ceil(pages_fetched);	}	else	{		double		lim;		lim = (2.0 * T * b) / (2.0 * T - b);		if (tuples_fetched <= lim)		{			pages_fetched =				(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);		}		else		{			pages_fetched =				b + (tuples_fetched - lim) * (T - b) / T;		}		pages_fetched = ceil(pages_fetched);	}	return pages_fetched;}/* * get_indexpath_pages *		Determine the total size of the indexes used in a bitmap index path. * * Note: if the same index is used more than once in a bitmap tree, we will * count it multiple times, which perhaps is the wrong thing ... but it's * not completely clear, and detecting duplicates is difficult, so ignore it * for now. */static doubleget_indexpath_pages(Path *bitmapqual){	double		result = 0;	ListCell   *l;	if (IsA(bitmapqual, BitmapAndPath))	{		BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;		foreach(l, apath->bitmapquals)		{			result += get_indexpath_pages((Path *) lfirst(l));		}	}	else if (IsA(bitmapqual, BitmapOrPath))	{		BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;		foreach(l, opath->bitmapquals)		{			result += get_indexpath_pages((Path *) lfirst(l));		}	}	else if (IsA(bitmapqual, IndexPath))	{		IndexPath  *ipath = (IndexPath *) bitmapqual;		result = (double) ipath->indexinfo->pages;	}	else		elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));	return result;}/* * cost_bitmap_heap_scan *	  Determines and returns the cost of scanning a relation using a bitmap *	  index-then-heap plan. * * 'baserel' is the relation to be scanned * 'bitmapqual' is a tree of IndexPaths, BitmapAndPaths, and BitmapOrPaths * 'outer_rel' is the outer relation when we are considering using the bitmap *		scan as the inside of a nestloop join (hence, some of the indexQuals *		are join clauses, and we should expect repeated scans of the table); *		NULL for a plain bitmap scan * * Note: if this is a join inner path, the component IndexPaths in bitmapqual * should have been costed accordingly. */voidcost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,					  Path *bitmapqual, RelOptInfo *outer_rel){	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		indexTotalCost;	Selectivity indexSelectivity;	Cost		cpu_per_tuple;	Cost		cost_per_page;	double		tuples_fetched;	double		pages_fetched;	double		T;	/* Should only be applied to base relations */	Assert(IsA(baserel, RelOptInfo));	Assert(baserel->relid > 0);	Assert(baserel->rtekind == RTE_RELATION);	if (!enable_bitmapscan)		startup_cost += disable_cost;	/*	 * Fetch total cost of obtaining the bitmap, as well as its total	 * selectivity.	 */	cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);	startup_cost += indexTotalCost;	/*	 * Estimate number of main-table pages fetched.	 */	tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);	T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;	if (outer_rel != NULL && outer_rel->rows > 1)	{		/*		 * For repeated bitmap scans, scale up the number of tuples fetched in		 * the Mackert and Lohman formula by the number of scans, so that we		 * estimate the number of pages fetched by all the scans. Then		 * pro-rate for one scan.		 */		double		num_scans = outer_rel->rows;		pages_fetched = index_pages_fetched(tuples_fetched * num_scans,											baserel->pages,											get_indexpath_pages(bitmapqual),											root);		pages_fetched /= num_scans;	}	else	{		/*		 * For a single scan, the number of heap pages that need to be fetched		 * is the same as the Mackert and Lohman formula for the case T <= b		 * (ie, no re-reads needed).		 */		pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);	}	if (pages_fetched >= T)		pages_fetched = T;	else		pages_fetched = ceil(pages_fetched);	/*	 * For small numbers of pages we should charge random_page_cost apiece,	 * while if nearly all the table's pages are being read, it's more	 * appropriate to charge seq_page_cost apiece.	The effect is nonlinear,	 * too. For lack of a better idea, interpolate like this to determine the	 * cost per page.	 */	if (pages_fetched >= 2.0)		cost_per_page = random_page_cost -			(random_page_cost - seq_page_cost) * sqrt(pages_fetched / T);	else		cost_per_page = random_page_cost;	run_cost += pages_fetched * cost_per_page;	/*	 * Estimate CPU costs per tuple.	 *	 * Often the indexquals don't need to be rechecked at each tuple ... but	 * not always, especially not if there are enough tuples involved that the	 * bitmaps become lossy.  For the moment, just assume they will be	 * rechecked always.	 */	startup_cost += baserel->baserestrictcost.startup;	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;	run_cost += cpu_per_tuple * tuples_fetched;	path->startup_cost = startup_cost;	path->total_cost = startup_cost + run_cost;}/* * cost_bitmap_tree_node *		Extract cost and selectivity from a bitmap tree node (index/and/or) */voidcost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec){	if (IsA(path, IndexPath))	{		*cost = ((IndexPath *) path)->indextotalcost;		*selec = ((IndexPath *) path)->indexselectivity;		/*		 * Charge a small amount per retrieved tuple to reflect the costs of		 * manipulating the bitmap.  This is mostly to make sure that a bitmap		 * scan doesn't look to be the same cost as an indexscan to retrieve a		 * single tuple.		 */		*cost += 0.1 * cpu_operator_cost * ((IndexPath *) path)->rows;	}	else if (IsA(path, BitmapAndPath))	{		*cost = path->total_cost;		*selec = ((BitmapAndPath *) path)->bitmapselectivity;	}	else if (IsA(path, BitmapOrPath))	{		*cost = path->total_cost;		*selec = ((BitmapOrPath *) path)->bitmapselectivity;	}	else	{		elog(ERROR, "unrecognized node type: %d", nodeTag(path));		*cost = *selec = 0;		/* keep compiler quiet */	}}/* * cost_bitmap_and_node *		Estimate the cost of a BitmapAnd node * * Note that this considers only the costs of index scanning and bitmap * creation, not the eventual heap access.	In that sense the object isn't * truly a Path, but it has enough path-like properties (costs in particular) * to warrant treating it as one. */voidcost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root){	Cost		totalCost;	Selectivity selec;	ListCell   *l;	/*	 * We estimate AND selectivity on the assumption that the inputs are	 * independent.  This is probably often wrong, but we don't have the info	 * to do better.	 *	 * The runtime cost of the BitmapAnd itself is estimated at 100x	 * cpu_operator_cost for each tbm_intersect needed.  Probably too small,	 * definitely too simplistic?	 */	totalCost = 0.0;	selec = 1.0;	foreach(l, path->bitmapquals)	{		Path	   *subpath = (Path *) lfirst(l);		Cost		subCost;		Selectivity subselec;		cost_bitmap_tree_node(subpath, &subCost, &subselec);		selec *= subselec;		totalCost += subCost;		if (l != list_head(path->bitmapquals))			totalCost += 100.0 * cpu_operator_cost;	}	path->bitmapselectivity = selec;	path->path.startup_cost = totalCost;	path->path.total_cost = totalCost;}/* * cost_bitmap_or_node *		Estimate the cost of a BitmapOr node * * See comments for cost_bitmap_and_node. */voidcost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root){	Cost		totalCost;	Selectivity selec;	ListCell   *l;	/*	 * We estimate OR selectivity on the assumption that the inputs are	 * non-overlapping, since that's often the case in "x IN (list)" type	 * situations.	Of course, we clamp to 1.0 at the end.	 *	 * The runtime cost of the BitmapOr itself is estimated at 100x	 * cpu_operator_cost for each tbm_union needed.  Probably too small,	 * definitely too simplistic?  We are aware that the tbm_unions are	 * optimized out when the inputs are BitmapIndexScans.	 */	totalCost = 0.0;	selec = 0.0;	foreach(l, path->bitmapquals)	{		Path	   *subpath = (Path *) lfirst(l);		Cost		subCost;		Selectivity subselec;		cost_bitmap_tree_node(subpath, &subCost, &subselec);		selec += subselec;		totalCost += subCost;		if (l != list_head(path->bitmapquals) &&			!IsA(subpath, IndexPath))			totalCost += 100.0 * cpu_operator_cost;	}	path->bitmapselectivity = Min(selec, 1.0);	path->path.startup_cost = totalCost;	path->path.total_cost = totalCost;}/* * cost_tidscan *	  Determines and returns the cost of scanning a relation using TIDs. */voidcost_tidscan(Path *path, PlannerInfo *root,			 RelOptInfo *baserel, List *tidquals){	Cost		startup_cost = 0;	Cost		run_cost = 0;	bool		isCurrentOf = false;	Cost		cpu_per_tuple;	QualCost	tid_qual_cost;	int			ntuples;	ListCell   *l;	/* Should only be applied to base relations */	Assert(baserel->relid > 0);	Assert(baserel->rtekind == RTE_RELATION);	/* Count how many tuples we expect to retrieve */	ntuples = 0;	foreach(l, tidquals)	{		if (IsA(lfirst(l), ScalarArrayOpExpr))		{			/* Each element of the array yields 1 tuple */			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) lfirst(l);			Node	   *arraynode = (Node *) lsecond(saop->args);			ntuples += estimate_array_length(arraynode);		}		else if (IsA(lfirst(l), CurrentOfExpr))		{			/* CURRENT OF yields 1 tuple */			isCurrentOf = true;			ntuples++;		}		else		{			/* It's just CTID = something, count 1 tuple */			ntuples++;		}	}	/*	 * We must force TID scan for WHERE CURRENT OF, because only nodeTidscan.c	 * understands how to do it correctly.	Therefore, honor enable_tidscan	 * only when CURRENT OF isn't present.  Also note that cost_qual_eval

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?