costsize.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 2,021 行 · 第 1/5 页

C
2,021
字号
/*------------------------------------------------------------------------- * * costsize.c *	  Routines to compute (and set) relation sizes and path costs * * Path costs are measured in arbitrary units established by these basic * parameters: * *	seq_page_cost		Cost of a sequential page fetch *	random_page_cost	Cost of a non-sequential page fetch *	cpu_tuple_cost		Cost of typical CPU time to process a tuple *	cpu_index_tuple_cost  Cost of typical CPU time to process an index tuple *	cpu_operator_cost	Cost of CPU time to execute an operator or function * * We expect that the kernel will typically do some amount of read-ahead * optimization; this in conjunction with seek costs means that seq_page_cost * is normally considerably less than random_page_cost.  (However, if the * database is fully cached in RAM, it is reasonable to set them equal.) * * We also use a rough estimate "effective_cache_size" of the number of * disk pages in Postgres + OS-level disk cache.  (We can't simply use * NBuffers for this purpose because that would ignore the effects of * the kernel's disk cache.) * * Obviously, taking constants for these values is an oversimplification, * but it's tough enough to get any useful estimates even at this level of * detail.	Note that all of these parameters are user-settable, in case * the default values are drastically off for a particular platform. * * We compute two separate costs for each path: *		total_cost: total estimated cost to fetch all tuples *		startup_cost: cost that is expended before first tuple is fetched * In some scenarios, such as when there is a LIMIT or we are implementing * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the * path's result.  A caller can estimate the cost of fetching a partial * result by interpolating between startup_cost and total_cost.  In detail: *		actual_cost = startup_cost + *			(total_cost - startup_cost) * tuples_to_fetch / path->parent->rows; * Note that a base relation's rows count (and, by extension, plan_rows for * plan nodes below the LIMIT node) are set without regard to any LIMIT, so * that this equation works properly.  (Also, these routines guarantee not to * set the rows count to zero, so there will be no zero divide.)  The LIMIT is * applied as a top-level plan node. * * For largely historical reasons, most of the routines in this module use * the passed result Path only to store their startup_cost and total_cost * results into.  All the input data they need is passed as separate * parameters, even though much of it could be extracted from the Path. * An exception is made for the cost_XXXjoin() routines, which expect all * the non-cost fields of the passed XXXPath to be filled in. * * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.191.2.1 2008/03/24 21:53:12 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <math.h>#include "executor/nodeHash.h"#include "miscadmin.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/planmain.h"#include "parser/parsetree.h"#include "parser/parse_expr.h"#include "utils/lsyscache.h"#include "utils/selfuncs.h"#include "utils/tuplesort.h"#define LOG2(x)  (log(x) / 0.693147180559945)/* * Some Paths return less than the nominal number of rows of their parent * relations; join nodes need to do this to get the correct input count: */#define PATH_ROWS(path) \	(IsA(path, UniquePath) ? \	 ((UniquePath *) (path))->rows : \	 (path)->parent->rows)double		seq_page_cost = DEFAULT_SEQ_PAGE_COST;double		random_page_cost = DEFAULT_RANDOM_PAGE_COST;double		cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;double		cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;double		cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;int			effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;Cost		disable_cost = 100000000.0;bool		enable_seqscan = true;bool		enable_indexscan = true;bool		enable_bitmapscan = true;bool		enable_tidscan = true;bool		enable_sort = true;bool		enable_hashagg = true;bool		enable_nestloop = true;bool		enable_mergejoin = true;bool		enable_hashjoin = true;typedef struct{	PlannerInfo *root;	QualCost	total;} cost_qual_eval_context;static MergeScanSelCache *cached_scansel(PlannerInfo *root,			   RestrictInfo *rinfo,			   PathKey *pathkey);static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);static Selectivity approx_selectivity(PlannerInfo *root, List *quals,				   JoinType jointype);static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root);static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);static double relation_byte_size(double tuples, int width);static double page_size(double tuples, int width);/* * clamp_row_est *		Force a row-count estimate to a sane value. */doubleclamp_row_est(double nrows){	/*	 * Force estimate to be at least one row, to make explain output look	 * better and to avoid possible divide-by-zero when interpolating costs.	 * Make it an integer, too.	 */	if (nrows <= 1.0)		nrows = 1.0;	else		nrows = rint(nrows);	return nrows;}/* * cost_seqscan *	  Determines and returns the cost of scanning a relation sequentially. */voidcost_seqscan(Path *path, PlannerInfo *root,			 RelOptInfo *baserel){	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		cpu_per_tuple;	/* Should only be applied to base relations */	Assert(baserel->relid > 0);	Assert(baserel->rtekind == RTE_RELATION);	if (!enable_seqscan)		startup_cost += disable_cost;	/*	 * disk costs	 */	run_cost += seq_page_cost * baserel->pages;	/* CPU costs */	startup_cost += baserel->baserestrictcost.startup;	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;	run_cost += cpu_per_tuple * baserel->tuples;	path->startup_cost = startup_cost;	path->total_cost = startup_cost + run_cost;}/* * cost_index *	  Determines and returns the cost of scanning a relation using an index. * * 'index' is the index to be used * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics) * 'outer_rel' is the outer relation when we are considering using the index *		scan as the inside of a nestloop join (hence, some of the indexQuals *		are join clauses, and we should expect repeated scans of the index); *		NULL for a plain index scan * * cost_index() takes an IndexPath not just a Path, because it sets a few * additional fields of the IndexPath besides startup_cost and total_cost. * These fields are needed if the IndexPath is used in a BitmapIndexScan. * * NOTE: 'indexQuals' must contain only clauses usable as index restrictions. * Any additional quals evaluated as qpquals may reduce the number of returned * tuples, but they won't reduce the number of tuples we have to fetch from * the table, so they don't reduce the scan cost. * * NOTE: as of 8.0, indexQuals is a list of RestrictInfo nodes, where formerly * it was a list of bare clause expressions. */voidcost_index(IndexPath *path, PlannerInfo *root,		   IndexOptInfo *index,		   List *indexQuals,		   RelOptInfo *outer_rel){	RelOptInfo *baserel = index->rel;	Cost		startup_cost = 0;	Cost		run_cost = 0;	Cost		indexStartupCost;	Cost		indexTotalCost;	Selectivity indexSelectivity;	double		indexCorrelation,				csquared;	Cost		min_IO_cost,				max_IO_cost;	Cost		cpu_per_tuple;	double		tuples_fetched;	double		pages_fetched;	/* Should only be applied to base relations */	Assert(IsA(baserel, RelOptInfo) &&		   IsA(index, IndexOptInfo));	Assert(baserel->relid > 0);	Assert(baserel->rtekind == RTE_RELATION);	if (!enable_indexscan)		startup_cost += disable_cost;	/*	 * Call index-access-method-specific code to estimate the processing cost	 * for scanning the index, as well as the selectivity of the index (ie,	 * the fraction of main-table tuples we will have to retrieve) and its	 * correlation to the main-table tuple order.	 */	OidFunctionCall8(index->amcostestimate,					 PointerGetDatum(root),					 PointerGetDatum(index),					 PointerGetDatum(indexQuals),					 PointerGetDatum(outer_rel),					 PointerGetDatum(&indexStartupCost),					 PointerGetDatum(&indexTotalCost),					 PointerGetDatum(&indexSelectivity),					 PointerGetDatum(&indexCorrelation));	/*	 * Save amcostestimate's results for possible use in bitmap scan planning.	 * We don't bother to save indexStartupCost or indexCorrelation, because a	 * bitmap scan doesn't care about either.	 */	path->indextotalcost = indexTotalCost;	path->indexselectivity = indexSelectivity;	/* all costs for touching index itself included here */	startup_cost += indexStartupCost;	run_cost += indexTotalCost - indexStartupCost;	/* estimate number of main-table tuples fetched */	tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples);	/*----------	 * Estimate number of main-table pages fetched, and compute I/O cost.	 *	 * When the index ordering is uncorrelated with the table ordering,	 * we use an approximation proposed by Mackert and Lohman (see	 * index_pages_fetched() for details) to compute the number of pages	 * fetched, and then charge random_page_cost per page fetched.	 *	 * When the index ordering is exactly correlated with the table ordering	 * (just after a CLUSTER, for example), the number of pages fetched should	 * be exactly selectivity * table_size.  What's more, all but the first	 * will be sequential fetches, not the random fetches that occur in the	 * uncorrelated case.  So if the number of pages is more than 1, we	 * ought to charge	 *		random_page_cost + (pages_fetched - 1) * seq_page_cost	 * For partially-correlated indexes, we ought to charge somewhere between	 * these two estimates.  We currently interpolate linearly between the	 * estimates based on the correlation squared (XXX is that appropriate?).	 *----------	 */	if (outer_rel != NULL && outer_rel->rows > 1)	{		/*		 * For repeated indexscans, the appropriate estimate for the		 * uncorrelated case is to scale up the number of tuples fetched in		 * the Mackert and Lohman formula by the number of scans, so that we		 * estimate the number of pages fetched by all the scans; then		 * pro-rate the costs for one scan.  In this case we assume all the		 * fetches are random accesses.		 */		double		num_scans = outer_rel->rows;		pages_fetched = index_pages_fetched(tuples_fetched * num_scans,											baserel->pages,											(double) index->pages,											root);		max_IO_cost = (pages_fetched * random_page_cost) / num_scans;		/*		 * In the perfectly correlated case, the number of pages touched by		 * each scan is selectivity * table_size, and we can use the Mackert		 * and Lohman formula at the page level to estimate how much work is		 * saved by caching across scans.  We still assume all the fetches are		 * random, though, which is an overestimate that's hard to correct for		 * without double-counting the cache effects.  (But in most cases		 * where such a plan is actually interesting, only one page would get		 * fetched per scan anyway, so it shouldn't matter much.)		 */		pages_fetched = ceil(indexSelectivity * (double) baserel->pages);		pages_fetched = index_pages_fetched(pages_fetched * num_scans,											baserel->pages,											(double) index->pages,											root);		min_IO_cost = (pages_fetched * random_page_cost) / num_scans;	}	else	{		/*		 * Normal case: apply the Mackert and Lohman formula, and then		 * interpolate between that and the correlation-derived result.		 */		pages_fetched = index_pages_fetched(tuples_fetched,											baserel->pages,											(double) index->pages,											root);		/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */		max_IO_cost = pages_fetched * random_page_cost;		/* min_IO_cost is for the perfectly correlated case (csquared=1) */		pages_fetched = ceil(indexSelectivity * (double) baserel->pages);		min_IO_cost = random_page_cost;		if (pages_fetched > 1)			min_IO_cost += (pages_fetched - 1) * seq_page_cost;	}	/*	 * Now interpolate based on estimated index order correlation to get total	 * disk I/O cost for main table accesses.	 */	csquared = indexCorrelation * indexCorrelation;	run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);	/*	 * Estimate CPU costs per tuple.	 *	 * Normally the indexquals will be removed from the list of restriction	 * clauses that we have to evaluate as qpquals, so we should subtract	 * their costs from baserestrictcost.  But if we are doing a join then	 * some of the indexquals are join clauses and shouldn't be subtracted.	 * Rather than work out exactly how much to subtract, we don't subtract	 * anything.	 */	startup_cost += baserel->baserestrictcost.startup;	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;	if (outer_rel == NULL)	{		QualCost	index_qual_cost;		cost_qual_eval(&index_qual_cost, indexQuals, root);		/* any startup cost still has to be paid ... */		cpu_per_tuple -= index_qual_cost.per_tuple;	}	run_cost += cpu_per_tuple * tuples_fetched;	path->path.startup_cost = startup_cost;	path->path.total_cost = startup_cost + run_cost;}/* * index_pages_fetched *	  Estimate the number of pages actually fetched after accounting for *	  cache effects. * * We use an approximation proposed by Mackert and Lohman, "Index Scans * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424. * The Mackert and Lohman approximation is that the number of pages * fetched is *	PF = *		min(2TNs/(2T+Ns), T)			when T <= b *		2TNs/(2T+Ns)					when T > b and Ns <= 2Tb/(2T-b) *		b + (Ns - 2Tb/(2T-b))*(T-b)/T	when T > b and Ns > 2Tb/(2T-b) * where *		T = # pages in table *		N = # tuples in table *		s = selectivity = fraction of table to be scanned *		b = # buffer pages available (we include kernel space here) * * We assume that effective_cache_size is the total number of buffer pages * available for the whole query, and pro-rate that space across all the * tables in the query and the index currently under consideration.  (This * ignores space needed for other indexes used by the query, but since we * don't know which indexes will get used, we can't estimate that very well;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?