costsize.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 2,021 行 · 第 1/5 页
C
2,021 行
/*------------------------------------------------------------------------- * * costsize.c * Routines to compute (and set) relation sizes and path costs * * Path costs are measured in arbitrary units established by these basic * parameters: * * seq_page_cost Cost of a sequential page fetch * random_page_cost Cost of a non-sequential page fetch * cpu_tuple_cost Cost of typical CPU time to process a tuple * cpu_index_tuple_cost Cost of typical CPU time to process an index tuple * cpu_operator_cost Cost of CPU time to execute an operator or function * * We expect that the kernel will typically do some amount of read-ahead * optimization; this in conjunction with seek costs means that seq_page_cost * is normally considerably less than random_page_cost. (However, if the * database is fully cached in RAM, it is reasonable to set them equal.) * * We also use a rough estimate "effective_cache_size" of the number of * disk pages in Postgres + OS-level disk cache. (We can't simply use * NBuffers for this purpose because that would ignore the effects of * the kernel's disk cache.) * * Obviously, taking constants for these values is an oversimplification, * but it's tough enough to get any useful estimates even at this level of * detail. Note that all of these parameters are user-settable, in case * the default values are drastically off for a particular platform. * * We compute two separate costs for each path: * total_cost: total estimated cost to fetch all tuples * startup_cost: cost that is expended before first tuple is fetched * In some scenarios, such as when there is a LIMIT or we are implementing * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the * path's result. A caller can estimate the cost of fetching a partial * result by interpolating between startup_cost and total_cost. In detail: * actual_cost = startup_cost + * (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows; * Note that a base relation's rows count (and, by extension, plan_rows for * plan nodes below the LIMIT node) are set without regard to any LIMIT, so * that this equation works properly. (Also, these routines guarantee not to * set the rows count to zero, so there will be no zero divide.) The LIMIT is * applied as a top-level plan node. * * For largely historical reasons, most of the routines in this module use * the passed result Path only to store their startup_cost and total_cost * results into. All the input data they need is passed as separate * parameters, even though much of it could be extracted from the Path. * An exception is made for the cost_XXXjoin() routines, which expect all * the non-cost fields of the passed XXXPath to be filled in. * * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.191.2.1 2008/03/24 21:53:12 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <math.h>#include "executor/nodeHash.h"#include "miscadmin.h"#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/planmain.h"#include "parser/parsetree.h"#include "parser/parse_expr.h"#include "utils/lsyscache.h"#include "utils/selfuncs.h"#include "utils/tuplesort.h"#define LOG2(x) (log(x) / 0.693147180559945)/* * Some Paths return less than the nominal number of rows of their parent * relations; join nodes need to do this to get the correct input count: */#define PATH_ROWS(path) \ (IsA(path, UniquePath) ? \ ((UniquePath *) (path))->rows : \ (path)->parent->rows)double seq_page_cost = DEFAULT_SEQ_PAGE_COST;double random_page_cost = DEFAULT_RANDOM_PAGE_COST;double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;Cost disable_cost = 100000000.0;bool enable_seqscan = true;bool enable_indexscan = true;bool enable_bitmapscan = true;bool enable_tidscan = true;bool enable_sort = true;bool enable_hashagg = true;bool enable_nestloop = true;bool enable_mergejoin = true;bool enable_hashjoin = true;typedef struct{ PlannerInfo *root; QualCost total;} cost_qual_eval_context;static MergeScanSelCache *cached_scansel(PlannerInfo *root, RestrictInfo *rinfo, PathKey *pathkey);static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);static Selectivity approx_selectivity(PlannerInfo *root, List *quals, JoinType jointype);static Selectivity join_in_selectivity(JoinPath *path, PlannerInfo *root);static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);static double relation_byte_size(double tuples, int width);static double page_size(double tuples, int width);/* * clamp_row_est * Force a row-count estimate to a sane value. */doubleclamp_row_est(double nrows){ /* * Force estimate to be at least one row, to make explain output look * better and to avoid possible divide-by-zero when interpolating costs. * Make it an integer, too. */ if (nrows <= 1.0) nrows = 1.0; else nrows = rint(nrows); return nrows;}/* * cost_seqscan * Determines and returns the cost of scanning a relation sequentially. */voidcost_seqscan(Path *path, PlannerInfo *root, RelOptInfo *baserel){ Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; /* Should only be applied to base relations */ Assert(baserel->relid > 0); Assert(baserel->rtekind == RTE_RELATION); if (!enable_seqscan) startup_cost += disable_cost; /* * disk costs */ run_cost += seq_page_cost * baserel->pages; /* CPU costs */ startup_cost += baserel->baserestrictcost.startup; cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; run_cost += cpu_per_tuple * baserel->tuples; path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost;}/* * cost_index * Determines and returns the cost of scanning a relation using an index. * * 'index' is the index to be used * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics) * 'outer_rel' is the outer relation when we are considering using the index * scan as the inside of a nestloop join (hence, some of the indexQuals * are join clauses, and we should expect repeated scans of the index); * NULL for a plain index scan * * cost_index() takes an IndexPath not just a Path, because it sets a few * additional fields of the IndexPath besides startup_cost and total_cost. * These fields are needed if the IndexPath is used in a BitmapIndexScan. * * NOTE: 'indexQuals' must contain only clauses usable as index restrictions. * Any additional quals evaluated as qpquals may reduce the number of returned * tuples, but they won't reduce the number of tuples we have to fetch from * the table, so they don't reduce the scan cost. * * NOTE: as of 8.0, indexQuals is a list of RestrictInfo nodes, where formerly * it was a list of bare clause expressions. */voidcost_index(IndexPath *path, PlannerInfo *root, IndexOptInfo *index, List *indexQuals, RelOptInfo *outer_rel){ RelOptInfo *baserel = index->rel; Cost startup_cost = 0; Cost run_cost = 0; Cost indexStartupCost; Cost indexTotalCost; Selectivity indexSelectivity; double indexCorrelation, csquared; Cost min_IO_cost, max_IO_cost; Cost cpu_per_tuple; double tuples_fetched; double pages_fetched; /* Should only be applied to base relations */ Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo)); Assert(baserel->relid > 0); Assert(baserel->rtekind == RTE_RELATION); if (!enable_indexscan) startup_cost += disable_cost; /* * Call index-access-method-specific code to estimate the processing cost * for scanning the index, as well as the selectivity of the index (ie, * the fraction of main-table tuples we will have to retrieve) and its * correlation to the main-table tuple order. */ OidFunctionCall8(index->amcostestimate, PointerGetDatum(root), PointerGetDatum(index), PointerGetDatum(indexQuals), PointerGetDatum(outer_rel), PointerGetDatum(&indexStartupCost), PointerGetDatum(&indexTotalCost), PointerGetDatum(&indexSelectivity), PointerGetDatum(&indexCorrelation)); /* * Save amcostestimate's results for possible use in bitmap scan planning. * We don't bother to save indexStartupCost or indexCorrelation, because a * bitmap scan doesn't care about either. */ path->indextotalcost = indexTotalCost; path->indexselectivity = indexSelectivity; /* all costs for touching index itself included here */ startup_cost += indexStartupCost; run_cost += indexTotalCost - indexStartupCost; /* estimate number of main-table tuples fetched */ tuples_fetched = clamp_row_est(indexSelectivity * baserel->tuples); /*---------- * Estimate number of main-table pages fetched, and compute I/O cost. * * When the index ordering is uncorrelated with the table ordering, * we use an approximation proposed by Mackert and Lohman (see * index_pages_fetched() for details) to compute the number of pages * fetched, and then charge random_page_cost per page fetched. * * When the index ordering is exactly correlated with the table ordering * (just after a CLUSTER, for example), the number of pages fetched should * be exactly selectivity * table_size. What's more, all but the first * will be sequential fetches, not the random fetches that occur in the * uncorrelated case. So if the number of pages is more than 1, we * ought to charge * random_page_cost + (pages_fetched - 1) * seq_page_cost * For partially-correlated indexes, we ought to charge somewhere between * these two estimates. We currently interpolate linearly between the * estimates based on the correlation squared (XXX is that appropriate?). *---------- */ if (outer_rel != NULL && outer_rel->rows > 1) { /* * For repeated indexscans, the appropriate estimate for the * uncorrelated case is to scale up the number of tuples fetched in * the Mackert and Lohman formula by the number of scans, so that we * estimate the number of pages fetched by all the scans; then * pro-rate the costs for one scan. In this case we assume all the * fetches are random accesses. */ double num_scans = outer_rel->rows; pages_fetched = index_pages_fetched(tuples_fetched * num_scans, baserel->pages, (double) index->pages, root); max_IO_cost = (pages_fetched * random_page_cost) / num_scans; /* * In the perfectly correlated case, the number of pages touched by * each scan is selectivity * table_size, and we can use the Mackert * and Lohman formula at the page level to estimate how much work is * saved by caching across scans. We still assume all the fetches are * random, though, which is an overestimate that's hard to correct for * without double-counting the cache effects. (But in most cases * where such a plan is actually interesting, only one page would get * fetched per scan anyway, so it shouldn't matter much.) */ pages_fetched = ceil(indexSelectivity * (double) baserel->pages); pages_fetched = index_pages_fetched(pages_fetched * num_scans, baserel->pages, (double) index->pages, root); min_IO_cost = (pages_fetched * random_page_cost) / num_scans; } else { /* * Normal case: apply the Mackert and Lohman formula, and then * interpolate between that and the correlation-derived result. */ pages_fetched = index_pages_fetched(tuples_fetched, baserel->pages, (double) index->pages, root); /* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */ max_IO_cost = pages_fetched * random_page_cost; /* min_IO_cost is for the perfectly correlated case (csquared=1) */ pages_fetched = ceil(indexSelectivity * (double) baserel->pages); min_IO_cost = random_page_cost; if (pages_fetched > 1) min_IO_cost += (pages_fetched - 1) * seq_page_cost; } /* * Now interpolate based on estimated index order correlation to get total * disk I/O cost for main table accesses. */ csquared = indexCorrelation * indexCorrelation; run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost); /* * Estimate CPU costs per tuple. * * Normally the indexquals will be removed from the list of restriction * clauses that we have to evaluate as qpquals, so we should subtract * their costs from baserestrictcost. But if we are doing a join then * some of the indexquals are join clauses and shouldn't be subtracted. * Rather than work out exactly how much to subtract, we don't subtract * anything. */ startup_cost += baserel->baserestrictcost.startup; cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; if (outer_rel == NULL) { QualCost index_qual_cost; cost_qual_eval(&index_qual_cost, indexQuals, root); /* any startup cost still has to be paid ... */ cpu_per_tuple -= index_qual_cost.per_tuple; } run_cost += cpu_per_tuple * tuples_fetched; path->path.startup_cost = startup_cost; path->path.total_cost = startup_cost + run_cost;}/* * index_pages_fetched * Estimate the number of pages actually fetched after accounting for * cache effects. * * We use an approximation proposed by Mackert and Lohman, "Index Scans * Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions * on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424. * The Mackert and Lohman approximation is that the number of pages * fetched is * PF = * min(2TNs/(2T+Ns), T) when T <= b * 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b) * b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b) * where * T = # pages in table * N = # tuples in table * s = selectivity = fraction of table to be scanned * b = # buffer pages available (we include kernel space here) * * We assume that effective_cache_size is the total number of buffer pages * available for the whole query, and pro-rate that space across all the * tables in the query and the index currently under consideration. (This * ignores space needed for other indexes used by the query, but since we * don't know which indexes will get used, we can't estimate that very well;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?