costsize.c
来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 1,993 行 · 第 1/5 页
C
1,993 行
/* Should only be applied to base relations */ Assert(baserel->relid > 0); Assert(baserel->rtekind == RTE_RELATION); if (!enable_tidscan) startup_cost += disable_cost; /* disk costs --- assume each tuple on a different page */ run_cost += random_page_cost * ntuples; /* CPU costs */ startup_cost += baserel->baserestrictcost.startup; cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; run_cost += cpu_per_tuple * ntuples; path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost;}/* * cost_subqueryscan * Determines and returns the cost of scanning a subquery RTE. */voidcost_subqueryscan(Path *path, RelOptInfo *baserel){ Cost startup_cost; Cost run_cost; Cost cpu_per_tuple; /* Should only be applied to base relations that are subqueries */ Assert(baserel->relid > 0); Assert(baserel->rtekind == RTE_SUBQUERY); /* * Cost of path is cost of evaluating the subplan, plus cost of * evaluating any restriction clauses that will be attached to the * SubqueryScan node, plus cpu_tuple_cost to account for selection and * projection overhead. */ path->startup_cost = baserel->subplan->startup_cost; path->total_cost = baserel->subplan->total_cost; startup_cost = baserel->baserestrictcost.startup; cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple; run_cost = cpu_per_tuple * baserel->tuples; path->startup_cost += startup_cost; path->total_cost += startup_cost + run_cost;}/* * cost_functionscan * Determines and returns the cost of scanning a function RTE. */voidcost_functionscan(Path *path, Query *root, RelOptInfo *baserel){ Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; /* Should only be applied to base relations that are functions */ Assert(baserel->relid > 0); Assert(baserel->rtekind == RTE_FUNCTION); /* * For now, estimate function's cost at one operator eval per function * call. Someday we should revive the function cost estimate columns * in pg_proc... */ cpu_per_tuple = cpu_operator_cost; /* Add scanning CPU costs */ startup_cost += baserel->baserestrictcost.startup; cpu_per_tuple += cpu_tuple_cost + baserel->baserestrictcost.per_tuple; run_cost += cpu_per_tuple * baserel->tuples; path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost;}/* * cost_sort * Determines and returns the cost of sorting a relation, including * the cost of reading the input data. * * If the total volume of data to sort is less than SortMem, we will do * an in-memory sort, which requires no I/O and about t*log2(t) tuple * comparisons for t tuples. * * If the total volume exceeds SortMem, we switch to a tape-style merge * algorithm. There will still be about t*log2(t) tuple comparisons in * total, but we will also need to write and read each tuple once per * merge pass. We expect about ceil(log6(r)) merge passes where r is the * number of initial runs formed (log6 because tuplesort.c uses six-tape * merging). Since the average initial run should be about twice SortMem, * we have * disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem))) * cpu = comparison_cost * t * log2(t) * * The disk traffic is assumed to be half sequential and half random * accesses (XXX can't we refine that guess?) * * We charge two operator evals per tuple comparison, which should be in * the right ballpark in most cases. * * 'pathkeys' is a list of sort keys * 'input_cost' is the total cost for reading the input data * 'tuples' is the number of tuples in the relation * 'width' is the average tuple width in bytes * * NOTE: some callers currently pass NIL for pathkeys because they * can't conveniently supply the sort keys. Since this routine doesn't * currently do anything with pathkeys anyway, that doesn't matter... * but if it ever does, it should react gracefully to lack of key data. * (Actually, the thing we'd most likely be interested in is just the number * of sort keys, which all callers *could* supply.) */voidcost_sort(Path *path, Query *root, List *pathkeys, Cost input_cost, double tuples, int width){ Cost startup_cost = input_cost; Cost run_cost = 0; double nbytes = relation_byte_size(tuples, width); long sortmembytes = SortMem * 1024L; if (!enable_sort) startup_cost += disable_cost; /* * We want to be sure the cost of a sort is never estimated as zero, * even if passed-in tuple count is zero. Besides, mustn't do * log(0)... */ if (tuples < 2.0) tuples = 2.0; /* * CPU costs * * Assume about two operator evals per tuple comparison and N log2 N * comparisons */ startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples); /* disk costs */ if (nbytes > sortmembytes) { double npages = ceil(nbytes / BLCKSZ); double nruns = nbytes / (sortmembytes * 2); double log_runs = ceil(LOG6(nruns)); double npageaccesses; if (log_runs < 1.0) log_runs = 1.0; npageaccesses = 2.0 * npages * log_runs; /* Assume half are sequential (cost 1), half are not */ startup_cost += npageaccesses * (1.0 + cost_nonsequential_access(npages)) * 0.5; } /* * Also charge a small amount (arbitrarily set equal to operator cost) * per extracted tuple. */ run_cost += cpu_operator_cost * tuples; path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost;}/* * cost_material * Determines and returns the cost of materializing a relation, including * the cost of reading the input data. * * If the total volume of data to materialize exceeds SortMem, we will need * to write it to disk, so the cost is much higher in that case. */voidcost_material(Path *path, Cost input_cost, double tuples, int width){ Cost startup_cost = input_cost; Cost run_cost = 0; double nbytes = relation_byte_size(tuples, width); long sortmembytes = SortMem * 1024L; /* disk costs */ if (nbytes > sortmembytes) { double npages = ceil(nbytes / BLCKSZ); /* We'll write during startup and read during retrieval */ startup_cost += npages; run_cost += npages; } /* * Also charge a small amount per extracted tuple. We use * cpu_tuple_cost so that it doesn't appear worthwhile to materialize * a bare seqscan. */ run_cost += cpu_tuple_cost * tuples; path->startup_cost = startup_cost; path->total_cost = startup_cost + run_cost;}/* * cost_agg * Determines and returns the cost of performing an Agg plan node, * including the cost of its input. * * Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs * are for appropriately-sorted input. */voidcost_agg(Path *path, Query *root, AggStrategy aggstrategy, int numAggs, int numGroupCols, double numGroups, Cost input_startup_cost, Cost input_total_cost, double input_tuples){ Cost startup_cost; Cost total_cost; /* * We charge one cpu_operator_cost per aggregate function per input * tuple, and another one per output tuple (corresponding to transfn * and finalfn calls respectively). If we are grouping, we charge an * additional cpu_operator_cost per grouping column per input tuple * for grouping comparisons. * * We will produce a single output tuple if not grouping, and a tuple per * group otherwise. * * Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the * same total CPU cost, but AGG_SORTED has lower startup cost. If the * input path is already sorted appropriately, AGG_SORTED should be * preferred (since it has no risk of memory overflow). This will * happen as long as the computed total costs are indeed exactly equal * --- but if there's roundoff error we might do the wrong thing. So * be sure that the computations below form the same intermediate * values in the same order. */ if (aggstrategy == AGG_PLAIN) { startup_cost = input_total_cost; startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs; /* we aren't grouping */ total_cost = startup_cost; } else if (aggstrategy == AGG_SORTED) { /* Here we are able to deliver output on-the-fly */ startup_cost = input_startup_cost; total_cost = input_total_cost; /* calcs phrased this way to match HASHED case, see note above */ total_cost += cpu_operator_cost * input_tuples * numGroupCols; total_cost += cpu_operator_cost * input_tuples * numAggs; total_cost += cpu_operator_cost * numGroups * numAggs; } else { /* must be AGG_HASHED */ startup_cost = input_total_cost; startup_cost += cpu_operator_cost * input_tuples * numGroupCols; startup_cost += cpu_operator_cost * input_tuples * numAggs; total_cost = startup_cost; total_cost += cpu_operator_cost * numGroups * numAggs; } path->startup_cost = startup_cost; path->total_cost = total_cost;}/* * cost_group * Determines and returns the cost of performing a Group plan node, * including the cost of its input. * * Note: caller must ensure that input costs are for appropriately-sorted * input. */voidcost_group(Path *path, Query *root, int numGroupCols, double numGroups, Cost input_startup_cost, Cost input_total_cost, double input_tuples){ Cost startup_cost; Cost total_cost; startup_cost = input_startup_cost; total_cost = input_total_cost; /* * Charge one cpu_operator_cost per comparison per input tuple. We * assume all columns get compared at most of the tuples. */ total_cost += cpu_operator_cost * input_tuples * numGroupCols; path->startup_cost = startup_cost; path->total_cost = total_cost;}/* * cost_nestloop * Determines and returns the cost of joining two relations using the * nested loop algorithm. * * 'path' is already filled in except for the cost fields */voidcost_nestloop(NestPath *path, Query *root){ Path *outer_path = path->outerjoinpath; Path *inner_path = path->innerjoinpath; List *restrictlist = path->joinrestrictinfo; Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; QualCost restrict_qual_cost; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double ntuples; Selectivity joininfactor; if (!enable_nestloop) startup_cost += disable_cost; /* * If we're doing JOIN_IN then we will stop scanning inner tuples for * an outer tuple as soon as we have one match. Account for the * effects of this by scaling down the cost estimates in proportion to * the expected output size. (This assumes that all the quals * attached to the join are IN quals, which should be true.) * * Note: it's probably bogus to use the normal selectivity calculation * here when either the outer or inner path is a UniquePath. */ if (path->jointype == JOIN_IN) { Selectivity qual_selec = approx_selectivity(root, restrictlist, path->jointype); double qptuples; qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows); if (qptuples > path->path.parent->rows) joininfactor = path->path.parent->rows / qptuples; else joininfactor = 1.0; } else joininfactor = 1.0; /* cost of source data */ /* * NOTE: clearly, we must pay both outer and inner paths' startup_cost * before we can start returning tuples, so the join's startup cost is * their sum. What's not so clear is whether the inner path's * startup_cost must be paid again on each rescan of the inner path. * This is not true if the inner path is materialized or is a * hashjoin, but probably is true otherwise. */ startup_cost += outer_path->startup_cost + inner_path->startup_cost; run_cost += outer_path->total_cost - outer_path->startup_cost; if (IsA(inner_path, MaterialPath) || IsA(inner_path, HashPath)) { /* charge only run cost for each iteration of inner path */ } else { /* * charge startup cost for each iteration of inner path, except we * already charged the first startup_cost in our own startup */ run_cost += (outer_path_rows - 1) * inner_path->startup_cost; } run_cost += outer_path_rows * (inner_path->total_cost - inner_path->startup_cost) * joininfactor; /* * Compute number of tuples processed (not number emitted!). If inner * path is an indexscan, be sure to use its estimated output row * count, which may be lower than the restriction-clause-only row * count of its parent. (We don't include this case in the PATH_ROWS * macro because it applies *only* to a nestloop's inner relation.) * Note: it is correct to use the unadjusted inner_path_rows in the * above calculation for joininfactor, since otherwise we'd be * double-counting the selectivity of the join clause being used for * the index. */ if (IsA(inner_path, IndexPath))
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?