costsize.c
来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 2,021 行 · 第 1/5 页
C
2,021 行
voidcost_group(Path *path, PlannerInfo *root, int numGroupCols, double numGroups, Cost input_startup_cost, Cost input_total_cost, double input_tuples){ Cost startup_cost; Cost total_cost; startup_cost = input_startup_cost; total_cost = input_total_cost; /* * Charge one cpu_operator_cost per comparison per input tuple. We assume * all columns get compared at most of the tuples. */ total_cost += cpu_operator_cost * input_tuples * numGroupCols; path->startup_cost = startup_cost; path->total_cost = total_cost;}/* * If a nestloop's inner path is an indexscan, be sure to use its estimated * output row count, which may be lower than the restriction-clause-only row * count of its parent. (We don't include this case in the PATH_ROWS macro * because it applies *only* to a nestloop's inner relation.) We have to * be prepared to recurse through Append nodes in case of an appendrel. */static doublenestloop_inner_path_rows(Path *path){ double result; if (IsA(path, IndexPath)) result = ((IndexPath *) path)->rows; else if (IsA(path, BitmapHeapPath)) result = ((BitmapHeapPath *) path)->rows; else if (IsA(path, AppendPath)) { ListCell *l; result = 0; foreach(l, ((AppendPath *) path)->subpaths) { result += nestloop_inner_path_rows((Path *) lfirst(l)); } } else result = PATH_ROWS(path); return result;}/* * cost_nestloop * Determines and returns the cost of joining two relations using the * nested loop algorithm. * * 'path' is already filled in except for the cost fields */voidcost_nestloop(NestPath *path, PlannerInfo *root){ Path *outer_path = path->outerjoinpath; Path *inner_path = path->innerjoinpath; Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; QualCost restrict_qual_cost; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = nestloop_inner_path_rows(inner_path); double ntuples; Selectivity joininfactor; if (!enable_nestloop) startup_cost += disable_cost; /* * If we're doing JOIN_IN then we will stop scanning inner tuples for an * outer tuple as soon as we have one match. Account for the effects of * this by scaling down the cost estimates in proportion to the JOIN_IN * selectivity. (This assumes that all the quals attached to the join are * IN quals, which should be true.) */ joininfactor = join_in_selectivity(path, root); /* cost of source data */ /* * NOTE: clearly, we must pay both outer and inner paths' startup_cost * before we can start returning tuples, so the join's startup cost is * their sum. What's not so clear is whether the inner path's * startup_cost must be paid again on each rescan of the inner path. This * is not true if the inner path is materialized or is a hashjoin, but * probably is true otherwise. */ startup_cost += outer_path->startup_cost + inner_path->startup_cost; run_cost += outer_path->total_cost - outer_path->startup_cost; if (IsA(inner_path, MaterialPath) || IsA(inner_path, HashPath)) { /* charge only run cost for each iteration of inner path */ } else { /* * charge startup cost for each iteration of inner path, except we * already charged the first startup_cost in our own startup */ run_cost += (outer_path_rows - 1) * inner_path->startup_cost; } run_cost += outer_path_rows * (inner_path->total_cost - inner_path->startup_cost) * joininfactor; /* * Compute number of tuples processed (not number emitted!) */ ntuples = outer_path_rows * inner_path_rows * joininfactor; /* CPU costs */ cost_qual_eval(&restrict_qual_cost, path->joinrestrictinfo, root); startup_cost += restrict_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple; run_cost += cpu_per_tuple * ntuples; path->path.startup_cost = startup_cost; path->path.total_cost = startup_cost + run_cost;}/* * cost_mergejoin * Determines and returns the cost of joining two relations using the * merge join algorithm. * * 'path' is already filled in except for the cost fields * * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list; * outersortkeys and innersortkeys are lists of the keys to be used * to sort the outer and inner relations, or NIL if no explicit * sort is needed because the source path is already ordered. */voidcost_mergejoin(MergePath *path, PlannerInfo *root){ Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; List *mergeclauses = path->path_mergeclauses; List *outersortkeys = path->outersortkeys; List *innersortkeys = path->innersortkeys; Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; Selectivity merge_selec; QualCost merge_qual_cost; QualCost qp_qual_cost; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outer_rows, inner_rows, outer_skip_rows, inner_skip_rows; double mergejointuples, rescannedtuples; double rescanratio; Selectivity outerstartsel, outerendsel, innerstartsel, innerendsel; Selectivity joininfactor; Path sort_path; /* dummy for result of cost_sort */ /* Protect some assumptions below that rowcounts aren't zero */ if (outer_path_rows <= 0) outer_path_rows = 1; if (inner_path_rows <= 0) inner_path_rows = 1; if (!enable_mergejoin) startup_cost += disable_cost; /* * Compute cost and selectivity of the mergequals and qpquals (other * restriction clauses) separately. We use approx_selectivity here for * speed --- in most cases, any errors won't affect the result much. * * Note: it's probably bogus to use the normal selectivity calculation * here when either the outer or inner path is a UniquePath. */ merge_selec = approx_selectivity(root, mergeclauses, path->jpath.jointype); cost_qual_eval(&merge_qual_cost, mergeclauses, root); cost_qual_eval(&qp_qual_cost, path->jpath.joinrestrictinfo, root); qp_qual_cost.startup -= merge_qual_cost.startup; qp_qual_cost.per_tuple -= merge_qual_cost.per_tuple; /* approx # tuples passing the merge quals */ mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows); /* * When there are equal merge keys in the outer relation, the mergejoin * must rescan any matching tuples in the inner relation. This means * re-fetching inner tuples. Our cost model for this is that a re-fetch * costs the same as an original fetch, which is probably an overestimate; * but on the other hand we ignore the bookkeeping costs of mark/restore. * Not clear if it's worth developing a more refined model. * * The number of re-fetches can be estimated approximately as size of * merge join output minus size of inner relation. Assume that the * distinct key values are 1, 2, ..., and denote the number of values of * each key in the outer relation as m1, m2, ...; in the inner relation, * n1, n2, ... Then we have * * size of join = m1 * n1 + m2 * n2 + ... * * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 * * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner * relation * * This equation works correctly for outer tuples having no inner match * (nk = 0), but not for inner tuples having no outer match (mk = 0); we * are effectively subtracting those from the number of rescanned tuples, * when we should not. Can we do better without expensive selectivity * computations? */ if (IsA(outer_path, UniquePath)) rescannedtuples = 0; else { rescannedtuples = mergejointuples - inner_path_rows; /* Must clamp because of possible underestimate */ if (rescannedtuples < 0) rescannedtuples = 0; } /* We'll inflate inner run cost this much to account for rescanning */ rescanratio = 1.0 + (rescannedtuples / inner_path_rows); /* * A merge join will stop as soon as it exhausts either input stream * (unless it's an outer join, in which case the outer side has to be * scanned all the way anyway). Estimate fraction of the left and right * inputs that will actually need to be scanned. Likewise, we can * estimate the number of rows that will be skipped before the first * join pair is found, which should be factored into startup cost. * We use only the first (most significant) merge clause for this purpose. * Since mergejoinscansel() is a fairly expensive computation, we cache * the results in the merge clause RestrictInfo. */ if (mergeclauses && path->jpath.jointype != JOIN_FULL) { RestrictInfo *firstclause = (RestrictInfo *) linitial(mergeclauses); List *opathkeys; List *ipathkeys; PathKey *opathkey; PathKey *ipathkey; MergeScanSelCache *cache; /* Get the input pathkeys to determine the sort-order details */ opathkeys = outersortkeys ? outersortkeys : outer_path->pathkeys; ipathkeys = innersortkeys ? innersortkeys : inner_path->pathkeys; Assert(opathkeys); Assert(ipathkeys); opathkey = (PathKey *) linitial(opathkeys); ipathkey = (PathKey *) linitial(ipathkeys); /* debugging check */ if (opathkey->pk_opfamily != ipathkey->pk_opfamily || opathkey->pk_strategy != ipathkey->pk_strategy || opathkey->pk_nulls_first != ipathkey->pk_nulls_first) elog(ERROR, "left and right pathkeys do not match in mergejoin"); /* Get the selectivity with caching */ cache = cached_scansel(root, firstclause, opathkey); if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids)) { /* left side of clause is outer */ outerstartsel = cache->leftstartsel; outerendsel = cache->leftendsel; innerstartsel = cache->rightstartsel; innerendsel = cache->rightendsel; } else { /* left side of clause is inner */ outerstartsel = cache->rightstartsel; outerendsel = cache->rightendsel; innerstartsel = cache->leftstartsel; innerendsel = cache->leftendsel; } if (path->jpath.jointype == JOIN_LEFT) { outerstartsel = 0.0; outerendsel = 1.0; } else if (path->jpath.jointype == JOIN_RIGHT) { innerstartsel = 0.0; innerendsel = 1.0; } } else { /* cope with clauseless or full mergejoin */ outerstartsel = innerstartsel = 0.0; outerendsel = innerendsel = 1.0; } /* * Convert selectivities to row counts. We force outer_rows and * inner_rows to be at least 1, but the skip_rows estimates can be zero. */ outer_skip_rows = rint(outer_path_rows * outerstartsel); inner_skip_rows = rint(inner_path_rows * innerstartsel); outer_rows = clamp_row_est(outer_path_rows * outerendsel); inner_rows = clamp_row_est(inner_path_rows * innerendsel); Assert(outer_skip_rows <= outer_rows); Assert(inner_skip_rows <= inner_rows); /* * Readjust scan selectivities to account for above rounding. This is * normally an insignificant effect, but when there are only a few rows in * the inputs, failing to do this makes for a large percentage error. */ outerstartsel = outer_skip_rows / outer_path_rows; innerstartsel = inner_skip_rows / inner_path_rows; outerendsel = outer_rows / outer_path_rows; innerendsel = inner_rows / inner_path_rows; Assert(outerstartsel <= outerendsel); Assert(innerstartsel <= innerendsel); /* cost of source data */ if (outersortkeys) /* do we need to sort outer? */ { cost_sort(&sort_path, root, outersortkeys, outer_path->total_cost, outer_path_rows, outer_path->parent->width, -1.0); startup_cost += sort_path.startup_cost; startup_cost += (sort_path.total_cost - sort_path.startup_cost) * outerstartsel; run_cost += (sort_path.total_cost - sort_path.startup_cost) * (outerendsel - outerstartsel); } else { startup_cost += outer_path->startup_cost; startup_cost += (outer_path->total_cost - outer_path->startup_cost) * outerstartsel; run_cost += (outer_path->total_cost - outer_path->startup_cost) * (outerendsel - outerstartsel); } if (innersortkeys) /* do we need to sort inner? */ { cost_sort(&sort_path, root, innersortkeys, inner_path->total_cost, inner_path_rows, inner_path->parent->width, -1.0); startup_cost += sort_path.startup_cost; startup_cost += (sort_path.total_cost - sort_path.startup_cost) * innerstartsel * rescanratio; run_cost += (sort_path.total_cost - sort_path.startup_cost) * (innerendsel - innerstartsel) * rescanratio; } else { startup_cost += inner_path->startup_cost; startup_cost += (inner_path->total_cost - inner_path->startup_cost) * innerstartsel * rescanratio; run_cost += (inner_path->total_cost - inner_path->startup_cost) * (innerendsel - innerstartsel) * rescanratio; } /* CPU costs */ /* * If we're doing JOIN_IN then we will stop outputting inner tuples for an * outer tuple as soon as we have one match. Account for the effects of * this by scaling down the cost estimates in proportion to the expected * output size. (This assumes that all the quals attached to the join are * IN quals, which should be true.) */ joininfactor = join_in_selectivity(&path->jpath, root); /* * The number of tuple comparisons needed is approximately number of outer * rows plus number of inner rows plus number of rescanned tuples (can we * refine this?). At each one, we need to evaluate the mergejoin quals. * NOTE: JOIN_IN mode does not save any work here, so do NOT include * joininfactor. */ startup_cost += merge_qual_cost.startup; startup_cost += merge_qual_cost.per_tuple * (outer_skip_rows + inner_skip_rows * rescanratio); run_cost += merge_qual_cost.per_tuple *
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?