costsize.c
来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 1,993 行 · 第 1/5 页
C
1,993 行
inner_path_rows = ((IndexPath *) inner_path)->rows; ntuples = inner_path_rows * outer_path_rows; /* CPU costs */ cost_qual_eval(&restrict_qual_cost, restrictlist); startup_cost += restrict_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + restrict_qual_cost.per_tuple; run_cost += cpu_per_tuple * ntuples; path->path.startup_cost = startup_cost; path->path.total_cost = startup_cost + run_cost;}/* * cost_mergejoin * Determines and returns the cost of joining two relations using the * merge join algorithm. * * 'path' is already filled in except for the cost fields * * Notes: path's mergeclauses should be a subset of the joinrestrictinfo list; * outersortkeys and innersortkeys are lists of the keys to be used * to sort the outer and inner relations, or NIL if no explicit * sort is needed because the source path is already ordered. */voidcost_mergejoin(MergePath *path, Query *root){ Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; List *restrictlist = path->jpath.joinrestrictinfo; List *mergeclauses = path->path_mergeclauses; List *outersortkeys = path->outersortkeys; List *innersortkeys = path->innersortkeys; Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; Selectivity merge_selec; Selectivity qp_selec; QualCost merge_qual_cost; QualCost qp_qual_cost; RestrictInfo *firstclause; List *qpquals; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outer_rows, inner_rows; double mergejointuples, rescannedtuples; double qptuples; double rescanratio; Selectivity outerscansel, innerscansel; Selectivity joininfactor; Path sort_path; /* dummy for result of cost_sort */ if (!enable_mergejoin) startup_cost += disable_cost; /* * Compute cost and selectivity of the mergequals and qpquals (other * restriction clauses) separately. We use approx_selectivity here * for speed --- in most cases, any errors won't affect the result * much. * * Note: it's probably bogus to use the normal selectivity calculation * here when either the outer or inner path is a UniquePath. */ merge_selec = approx_selectivity(root, mergeclauses, path->jpath.jointype); cost_qual_eval(&merge_qual_cost, mergeclauses); qpquals = set_ptrDifference(restrictlist, mergeclauses); qp_selec = approx_selectivity(root, qpquals, path->jpath.jointype); cost_qual_eval(&qp_qual_cost, qpquals); freeList(qpquals); /* approx # tuples passing the merge quals */ mergejointuples = ceil(merge_selec * outer_path_rows * inner_path_rows); /* approx # tuples passing qpquals as well */ qptuples = ceil(mergejointuples * qp_selec); /* * When there are equal merge keys in the outer relation, the * mergejoin must rescan any matching tuples in the inner relation. * This means re-fetching inner tuples. Our cost model for this is * that a re-fetch costs the same as an original fetch, which is * probably an overestimate; but on the other hand we ignore the * bookkeeping costs of mark/restore. Not clear if it's worth * developing a more refined model. * * The number of re-fetches can be estimated approximately as size of * merge join output minus size of inner relation. Assume that the * distinct key values are 1, 2, ..., and denote the number of values * of each key in the outer relation as m1, m2, ...; in the inner * relation, n1, n2, ... Then we have * * size of join = m1 * n1 + m2 * n2 + ... * * number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 * * n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner * relation * * This equation works correctly for outer tuples having no inner match * (nk = 0), but not for inner tuples having no outer match (mk = 0); * we are effectively subtracting those from the number of rescanned * tuples, when we should not. Can we do better without expensive * selectivity computations? */ if (IsA(outer_path, UniquePath)) rescannedtuples = 0; else { rescannedtuples = mergejointuples - inner_path_rows; /* Must clamp because of possible underestimate */ if (rescannedtuples < 0) rescannedtuples = 0; } /* We'll inflate inner run cost this much to account for rescanning */ rescanratio = 1.0 + (rescannedtuples / inner_path_rows); /* * A merge join will stop as soon as it exhausts either input stream. * Estimate fraction of the left and right inputs that will actually * need to be scanned. We use only the first (most significant) merge * clause for this purpose. * * Since this calculation is somewhat expensive, and will be the same for * all mergejoin paths associated with the merge clause, we cache the * results in the RestrictInfo node. */ if (mergeclauses) { firstclause = (RestrictInfo *) lfirst(mergeclauses); if (firstclause->left_mergescansel < 0) /* not computed yet? */ mergejoinscansel(root, (Node *) firstclause->clause, &firstclause->left_mergescansel, &firstclause->right_mergescansel); if (bms_is_subset(firstclause->left_relids, outer_path->parent->relids)) { /* left side of clause is outer */ outerscansel = firstclause->left_mergescansel; innerscansel = firstclause->right_mergescansel; } else { /* left side of clause is inner */ outerscansel = firstclause->right_mergescansel; innerscansel = firstclause->left_mergescansel; } } else { /* cope with clauseless mergejoin */ outerscansel = innerscansel = 1.0; } /* convert selectivity to row count; must scan at least one row */ outer_rows = ceil(outer_path_rows * outerscansel); if (outer_rows < 1) outer_rows = 1; inner_rows = ceil(inner_path_rows * innerscansel); if (inner_rows < 1) inner_rows = 1; /* * Readjust scan selectivities to account for above rounding. This is * normally an insignificant effect, but when there are only a few * rows in the inputs, failing to do this makes for a large percentage * error. */ outerscansel = outer_rows / outer_path_rows; innerscansel = inner_rows / inner_path_rows; /* cost of source data */ if (outersortkeys) /* do we need to sort outer? */ { cost_sort(&sort_path, root, outersortkeys, outer_path->total_cost, outer_path_rows, outer_path->parent->width); startup_cost += sort_path.startup_cost; run_cost += (sort_path.total_cost - sort_path.startup_cost) * outerscansel; } else { startup_cost += outer_path->startup_cost; run_cost += (outer_path->total_cost - outer_path->startup_cost) * outerscansel; } if (innersortkeys) /* do we need to sort inner? */ { cost_sort(&sort_path, root, innersortkeys, inner_path->total_cost, inner_path_rows, inner_path->parent->width); startup_cost += sort_path.startup_cost; run_cost += (sort_path.total_cost - sort_path.startup_cost) * innerscansel * rescanratio; } else { startup_cost += inner_path->startup_cost; run_cost += (inner_path->total_cost - inner_path->startup_cost) * innerscansel * rescanratio; } /* CPU costs */ /* * If we're doing JOIN_IN then we will stop outputting inner tuples * for an outer tuple as soon as we have one match. Account for the * effects of this by scaling down the cost estimates in proportion to * the expected output size. (This assumes that all the quals * attached to the join are IN quals, which should be true.) */ if (path->jpath.jointype == JOIN_IN && qptuples > path->jpath.path.parent->rows) joininfactor = path->jpath.path.parent->rows / qptuples; else joininfactor = 1.0; /* * The number of tuple comparisons needed is approximately number of * outer rows plus number of inner rows plus number of rescanned * tuples (can we refine this?). At each one, we need to evaluate the * mergejoin quals. NOTE: JOIN_IN mode does not save any work here, * so do NOT include joininfactor. */ startup_cost += merge_qual_cost.startup; run_cost += merge_qual_cost.per_tuple * (outer_rows + inner_rows * rescanratio); /* * For each tuple that gets through the mergejoin proper, we charge * cpu_tuple_cost plus the cost of evaluating additional restriction * clauses that are to be applied at the join. (This is pessimistic * since not all of the quals may get evaluated at each tuple.) This * work is skipped in JOIN_IN mode, so apply the factor. */ startup_cost += qp_qual_cost.startup; cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple; run_cost += cpu_per_tuple * mergejointuples * joininfactor; path->jpath.path.startup_cost = startup_cost; path->jpath.path.total_cost = startup_cost + run_cost;}/* * cost_hashjoin * Determines and returns the cost of joining two relations using the * hash join algorithm. * * 'path' is already filled in except for the cost fields * * Note: path's hashclauses should be a subset of the joinrestrictinfo list */voidcost_hashjoin(HashPath *path, Query *root){ Path *outer_path = path->jpath.outerjoinpath; Path *inner_path = path->jpath.innerjoinpath; List *restrictlist = path->jpath.joinrestrictinfo; List *hashclauses = path->path_hashclauses; Cost startup_cost = 0; Cost run_cost = 0; Cost cpu_per_tuple; Selectivity hash_selec; Selectivity qp_selec; QualCost hash_qual_cost; QualCost qp_qual_cost; double hashjointuples; double qptuples; double outer_path_rows = PATH_ROWS(outer_path); double inner_path_rows = PATH_ROWS(inner_path); double outerbytes = relation_byte_size(outer_path_rows, outer_path->parent->width); double innerbytes = relation_byte_size(inner_path_rows, inner_path->parent->width); int num_hashclauses = length(hashclauses); int virtualbuckets; int physicalbuckets; int numbatches; Selectivity innerbucketsize; Selectivity joininfactor; List *hcl; List *qpquals; if (!enable_hashjoin) startup_cost += disable_cost; /* * Compute cost and selectivity of the hashquals and qpquals (other * restriction clauses) separately. We use approx_selectivity here * for speed --- in most cases, any errors won't affect the result * much. * * Note: it's probably bogus to use the normal selectivity calculation * here when either the outer or inner path is a UniquePath. */ hash_selec = approx_selectivity(root, hashclauses, path->jpath.jointype); cost_qual_eval(&hash_qual_cost, hashclauses); qpquals = set_ptrDifference(restrictlist, hashclauses); qp_selec = approx_selectivity(root, qpquals, path->jpath.jointype); cost_qual_eval(&qp_qual_cost, qpquals); freeList(qpquals); /* approx # tuples passing the hash quals */ hashjointuples = ceil(hash_selec * outer_path_rows * inner_path_rows); /* approx # tuples passing qpquals as well */ qptuples = ceil(hashjointuples * qp_selec); /* cost of source data */ startup_cost += outer_path->startup_cost; run_cost += outer_path->total_cost - outer_path->startup_cost; startup_cost += inner_path->total_cost; /* * Cost of computing hash function: must do it once per input tuple. * We charge one cpu_operator_cost for each column's hash function. * * XXX when a hashclause is more complex than a single operator, we * really should charge the extra eval costs of the left or right * side, as appropriate, here. This seems more work than it's worth * at the moment. */ startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows; run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows; /* Get hash table size that executor would use for inner relation */ ExecChooseHashTableSize(inner_path_rows, inner_path->parent->width, &virtualbuckets, &physicalbuckets, &numbatches); /* * Determine bucketsize fraction for inner relation. We use the * smallest bucketsize estimated for any individual hashclause; this * is undoubtedly conservative. * * BUT: if inner relation has been unique-ified, we can assume it's good * for hashing. This is important both because it's the right answer, * and because we avoid contaminating the cache with a value that's * wrong for non-unique-ified paths. */ if (IsA(inner_path, UniquePath)) innerbucketsize = 1.0 / virtualbuckets; else { innerbucketsize = 1.0; foreach(hcl, hashclauses) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl); Selectivity thisbucketsize; Assert(IsA(restrictinfo, RestrictInfo)); /* * First we have to figure out which side of the hashjoin * clause is the inner side. * * Since we tend to visit the same clauses over and over when * planning a large query, we cache the bucketsize estimate in * the RestrictInfo node to avoid repeated lookups of * statistics. */ if (bms_is_subset(restrictinfo->right_relids, inner_path->parent->relids)) { /* righthand side is inner */ thisbucketsize = restrictinfo->right_bucketsize; if (thisbucketsize < 0) { /* not cached yet */ thisbucketsize = estimate_hash_bucketsize(root, (Var *) get_rightop(restrictinfo->clause), virtualbuckets); restrictinfo->right_bucketsize = thisbucketsize; } } else { Assert(bms_is_subset(restrictinfo->left_relids, inner_path->parent->relids)); /* lefthand side is inner */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?