joinpath.c
来自「PostgreSQL7.4.6 for Linux」· C语言 代码 · 共 874 行 · 第 1/2 页
C
874 行
/*------------------------------------------------------------------------- * * joinpath.c * Routines to find all possible paths for processing a set of joins * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.82.2.1 2004/04/06 18:46:25 tgl Exp $ * *------------------------------------------------------------------------- */#include "postgres.h"#include <math.h>#include "optimizer/clauses.h"#include "optimizer/cost.h"#include "optimizer/pathnode.h"#include "optimizer/paths.h"#include "parser/parsetree.h"#include "utils/lsyscache.h"static void sort_inner_and_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype);static void match_unsorted_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype);static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype);static List *select_mergejoin_clauses(RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype);/* * add_paths_to_joinrel * Given a join relation and two component rels from which it can be made, * consider all possible paths that use the two component rels as outer * and inner rel respectively. Add these paths to the join rel's pathlist * if they survive comparison with other paths (and remove any existing * paths that are dominated by these paths). * * Modifies the pathlist field of the joinrel node to contain the best * paths found so far. */voidadd_paths_to_joinrel(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, List *restrictlist){ List *mergeclause_list = NIL; /* * Find potential mergejoin clauses. We can skip this if we are not * interested in doing a mergejoin. However, mergejoin is currently * our only way of implementing full outer joins, so override * mergejoin disable if it's a full join. */ if (enable_mergejoin || jointype == JOIN_FULL) mergeclause_list = select_mergejoin_clauses(joinrel, outerrel, innerrel, restrictlist, jointype); /* * 1. Consider mergejoin paths where both relations must be explicitly * sorted. */ sort_inner_and_outer(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype); /* * 2. Consider paths where the outer relation need not be explicitly * sorted. This includes both nestloops and mergejoins where the outer * path is already ordered. */ match_unsorted_outer(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype);#ifdef NOT_USED /* * 3. Consider paths where the inner relation need not be explicitly * sorted. This includes mergejoins only (nestloops were already * built in match_unsorted_outer). * * Diked out as redundant 2/13/2000 -- tgl. There isn't any really * significant difference between the inner and outer side of a * mergejoin, so match_unsorted_inner creates no paths that aren't * equivalent to those made by match_unsorted_outer when * add_paths_to_joinrel() is invoked with the two rels given in the * other order. */ match_unsorted_inner(root, joinrel, outerrel, innerrel, restrictlist, mergeclause_list, jointype);#endif /* * 4. Consider paths where both outer and inner relations must be * hashed before being joined. */ if (enable_hashjoin) hash_inner_and_outer(root, joinrel, outerrel, innerrel, restrictlist, jointype);}/* * sort_inner_and_outer * Create mergejoin join paths by explicitly sorting both the outer and * inner join relations on each available merge ordering. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join * 'jointype' is the type of join to do */static voidsort_inner_and_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype){ bool useallclauses; Path *outer_path; Path *inner_path; List *all_pathkeys; List *i; /* * If we are doing a right or full join, we must use *all* the * mergeclauses as join clauses, else we will not have a valid plan. */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_IN: case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: useallclauses = false; break; case JOIN_RIGHT: case JOIN_FULL: useallclauses = true; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); useallclauses = false; /* keep compiler quiet */ break; } /* * We only consider the cheapest-total-cost input paths, since we are * assuming here that a sort is required. We will consider * cheapest-startup-cost input paths later, and only if they don't * need a sort. * * If unique-ification is requested, do it and then handle as a plain * inner join. */ outer_path = outerrel->cheapest_total_path; inner_path = innerrel->cheapest_total_path; if (jointype == JOIN_UNIQUE_OUTER) { outer_path = (Path *) create_unique_path(root, outerrel, outer_path); jointype = JOIN_INNER; } else if (jointype == JOIN_UNIQUE_INNER) { inner_path = (Path *) create_unique_path(root, innerrel, inner_path); jointype = JOIN_INNER; } /* * Each possible ordering of the available mergejoin clauses will * generate a differently-sorted result path at essentially the same * cost. We have no basis for choosing one over another at this level * of joining, but some sort orders may be more useful than others for * higher-level mergejoins, so it's worth considering multiple * orderings. * * Actually, it's not quite true that every mergeclause ordering will * generate a different path order, because some of the clauses may be * redundant. Therefore, what we do is convert the mergeclause list * to a list of canonical pathkeys, and then consider different * orderings of the pathkeys. * * Generating a path for *every* permutation of the pathkeys doesn't seem * like a winning strategy; the cost in planning time is too high. For * now, we generate one path for each pathkey, listing that pathkey * first and the rest in random order. This should allow at least a * one-clause mergejoin without re-sorting against any other possible * mergejoin partner path. But if we've not guessed the right * ordering of secondary keys, we may end up evaluating clauses as * qpquals when they could have been done as mergeclauses. We need to * figure out a better way. (Two possible approaches: look at all the * relevant index relations to suggest plausible sort orders, or make * just one output path and somehow mark it as having a sort-order * that can be rearranged freely.) */ all_pathkeys = make_pathkeys_for_mergeclauses(root, mergeclause_list, outerrel); foreach(i, all_pathkeys) { List *front_pathkey = lfirst(i); List *cur_pathkeys; List *cur_mergeclauses; List *outerkeys; List *innerkeys; List *merge_pathkeys; /* Make a pathkey list with this guy first. */ if (i != all_pathkeys) cur_pathkeys = lcons(front_pathkey, lremove(front_pathkey, listCopy(all_pathkeys))); else cur_pathkeys = all_pathkeys; /* no work at first one... */ /* * Select mergeclause(s) that match this sort ordering. If we had * redundant merge clauses then we will get a subset of the * original clause list. There had better be some match, * however... */ cur_mergeclauses = find_mergeclauses_for_pathkeys(root, cur_pathkeys, mergeclause_list); Assert(cur_mergeclauses != NIL); /* Forget it if can't use all the clauses in right/full join */ if (useallclauses && length(cur_mergeclauses) != length(mergeclause_list)) continue; /* * Build sort pathkeys for both sides. * * Note: it's possible that the cheapest paths will already be sorted * properly. create_mergejoin_path will detect that case and * suppress an explicit sort step, so we needn't do so here. */ outerkeys = make_pathkeys_for_mergeclauses(root, cur_mergeclauses, outerrel); innerkeys = make_pathkeys_for_mergeclauses(root, cur_mergeclauses, innerrel); /* Build pathkeys representing output sort order. */ merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys); /* * And now we can make the path. */ add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, outer_path, inner_path, restrictlist, merge_pathkeys, cur_mergeclauses, outerkeys, innerkeys)); }}/* * match_unsorted_outer * Creates possible join paths for processing a single join relation * 'joinrel' by employing either iterative substitution or * mergejoining on each of its possible outer paths (considering * only outer paths that are already ordered well enough for merging). * * We always generate a nestloop path for each available outer path. * In fact we may generate as many as four: one on the cheapest-total-cost * inner path, one on the same with materialization, one on the * cheapest-startup-cost inner path (if different), * and one on the best inner-indexscan path (if any). * * We also consider mergejoins if mergejoin clauses are available. We have * two ways to generate the inner path for a mergejoin: sort the cheapest * inner path, or use an inner path that is already suitably ordered for the * merge. If we have several mergeclauses, it could be that there is no inner * path (or only a very expensive one) for the full list of mergeclauses, but * better paths exist if we truncate the mergeclause list (thereby discarding * some sort key requirements). So, we consider truncations of the * mergeclause list as well as the full list. (Ideally we'd consider all * subsets of the mergeclause list, but that seems way too expensive.) * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join * 'jointype' is the type of join to do */static voidmatch_unsorted_outer(Query *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype){ JoinType save_jointype = jointype; bool nestjoinOK; bool useallclauses; Path *inner_cheapest_startup = innerrel->cheapest_startup_path; Path *inner_cheapest_total = innerrel->cheapest_total_path; Path *matpath = NULL; Path *bestinnerjoin = NULL; List *i; /* * Nestloop only supports inner, left, and IN joins. Also, if we are * doing a right or full join, we must use *all* the mergeclauses as * join clauses, else we will not have a valid plan. (Although these * two flags are currently inverses, keep them separate for clarity * and possible future changes.) */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_IN: case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: nestjoinOK = true; useallclauses = false; break; case JOIN_RIGHT: case JOIN_FULL: nestjoinOK = false; useallclauses = true; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); nestjoinOK = false; /* keep compiler quiet */ useallclauses = false; break; } /* * If we need to unique-ify the inner path, we will consider only the * cheapest inner. */ if (jointype == JOIN_UNIQUE_INNER) { inner_cheapest_total = (Path *) create_unique_path(root, innerrel, inner_cheapest_total); inner_cheapest_startup = inner_cheapest_total; jointype = JOIN_INNER; } else if (nestjoinOK) { /* * If the cheapest inner path is a join or seqscan, we should * consider materializing it. (This is a heuristic: we could * consider it always, but for inner indexscans it's probably a * waste of time.) */ if (!(IsA(inner_cheapest_total, IndexPath) || IsA(inner_cheapest_total, TidPath))) matpath = (Path *) create_material_path(innerrel, inner_cheapest_total); /* * Get the best innerjoin indexpath (if any) for this outer rel. * It's the same for all outer paths. */ bestinnerjoin = best_inner_indexscan(root, innerrel, outerrel->relids, jointype); } foreach(i, outerrel->pathlist) { Path *outerpath = (Path *) lfirst(i); List *merge_pathkeys; List *mergeclauses; List *innersortkeys; List *trialsortkeys; Path *cheapest_startup_inner; Path *cheapest_total_inner; int num_sortkeys; int sortkeycnt; /* * If we need to unique-ify the outer path, it's pointless to * consider any but the cheapest outer. */ if (save_jointype == JOIN_UNIQUE_OUTER) { if (outerpath != outerrel->cheapest_total_path) continue; outerpath = (Path *) create_unique_path(root, outerrel, outerpath); jointype = JOIN_INNER; } /* * The result will have this sort order (even if it is implemented * as a nestloop, and even if some of the mergeclauses are * implemented by qpquals rather than as true mergeclauses): */ merge_pathkeys = build_join_pathkeys(root, joinrel, outerpath->pathkeys); if (nestjoinOK)
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?