nodemergejoin.c

来自「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」· C语言代码 · 共 1,701 行 · 第 1/4 页
1,701 行
/*------------------------------------------------------------------------- * * nodeMergejoin.c *	  routines supporting merge joins * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.75.2.2 2006/03/17 19:38:20 tgl Exp $ * *------------------------------------------------------------------------- *//* * INTERFACE ROUTINES *		ExecMergeJoin			mergejoin outer and inner relations. *		ExecInitMergeJoin		creates and initializes run time states *		ExecEndMergeJoin		cleans up the node. * * NOTES * *		Merge-join is done by joining the inner and outer tuples satisfying *		join clauses of the form ((= outerKey innerKey) ...). *		The join clause list is provided by the query planner and may contain *		more than one (= outerKey innerKey) clause (for composite sort key). * *		However, the query executor needs to know whether an outer *		tuple is "greater/smaller" than an inner tuple so that it can *		"synchronize" the two relations. For example, consider the following *		relations: * *				outer: (0 ^1 1 2 5 5 5 6 6 7)	current tuple: 1 *				inner: (1 ^3 5 5 5 5 6)			current tuple: 3 * *		To continue the merge-join, the executor needs to scan both inner *		and outer relations till the matching tuples 5. It needs to know *		that currently inner tuple 3 is "greater" than outer tuple 1 and *		therefore it should scan the outer relation first to find a *		matching tuple and so on. * *		Therefore, when initializing the merge-join node, we look up the *		associated sort operators.	We assume the planner has seen to it *		that the inputs are correctly sorted by these operators.  Rather *		than directly executing the merge join clauses, we evaluate the *		left and right key expressions separately and then compare the *		columns one at a time (see MJCompare). * * *		Consider the above relations and suppose that the executor has *		just joined the first outer "5" with the last inner "5". The *		next step is of course to join the second outer "5" with all *		the inner "5's". This requires repositioning the inner "cursor" *		to point at the first inner "5". This is done by "marking" the *		first inner 5 so we can restore the "cursor" to it before joining *		with the second outer 5. The access method interface provides *		routines to mark and restore to a tuple. * * *		Essential operation of the merge join algorithm is as follows: * *		Join { *			get initial outer and inner tuples				INITIALIZE *			do forever { *				while (outer != inner) {					SKIP_TEST *					if (outer < inner) *						advance outer						SKIPOUTER_ADVANCE *					else *						advance inner						SKIPINNER_ADVANCE *				} *				mark inner position							SKIP_TEST *				do forever { *					while (outer == inner) { *						join tuples							JOINTUPLES *						advance inner position				NEXTINNER *					} *					advance outer position					NEXTOUTER *					if (outer == mark)						TESTOUTER *						restore inner position to mark		TESTOUTER *					else *						break	// return to top of outer loop *				} *			} *		} * *		The merge join operation is coded in the fashion *		of a state machine.  At each state, we do something and then *		proceed to another state.  This state is stored in the node's *		execution state information and is preserved across calls to *		ExecMergeJoin. -cim 10/31/89 */#include "postgres.h"#include "access/heapam.h"#include "access/nbtree.h"#include "access/printtup.h"#include "catalog/pg_amop.h"#include "catalog/pg_operator.h"#include "executor/execdebug.h"#include "executor/execdefs.h"#include "executor/nodeMergejoin.h"#include "miscadmin.h"#include "utils/acl.h"#include "utils/catcache.h"#include "utils/lsyscache.h"#include "utils/memutils.h"#include "utils/syscache.h"/* * Comparison strategies supported by MJCompare * * XXX eventually should extend these to support descending-order sorts. * There are some tricky issues however about being sure we are on the same * page as the underlying sort or index as to which end NULLs sort to. */typedef enum{	MERGEFUNC_LT,				/* raw "<" operator */	MERGEFUNC_CMP				/* -1 / 0 / 1 three-way comparator */} MergeFunctionKind;/* Runtime data for each mergejoin clause */typedef struct MergeJoinClauseData{	/* Executable expression trees */	ExprState  *lexpr;			/* left-hand (outer) input expression */	ExprState  *rexpr;			/* right-hand (inner) input expression */	/*	 * If we have a current left or right input tuple, the values of the	 * expressions are loaded into these fields:	 */	Datum		ldatum;			/* current left-hand value */	Datum		rdatum;			/* current right-hand value */	bool		lisnull;		/* and their isnull flags */	bool		risnull;	/*	 * Remember whether mergejoin operator is strict (usually it will be).	 * NOTE: if it's not strict, we still assume it cannot return true for one	 * null and one non-null input.	 */	bool		mergestrict;	/*	 * The comparison strategy in use, and the lookup info to let us call the	 * needed comparison routines.	eqfinfo is the "=" operator itself.	 * cmpfinfo is either the btree comparator or the "<" operator.	 */	MergeFunctionKind cmpstrategy;	FmgrInfo	eqfinfo;	FmgrInfo	cmpfinfo;} MergeJoinClauseData;#define MarkInnerTuple(innerTupleSlot, mergestate) \	ExecCopySlot((mergestate)->mj_MarkedTupleSlot, (innerTupleSlot))/* * MJExamineQuals * * This deconstructs the list of mergejoinable expressions, which is given * to us by the planner in the form of a list of "leftexpr = rightexpr" * expression trees in the order matching the sort columns of the inputs. * We build an array of MergeJoinClause structs containing the information * we will need at runtime.  Each struct essentially tells us how to compare * the two expressions from the original clause. * * The best, most efficient way to compare two expressions is to use a btree * comparison support routine, since that requires only one function call * per comparison.	Hence we try to find a btree opclass that matches the * mergejoinable operator.	If we cannot find one, we'll have to call both * the "=" and (often) the "<" operator for each comparison. */static MergeJoinClauseMJExamineQuals(List *qualList, PlanState *parent){	MergeJoinClause clauses;	int			nClauses = list_length(qualList);	int			iClause;	ListCell   *l;	clauses = (MergeJoinClause) palloc0(nClauses * sizeof(MergeJoinClauseData));	iClause = 0;	foreach(l, qualList)	{		OpExpr	   *qual = (OpExpr *) lfirst(l);		MergeJoinClause clause = &clauses[iClause];		Oid			ltop;		Oid			gtop;		RegProcedure ltproc;		RegProcedure gtproc;		AclResult	aclresult;		CatCList   *catlist;		int			i;		if (!IsA(qual, OpExpr))			elog(ERROR, "mergejoin clause is not an OpExpr");		/*		 * Prepare the input expressions for execution.		 */		clause->lexpr = ExecInitExpr((Expr *) linitial(qual->args), parent);		clause->rexpr = ExecInitExpr((Expr *) lsecond(qual->args), parent);		/*		 * Check permission to call the mergejoinable operator. For		 * predictability, we check this even if we end up not using it.		 */		aclresult = pg_proc_aclcheck(qual->opfuncid, GetUserId(), ACL_EXECUTE);		if (aclresult != ACLCHECK_OK)			aclcheck_error(aclresult, ACL_KIND_PROC,						   get_func_name(qual->opfuncid));		/* Set up the fmgr lookup information */		fmgr_info(qual->opfuncid, &(clause->eqfinfo));		/* And remember strictness */		clause->mergestrict = clause->eqfinfo.fn_strict;		/*		 * Lookup the comparison operators that go with the mergejoinable		 * top-level operator.	(This will elog if the operator isn't		 * mergejoinable, which would be the planner's mistake.)		 */		op_mergejoin_crossops(qual->opno,							  &ltop,							  &gtop,							  &ltproc,							  &gtproc);		clause->cmpstrategy = MERGEFUNC_LT;		/*		 * Look for a btree opclass including all three operators. This is		 * much like SelectSortFunction except we insist on matching all the		 * operators provided, and it can be a cross-type opclass.		 *		 * XXX for now, insist on forward sort so that NULLs can be counted on		 * to be high.		 */		catlist = SearchSysCacheList(AMOPOPID, 1,									 ObjectIdGetDatum(qual->opno),									 0, 0, 0);		for (i = 0; i < catlist->n_members; i++)		{			HeapTuple	tuple = &catlist->members[i]->tuple;			Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);			Oid			opcid = aform->amopclaid;			if (aform->amopstrategy != BTEqualStrategyNumber)				continue;			if (!opclass_is_btree(opcid))				continue;			if (get_op_opclass_strategy(ltop, opcid) == BTLessStrategyNumber &&			 get_op_opclass_strategy(gtop, opcid) == BTGreaterStrategyNumber)			{				clause->cmpstrategy = MERGEFUNC_CMP;				ltproc = get_opclass_proc(opcid, aform->amopsubtype,										  BTORDER_PROC);				Assert(RegProcedureIsValid(ltproc));				break;			/* done looking */			}		}		ReleaseSysCacheList(catlist);		/* Check permission to call "<" operator or cmp function */		aclresult = pg_proc_aclcheck(ltproc, GetUserId(), ACL_EXECUTE);		if (aclresult != ACLCHECK_OK)			aclcheck_error(aclresult, ACL_KIND_PROC,						   get_func_name(ltproc));		/* Set up the fmgr lookup information */		fmgr_info(ltproc, &(clause->cmpfinfo));		iClause++;	}	return clauses;}/* * MJEvalOuterValues * * Compute the values of the mergejoined expressions for the current * outer tuple.  We also detect whether it's impossible for the current * outer tuple to match anything --- this is true if it yields a NULL * input for any strict mergejoin operator. * * We evaluate the values in OuterEContext, which can be reset each * time we move to a new tuple. */static boolMJEvalOuterValues(MergeJoinState *mergestate){	ExprContext *econtext = mergestate->mj_OuterEContext;	bool		canmatch = true;	int			i;	MemoryContext oldContext;	ResetExprContext(econtext);	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);	econtext->ecxt_outertuple = mergestate->mj_OuterTupleSlot;	for (i = 0; i < mergestate->mj_NumClauses; i++)	{		MergeJoinClause clause = &mergestate->mj_Clauses[i];		clause->ldatum = ExecEvalExpr(clause->lexpr, econtext,									  &clause->lisnull, NULL);		if (clause->lisnull && clause->mergestrict)			canmatch = false;	}	MemoryContextSwitchTo(oldContext);	return canmatch;}/* * MJEvalInnerValues * * Same as above, but for the inner tuple.	Here, we have to be prepared * to load data from either the true current inner, or the marked inner, * so caller must tell us which slot to load from. */static boolMJEvalInnerValues(MergeJoinState *mergestate, TupleTableSlot *innerslot){	ExprContext *econtext = mergestate->mj_InnerEContext;	bool		canmatch = true;	int			i;	MemoryContext oldContext;	ResetExprContext(econtext);	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);	econtext->ecxt_innertuple = innerslot;	for (i = 0; i < mergestate->mj_NumClauses; i++)	{		MergeJoinClause clause = &mergestate->mj_Clauses[i];		clause->rdatum = ExecEvalExpr(clause->rexpr, econtext,									  &clause->risnull, NULL);		if (clause->risnull && clause->mergestrict)			canmatch = false;	}	MemoryContextSwitchTo(oldContext);	return canmatch;}/* * MJCompare * * Compare the mergejoinable values of the current two input tuples * and return 0 if they are equal (ie, the mergejoin equalities all * succeed), +1 if outer > inner, -1 if outer < inner. * * MJEvalOuterValues and MJEvalInnerValues must already have been called * for the current outer and inner tuples, respectively. */static intMJCompare(MergeJoinState *mergestate){	int			result = 0;	bool		nulleqnull = false;	ExprContext *econtext = mergestate->js.ps.ps_ExprContext;	int			i;	MemoryContext oldContext;	FunctionCallInfoData fcinfo;	/*	 * Call the comparison functions in short-lived context, in case they leak	 * memory.	 */	ResetExprContext(econtext);	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);	for (i = 0; i < mergestate->mj_NumClauses; i++)	{		MergeJoinClause clause = &mergestate->mj_Clauses[i];		Datum		fresult;		/*		 * Deal with null inputs.  We treat NULL as sorting after non-NULL.		 *		 * If both inputs are NULL, and the comparison function isn't strict,		 * then we call it and check for a true result (this allows operators		 * that behave like IS NOT DISTINCT to be mergejoinable). If the		 * function is strict or returns false, we temporarily pretend NULL ==		 * NULL and contine checking remaining columns.		 */		if (clause->lisnull)		{			if (clause->risnull)			{				if (!clause->eqfinfo.fn_strict)				{					InitFunctionCallInfoData(fcinfo, &(clause->eqfinfo), 2,											 NULL, NULL);					fcinfo.arg[0] = clause->ldatum;					fcinfo.arg[1] = clause->rdatum;					fcinfo.argnull[0] = true;					fcinfo.argnull[1] = true;					fresult = FunctionCallInvoke(&fcinfo);					if (!fcinfo.isnull && DatumGetBool(fresult))					{						/* treat nulls as really equal */						continue;					}				}				nulleqnull = true;				continue;			}
nodemergejoin.c - 源码说明

本页面展示了「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」中的 nodemergejoin.c 源码文件，采用 C语言编程语言编写，共 1,701 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?