📄 nodemergejoin.c

📁 PostgreSQL7.4.6 for Linux
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*------------------------------------------------------------------------- * * nodeMergejoin.c *	  routines supporting merge joins * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.62 2003/09/25 06:57:59 petere Exp $ * *------------------------------------------------------------------------- *//* * INTERFACE ROUTINES *		ExecMergeJoin			mergejoin outer and inner relations. *		ExecInitMergeJoin		creates and initializes run time states *		ExecEndMergeJoin		cleans up the node. * * NOTES *		Essential operation of the merge join algorithm is as follows: * *		Join {												   - *			get initial outer and inner tuples				INITIALIZE *			Skip Inner										SKIPINNER *			mark inner position								JOINMARK *			do forever {									   - *				while (outer == inner) {					JOINTEST *					join tuples								JOINTUPLES *					advance inner position					NEXTINNER *				}											   - *				advance outer position						NEXTOUTER *				if (outer == mark) {						TESTOUTER *					restore inner position to mark			TESTOUTER *					continue								   - *				} else {									   - *					Skip Outer								SKIPOUTER *					mark inner position						JOINMARK *				}											   - *			}												   - *		}													   - * *		Skip Outer {										SKIPOUTER_BEGIN *			if (inner == outer) Join Tuples					JOINTUPLES *			while (outer < inner)							SKIPOUTER_TEST *				advance outer								SKIPOUTER_ADVANCE *			if (outer > inner)								SKIPOUTER_TEST *				Skip Inner									SKIPINNER *		}													   - * *		Skip Inner {										SKIPINNER_BEGIN *			if (inner == outer) Join Tuples					JOINTUPLES *			while (outer > inner)							SKIPINNER_TEST *				advance inner								SKIPINNER_ADVANCE *			if (outer < inner)								SKIPINNER_TEST *				Skip Outer									SKIPOUTER *		}													   - * *		The merge join operation is coded in the fashion *		of a state machine.  At each state, we do something and then *		proceed to another state.  This state is stored in the node's *		execution state information and is preserved across calls to *		ExecMergeJoin. -cim 10/31/89 * */#include "postgres.h"#include "access/heapam.h"#include "access/printtup.h"#include "catalog/pg_operator.h"#include "executor/execdebug.h"#include "executor/execdefs.h"#include "executor/nodeMergejoin.h"#include "utils/lsyscache.h"#include "utils/syscache.h"static bool MergeCompare(List *eqQual, List *compareQual, ExprContext *econtext);#define MarkInnerTuple(innerTupleSlot, mergestate) \( \	ExecStoreTuple(heap_copytuple((innerTupleSlot)->val), \				   (mergestate)->mj_MarkedTupleSlot, \				   InvalidBuffer, \				   true) \)/* ---------------------------------------------------------------- *		MJFormSkipQuals * *		This takes the mergeclause which is a qualification of the *		form ((= expr expr) (= expr expr) ...) and forms new lists *		of the forms ((< expr expr) (< expr expr) ...) and *		((> expr expr) (> expr expr) ...).	These lists will be used *		by ExecMergeJoin() to determine if we should skip tuples. *		(We expect there to be suitable operators because the "=" operators *		were marked mergejoinable; however, there might be a different *		one needed in each qual clause.) * ---------------------------------------------------------------- */static voidMJFormSkipQuals(List *qualList, List **ltQuals, List **gtQuals,				PlanState *parent){	List	   *ltexprs,			   *gtexprs,			   *ltcdr,			   *gtcdr;	/*	 * Make modifiable copies of the qualList.	 */	ltexprs = (List *) copyObject((Node *) qualList);	gtexprs = (List *) copyObject((Node *) qualList);	/*	 * Scan both lists in parallel, so that we can update the operators	 * with the minimum number of syscache searches.	 */	ltcdr = ltexprs;	foreach(gtcdr, gtexprs)	{		OpExpr	   *ltop = (OpExpr *) lfirst(ltcdr);		OpExpr	   *gtop = (OpExpr *) lfirst(gtcdr);		/*		 * The two ops should be identical, so use either one for lookup.		 */		if (!IsA(ltop, OpExpr))			elog(ERROR, "mergejoin clause is not an OpExpr");		/*		 * Lookup the operators, and replace the data in the copied		 * operator nodes.		 */		op_mergejoin_crossops(ltop->opno,							  &ltop->opno,							  &gtop->opno,							  &ltop->opfuncid,							  &gtop->opfuncid);		ltcdr = lnext(ltcdr);	}	/*	 * Prepare both lists for execution.	 */	*ltQuals = (List *) ExecInitExpr((Expr *) ltexprs, parent);	*gtQuals = (List *) ExecInitExpr((Expr *) gtexprs, parent);}/* ---------------------------------------------------------------- *		MergeCompare * *		Compare the keys according to 'compareQual' which is of the *		form: { (key1a > key2a) (key1b > key2b) ... }. * *		(actually, it could also be of the form (key1a < key2a)...) * *		This is different from calling ExecQual because ExecQual returns *		true only if ALL the comparison clauses are satisfied. *		However, there is an order of significance among the keys with *		the first keys being most significant. Therefore, the clauses *		are evaluated in order and the 'compareQual' is satisfied *		if (key1i > key2i) is true and (key1j = key2j) for 0 < j < i. *		We use the original mergeclause items to detect equality. * ---------------------------------------------------------------- */static boolMergeCompare(List *eqQual, List *compareQual, ExprContext *econtext){	bool		result;	MemoryContext oldContext;	List	   *clause;	List	   *eqclause;	/*	 * Do expression eval in short-lived context.	 */	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);	/*	 * for each pair of clauses, test them until our compare conditions	 * are satisfied. if we reach the end of the list, none of our key	 * greater-than conditions were satisfied so we return false.	 */	result = false;				/* assume 'false' result */	eqclause = eqQual;	foreach(clause, compareQual)	{		Datum		const_value;		bool		isNull;		/*		 * first test if our compare clause is satisfied. if so then		 * return true.		 *		 * A NULL result is considered false.		 */		const_value = ExecEvalExpr((ExprState *) lfirst(clause),								   econtext,								   &isNull,								   NULL);		if (DatumGetBool(const_value) && !isNull)		{			result = true;			break;		}		/*-----------		 * ok, the compare clause failed so we test if the keys are		 * equal... if key1 != key2, we return false. otherwise		 * key1 = key2 so we move on to the next pair of keys.		 *-----------		 */		const_value = ExecEvalExpr((ExprState *) lfirst(eqclause),								   econtext,								   &isNull,								   NULL);		if (!DatumGetBool(const_value) || isNull)			break;				/* return false */		eqclause = lnext(eqclause);	}	MemoryContextSwitchTo(oldContext);	return result;}/* ---------------------------------------------------------------- *		ExecMergeTupleDump * *		This function is called through the MJ_dump() macro *		when EXEC_MERGEJOINDEBUG is defined * ---------------------------------------------------------------- */#ifdef EXEC_MERGEJOINDEBUGstatic voidExecMergeTupleDumpOuter(MergeJoinState *mergestate){	TupleTableSlot *outerSlot = mergestate->mj_OuterTupleSlot;	printf("==== outer tuple ====\n");	if (TupIsNull(outerSlot))		printf("(nil)\n");	else		MJ_debugtup(outerSlot->val,					outerSlot->ttc_tupleDescriptor);}static voidExecMergeTupleDumpInner(MergeJoinState *mergestate){	TupleTableSlot *innerSlot = mergestate->mj_InnerTupleSlot;	printf("==== inner tuple ====\n");	if (TupIsNull(innerSlot))		printf("(nil)\n");	else		MJ_debugtup(innerSlot->val,					innerSlot->ttc_tupleDescriptor);}static voidExecMergeTupleDumpMarked(MergeJoinState *mergestate){	TupleTableSlot *markedSlot = mergestate->mj_MarkedTupleSlot;	printf("==== marked tuple ====\n");	if (TupIsNull(markedSlot))		printf("(nil)\n");	else		MJ_debugtup(markedSlot->val,					markedSlot->ttc_tupleDescriptor);}static voidExecMergeTupleDump(MergeJoinState *mergestate){	printf("******** ExecMergeTupleDump ********\n");	ExecMergeTupleDumpOuter(mergestate);	ExecMergeTupleDumpInner(mergestate);	ExecMergeTupleDumpMarked(mergestate);	printf("******** \n");}#endif/* ---------------------------------------------------------------- *		ExecMergeJoin * * old comments *		Details of the merge-join routines: * *		(1) ">" and "<" operators * *		Merge-join is done by joining the inner and outer tuples satisfying *		the join clauses of the form ((= outerKey innerKey) ...). *		The join clauses is provided by the query planner and may contain *		more than one (= outerKey innerKey) clauses (for composite key). * *		However, the query executor needs to know whether an outer *		tuple is "greater/smaller" than an inner tuple so that it can *		"synchronize" the two relations. For e.g., consider the following *		relations: * *				outer: (0 ^1 1 2 5 5 5 6 6 7)	current tuple: 1 *				inner: (1 ^3 5 5 5 5 6)			current tuple: 3 * *		To continue the merge-join, the executor needs to scan both inner *		and outer relations till the matching tuples 5. It needs to know *		that currently inner tuple 3 is "greater" than outer tuple 1 and *		therefore it should scan the outer relation first to find a *		matching tuple and so on. * *		Therefore, when initializing the merge-join node, the executor *		creates the "greater/smaller" clause by substituting the "=" *		operator in the join clauses with the corresponding ">" operator. *		The opposite "smaller/greater" clause is formed by substituting "<". * *		Note: prior to v6.5, the relational clauses were formed using the *		sort op used to sort the inner relation, which of course would fail *		if the outer and inner keys were of different data types. *		In the current code, we instead assume that operators named "<" and ">" *		will do the right thing.  This should be true since the mergejoin "=" *		operator's pg_operator entry will have told the planner to sort by *		"<" for each of the left and right sides. * *		(2) repositioning inner "cursor" * *		Consider the above relations and suppose that the executor has *		just joined the first outer "5" with the last inner "5". The *		next step is of course to join the second outer "5" with all *		the inner "5's". This requires repositioning the inner "cursor" *		to point at the first inner "5". This is done by "marking" the *		first inner 5 and restore the "cursor" to it before joining *		with the second outer 5. The access method interface provides *		routines to mark and restore to a tuple. * ---------------------------------------------------------------- */TupleTableSlot *ExecMergeJoin(MergeJoinState *node){	EState	   *estate;	ScanDirection direction;	List	   *innerSkipQual;	List	   *outerSkipQual;	List	   *mergeclauses;	List	   *joinqual;	List	   *otherqual;	bool		qualResult;	bool		compareResult;	PlanState  *innerPlan;	TupleTableSlot *innerTupleSlot;	PlanState  *outerPlan;	TupleTableSlot *outerTupleSlot;	ExprContext *econtext;	bool		doFillOuter;	bool		doFillInner;	/*	 * get information from node	 */	estate = node->js.ps.state;	direction = estate->es_direction;	innerPlan = innerPlanState(node);	outerPlan = outerPlanState(node);	econtext = node->js.ps.ps_ExprContext;	mergeclauses = node->mergeclauses;	joinqual = node->js.joinqual;	otherqual = node->js.ps.qual;	switch (node->js.jointype)	{		case JOIN_INNER:		case JOIN_IN:			doFillOuter = false;			doFillInner = false;			break;		case JOIN_LEFT:			doFillOuter = true;			doFillInner = false;			break;		case JOIN_FULL:			doFillOuter = true;			doFillInner = true;			break;		case JOIN_RIGHT:			doFillOuter = false;			doFillInner = true;			break;		default:			elog(ERROR, "unrecognized join type: %d",				 (int) node->js.jointype);			doFillOuter = false;	/* keep compiler quiet */			doFillInner = false;			break;	}	if (ScanDirectionIsForward(direction))	{		outerSkipQual = node->mj_OuterSkipQual;		innerSkipQual = node->mj_InnerSkipQual;	}	else	{		outerSkipQual = node->mj_InnerSkipQual;		innerSkipQual = node->mj_OuterSkipQual;	}	/*	 * Check to see if we're still projecting out tuples from a previous	 * join tuple (because there is a function-returning-set in the	 * projection expressions).  If so, try to project another one.	 */	if (node->js.ps.ps_TupFromTlist)	{		TupleTableSlot *result;		ExprDoneCond isDone;		result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);		if (isDone == ExprMultipleResult)			return result;		/* Done with that source tuple... */		node->js.ps.ps_TupFromTlist = false;	}	/*	 * Reset per-tuple memory context to free any expression evaluation	 * storage allocated in the previous tuple cycle.  Note this can't	 * happen until we're done projecting out tuples from a join tuple.	 */	ResetExprContext(econtext);	/*	 * ok, everything is setup.. let's go to work	 */	for (;;)	{		/*		 * get the current state of the join and do things accordingly.		 * Note: The join states are highlighted with 32-* comments for		 * improved readability.		 */		MJ_dump(node);		switch (node->mj_JoinState)		{				/*				 * EXEC_MJ_INITIALIZE means that this is the first time				 * ExecMergeJoin() has been called and so we have to fetch				 * the first tuple for both outer and inner subplans. If				 * we fail to get a tuple here, then that subplan is				 * empty, and we either end the join or go to one of the				 * fill-remaining-tuples states.				 */			case EXEC_MJ_INITIALIZE:				MJ_printf("ExecMergeJoin: EXEC_MJ_INITIALIZE\n");				outerTupleSlot = ExecProcNode(outerPlan);				node->mj_OuterTupleSlot = outerTupleSlot;				if (TupIsNull(outerTupleSlot))				{					MJ_printf("ExecMergeJoin: outer subplan is empty\n");					if (doFillInner)					{						/*						 * Need to emit right-join tuples for remaining						 * inner tuples.  We set MatchedInner = true to						 * force the ENDOUTER state to advance inner.						 */						node->mj_JoinState = EXEC_MJ_ENDOUTER;						node->mj_MatchedInner = true;						break;					}					/* Otherwise we're done. */					return NULL;				}				innerTupleSlot = ExecProcNode(innerPlan);				node->mj_InnerTupleSlot = innerTupleSlot;				if (TupIsNull(innerTupleSlot))				{					MJ_printf("ExecMergeJoin: inner subplan is empty\n");					if (doFillOuter)					{						/*						 * Need to emit left-join tuples for all outer						 * tuples, including the one we just fetched.  We						 * set MatchedOuter = false to force the ENDINNER						 * state to emit this tuple before advancing						 * outer.						 */						node->mj_JoinState = EXEC_MJ_ENDINNER;						node->mj_MatchedOuter = false;						break;					}					/* Otherwise we're done. */					return NULL;				}				/*				 * OK, we have the initial tuples.	Begin by skipping				 * unmatched inner tuples.				 */				node->mj_JoinState = EXEC_MJ_SKIPINNER_BEGIN;				break;				/*				 * EXEC_MJ_JOINMARK means we have just found a new outer				 * tuple and a possible matching inner tuple. This is the				 * case after the INITIALIZE, SKIPOUTER or SKIPINNER				 * states.				 */			case EXEC_MJ_JOINMARK:				MJ_printf("ExecMergeJoin: EXEC_MJ_JOINMARK\n");				ExecMarkPos(innerPlan);				MarkInnerTuple(node->mj_InnerTupleSlot, node);				node->mj_JoinState = EXEC_MJ_JOINTEST;				break;				/*				 * EXEC_MJ_JOINTEST means we have two tuples which might				 * satisfy the merge clause, so we test them.				 *
12 3 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -