nodehashjoin.c

来自「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」· C语言代码 · 共 883 行 · 第 1/2 页
883 行
	hjstate->hj_CurTuple = NULL;	/*	 * Deconstruct the hash clauses into outer and inner argument values, so	 * that we can evaluate those subexpressions separately.  Also make a list	 * of the hash operator OIDs, in preparation for looking up the hash	 * functions to use.	 */	lclauses = NIL;	rclauses = NIL;	hoperators = NIL;	foreach(l, hjstate->hashclauses)	{		FuncExprState *fstate = (FuncExprState *) lfirst(l);		OpExpr	   *hclause;		Assert(IsA(fstate, FuncExprState));		hclause = (OpExpr *) fstate->xprstate.expr;		Assert(IsA(hclause, OpExpr));		lclauses = lappend(lclauses, linitial(fstate->args));		rclauses = lappend(rclauses, lsecond(fstate->args));		hoperators = lappend_oid(hoperators, hclause->opno);	}	hjstate->hj_OuterHashKeys = lclauses;	hjstate->hj_InnerHashKeys = rclauses;	hjstate->hj_HashOperators = hoperators;	/* child Hash node needs to evaluate inner hash keys, too */	((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;	hjstate->js.ps.ps_OuterTupleSlot = NULL;	hjstate->js.ps.ps_TupFromTlist = false;	hjstate->hj_NeedNewOuter = true;	hjstate->hj_MatchedOuter = false;	hjstate->hj_OuterNotEmpty = false;	return hjstate;}intExecCountSlotsHashJoin(HashJoin *node){	return ExecCountSlotsNode(outerPlan(node)) +		ExecCountSlotsNode(innerPlan(node)) +		HASHJOIN_NSLOTS;}/* ---------------------------------------------------------------- *		ExecEndHashJoin * *		clean up routine for HashJoin node * ---------------------------------------------------------------- */voidExecEndHashJoin(HashJoinState *node){	/*	 * Free hash table	 */	if (node->hj_HashTable)	{		ExecHashTableDestroy(node->hj_HashTable);		node->hj_HashTable = NULL;	}	/*	 * Free the exprcontext	 */	ExecFreeExprContext(&node->js.ps);	/*	 * clean out the tuple table	 */	ExecClearTuple(node->js.ps.ps_ResultTupleSlot);	ExecClearTuple(node->hj_OuterTupleSlot);	ExecClearTuple(node->hj_HashTupleSlot);	/*	 * clean up subtrees	 */	ExecEndNode(outerPlanState(node));	ExecEndNode(innerPlanState(node));}/* * ExecHashJoinOuterGetTuple * *		get the next outer tuple for hashjoin: either by *		executing a plan node in the first pass, or from *		the temp files for the hashjoin batches. * * Returns a null slot if no more outer tuples.  On success, the tuple's * hash value is stored at *hashvalue --- this is either originally computed, * or re-read from the temp file. */static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,						  HashJoinState *hjstate,						  uint32 *hashvalue){	HashJoinTable hashtable = hjstate->hj_HashTable;	int			curbatch = hashtable->curbatch;	TupleTableSlot *slot;	if (curbatch == 0)	{							/* if it is the first pass */		/*		 * Check to see if first outer tuple was already fetched by		 * ExecHashJoin() and not used yet.		 */		slot = hjstate->hj_FirstOuterTupleSlot;		if (!TupIsNull(slot))			hjstate->hj_FirstOuterTupleSlot = NULL;		else			slot = ExecProcNode(outerNode);		if (!TupIsNull(slot))		{			/*			 * We have to compute the tuple's hash value.			 */			ExprContext *econtext = hjstate->js.ps.ps_ExprContext;			econtext->ecxt_outertuple = slot;			*hashvalue = ExecHashGetHashValue(hashtable, econtext,											  hjstate->hj_OuterHashKeys);			/* remember outer relation is not empty for possible rescan */			hjstate->hj_OuterNotEmpty = true;			return slot;		}		/*		 * We have just reached the end of the first pass. Try to switch to a		 * saved batch.		 */		curbatch = ExecHashJoinNewBatch(hjstate);	}	/*	 * Try to read from a temp file. Loop allows us to advance to new batches	 * as needed.  NOTE: nbatch could increase inside ExecHashJoinNewBatch, so	 * don't try to optimize this loop.	 */	while (curbatch < hashtable->nbatch)	{		slot = ExecHashJoinGetSavedTuple(hjstate,										 hashtable->outerBatchFile[curbatch],										 hashvalue,										 hjstate->hj_OuterTupleSlot);		if (!TupIsNull(slot))			return slot;		curbatch = ExecHashJoinNewBatch(hjstate);	}	/* Out of batches... */	return NULL;}/* * ExecHashJoinNewBatch *		switch to a new hashjoin batch * * Returns the number of the new batch (1..nbatch-1), or nbatch if no more. * We will never return a batch number that has an empty outer batch file. */static intExecHashJoinNewBatch(HashJoinState *hjstate){	HashJoinTable hashtable = hjstate->hj_HashTable;	int			nbatch;	int			curbatch;	BufFile    *innerFile;	TupleTableSlot *slot;	uint32		hashvalue;start_over:	nbatch = hashtable->nbatch;	curbatch = hashtable->curbatch;	if (curbatch > 0)	{		/*		 * We no longer need the previous outer batch file; close it right		 * away to free disk space.		 */		if (hashtable->outerBatchFile[curbatch])			BufFileClose(hashtable->outerBatchFile[curbatch]);		hashtable->outerBatchFile[curbatch] = NULL;	}	/*	 * We can always skip over any batches that are completely empty on both	 * sides.  We can sometimes skip over batches that are empty on only one	 * side, but there are exceptions:	 *	 * 1. In a LEFT JOIN, we have to process outer batches even if the inner	 * batch is empty.	 *	 * 2. If we have increased nbatch since the initial estimate, we have to	 * scan inner batches since they might contain tuples that need to be	 * reassigned to later inner batches.	 *	 * 3. Similarly, if we have increased nbatch since starting the outer	 * scan, we have to rescan outer batches in case they contain tuples that	 * need to be reassigned.	 */	curbatch++;	while (curbatch < nbatch &&		   (hashtable->outerBatchFile[curbatch] == NULL ||			hashtable->innerBatchFile[curbatch] == NULL))	{		if (hashtable->outerBatchFile[curbatch] &&			hjstate->js.jointype == JOIN_LEFT)			break;				/* must process due to rule 1 */		if (hashtable->innerBatchFile[curbatch] &&			nbatch != hashtable->nbatch_original)			break;				/* must process due to rule 2 */		if (hashtable->outerBatchFile[curbatch] &&			nbatch != hashtable->nbatch_outstart)			break;				/* must process due to rule 3 */		/* We can ignore this batch. */		/* Release associated temp files right away. */		if (hashtable->innerBatchFile[curbatch])			BufFileClose(hashtable->innerBatchFile[curbatch]);		hashtable->innerBatchFile[curbatch] = NULL;		if (hashtable->outerBatchFile[curbatch])			BufFileClose(hashtable->outerBatchFile[curbatch]);		hashtable->outerBatchFile[curbatch] = NULL;		curbatch++;	}	if (curbatch >= nbatch)		return curbatch;		/* no more batches */	hashtable->curbatch = curbatch;	/*	 * Reload the hash table with the new inner batch (which could be empty)	 */	ExecHashTableReset(hashtable);	innerFile = hashtable->innerBatchFile[curbatch];	if (innerFile != NULL)	{		if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))			ereport(ERROR,					(errcode_for_file_access(),				   errmsg("could not rewind hash-join temporary file: %m")));		while ((slot = ExecHashJoinGetSavedTuple(hjstate,												 innerFile,												 &hashvalue,												 hjstate->hj_HashTupleSlot)))		{			/*			 * NOTE: some tuples may be sent to future batches.  Also, it is			 * possible for hashtable->nbatch to be increased here!			 */			ExecHashTableInsert(hashtable,								ExecFetchSlotTuple(slot),								hashvalue);		}		/*		 * after we build the hash table, the inner batch file is no longer		 * needed		 */		BufFileClose(innerFile);		hashtable->innerBatchFile[curbatch] = NULL;	}	/*	 * If there's no outer batch file, advance to next batch.	 */	if (hashtable->outerBatchFile[curbatch] == NULL)		goto start_over;	/*	 * Rewind outer batch file, so that we can start reading it.	 */	if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not rewind hash-join temporary file: %m")));	return curbatch;}/* * ExecHashJoinSaveTuple *		save a tuple to a batch file. * * The data recorded in the file for each tuple is its hash value, * then an image of its HeapTupleData (with meaningless t_data pointer) * followed by the HeapTupleHeader and tuple data. * * Note: it is important always to call this in the regular executor * context, not in a shorter-lived context; else the temp file buffers * will get messed up. */voidExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,					  BufFile **fileptr){	BufFile    *file = *fileptr;	size_t		written;	if (file == NULL)	{		/* First write to this batch file, so open it. */		file = BufFileCreateTemp(false);		*fileptr = file;	}	written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));	if (written != sizeof(uint32))		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not write to hash-join temporary file: %m")));	written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));	if (written != sizeof(HeapTupleData))		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not write to hash-join temporary file: %m")));	written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);	if (written != (size_t) heapTuple->t_len)		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not write to hash-join temporary file: %m")));}/* * ExecHashJoinGetSavedTuple *		read the next tuple from a batch file.	Return NULL if no more. * * On success, *hashvalue is set to the tuple's hash value, and the tuple * itself is stored in the given slot. */static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,						  BufFile *file,						  uint32 *hashvalue,						  TupleTableSlot *tupleSlot){	HeapTupleData htup;	size_t		nread;	HeapTuple	heapTuple;	nread = BufFileRead(file, (void *) hashvalue, sizeof(uint32));	if (nread == 0)		return NULL;			/* end of file */	if (nread != sizeof(uint32))		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not read from hash-join temporary file: %m")));	nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));	if (nread != sizeof(HeapTupleData))		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not read from hash-join temporary file: %m")));	heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);	memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));	heapTuple->t_datamcxt = CurrentMemoryContext;	heapTuple->t_data = (HeapTupleHeader)		((char *) heapTuple + HEAPTUPLESIZE);	nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);	if (nread != (size_t) htup.t_len)		ereport(ERROR,				(errcode_for_file_access(),				 errmsg("could not read from hash-join temporary file: %m")));	return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);}voidExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt){	/*	 * In a multi-batch join, we currently have to do rescans the hard way,	 * primarily because batch temp files may have already been released. But	 * if it's a single-batch join, and there is no parameter change for the	 * inner subnode, then we can just re-use the existing hash table without	 * rebuilding it.	 */	if (node->hj_HashTable != NULL)	{		if (node->hj_HashTable->nbatch == 1 &&			((PlanState *) node)->righttree->chgParam == NULL)		{			/*			 * okay to reuse the hash table; needn't rescan inner, either.			 *			 * What we do need to do is reset our state about the emptiness			 * of the outer relation, so that the new scan of the outer will			 * update it correctly if it turns out to be empty this time.			 * (There's no harm in clearing it now because ExecHashJoin won't			 * need the info.  In the other cases, where the hash table			 * doesn't exist or we are destroying it, we leave this state			 * alone because ExecHashJoin will need it the first time			 * through.)			 */			node->hj_OuterNotEmpty = false;		}		else		{			/* must destroy and rebuild hash table */			ExecHashTableDestroy(node->hj_HashTable);			node->hj_HashTable = NULL;			/*			 * if chgParam of subnode is not null then plan will be re-scanned			 * by first ExecProcNode.			 */			if (((PlanState *) node)->righttree->chgParam == NULL)				ExecReScan(((PlanState *) node)->righttree, exprCtxt);		}	}	/* Always reset intra-tuple state */	node->hj_CurHashValue = 0;	node->hj_CurBucketNo = 0;	node->hj_CurTuple = NULL;	node->js.ps.ps_OuterTupleSlot = NULL;	node->js.ps.ps_TupFromTlist = false;	node->hj_NeedNewOuter = true;	node->hj_MatchedOuter = false;	node->hj_FirstOuterTupleSlot = NULL;	/*	 * if chgParam of subnode is not null then plan will be re-scanned by	 * first ExecProcNode.	 */	if (((PlanState *) node)->lefttree->chgParam == NULL)		ExecReScan(((PlanState *) node)->lefttree, exprCtxt);}
nodehashjoin.c - 源码说明

本页面展示了「PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统」中的 nodehashjoin.c 源码文件，采用 C语言编程语言编写，共 883 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与PostgreSQL相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?