📄 nodehash.c

📁 PostgreSQL7.4.6 for Linux
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
	 * Target in-memory hashtable size is SortMem kilobytes.	 */	hash_table_bytes = SortMem * 1024L;	/*	 * Count the number of hash buckets we want for the whole relation,	 * for an average bucket load of NTUP_PER_BUCKET (per virtual	 * bucket!).  It has to fit in an int, however.	 */	dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);	if (dtmp < INT_MAX)		totalbuckets = (int) dtmp;	else		totalbuckets = INT_MAX;	if (totalbuckets <= 0)		totalbuckets = 1;	/*	 * Count the number of buckets we think will actually fit in the	 * target memory size, at a loading of NTUP_PER_BUCKET (physical	 * buckets). NOTE: FUDGE_FAC here determines the fraction of the	 * hashtable space reserved to allow for nonuniform distribution of	 * hash values. Perhaps this should be a different number from the	 * other uses of FUDGE_FAC, but since we have no real good way to pick	 * either one...	 */	bucketsize = NTUP_PER_BUCKET * tupsize;	nbuckets = (int) (hash_table_bytes / (bucketsize * FUDGE_FAC));	if (nbuckets <= 0)		nbuckets = 1;	if (totalbuckets <= nbuckets)	{		/*		 * We have enough space, so no batching.  In theory we could even		 * reduce nbuckets, but since that could lead to poor behavior if		 * estimated ntuples is much less than reality, it seems better to		 * make more buckets instead of fewer.		 */		totalbuckets = nbuckets;		nbatch = 0;	}	else	{		/*		 * Need to batch; compute how many batches we want to use. Note		 * that nbatch doesn't have to have anything to do with the ratio		 * totalbuckets/nbuckets; in fact, it is the number of groups we		 * will use for the part of the data that doesn't fall into the		 * first nbuckets hash buckets.  We try to set it to make all the		 * batches the same size.		 */		dtmp = ceil((inner_rel_bytes - hash_table_bytes) /					hash_table_bytes);		if (dtmp < INT_MAX)			nbatch = (int) dtmp;		else			nbatch = INT_MAX;		if (nbatch <= 0)			nbatch = 1;	}	/*	 * Now, totalbuckets is the number of (virtual) hashbuckets for the	 * whole relation, and nbuckets is the number of physical hashbuckets	 * we will use in the first pass.  Data falling into the first	 * nbuckets virtual hashbuckets gets handled in the first pass;	 * everything else gets divided into nbatch batches to be processed in	 * additional passes.	 */	*virtualbuckets = totalbuckets;	*physicalbuckets = nbuckets;	*numbatches = nbatch;}/* ---------------------------------------------------------------- *		ExecHashTableDestroy * *		destroy a hash table * ---------------------------------------------------------------- */voidExecHashTableDestroy(HashJoinTable hashtable){	int			i;	/* Make sure all the temp files are closed */	for (i = 0; i < hashtable->nbatch; i++)	{		if (hashtable->innerBatchFile[i])			BufFileClose(hashtable->innerBatchFile[i]);		if (hashtable->outerBatchFile[i])			BufFileClose(hashtable->outerBatchFile[i]);	}	/* Release working memory (batchCxt is a child, so it goes away too) */	MemoryContextDelete(hashtable->hashCxt);	/* And drop the control block */	pfree(hashtable);}/* ---------------------------------------------------------------- *		ExecHashTableInsert * *		insert a tuple into the hash table depending on the hash value *		it may just go to a tmp file for other batches * ---------------------------------------------------------------- */voidExecHashTableInsert(HashJoinTable hashtable,					ExprContext *econtext,					List *hashkeys){	int			bucketno = ExecHashGetBucket(hashtable, econtext, hashkeys);	int			batchno = ExecHashGetBatch(bucketno, hashtable);	TupleTableSlot *slot = econtext->ecxt_innertuple;	HeapTuple	heapTuple = slot->val;	/*	 * decide whether to put the tuple in the hash table or a tmp file	 */	if (batchno < 0)	{		/*		 * put the tuple in hash table		 */		HashJoinTuple hashTuple;		int			hashTupleSize;		hashTupleSize = MAXALIGN(sizeof(*hashTuple)) + heapTuple->t_len;		hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,													   hashTupleSize);		memcpy((char *) &hashTuple->htup,			   (char *) heapTuple,			   sizeof(hashTuple->htup));		hashTuple->htup.t_datamcxt = hashtable->batchCxt;		hashTuple->htup.t_data = (HeapTupleHeader)			(((char *) hashTuple) + MAXALIGN(sizeof(*hashTuple)));		memcpy((char *) hashTuple->htup.t_data,			   (char *) heapTuple->t_data,			   heapTuple->t_len);		hashTuple->next = hashtable->buckets[bucketno];		hashtable->buckets[bucketno] = hashTuple;	}	else	{		/*		 * put the tuple into a tmp file for later batches		 */		hashtable->innerBatchSize[batchno]++;		ExecHashJoinSaveTuple(heapTuple,							  hashtable->innerBatchFile[batchno]);	}}/* ---------------------------------------------------------------- *		ExecHashGetBucket * *		Get the hash value for a tuple * ---------------------------------------------------------------- */intExecHashGetBucket(HashJoinTable hashtable,				  ExprContext *econtext,				  List *hashkeys){	uint32		hashkey = 0;	int			bucketno;	List	   *hk;	int			i = 0;	MemoryContext oldContext;	/*	 * We reset the eval context each time to reclaim any memory leaked in	 * the hashkey expressions.	 */	ResetExprContext(econtext);	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);	foreach(hk, hashkeys)	{		Datum		keyval;		bool		isNull;		/* rotate hashkey left 1 bit at each step */		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);		/*		 * Get the join attribute value of the tuple		 */		keyval = ExecEvalExpr((ExprState *) lfirst(hk),							  econtext, &isNull, NULL);		/*		 * Compute the hash function		 */		if (!isNull)			/* treat nulls as having hash key 0 */		{			uint32		hkey;			hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],												keyval));			hashkey ^= hkey;		}		i++;	}	bucketno = hashkey % (uint32) hashtable->totalbuckets;#ifdef HJDEBUG	if (bucketno >= hashtable->nbuckets)		printf("hash(%u) = %d SAVED\n", hashkey, bucketno);	else		printf("hash(%u) = %d\n", hashkey, bucketno);#endif	MemoryContextSwitchTo(oldContext);	return bucketno;}/* ---------------------------------------------------------------- *		ExecHashGetBatch * *		determine the batch number for a bucketno * * Returns -1 if bucket belongs to initial (or current) batch, * else 0..nbatch-1 corresponding to external batch file number for bucket. * ---------------------------------------------------------------- */intExecHashGetBatch(int bucketno, HashJoinTable hashtable){	if (bucketno < hashtable->nbuckets)		return -1;	return (bucketno - hashtable->nbuckets) % hashtable->nbatch;}/* ---------------------------------------------------------------- *		ExecScanHashBucket * *		scan a hash bucket of matches * ---------------------------------------------------------------- */HeapTupleExecScanHashBucket(HashJoinState *hjstate,				   List *hjclauses,				   ExprContext *econtext){	HashJoinTable hashtable = hjstate->hj_HashTable;	HashJoinTuple hashTuple = hjstate->hj_CurTuple;	/*	 * hj_CurTuple is NULL to start scanning a new bucket, or the address	 * of the last tuple returned from the current bucket.	 */	if (hashTuple == NULL)		hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];	else		hashTuple = hashTuple->next;	while (hashTuple != NULL)	{		HeapTuple	heapTuple = &hashTuple->htup;		TupleTableSlot *inntuple;		/* insert hashtable's tuple into exec slot so ExecQual sees it */		inntuple = ExecStoreTuple(heapTuple,	/* tuple to store */								  hjstate->hj_HashTupleSlot,	/* slot */								  InvalidBuffer,								  false);		/* do not pfree this tuple */		econtext->ecxt_innertuple = inntuple;		/* reset temp memory each time to avoid leaks from qual expression */		ResetExprContext(econtext);		if (ExecQual(hjclauses, econtext, false))		{			hjstate->hj_CurTuple = hashTuple;			return heapTuple;		}		hashTuple = hashTuple->next;	}	/*	 * no match	 */	return NULL;}/* ---------------------------------------------------------------- *		ExecHashTableReset * *		reset hash table header for new batch * *		ntuples is the number of tuples in the inner relation's batch *		(which we currently don't actually use...) * ---------------------------------------------------------------- */voidExecHashTableReset(HashJoinTable hashtable, long ntuples){	MemoryContext oldcxt;	int			nbuckets = hashtable->nbuckets;	/*	 * Release all the hash buckets and tuples acquired in the prior pass,	 * and reinitialize the context for a new pass.	 */	MemoryContextReset(hashtable->batchCxt);	oldcxt = MemoryContextSwitchTo(hashtable->batchCxt);	/*	 * We still use the same number of physical buckets as in the first	 * pass. (It could be different; but we already decided how many	 * buckets would be appropriate for the allowed memory, so stick with	 * that number.) We MUST set totalbuckets to equal nbuckets, because	 * from now on no tuples will go out to temp files; there are no more	 * virtual buckets, only real buckets.	(This implies that tuples will	 * go into different bucket numbers than they did on the first pass,	 * but that's OK.)	 */	hashtable->totalbuckets = nbuckets;	/* Reallocate and reinitialize the hash bucket headers. */	hashtable->buckets = (HashJoinTuple *)		palloc0(nbuckets * sizeof(HashJoinTuple));	MemoryContextSwitchTo(oldcxt);}voidExecReScanHash(HashState *node, ExprContext *exprCtxt){	/*	 * if chgParam of subnode is not null then plan will be re-scanned by	 * first ExecProcNode.	 */	if (((PlanState *) node)->lefttree->chgParam == NULL)		ExecReScan(((PlanState *) node)->lefttree, exprCtxt);}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -