nodehash.c

来自「关系型数据库 Postgresql 6.5.2」· C语言代码 · 共 734 行 · 第 1/2 页
734 行
/*------------------------------------------------------------------------- * * nodeHash.c *	  Routines to hash relations for hashjoin * * Copyright (c) 1994, Regents of the University of California * * *	$Id: nodeHash.c,v 1.36 1999/05/25 16:08:41 momjian Exp $ * *------------------------------------------------------------------------- *//* * INTERFACE ROUTINES *		ExecHash		- generate an in-memory hash table of the relation *		ExecInitHash	- initialize node and subnodes.. *		ExecEndHash		- shutdown node and subnodes * */#include <sys/types.h>#include <stdio.h>#include <math.h>#include <string.h>#include "postgres.h"#include "miscadmin.h"#include "executor/execdebug.h"#include "executor/executor.h"#include "executor/nodeHash.h"#include "executor/nodeHashjoin.h"#include "utils/hsearch.h"#include "utils/portal.h"extern int	SortMem;static int	hashFunc(Datum key, int len, bool byVal);/* ---------------------------------------------------------------- *		ExecHash * *		build hash table for hashjoin, all do partitioning if more *		than one batches are required. * ---------------------------------------------------------------- */TupleTableSlot *ExecHash(Hash *node){	EState	   *estate;	HashState  *hashstate;	Plan	   *outerNode;	Var		   *hashkey;	HashJoinTable hashtable;	TupleTableSlot *slot;	ExprContext *econtext;	int			nbatch;	int			i;	/* ----------------	 *	get state info from node	 * ----------------	 */	hashstate = node->hashstate;	estate = node->plan.state;	outerNode = outerPlan(node);	hashtable = hashstate->hashtable;	if (hashtable == NULL)		elog(ERROR, "ExecHash: hash table is NULL.");	nbatch = hashtable->nbatch;	if (nbatch > 0)	{		/* ----------------		 * Open temp files for inner batches, if needed.		 * Note that file buffers are palloc'd in regular executor context.		 * ----------------		 */		for (i = 0; i < nbatch; i++)		{			File		tfile = OpenTemporaryFile();			Assert(tfile >= 0);			hashtable->innerBatchFile[i] = BufFileCreate(tfile);		}	}	/* ----------------	 *	set expression context	 * ----------------	 */	hashkey = node->hashkey;	econtext = hashstate->cstate.cs_ExprContext;	/* ----------------	 *	get all inner tuples and insert into the hash table (or temp files)	 * ----------------	 */	for (;;)	{		slot = ExecProcNode(outerNode, (Plan *) node);		if (TupIsNull(slot))			break;		econtext->ecxt_innertuple = slot;		ExecHashTableInsert(hashtable, econtext, hashkey);		ExecClearTuple(slot);	}	/* ---------------------	 *	Return the slot so that we have the tuple descriptor	 *	when we need to save/restore them.	-Jeff 11 July 1991	 * ---------------------	 */	return slot;}/* ---------------------------------------------------------------- *		ExecInitHash * *		Init routine for Hash node * ---------------------------------------------------------------- */boolExecInitHash(Hash *node, EState *estate, Plan *parent){	HashState  *hashstate;	Plan	   *outerPlan;	SO1_printf("ExecInitHash: %s\n",			   "initializing hash node");	/* ----------------	 *	assign the node's execution state	 * ----------------	 */	node->plan.state = estate;	/* ----------------	 * create state structure	 * ----------------	 */	hashstate = makeNode(HashState);	node->hashstate = hashstate;	hashstate->hashtable = NULL;	/* ----------------	 *	Miscellaneous initialization	 *	 *		 +	assign node's base_id	 *		 +	assign debugging hooks and	 *		 +	create expression context for node	 * ----------------	 */	ExecAssignNodeBaseInfo(estate, &hashstate->cstate, parent);	ExecAssignExprContext(estate, &hashstate->cstate);	/* ----------------	 * initialize our result slot	 * ----------------	 */	ExecInitResultTupleSlot(estate, &hashstate->cstate);	/* ----------------	 * initializes child nodes	 * ----------------	 */	outerPlan = outerPlan(node);	ExecInitNode(outerPlan, estate, (Plan *) node);	/* ----------------	 *	initialize tuple type. no need to initialize projection	 *	info because this node doesn't do projections	 * ----------------	 */	ExecAssignResultTypeFromOuterPlan((Plan *) node, &hashstate->cstate);	hashstate->cstate.cs_ProjInfo = NULL;	return TRUE;}intExecCountSlotsHash(Hash *node){#define HASH_NSLOTS 1	return ExecCountSlotsNode(outerPlan(node)) +	ExecCountSlotsNode(innerPlan(node)) +	HASH_NSLOTS;}/* --------------------------------------------------------------- *		ExecEndHash * *		clean up routine for Hash node * ---------------------------------------------------------------- */voidExecEndHash(Hash *node){	HashState  *hashstate;	Plan	   *outerPlan;	/* ----------------	 *	get info from the hash state	 * ----------------	 */	hashstate = node->hashstate;	/* ----------------	 *	free projection info.  no need to free result type info	 *	because that came from the outer plan...	 * ----------------	 */	ExecFreeProjectionInfo(&hashstate->cstate);	/* ----------------	 *	shut down the subplan	 * ----------------	 */	outerPlan = outerPlan(node);	ExecEndNode(outerPlan, (Plan *) node);}/* ---------------------------------------------------------------- *		ExecHashTableCreate * *		create a hashtable in shared memory for hashjoin. * ---------------------------------------------------------------- */#define NTUP_PER_BUCKET			10#define FUDGE_FAC				2.0HashJoinTableExecHashTableCreate(Hash *node){	Plan	   *outerNode;	int			ntuples;	int			tupsize;	double		inner_rel_bytes;	double		hash_table_bytes;	int			nbatch;	HashJoinTable hashtable;	int			nbuckets;	int			totalbuckets;	int			bucketsize;	int			i;	Portal		myPortal;	char		myPortalName[64];	MemoryContext oldcxt;	/* ----------------	 *	Get information about the size of the relation to be hashed	 *	(it's the "outer" subtree of this node, but the inner relation of	 *	the hashjoin).	 *	Caution: this is only the planner's estimates, and so	 *	can't be trusted too far.  Apply a healthy fudge factor.	 * ----------------	 */	outerNode = outerPlan(node);	ntuples = outerNode->plan_size;	if (ntuples <= 0)			/* force a plausible size if no info */		ntuples = 1000;	/*	 * estimate tupsize based on footprint of tuple in hashtable... but	 * what about palloc overhead?	 */	tupsize = MAXALIGN(outerNode->plan_width) +		MAXALIGN(sizeof(HashJoinTupleData));	inner_rel_bytes = (double) ntuples *tupsize * FUDGE_FAC;	/*	 * Target hashtable size is SortMem kilobytes, but not less than	 * sqrt(estimated inner rel size), so as to avoid horrible	 * performance.	 */	hash_table_bytes = sqrt(inner_rel_bytes);	if (hash_table_bytes < (SortMem * 1024L))		hash_table_bytes = SortMem * 1024L;	/*	 * Count the number of hash buckets we want for the whole relation,	 * for an average bucket load of NTUP_PER_BUCKET (per virtual	 * bucket!).	 */	totalbuckets = (int) ceil((double) ntuples * FUDGE_FAC / NTUP_PER_BUCKET);	/*	 * Count the number of buckets we think will actually fit in the	 * target memory size, at a loading of NTUP_PER_BUCKET (physical	 * buckets). NOTE: FUDGE_FAC here determines the fraction of the	 * hashtable space reserved to allow for nonuniform distribution of	 * hash values. Perhaps this should be a different number from the	 * other uses of FUDGE_FAC, but since we have no real good way to pick	 * either one...	 */	bucketsize = NTUP_PER_BUCKET * tupsize;	nbuckets = (int) (hash_table_bytes / (bucketsize * FUDGE_FAC));	if (nbuckets <= 0)		nbuckets = 1;	if (totalbuckets <= nbuckets)	{		/*		 * We have enough space, so no batching.  In theory we could even		 * reduce nbuckets, but since that could lead to poor behavior if		 * estimated ntuples is much less than reality, it seems better to		 * make more buckets instead of fewer.		 */		totalbuckets = nbuckets;		nbatch = 0;	}	else	{		/*		 * Need to batch; compute how many batches we want to use. Note		 * that nbatch doesn't have to have anything to do with the ratio		 * totalbuckets/nbuckets; in fact, it is the number of groups we		 * will use for the part of the data that doesn't fall into the		 * first nbuckets hash buckets.		 */		nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /							hash_table_bytes);		if (nbatch <= 0)			nbatch = 1;	}	/*	 * Now, totalbuckets is the number of (virtual) hashbuckets for the	 * whole relation, and nbuckets is the number of physical hashbuckets	 * we will use in the first pass.  Data falling into the first	 * nbuckets virtual hashbuckets gets handled in the first pass;	 * everything else gets divided into nbatch batches to be processed in	 * additional passes.	 */#ifdef HJDEBUG	printf("nbatch = %d, totalbuckets = %d, nbuckets = %d\n",		   nbatch, totalbuckets, nbuckets);#endif	/* ----------------	 *	Initialize the hash table control block.	 *	The hashtable control block is just palloc'd from executor memory.	 * ----------------	 */	hashtable = (HashJoinTable) palloc(sizeof(HashTableData));	hashtable->nbuckets = nbuckets;	hashtable->totalbuckets = totalbuckets;	hashtable->buckets = NULL;	hashtable->nbatch = nbatch;	hashtable->curbatch = 0;	hashtable->innerBatchFile = NULL;	hashtable->outerBatchFile = NULL;	hashtable->innerBatchSize = NULL;	hashtable->outerBatchSize = NULL;	/* ----------------	 *	Create a named portal in which to keep the hashtable working storage.	 *	Each hashjoin must have its own portal, so be wary of name conflicts.	 * ----------------	 */	i = 0;	do
nodehash.c - 源码说明

本页面展示了「关系型数据库 Postgresql 6.5.2」中的 nodehash.c 源码文件，采用 C语言编程语言编写，共 734 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与Postgresql相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?