📄 hashjoin.h

📁 PostgreSQL 8.1.4的源码适用于Linux下的开源数据库系统
💻 H
字号:
/*------------------------------------------------------------------------- * * hashjoin.h *	  internal structures for hash joins * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.37 2005/10/15 02:49:44 momjian Exp $ * *------------------------------------------------------------------------- */#ifndef HASHJOIN_H#define HASHJOIN_H#include "access/htup.h"#include "storage/buffile.h"/* ---------------------------------------------------------------- *				hash-join hash table structures * * Each active hashjoin has a HashJoinTable control block, which is * palloc'd in the executor's per-query context.  All other storage needed * for the hashjoin is kept in private memory contexts, two for each hashjoin. * This makes it easy and fast to release the storage when we don't need it * anymore.  (Exception: data associated with the temp files lives in the * per-query context too, since we always call buffile.c in that context.) * * The hashtable contexts are made children of the per-query context, ensuring * that they will be discarded at end of statement even if the join is * aborted early by an error.  (Likewise, any temporary files we make will * be cleaned up by the virtual file manager in event of an error.) * * Storage that should live through the entire join is allocated from the * "hashCxt", while storage that is only wanted for the current batch is * allocated in the "batchCxt".  By resetting the batchCxt at the end of * each batch, we free all the per-batch storage reliably and without tedium. * * During first scan of inner relation, we get its tuples from executor. * If nbatch > 1 then tuples that don't belong in first batch get saved * into inner-batch temp files. The same statements apply for the * first scan of the outer relation, except we write tuples to outer-batch * temp files.	After finishing the first scan, we do the following for * each remaining batch: *	1. Read tuples from inner batch file, load into hash buckets. *	2. Read tuples from outer batch file, match to hash buckets and output. * * It is possible to increase nbatch on the fly if the in-memory hash table * gets too big.  The hash-value-to-batch computation is arranged so that this * can only cause a tuple to go into a later batch than previously thought, * never into an earlier batch.  When we increase nbatch, we rescan the hash * table and dump out any tuples that are now of a later batch to the correct * inner batch file.  Subsequently, while reading either inner or outer batch * files, we might find tuples that no longer belong to the current batch; * if so, we just dump them out to the correct batch file. * ---------------------------------------------------------------- *//* these are in nodes/execnodes.h: *//* typedef struct HashJoinTupleData *HashJoinTuple; *//* typedef struct HashJoinTableData *HashJoinTable; */typedef struct HashJoinTupleData{	struct HashJoinTupleData *next;		/* link to next tuple in same bucket */	uint32		hashvalue;		/* tuple's hash code */	HeapTupleData htup;			/* tuple header */} HashJoinTupleData;typedef struct HashJoinTableData{	int			nbuckets;		/* # buckets in the in-memory hash table */	/* buckets[i] is head of list of tuples in i'th in-memory bucket */	struct HashJoinTupleData **buckets;	/* buckets array is per-batch storage, as are all the tuples */	int			nbatch;			/* number of batches */	int			curbatch;		/* current batch #; 0 during 1st pass */	int			nbatch_original;	/* nbatch when we started inner scan */	int			nbatch_outstart;	/* nbatch when we started outer scan */	bool		growEnabled;	/* flag to shut off nbatch increases */	double		totalTuples;	/* # tuples obtained from inner plan */	/*	 * These arrays are allocated for the life of the hash join, but only if	 * nbatch > 1.	A file is opened only when we first write a tuple into it	 * (otherwise its pointer remains NULL).  Note that the zero'th array	 * elements never get used, since we will process rather than dump out any	 * tuples of batch zero.	 */	BufFile   **innerBatchFile; /* buffered virtual temp file per batch */	BufFile   **outerBatchFile; /* buffered virtual temp file per batch */	/*	 * Info about the datatype-specific hash functions for the datatypes being	 * hashed.	We assume that the inner and outer sides of each hashclause	 * are the same type, or at least share the same hash function. This is an	 * array of the same length as the number of hash keys.	 */	FmgrInfo   *hashfunctions;	/* lookup data for hash functions */	Size		spaceUsed;		/* memory space currently used by tuples */	Size		spaceAllowed;	/* upper limit for space used */	MemoryContext hashCxt;		/* context for whole-hash-join storage */	MemoryContext batchCxt;		/* context for this-batch-only storage */} HashJoinTableData;#endif   /* HASHJOIN_H */
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -