dynahash.c

来自「postgresql8.3.4源码,开源数据库」· C语言 代码 · 共 1,529 行 · 第 1/3 页

C
1,529
字号
/*------------------------------------------------------------------------- * * dynahash.c *	  dynamic hash tables * * dynahash.c supports both local-to-a-backend hash tables and hash tables in * shared memory.  For shared hash tables, it is the caller's responsibility * to provide appropriate access interlocking.	The simplest convention is * that a single LWLock protects the whole hash table.	Searches (HASH_FIND or * hash_seq_search) need only shared lock, but any update requires exclusive * lock.  For heavily-used shared tables, the single-lock approach creates a * concurrency bottleneck, so we also support "partitioned" locking wherein * there are multiple LWLocks guarding distinct subsets of the table.  To use * a hash table in partitioned mode, the HASH_PARTITION flag must be given * to hash_create.	This prevents any attempt to split buckets on-the-fly. * Therefore, each hash bucket chain operates independently, and no fields * of the hash header change after init except nentries and freeList. * A partitioned table uses a spinlock to guard changes of those two fields. * This lets any subset of the hash buckets be treated as a separately * lockable partition.	We expect callers to use the low-order bits of a * lookup key's hash value as a partition number --- this will work because * of the way calc_bucket() maps hash values to bucket numbers. * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION *	  $PostgreSQL: pgsql/src/backend/utils/hash/dynahash.c,v 1.78 2008/01/01 19:45:53 momjian Exp $ * *------------------------------------------------------------------------- *//* * Original comments: * * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson. * Coded into C, with minor code improvements, and with hsearch(3) interface, * by ejp@ausmelb.oz, Jul 26, 1988: 13:16; * also, hcreate/hdestroy routines added to simulate hsearch(3). * * These routines simulate hsearch(3) and family, with the important * difference that the hash table is dynamic - can grow indefinitely * beyond its original size (as supplied to hcreate()). * * Performance appears to be comparable to that of hsearch(3). * The 'source-code' options referred to in hsearch(3)'s 'man' page * are not implemented; otherwise functionality is identical. * * Compilation controls: * DEBUG controls some informative traces, mainly for debugging. * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained; * when combined with HASH_DEBUG, these are displayed by hdestroy(). * * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor * concatenation property, in probably unnecessary code 'optimisation'. * * Modified margo@postgres.berkeley.edu February 1990 *		added multiple table interface * Modified by sullivan@postgres.berkeley.edu April 1990 *		changed ctl structure for shared memory */#include "postgres.h"#include "access/xact.h"#include "storage/shmem.h"#include "storage/spin.h"#include "utils/dynahash.h"#include "utils/memutils.h"/* * Constants * * A hash table has a top-level "directory", each of whose entries points * to a "segment" of ssize bucket headers.	The maximum number of hash * buckets is thus dsize * ssize (but dsize may be expansible).  Of course, * the number of records in the table can be larger, but we don't want a * whole lot of records per bucket or performance goes down. * * In a hash table allocated in shared memory, the directory cannot be * expanded because it must stay at a fixed address.  The directory size * should be selected using hash_select_dirsize (and you'd better have * a good idea of the maximum number of entries!).	For non-shared hash * tables, the initial directory size can be left at the default. */#define DEF_SEGSIZE			   256#define DEF_SEGSIZE_SHIFT	   8	/* must be log2(DEF_SEGSIZE) */#define DEF_DIRSIZE			   256#define DEF_FFACTOR			   1	/* default fill factor *//* A hash bucket is a linked list of HASHELEMENTs */typedef HASHELEMENT *HASHBUCKET;/* A hash segment is an array of bucket headers */typedef HASHBUCKET *HASHSEGMENT;/* * Header structure for a hash table --- contains all changeable info * * In a shared-memory hash table, the HASHHDR is in shared memory, while * each backend has a local HTAB struct.  For a non-shared table, there isn't * any functional difference between HASHHDR and HTAB, but we separate them * anyway to share code between shared and non-shared tables. */struct HASHHDR{	/* In a partitioned table, take this lock to touch nentries or freeList */	slock_t		mutex;			/* unused if not partitioned table */	/* These fields change during entry addition/deletion */	long		nentries;		/* number of entries in hash table */	HASHELEMENT *freeList;		/* linked list of free elements */	/* These fields can change, but not in a partitioned table */	/* Also, dsize can't change in a shared table, even if unpartitioned */	long		dsize;			/* directory size */	long		nsegs;			/* number of allocated segments (<= dsize) */	uint32		max_bucket;		/* ID of maximum bucket in use */	uint32		high_mask;		/* mask to modulo into entire table */	uint32		low_mask;		/* mask to modulo into lower half of table */	/* These fields are fixed at hashtable creation */	Size		keysize;		/* hash key length in bytes */	Size		entrysize;		/* total user element size in bytes */	long		num_partitions; /* # partitions (must be power of 2), or 0 */	long		ffactor;		/* target fill factor */	long		max_dsize;		/* 'dsize' limit if directory is fixed size */	long		ssize;			/* segment size --- must be power of 2 */	int			sshift;			/* segment shift = log2(ssize) */	int			nelem_alloc;	/* number of entries to allocate at once */#ifdef HASH_STATISTICS	/*	 * Count statistics here.  NB: stats code doesn't bother with mutex, so	 * counts could be corrupted a bit in a partitioned table.	 */	long		accesses;	long		collisions;#endif};#define IS_PARTITIONED(hctl)  ((hctl)->num_partitions != 0)/* * Top control structure for a hashtable --- in a shared table, each backend * has its own copy (OK since no fields change at runtime) */struct HTAB{	HASHHDR    *hctl;			/* => shared control information */	HASHSEGMENT *dir;			/* directory of segment starts */	HashValueFunc hash;			/* hash function */	HashCompareFunc match;		/* key comparison function */	HashCopyFunc keycopy;		/* key copying function */	HashAllocFunc alloc;		/* memory allocator */	MemoryContext hcxt;			/* memory context if default allocator used */	char	   *tabname;		/* table name (for error messages) */	bool		isshared;		/* true if table is in shared memory */	/* freezing a shared table isn't allowed, so we can keep state here */	bool		frozen;			/* true = no more inserts allowed */	/* We keep local copies of these fixed values to reduce contention */	Size		keysize;		/* hash key length in bytes */	long		ssize;			/* segment size --- must be power of 2 */	int			sshift;			/* segment shift = log2(ssize) */};/* * Key (also entry) part of a HASHELEMENT */#define ELEMENTKEY(helem)  (((char *)(helem)) + MAXALIGN(sizeof(HASHELEMENT)))/* * Fast MOD arithmetic, assuming that y is a power of 2 ! */#define MOD(x,y)			   ((x) & ((y)-1))#if HASH_STATISTICSstatic long hash_accesses,			hash_collisions,			hash_expansions;#endif/* * Private function prototypes */static void *DynaHashAlloc(Size size);static HASHSEGMENT seg_alloc(HTAB *hashp);static bool element_alloc(HTAB *hashp, int nelem);static bool dir_realloc(HTAB *hashp);static bool expand_table(HTAB *hashp);static HASHBUCKET get_hash_entry(HTAB *hashp);static void hdefault(HTAB *hashp);static int	choose_nelem_alloc(Size entrysize);static bool init_htab(HTAB *hashp, long nelem);static void hash_corrupted(HTAB *hashp);static void register_seq_scan(HTAB *hashp);static void deregister_seq_scan(HTAB *hashp);static bool has_seq_scans(HTAB *hashp);/* * memory allocation support */static MemoryContext CurrentDynaHashCxt = NULL;static void *DynaHashAlloc(Size size){	Assert(MemoryContextIsValid(CurrentDynaHashCxt));	return MemoryContextAlloc(CurrentDynaHashCxt, size);}/* * HashCompareFunc for string keys * * Because we copy keys with strlcpy(), they will be truncated at keysize-1 * bytes, so we can only compare that many ... hence strncmp is almost but * not quite the right thing. */static intstring_compare(const char *key1, const char *key2, Size keysize){	return strncmp(key1, key2, keysize - 1);}/************************** CREATE ROUTINES **********************//* * hash_create -- create a new dynamic hash table * *	tabname: a name for the table (for debugging purposes) *	nelem: maximum number of elements expected *	*info: additional table parameters, as indicated by flags *	flags: bitmask indicating which parameters to take from *info * * Note: for a shared-memory hashtable, nelem needs to be a pretty good * estimate, since we can't expand the table on the fly.  But an unshared * hashtable can be expanded on-the-fly, so it's better for nelem to be * on the small side and let the table grow if it's exceeded.  An overly * large nelem will penalize hash_seq_search speed without buying much. */HTAB *hash_create(const char *tabname, long nelem, HASHCTL *info, int flags){	HTAB	   *hashp;	HASHHDR    *hctl;	/*	 * For shared hash tables, we have a local hash header (HTAB struct) that	 * we allocate in TopMemoryContext; all else is in shared memory.	 *	 * For non-shared hash tables, everything including the hash header is in	 * a memory context created specially for the hash table --- this makes	 * hash_destroy very simple.  The memory context is made a child of either	 * a context specified by the caller, or TopMemoryContext if nothing is	 * specified.	 */	if (flags & HASH_SHARED_MEM)	{		/* Set up to allocate the hash header */		CurrentDynaHashCxt = TopMemoryContext;	}	else	{		/* Create the hash table's private memory context */		if (flags & HASH_CONTEXT)			CurrentDynaHashCxt = info->hcxt;		else			CurrentDynaHashCxt = TopMemoryContext;		CurrentDynaHashCxt = AllocSetContextCreate(CurrentDynaHashCxt,												   tabname,												   ALLOCSET_DEFAULT_MINSIZE,												   ALLOCSET_DEFAULT_INITSIZE,												   ALLOCSET_DEFAULT_MAXSIZE);	}	/* Initialize the hash header, plus a copy of the table name */	hashp = (HTAB *) DynaHashAlloc(sizeof(HTAB) + strlen(tabname) +1);	MemSet(hashp, 0, sizeof(HTAB));	hashp->tabname = (char *) (hashp + 1);	strcpy(hashp->tabname, tabname);	if (flags & HASH_FUNCTION)		hashp->hash = info->hash;	else		hashp->hash = string_hash;		/* default hash function */	/*	 * If you don't specify a match function, it defaults to string_compare if	 * you used string_hash (either explicitly or by default) and to memcmp	 * otherwise.  (Prior to PostgreSQL 7.4, memcmp was always used.)	 */	if (flags & HASH_COMPARE)		hashp->match = info->match;	else if (hashp->hash == string_hash)		hashp->match = (HashCompareFunc) string_compare;	else		hashp->match = memcmp;	/*	 * Similarly, the key-copying function defaults to strlcpy or memcpy.	 */	if (flags & HASH_KEYCOPY)		hashp->keycopy = info->keycopy;	else if (hashp->hash == string_hash)		hashp->keycopy = (HashCopyFunc) strlcpy;	else		hashp->keycopy = memcpy;	if (flags & HASH_ALLOC)		hashp->alloc = info->alloc;	else		hashp->alloc = DynaHashAlloc;	if (flags & HASH_SHARED_MEM)	{		/*		 * ctl structure and directory are preallocated for shared memory		 * tables.	Note that HASH_DIRSIZE and HASH_ALLOC had better be set as		 * well.		 */		hashp->hctl = info->hctl;		hashp->dir = (HASHSEGMENT *) (((char *) info->hctl) + sizeof(HASHHDR));		hashp->hcxt = NULL;		hashp->isshared = true;		/* hash table already exists, we're just attaching to it */		if (flags & HASH_ATTACH)		{			/* make local copies of some heavily-used values */			hctl = hashp->hctl;			hashp->keysize = hctl->keysize;			hashp->ssize = hctl->ssize;			hashp->sshift = hctl->sshift;			return hashp;		}	}	else	{		/* setup hash table defaults */		hashp->hctl = NULL;		hashp->dir = NULL;		hashp->hcxt = CurrentDynaHashCxt;		hashp->isshared = false;	}	if (!hashp->hctl)	{		hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));		if (!hashp->hctl)			ereport(ERROR,					(errcode(ERRCODE_OUT_OF_MEMORY),					 errmsg("out of memory")));	}	hashp->frozen = false;	hdefault(hashp);	hctl = hashp->hctl;	if (flags & HASH_PARTITION)	{		/* Doesn't make sense to partition a local hash table */		Assert(flags & HASH_SHARED_MEM);		/* # of partitions had better be a power of 2 */		Assert(info->num_partitions == (1L << my_log2(info->num_partitions)));		hctl->num_partitions = info->num_partitions;	}	if (flags & HASH_SEGMENT)	{		hctl->ssize = info->ssize;		hctl->sshift = my_log2(info->ssize);		/* ssize had better be a power of 2 */		Assert(hctl->ssize == (1L << hctl->sshift));	}	if (flags & HASH_FFACTOR)		hctl->ffactor = info->ffactor;	/*	 * SHM hash tables have fixed directory size passed by the caller.	 */	if (flags & HASH_DIRSIZE)	{		hctl->max_dsize = info->max_dsize;		hctl->dsize = info->dsize;	}	/*	 * hash table now allocates space for key and data but you have to say how	 * much space to allocate	 */	if (flags & HASH_ELEM)	{		Assert(info->entrysize >= info->keysize);		hctl->keysize = info->keysize;		hctl->entrysize = info->entrysize;	}	/* make local copies of heavily-used constant fields */	hashp->keysize = hctl->keysize;	hashp->ssize = hctl->ssize;	hashp->sshift = hctl->sshift;	/* Build the hash directory structure */	if (!init_htab(hashp, nelem))		elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);	/*	 * For a shared hash table, preallocate the requested number of elements.	 * This reduces problems with run-time out-of-shared-memory conditions.	 *	 * For a non-shared hash table, preallocate the requested number of	 * elements if it's less than our chosen nelem_alloc.  This avoids wasting	 * space if the caller correctly estimates a small table size.	 */	if ((flags & HASH_SHARED_MEM) ||		nelem < hctl->nelem_alloc)	{		if (!element_alloc(hashp, (int) nelem))			ereport(ERROR,					(errcode(ERRCODE_OUT_OF_MEMORY),					 errmsg("out of memory")));	}	return hashp;}/* * Set default HASHHDR parameters. */static voidhdefault(HTAB *hashp){	HASHHDR    *hctl = hashp->hctl;	MemSet(hctl, 0, sizeof(HASHHDR));	hctl->nentries = 0;	hctl->freeList = NULL;	hctl->dsize = DEF_DIRSIZE;	hctl->nsegs = 0;	/* rather pointless defaults for key & entry size */	hctl->keysize = sizeof(char *);	hctl->entrysize = 2 * sizeof(char *);	hctl->num_partitions = 0;	/* not partitioned */	hctl->ffactor = DEF_FFACTOR;	/* table has no fixed maximum size */	hctl->max_dsize = NO_MAX_DSIZE;	hctl->ssize = DEF_SEGSIZE;	hctl->sshift = DEF_SEGSIZE_SHIFT;#ifdef HASH_STATISTICS	hctl->accesses = hctl->collisions = 0;#endif}/* * Given the user-specified entry size, choose nelem_alloc, ie, how many * elements to add to the hash table when we need more. */static intchoose_nelem_alloc(Size entrysize){	int			nelem_alloc;	Size		elementSize;	Size		allocSize;	/* Each element has a HASHELEMENT header plus user data. */	/* NB: this had better match element_alloc() */	elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(entrysize);	/*	 * The idea here is to choose nelem_alloc at least 32, but round up so	 * that the allocation request will be a power of 2 or just less. This	 * makes little difference for hash tables in shared memory, but for hash	 * tables managed by palloc, the allocation request will be rounded up to	 * a power of 2 anyway.  If we fail to take this into account, we'll waste	 * as much as half the allocated space.	 */	allocSize = 32 * 4;			/* assume elementSize at least 8 */	do	{		allocSize <<= 1;		nelem_alloc = allocSize / elementSize;	} while (nelem_alloc < 32);	return nelem_alloc;}/* * Compute derived fields of hctl and build the initial directory/segment

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?