📄 dynahash.c
字号:
/*------------------------------------------------------------------------- * * dynahash.c * dynamic hashing * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /usr/local/cvsroot/pgsql/src/backend/utils/hash/dynahash.c,v 1.23.2.1 1999/08/02 05:25:08 scrappy Exp $ * *------------------------------------------------------------------------- *//* * * Dynamic hashing, after CACM April 1988 pp 446-457, by Per-Ake Larson. * Coded into C, with minor code improvements, and with hsearch(3) interface, * by ejp@ausmelb.oz, Jul 26, 1988: 13:16; * also, hcreate/hdestroy routines added to simulate hsearch(3). * * These routines simulate hsearch(3) and family, with the important * difference that the hash table is dynamic - can grow indefinitely * beyond its original size (as supplied to hcreate()). * * Performance appears to be comparable to that of hsearch(3). * The 'source-code' options referred to in hsearch(3)'s 'man' page * are not implemented; otherwise functionality is identical. * * Compilation controls: * DEBUG controls some informative traces, mainly for debugging. * HASH_STATISTICS causes HashAccesses and HashCollisions to be maintained; * when combined with HASH_DEBUG, these are displayed by hdestroy(). * * Problems & fixes to ejp@ausmelb.oz. WARNING: relies on pre-processor * concatenation property, in probably unnecessary code 'optimisation'. * * Modified margo@postgres.berkeley.edu February 1990 * added multiple table interface * Modified by sullivan@postgres.berkeley.edu April 1990 * changed ctl structure for shared memory */#include <sys/types.h>#include "postgres.h"#include "utils/dynahash.h"#include "utils/hsearch.h"#include "utils/memutils.h"/* * Fast MOD arithmetic, assuming that y is a power of 2 ! */#define MOD(x,y) ((x) & ((y)-1))/* * Private function prototypes */static long *DynaHashAlloc(unsigned int size);static void DynaHashFree(Pointer ptr);static uint32 call_hash(HTAB *hashp, char *k);static SEG_OFFSET seg_alloc(HTAB *hashp);static int bucket_alloc(HTAB *hashp);static int dir_realloc(HTAB *hashp);static int expand_table(HTAB *hashp);static int hdefault(HTAB *hashp);static int init_htab(HTAB *hashp, int nelem);typedef long *((*dhalloc_ptr) ());#ifndef FRONTEND/* ---------------- * memory allocation routines * * for postgres: all hash elements have to be in * the global cache context. Otherwise the postgres * garbage collector is going to corrupt them. -wei * * ??? the "cache" memory context is intended to store only * system cache information. The user of the hashing * routines should specify which context to use or we * should create a separate memory context for these * hash routines. For now I have modified this code to * do the latter -cim 1/19/91 * ---------------- */GlobalMemory DynaHashCxt = (GlobalMemory) NULL;static long *DynaHashAlloc(unsigned int size){ if (!DynaHashCxt) DynaHashCxt = CreateGlobalMemory("DynaHash"); return (long *) MemoryContextAlloc((MemoryContext) DynaHashCxt, size);}static voidDynaHashFree(Pointer ptr){ MemoryContextFree((MemoryContext) DynaHashCxt, ptr);}#define MEM_ALLOC DynaHashAlloc#define MEM_FREE DynaHashFree#else /* FRONTEND */#define MEM_ALLOC palloc#define MEM_FREE pfree#endif /* FRONTEND *//* * pointer access macros. Shared memory implementation cannot * store pointers in the hash table data structures because * pointer values will be different in different address spaces. * these macros convert offsets to pointers and pointers to offsets. * Shared memory need not be contiguous, but all addresses must be * calculated relative to some offset (segbase). */#define GET_SEG(hp,seg_num)\ (SEGMENT) (((unsigned long) (hp)->segbase) + (hp)->dir[seg_num])#define GET_BUCKET(hp,bucket_offs)\ (ELEMENT *) (((unsigned long) (hp)->segbase) + bucket_offs)#define MAKE_HASHOFFSET(hp,ptr)\ ( ((unsigned long) ptr) - ((unsigned long) (hp)->segbase) )#if HASH_STATISTICSstatic long hash_accesses, hash_collisions, hash_expansions;#endif/************************** CREATE ROUTINES **********************/HTAB *hash_create(int nelem, HASHCTL *info, int flags){ HHDR *hctl; HTAB *hashp; hashp = (HTAB *) MEM_ALLOC((unsigned long) sizeof(HTAB)); MemSet(hashp, 0, sizeof(HTAB)); if (flags & HASH_FUNCTION) hashp->hash = info->hash; else { /* default */ hashp->hash = string_hash; } if (flags & HASH_SHARED_MEM) { /* * ctl structure is preallocated for shared memory tables. Note * that HASH_DIRSIZE had better be set as well. */ hashp->hctl = (HHDR *) info->hctl; hashp->segbase = (char *) info->segbase; hashp->alloc = info->alloc; hashp->dir = (SEG_OFFSET *) info->dir; /* hash table already exists, we're just attaching to it */ if (flags & HASH_ATTACH) return hashp; } else { /* setup hash table defaults */ hashp->hctl = NULL; hashp->alloc = (dhalloc_ptr) MEM_ALLOC; hashp->dir = NULL; hashp->segbase = NULL; } if (!hashp->hctl) { hashp->hctl = (HHDR *) hashp->alloc((unsigned long) sizeof(HHDR)); if (!hashp->hctl) return 0; } if (!hdefault(hashp)) return 0; hctl = hashp->hctl;#ifdef HASH_STATISTICS hctl->accesses = hctl->collisions = 0;#endif if (flags & HASH_SEGMENT) { hctl->ssize = info->ssize; hctl->sshift = my_log2(info->ssize); /* ssize had better be a power of 2 */ Assert(hctl->ssize == (1L << hctl->sshift)); } if (flags & HASH_FFACTOR) hctl->ffactor = info->ffactor; /* * SHM hash tables have fixed directory size passed by the caller. */ if (flags & HASH_DIRSIZE) { hctl->max_dsize = info->max_dsize; hctl->dsize = info->dsize; } /* * hash table now allocates space for key and data but you have to say * how much space to allocate */ if (flags & HASH_ELEM) { hctl->keysize = info->keysize; hctl->datasize = info->datasize; } if (flags & HASH_ALLOC) hashp->alloc = info->alloc; if (init_htab(hashp, nelem)) { hash_destroy(hashp); return 0; } return hashp;}/* * Set default HHDR parameters. */static inthdefault(HTAB *hashp){ HHDR *hctl; MemSet(hashp->hctl, 0, sizeof(HHDR)); hctl = hashp->hctl; hctl->ssize = DEF_SEGSIZE; hctl->sshift = DEF_SEGSIZE_SHIFT; hctl->dsize = DEF_DIRSIZE; hctl->ffactor = DEF_FFACTOR; hctl->nkeys = 0; hctl->nsegs = 0; /* I added these MS. */ /* default memory allocation for hash buckets */ hctl->keysize = sizeof(char *); hctl->datasize = sizeof(char *); /* table has no fixed maximum size */ hctl->max_dsize = NO_MAX_DSIZE; /* garbage collection for HASH_REMOVE */ hctl->freeBucketIndex = INVALID_INDEX; return 1;}static intinit_htab(HTAB *hashp, int nelem){ SEG_OFFSET *segp; int nbuckets; int nsegs; HHDR *hctl; hctl = hashp->hctl; /* * Divide number of elements by the fill factor to determine a desired * number of buckets. Allocate space for the next greater power of * two number of buckets */ nelem = (nelem - 1) / hctl->ffactor + 1; nbuckets = 1 << my_log2(nelem); hctl->max_bucket = hctl->low_mask = nbuckets - 1; hctl->high_mask = (nbuckets << 1) - 1; /* * Figure number of directory segments needed, round up to a power of * 2 */ nsegs = (nbuckets - 1) / hctl->ssize + 1; nsegs = 1 << my_log2(nsegs); /* * Make sure directory is big enough. If pre-allocated directory is * too small, choke (caller screwed up). */ if (nsegs > hctl->dsize) { if (!(hashp->dir)) hctl->dsize = nsegs; else return -1; } /* Allocate a directory */ if (!(hashp->dir)) { hashp->dir = (SEG_OFFSET *) hashp->alloc(hctl->dsize * sizeof(SEG_OFFSET)); if (!hashp->dir) return -1; } /* Allocate initial segments */ for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) { *segp = seg_alloc(hashp); if (*segp == (SEG_OFFSET) 0) { hash_destroy(hashp); return 0; } }#if HASH_DEBUG fprintf(stderr, "%s\n%s%x\n%s%d\n%s%d\n%s%d\n%s%d\n%s%d\n%s%x\n%s%x\n%s%d\n%s%d\n", "init_htab:", "TABLE POINTER ", hashp, "DIRECTORY SIZE ", hctl->dsize, "SEGMENT SIZE ", hctl->ssize, "SEGMENT SHIFT ", hctl->sshift, "FILL FACTOR ", hctl->ffactor, "MAX BUCKET ", hctl->max_bucket, "HIGH MASK ", hctl->high_mask, "LOW MASK ", hctl->low_mask, "NSEGS ", hctl->nsegs, "NKEYS ", hctl->nkeys);#endif return 0;}/* * Estimate the space needed for a hashtable containing the given number * of entries of given size. * NOTE: this is used to estimate the footprint of hashtables in shared * memory; therefore it does not count HTAB which is in local memory. * NB: assumes that all hash structure parameters have default values! */longhash_estimate_size(long num_entries, long keysize, long datasize){ long size = 0; long nBuckets, nSegments, nDirEntries, nRecordAllocs, recordSize; /* estimate number of buckets wanted */ nBuckets = 1L << my_log2((num_entries - 1) / DEF_FFACTOR + 1); /* # of segments needed for nBuckets */ nSegments = 1L << my_log2((nBuckets - 1) / DEF_SEGSIZE + 1); /* directory entries */ nDirEntries = DEF_DIRSIZE; while (nDirEntries < nSegments) nDirEntries <<= 1; /* dir_alloc doubles dsize at each call */ /* fixed control info */ size += MAXALIGN(sizeof(HHDR)); /* but not HTAB, per above */ /* directory */ size += MAXALIGN(nDirEntries * sizeof(SEG_OFFSET)); /* segments */ size += nSegments * MAXALIGN(DEF_SEGSIZE * sizeof(BUCKET_INDEX)); /* records --- allocated in groups of BUCKET_ALLOC_INCR */ recordSize = sizeof(BUCKET_INDEX) + keysize + datasize; recordSize = MAXALIGN(recordSize); nRecordAllocs = (num_entries - 1) / BUCKET_ALLOC_INCR + 1; size += nRecordAllocs * BUCKET_ALLOC_INCR * recordSize; return size;}/********************** DESTROY ROUTINES ************************//* * XXX this sure looks thoroughly broken to me --- tgl 2/99. * It's freeing every entry individually --- but they weren't * allocated individually, see bucket_alloc!! Why doesn't it crash? * ANSWER: it probably does crash, but is never invoked in normal * operations... */voidhash_destroy(HTAB *hashp){ if (hashp != NULL) { SEG_OFFSET segNum; SEGMENT segp; int nsegs = hashp->hctl->nsegs; int j; BUCKET_INDEX *elp, p, q; ELEMENT *curr; /* cannot destroy a shared memory hash table */ Assert(!hashp->segbase); /* allocation method must be one we know how to free, too */ Assert(hashp->alloc == (dhalloc_ptr) MEM_ALLOC); hash_stats("destroy", hashp); for (segNum = 0; nsegs > 0; nsegs--, segNum++) { segp = GET_SEG(hashp, segNum); for (j = 0, elp = segp; j < hashp->hctl->ssize; j++, elp++) { for (p = *elp; p != INVALID_INDEX; p = q) { curr = GET_BUCKET(hashp, p); q = curr->next; MEM_FREE((char *) curr); } } MEM_FREE((char *) segp); } MEM_FREE((char *) hashp->dir); MEM_FREE((char *) hashp->hctl); MEM_FREE((char *) hashp); }}voidhash_stats(char *where, HTAB *hashp){#if HASH_STATISTICS fprintf(stderr, "%s: this HTAB -- accesses %ld collisions %ld\n", where, hashp->hctl->accesses, hashp->hctl->collisions); fprintf(stderr, "hash_stats: keys %ld keysize %ld maxp %d segmentcount %d\n", hashp->hctl->nkeys, hashp->hctl->keysize, hashp->hctl->max_bucket, hashp->hctl->nsegs); fprintf(stderr, "%s: total accesses %ld total collisions %ld\n", where, hash_accesses, hash_collisions); fprintf(stderr, "hash_stats: total expansions %ld\n", hash_expansions);#endif}/*******************************SEARCH ROUTINES *****************************/static uint32call_hash(HTAB *hashp, char *k){ HHDR *hctl = hashp->hctl; long hash_val, bucket; hash_val = hashp->hash(k, (int) hctl->keysize); bucket = hash_val & hctl->high_mask; if (bucket > hctl->max_bucket)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -