📄 libchash.c

📁 google hash table算法
💻 C
📖 第 1 页 / 共 5 页
字号:
static void SparseWrite(FILE *fp, SparseBin *binSparse, ulong cBuckets){   ulong i, j;   WRITE_UL(fp, cBuckets);   for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )      for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )	 WRITE_UL(fp, binSparse[i].bmOccupied[j]);}static ulong SparseRead(FILE *fp, SparseBin **pbinSparse){   ulong i, j, cBuckets;   READ_UL(fp, cBuckets);                /* actually, cBuckets is stored */   cBuckets = SparseAllocate(pbinSparse, cBuckets);   for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ )   {      for ( j = 0; j < (1<<LOG_BM_WORDS); j++ )	 READ_UL(fp, (*pbinSparse)[i].bmOccupied[j]);      (*pbinSparse)[i].cOccupied =	 SPARSE_POS_TO_OFFSET((*pbinSparse)[i].bmOccupied,1<<LOG_LOW_BIN_SIZE);      (*pbinSparse)[i].binSparse =	 (SparseBucket *) HTsmalloc(sizeof(*((*pbinSparse)[i].binSparse)) *				    (*pbinSparse)[i].cOccupied);   }   return cBuckets;}/*************************************************************************\| SparseMemory()                                                          ||     SparseMemory() tells us how much memory is being allocated for      ||     the dense table.  You need to tell me not only how many buckets     ||     there are, but how many are occupied.                               |\*************************************************************************/static ulong SparseMemory(ulong cBuckets, ulong cOccupied){   return ( cOccupied * sizeof(SparseBucket) +	    SPARSE_GROUPS(cBuckets) * sizeof(SparseBin) );}/*  Just for fun, I also provide support for dense tables.  These are *  just regulr arrays.  Access is fast, but they can get big. *  Use Table(x) at the top of chash.h to decide which you want. *  A disadvantage is we need to steal more of the data space for *  indicating empty buckets.  We choose -3. */#ifndef DenseBucket             /* by default, each bucket holds an HTItem */#define DenseBucket             HTItem#endiftypedef struct DenseBin {       /* needs to be a struct for C typing reasons */   DenseBucket *rgBuckets;      /* A bin is an array of buckets */} DenseBin;typedef struct DenseIterator {   long pos;               /* the actual iterator */   DenseBin *bin;          /* state info, to avoid args for NextBucket() */   ulong cBuckets;} DenseIterator;#define DENSE_IS_EMPTY(bin, i)     ( (bin)[i].data == EMPTY )#define DENSE_SET_EMPTY(bin, i)    (bin)[i].data = EMPTY      /* fks-hash.h */#define DENSE_SET_OCCUPIED(bin, i) (bin)[i].data = 1          /* not EMPTY */static void DenseClear(DenseBin *bin, ulong cBuckets){   while ( cBuckets-- )      DENSE_SET_EMPTY(bin->rgBuckets, cBuckets);}static ulong DenseAllocate(DenseBin **pbin, ulong cBuckets){   *pbin = (DenseBin *) HTsmalloc(sizeof(*pbin));   (*pbin)->rgBuckets = (DenseBucket *) HTsmalloc(sizeof(*(*pbin)->rgBuckets)						  * cBuckets);   DenseClear(*pbin, cBuckets);   return cBuckets;}static DenseBin *DenseFree(DenseBin *bin, ulong cBuckets){   HTfree(bin->rgBuckets, sizeof(*bin->rgBuckets) * cBuckets);   HTfree(bin, sizeof(*bin));   return NULL;}static int DenseIsEmpty(DenseBin *bin, ulong location){   return DENSE_IS_EMPTY(bin->rgBuckets, location);}static DenseBucket *DenseFind(DenseBin *bin, ulong location){   if ( DenseIsEmpty(bin, location) )      return NULL;   return bin->rgBuckets + location;}static DenseBucket *DenseInsert(DenseBin *bin, DenseBucket *bckInsert,				ulong location, int *pfOverwrite){   DenseBucket *bckPlace;   bckPlace = DenseFind(bin, location);   if ( bckPlace )                /* means something is already there */   {      if ( *pfOverwrite )	 *bckPlace = *bckInsert;      *pfOverwrite = 1;           /* set to 1 to indicate someone was there */      return bckPlace;   }   else   {      bin->rgBuckets[location] = *bckInsert;      *pfOverwrite = 0;      return bin->rgBuckets + location;   }}static DenseBucket *DenseNextBucket(DenseIterator *iter){   for ( iter->pos++; iter->pos < iter->cBuckets; iter->pos++ )      if ( !DenseIsEmpty(iter->bin, iter->pos) )	 return iter->bin->rgBuckets + iter->pos;   return NULL;                        /* all remaining groups were empty */}static DenseBucket *DenseFirstBucket(DenseIterator *iter,				     DenseBin *bin, ulong cBuckets){   iter->bin = bin;                    /* set it up for NextBucket() */   iter->cBuckets = cBuckets;   iter->pos = -1;                     /* thus the next bucket will be 0 */   return DenseNextBucket(iter);}static void DenseWrite(FILE *fp, DenseBin *bin, ulong cBuckets){   ulong pos = 0, bit, bm;   WRITE_UL(fp, cBuckets);   while ( pos < cBuckets )   {      bm = 0;      for ( bit = 0; bit < 8*sizeof(ulong); bit++ )      {	 if ( !DenseIsEmpty(bin, pos) )	    SET_BITMAP(&bm, bit);                /* in fks-hash.h */	 if ( ++pos == cBuckets )	    break;      }      WRITE_UL(fp, bm);   }}static ulong DenseRead(FILE *fp, DenseBin **pbin){   ulong pos = 0, bit, bm, cBuckets;   READ_UL(fp, cBuckets);   cBuckets = DenseAllocate(pbin, cBuckets);   while ( pos < cBuckets )   {      READ_UL(fp, bm);      for ( bit = 0; bit < 8*sizeof(ulong); bit++ )      {	 if ( TEST_BITMAP(&bm, bit) )            /* in fks-hash.h */	    DENSE_SET_OCCUPIED((*pbin)->rgBuckets, pos);	 else	    DENSE_SET_EMPTY((*pbin)->rgBuckets, pos);	 if ( ++pos == cBuckets )	    break;      }   }   return cBuckets;}static ulong DenseMemory(ulong cBuckets, ulong cOccupied){   return cBuckets * sizeof(DenseBucket);}/* ======================================================================== *//*                          HASHING ROUTINES                                *//*                       ----------------------                             *//*  Implements a simple quadratic hashing scheme.  We have a single hash *  table of size t and a single hash function h(x).  When inserting an *  item, first we try h(x) % t.  If it's occupied, we try h(x) +  *  i*(i-1)/2 % t for increasing values of i until we hit a not-occupied *  space.  To make this dynamic, we double the size of the hash table as *  soon as more than half the cells are occupied.  When deleting, we can *  choose to shrink the hashtable when less than a quarter of the *  cells are occupied, or we can choose never to shrink the hashtable. *  For lookup, we check h(x) + i*(i-1)/2 % t (starting with i=0) until *  we get a match or we hit an empty space.  Note that as a result, *  we can't make a cell empty on deletion, or lookups may end prematurely. *  Instead we mark the cell as "deleted."  We thus steal the value *  DELETED as a possible "data" value.  As long as data are pointers, *  that's ok. *     The hash increment we use, i(i-1)/2, is not the standard quadratic *  hash increment, which is i^2.  i(i-1)/2 covers the entire bucket space *  when the hashtable size is a power of two, as it is for us.  In fact, *  the first n probes cover n distinct buckets; then it repeats.  This *  guarantees insertion will always succeed. *     If you linear hashing, set JUMP in chash.h.  You can also change *  various other parameters there. *//*************************************************************************\| Hash()                                                                  ||     The hash function I use is due to Bob Jenkins (see                  ||     http://burtleburtle.net/bob/hash/evahash.html                       ||     According to http://burtleburtle.net/bob/c/lookup2.c,               ||     his implementation is public domain.)                               ||     It takes 36 instructions, in 18 cycles if you're lucky.             ||        hashing depends on the fact the hashtable size is always a       ||     power of 2.  cBuckets is probably ht->cBuckets.                     |\*************************************************************************/#if LOG_WORD_SIZE == 5                      /* 32 bit words */#define mix(a,b,c) \{ \  a -= b; a -= c; a ^= (c>>13); \  b -= c; b -= a; b ^= (a<<8); \  c -= a; c -= b; c ^= (b>>13); \  a -= b; a -= c; a ^= (c>>12);  \  b -= c; b -= a; b ^= (a<<16); \  c -= a; c -= b; c ^= (b>>5); \  a -= b; a -= c; a ^= (c>>3);  \  b -= c; b -= a; b ^= (a<<10); \  c -= a; c -= b; c ^= (b>>15); \}#ifdef WORD_HASH                 /* play with this on little-endian machines */#define WORD_AT(ptr)    ( *(ulong *)(ptr) )#else#define WORD_AT(ptr)    ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \			  ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) )#endif#elif LOG_WORD_SIZE == 6        /* 64 bit words */#define mix(a,b,c) \{ \  a -= b; a -= c; a ^= (c>>43); \  b -= c; b -= a; b ^= (a<<9); \  c -= a; c -= b; c ^= (b>>8); \  a -= b; a -= c; a ^= (c>>38); \  b -= c; b -= a; b ^= (a<<23); \  c -= a; c -= b; c ^= (b>>5); \  a -= b; a -= c; a ^= (c>>35); \  b -= c; b -= a; b ^= (a<<49); \  c -= a; c -= b; c ^= (b>>11); \  a -= b; a -= c; a ^= (c>>12); \  b -= c; b -= a; b ^= (a<<18); \  c -= a; c -= b; c ^= (b>>22); \}#ifdef WORD_HASH                 /* alpha is little-endian, btw */#define WORD_AT(ptr)    ( *(ulong *)(ptr) )#else#define WORD_AT(ptr)    ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \			  ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) + \			  ((ulong)(ptr)[4]<<32) + ((ulong)(ptr)[5]<<40) + \			  ((ulong)(ptr)[6]<<48) + ((ulong)(ptr)[7]<<56) )#endif#else                            /* neither 32 or 64 bit words */#error This hash function can only hash 32 or 64 bit words.  Sorry.#endifstatic ulong Hash(HashTable *ht, char *key, ulong cBuckets){   ulong a, b, c, cchKey, cchKeyOrig;   cchKeyOrig = ht->cchKey == NULL_TERMINATED ? strlen(key) : ht->cchKey;   a = b = c = 0x9e3779b9;       /* the golden ratio; an arbitrary value */   for ( cchKey = cchKeyOrig;  cchKey >= 3 * sizeof(ulong);	 cchKey -= 3 * sizeof(ulong),  key += 3 * sizeof(ulong) )   {      a += WORD_AT(key);      b += WORD_AT(key + sizeof(ulong));      c += WORD_AT(key + sizeof(ulong)*2);      mix(a,b,c);   }   c += cchKeyOrig;   switch ( cchKey ) {           /* deal with rest.  Cases fall through */#if LOG_WORD_SIZE == 5      case 11: c += (ulong)key[10]<<24;      case 10: c += (ulong)key[9]<<16;      case 9 : c += (ulong)key[8]<<8;               /* the first byte of c is reserved for the length */      case 8 : b += WORD_AT(key+4);  a+= WORD_AT(key);  break;      case 7 : b += (ulong)key[6]<<16;      case 6 : b += (ulong)key[5]<<8;      case 5 : b += key[4];      case 4 : a += WORD_AT(key);  break;      case 3 : a += (ulong)key[2]<<16;
💿 文件大小 23 K
👤 上传用户 gjinli88
📂 所属分类 Linux/Unix编程
📄 代码行数 1,525 行
💻 语言类型 C语言
🏷️ 相关标签

#google #table #hash #算法
更多google资源 →
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -