📄 libchash.c
字号:
static void SparseWrite(FILE *fp, SparseBin *binSparse, ulong cBuckets){ ulong i, j; WRITE_UL(fp, cBuckets); for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ ) for ( j = 0; j < (1<<LOG_BM_WORDS); j++ ) WRITE_UL(fp, binSparse[i].bmOccupied[j]);}static ulong SparseRead(FILE *fp, SparseBin **pbinSparse){ ulong i, j, cBuckets; READ_UL(fp, cBuckets); /* actually, cBuckets is stored */ cBuckets = SparseAllocate(pbinSparse, cBuckets); for ( i = 0; i < SPARSE_GROUPS(cBuckets); i++ ) { for ( j = 0; j < (1<<LOG_BM_WORDS); j++ ) READ_UL(fp, (*pbinSparse)[i].bmOccupied[j]); (*pbinSparse)[i].cOccupied = SPARSE_POS_TO_OFFSET((*pbinSparse)[i].bmOccupied,1<<LOG_LOW_BIN_SIZE); (*pbinSparse)[i].binSparse = (SparseBucket *) HTsmalloc(sizeof(*((*pbinSparse)[i].binSparse)) * (*pbinSparse)[i].cOccupied); } return cBuckets;}/*************************************************************************\| SparseMemory() || SparseMemory() tells us how much memory is being allocated for || the dense table. You need to tell me not only how many buckets || there are, but how many are occupied. |\*************************************************************************/static ulong SparseMemory(ulong cBuckets, ulong cOccupied){ return ( cOccupied * sizeof(SparseBucket) + SPARSE_GROUPS(cBuckets) * sizeof(SparseBin) );}/* Just for fun, I also provide support for dense tables. These are * just regulr arrays. Access is fast, but they can get big. * Use Table(x) at the top of chash.h to decide which you want. * A disadvantage is we need to steal more of the data space for * indicating empty buckets. We choose -3. */#ifndef DenseBucket /* by default, each bucket holds an HTItem */#define DenseBucket HTItem#endiftypedef struct DenseBin { /* needs to be a struct for C typing reasons */ DenseBucket *rgBuckets; /* A bin is an array of buckets */} DenseBin;typedef struct DenseIterator { long pos; /* the actual iterator */ DenseBin *bin; /* state info, to avoid args for NextBucket() */ ulong cBuckets;} DenseIterator;#define DENSE_IS_EMPTY(bin, i) ( (bin)[i].data == EMPTY )#define DENSE_SET_EMPTY(bin, i) (bin)[i].data = EMPTY /* fks-hash.h */#define DENSE_SET_OCCUPIED(bin, i) (bin)[i].data = 1 /* not EMPTY */static void DenseClear(DenseBin *bin, ulong cBuckets){ while ( cBuckets-- ) DENSE_SET_EMPTY(bin->rgBuckets, cBuckets);}static ulong DenseAllocate(DenseBin **pbin, ulong cBuckets){ *pbin = (DenseBin *) HTsmalloc(sizeof(*pbin)); (*pbin)->rgBuckets = (DenseBucket *) HTsmalloc(sizeof(*(*pbin)->rgBuckets) * cBuckets); DenseClear(*pbin, cBuckets); return cBuckets;}static DenseBin *DenseFree(DenseBin *bin, ulong cBuckets){ HTfree(bin->rgBuckets, sizeof(*bin->rgBuckets) * cBuckets); HTfree(bin, sizeof(*bin)); return NULL;}static int DenseIsEmpty(DenseBin *bin, ulong location){ return DENSE_IS_EMPTY(bin->rgBuckets, location);}static DenseBucket *DenseFind(DenseBin *bin, ulong location){ if ( DenseIsEmpty(bin, location) ) return NULL; return bin->rgBuckets + location;}static DenseBucket *DenseInsert(DenseBin *bin, DenseBucket *bckInsert, ulong location, int *pfOverwrite){ DenseBucket *bckPlace; bckPlace = DenseFind(bin, location); if ( bckPlace ) /* means something is already there */ { if ( *pfOverwrite ) *bckPlace = *bckInsert; *pfOverwrite = 1; /* set to 1 to indicate someone was there */ return bckPlace; } else { bin->rgBuckets[location] = *bckInsert; *pfOverwrite = 0; return bin->rgBuckets + location; }}static DenseBucket *DenseNextBucket(DenseIterator *iter){ for ( iter->pos++; iter->pos < iter->cBuckets; iter->pos++ ) if ( !DenseIsEmpty(iter->bin, iter->pos) ) return iter->bin->rgBuckets + iter->pos; return NULL; /* all remaining groups were empty */}static DenseBucket *DenseFirstBucket(DenseIterator *iter, DenseBin *bin, ulong cBuckets){ iter->bin = bin; /* set it up for NextBucket() */ iter->cBuckets = cBuckets; iter->pos = -1; /* thus the next bucket will be 0 */ return DenseNextBucket(iter);}static void DenseWrite(FILE *fp, DenseBin *bin, ulong cBuckets){ ulong pos = 0, bit, bm; WRITE_UL(fp, cBuckets); while ( pos < cBuckets ) { bm = 0; for ( bit = 0; bit < 8*sizeof(ulong); bit++ ) { if ( !DenseIsEmpty(bin, pos) ) SET_BITMAP(&bm, bit); /* in fks-hash.h */ if ( ++pos == cBuckets ) break; } WRITE_UL(fp, bm); }}static ulong DenseRead(FILE *fp, DenseBin **pbin){ ulong pos = 0, bit, bm, cBuckets; READ_UL(fp, cBuckets); cBuckets = DenseAllocate(pbin, cBuckets); while ( pos < cBuckets ) { READ_UL(fp, bm); for ( bit = 0; bit < 8*sizeof(ulong); bit++ ) { if ( TEST_BITMAP(&bm, bit) ) /* in fks-hash.h */ DENSE_SET_OCCUPIED((*pbin)->rgBuckets, pos); else DENSE_SET_EMPTY((*pbin)->rgBuckets, pos); if ( ++pos == cBuckets ) break; } } return cBuckets;}static ulong DenseMemory(ulong cBuckets, ulong cOccupied){ return cBuckets * sizeof(DenseBucket);}/* ======================================================================== *//* HASHING ROUTINES *//* ---------------------- *//* Implements a simple quadratic hashing scheme. We have a single hash * table of size t and a single hash function h(x). When inserting an * item, first we try h(x) % t. If it's occupied, we try h(x) + * i*(i-1)/2 % t for increasing values of i until we hit a not-occupied * space. To make this dynamic, we double the size of the hash table as * soon as more than half the cells are occupied. When deleting, we can * choose to shrink the hashtable when less than a quarter of the * cells are occupied, or we can choose never to shrink the hashtable. * For lookup, we check h(x) + i*(i-1)/2 % t (starting with i=0) until * we get a match or we hit an empty space. Note that as a result, * we can't make a cell empty on deletion, or lookups may end prematurely. * Instead we mark the cell as "deleted." We thus steal the value * DELETED as a possible "data" value. As long as data are pointers, * that's ok. * The hash increment we use, i(i-1)/2, is not the standard quadratic * hash increment, which is i^2. i(i-1)/2 covers the entire bucket space * when the hashtable size is a power of two, as it is for us. In fact, * the first n probes cover n distinct buckets; then it repeats. This * guarantees insertion will always succeed. * If you linear hashing, set JUMP in chash.h. You can also change * various other parameters there. *//*************************************************************************\| Hash() || The hash function I use is due to Bob Jenkins (see || http://burtleburtle.net/bob/hash/evahash.html || According to http://burtleburtle.net/bob/c/lookup2.c, || his implementation is public domain.) || It takes 36 instructions, in 18 cycles if you're lucky. || hashing depends on the fact the hashtable size is always a || power of 2. cBuckets is probably ht->cBuckets. |\*************************************************************************/#if LOG_WORD_SIZE == 5 /* 32 bit words */#define mix(a,b,c) \{ \ a -= b; a -= c; a ^= (c>>13); \ b -= c; b -= a; b ^= (a<<8); \ c -= a; c -= b; c ^= (b>>13); \ a -= b; a -= c; a ^= (c>>12); \ b -= c; b -= a; b ^= (a<<16); \ c -= a; c -= b; c ^= (b>>5); \ a -= b; a -= c; a ^= (c>>3); \ b -= c; b -= a; b ^= (a<<10); \ c -= a; c -= b; c ^= (b>>15); \}#ifdef WORD_HASH /* play with this on little-endian machines */#define WORD_AT(ptr) ( *(ulong *)(ptr) )#else#define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \ ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) )#endif#elif LOG_WORD_SIZE == 6 /* 64 bit words */#define mix(a,b,c) \{ \ a -= b; a -= c; a ^= (c>>43); \ b -= c; b -= a; b ^= (a<<9); \ c -= a; c -= b; c ^= (b>>8); \ a -= b; a -= c; a ^= (c>>38); \ b -= c; b -= a; b ^= (a<<23); \ c -= a; c -= b; c ^= (b>>5); \ a -= b; a -= c; a ^= (c>>35); \ b -= c; b -= a; b ^= (a<<49); \ c -= a; c -= b; c ^= (b>>11); \ a -= b; a -= c; a ^= (c>>12); \ b -= c; b -= a; b ^= (a<<18); \ c -= a; c -= b; c ^= (b>>22); \}#ifdef WORD_HASH /* alpha is little-endian, btw */#define WORD_AT(ptr) ( *(ulong *)(ptr) )#else#define WORD_AT(ptr) ( (ptr)[0] + ((ulong)(ptr)[1]<<8) + \ ((ulong)(ptr)[2]<<16) + ((ulong)(ptr)[3]<<24) + \ ((ulong)(ptr)[4]<<32) + ((ulong)(ptr)[5]<<40) + \ ((ulong)(ptr)[6]<<48) + ((ulong)(ptr)[7]<<56) )#endif#else /* neither 32 or 64 bit words */#error This hash function can only hash 32 or 64 bit words. Sorry.#endifstatic ulong Hash(HashTable *ht, char *key, ulong cBuckets){ ulong a, b, c, cchKey, cchKeyOrig; cchKeyOrig = ht->cchKey == NULL_TERMINATED ? strlen(key) : ht->cchKey; a = b = c = 0x9e3779b9; /* the golden ratio; an arbitrary value */ for ( cchKey = cchKeyOrig; cchKey >= 3 * sizeof(ulong); cchKey -= 3 * sizeof(ulong), key += 3 * sizeof(ulong) ) { a += WORD_AT(key); b += WORD_AT(key + sizeof(ulong)); c += WORD_AT(key + sizeof(ulong)*2); mix(a,b,c); } c += cchKeyOrig; switch ( cchKey ) { /* deal with rest. Cases fall through */#if LOG_WORD_SIZE == 5 case 11: c += (ulong)key[10]<<24; case 10: c += (ulong)key[9]<<16; case 9 : c += (ulong)key[8]<<8; /* the first byte of c is reserved for the length */ case 8 : b += WORD_AT(key+4); a+= WORD_AT(key); break; case 7 : b += (ulong)key[6]<<16; case 6 : b += (ulong)key[5]<<8; case 5 : b += key[4]; case 4 : a += WORD_AT(key); break; case 3 : a += (ulong)key[2]<<16;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -