📄 libchash.c
字号:
unsigned long retval; memcpy(&retval, ul, sizeof(retval)); return retval;}/*************************************************************************\| HTSetupKeyTrunc() || If keys are stored directly but cchKey is less than || sizeof(ulong), this cuts off the bits at the end. |\*************************************************************************/ static void HTSetupKeyTrunc(void){ int i, j; for ( i = 0; i < sizeof(unsigned long); i++ ) for ( j = 0; j < sizeof(unsigned long); j++ ) grgKeyTruncMask[i][j] = j < i ? 255 : 0; /* chars have 8 bits */}/* ======================================================================== *//* TABLE ROUTINES *//* -------------------- *//* The idea is that a hashtable with (logically) t buckets is divided * into t/M groups of M buckets each. (M is a constant set in * LOG_BM_WORDS for efficiency.) Each group is stored sparsely. * Thus, inserting into the table causes some array to grow, which is * slow but still constant time. Lookup involves doing a * logical-position-to-sparse-position lookup, which is also slow but * constant time. The larger M is, the slower these operations are * but the less overhead (slightly). * * To store the sparse array, we store a bitmap B, where B[i] = 1 iff * bucket i is non-empty. Then to look up bucket i we really look up * array[# of 1s before i in B]. This is constant time for fixed M. * * Terminology: the position of an item in the overall table (from * 1 .. t) is called its "location." The logical position in a group * (from 1 .. M ) is called its "position." The actual location in * the array (from 1 .. # of non-empty buckets in the group) is * called its "offset." * * The following operations are supported: * o Allocate an array with t buckets, all empty * o Free a array (but not whatever was stored in the buckets) * o Tell whether or not a bucket is empty * o Return a bucket with a given location * o Set the value of a bucket at a given location * o Iterate through all the buckets in the array * o Read and write an occupancy bitmap to disk * o Return how much memory is being allocated by the array structure */#ifndef SparseBucket /* by default, each bucket holds an HTItem */#define SparseBucket HTItem#endiftypedef struct SparseBin { SparseBucket *binSparse; HTBitmap bmOccupied; /* bmOccupied[i] is 1 if bucket i has an item */ short cOccupied; /* size of binSparse; useful for iterators, eg */} SparseBin;typedef struct SparseIterator { long posGroup; long posOffset; SparseBin *binSparse; /* state info, to avoid args for NextBucket() */ ulong cBuckets;} SparseIterator;#define LOG_LOW_BIN_SIZE ( LOG_BM_WORDS+LOG_WORD_SIZE )#define SPARSE_GROUPS(cBuckets) ( (((cBuckets)-1) >> LOG_LOW_BIN_SIZE) + 1 ) /* we need a small function to figure out # of items set in the bm */static HTOffset EntriesUpto(HTBitmapPart *bm, int i){ /* returns # of set bits in 0..i-1 */ HTOffset retval = 0; static HTOffset rgcBits[256] = /* # of bits set in one char */ {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; if ( i == 0 ) return 0; for ( ; i > sizeof(*bm)*8; i -= sizeof(*bm)*8, bm++ ) { /* think of it as loop unrolling */#if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */ retval += rgcBits[*bm & 255]; /* get the low byte */#if LOG_WORD_SIZE >= 4 /* at least 2 bytes */ retval += rgcBits[(*bm >> 8) & 255];#if LOG_WORD_SIZE >= 5 /* at least 4 bytes */ retval += rgcBits[(*bm >> 16) & 255]; retval += rgcBits[(*bm >> 24) & 255];#if LOG_WORD_SIZE >= 6 /* 8 bytes! */ retval += rgcBits[(*bm >> 32) & 255]; retval += rgcBits[(*bm >> 40) & 255]; retval += rgcBits[(*bm >> 48) & 255]; retval += rgcBits[(*bm >> 56) & 255];#if LOG_WORD_SIZE >= 7 /* not a concern for a while... */#error Need to rewrite EntriesUpto to support such big words#endif /* >8 bytes */#endif /* 8 bytes */#endif /* 4 bytes */#endif /* 2 bytes */#endif /* 1 byte */ } switch ( i ) { /* from 0 to 63 */ case 0: return retval;#if LOG_WORD_SIZE >= 3 /* 1 byte per word, or more */ case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8: return (retval + rgcBits[*bm & ((1 << i)-1)]);#if LOG_WORD_SIZE >= 4 /* at least 2 bytes */ case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16: return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & ((1 << (i-8))-1)]);#if LOG_WORD_SIZE >= 5 /* at least 4 bytes */ case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24: return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + rgcBits[(*bm >> 16) & ((1 << (i-16))-1)]); case 25: case 26: case 27: case 28: case 29: case 30: case 31: case 32: return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & ((1 << (i-24))-1)]);#if LOG_WORD_SIZE >= 6 /* 8 bytes! */ case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40: return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + rgcBits[(*bm >> 32) & ((1 << (i-32))-1)]); case 41: case 42: case 43: case 44: case 45: case 46: case 47: case 48: return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & ((1 << (i-40))-1)]); case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] + rgcBits[(*bm >> 48) & ((1 << (i-48))-1)]); case 57: case 58: case 59: case 60: case 61: case 62: case 63: case 64: return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] + rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] + rgcBits[(*bm >> 48) & 255] + rgcBits[(*bm >> 56) & ((1 << (i-56))-1)]);#endif /* 8 bytes */#endif /* 4 bytes */#endif /* 2 bytes */#endif /* 1 byte */ } assert("" == "word size is too big in EntriesUpto()"); return -1;}#define SPARSE_POS_TO_OFFSET(bm, i) ( EntriesUpto(&((bm)[0]), i) )#define SPARSE_BUCKET(bin, location) \ ( (bin)[(location) >> LOG_LOW_BIN_SIZE].binSparse + \ SPARSE_POS_TO_OFFSET((bin)[(location)>>LOG_LOW_BIN_SIZE].bmOccupied, \ MOD2(location, LOG_LOW_BIN_SIZE)) )/*************************************************************************\| SparseAllocate() || SparseFree() || Allocates, sets-to-empty, and frees a sparse array. All you need || to tell me is how many buckets you want. I return the number of || buckets I actually allocated, setting the array as a parameter. || Note that you have to set auxilliary parameters, like cOccupied. |\*************************************************************************/static ulong SparseAllocate(SparseBin **pbinSparse, ulong cBuckets){ int cGroups = SPARSE_GROUPS(cBuckets); *pbinSparse = (SparseBin *) HTscalloc(sizeof(**pbinSparse) * cGroups); return cGroups << LOG_LOW_BIN_SIZE;}static SparseBin *SparseFree(SparseBin *binSparse, ulong cBuckets){ ulong iGroup, cGroups = SPARSE_GROUPS(cBuckets); for ( iGroup = 0; iGroup < cGroups; iGroup++ ) HTfree(binSparse[iGroup].binSparse, (sizeof(*binSparse[iGroup].binSparse) * binSparse[iGroup].cOccupied)); HTfree(binSparse, sizeof(*binSparse) * cGroups); return NULL;}/*************************************************************************\| SparseIsEmpty() || SparseFind() || You give me a location (ie a number between 1 and t), and I || return the bucket at that location, or NULL if the bucket is || empty. It's OK to call Find() on an empty table. |\*************************************************************************/static int SparseIsEmpty(SparseBin *binSparse, ulong location){ return !TEST_BITMAP(binSparse[location>>LOG_LOW_BIN_SIZE].bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE));}static SparseBucket *SparseFind(SparseBin *binSparse, ulong location){ if ( SparseIsEmpty(binSparse, location) ) return NULL; return SPARSE_BUCKET(binSparse, location);}/*************************************************************************\| SparseInsert() || You give me a location, and contents to put there, and I insert || into that location and RETURN a pointer to the location. If || bucket was already occupied, I write over the contents only if || *pfOverwrite is 1. We set *pfOverwrite to 1 if there was someone || there (whether or not we overwrote) and 0 else. |\*************************************************************************/static SparseBucket *SparseInsert(SparseBin *binSparse, SparseBucket *bckInsert, ulong location, int *pfOverwrite){ SparseBucket *bckPlace; HTOffset offset; bckPlace = SparseFind(binSparse, location); if ( bckPlace ) /* means we replace old contents */ { if ( *pfOverwrite ) *bckPlace = *bckInsert; *pfOverwrite = 1; return bckPlace; } binSparse += (location >> LOG_LOW_BIN_SIZE); offset = SPARSE_POS_TO_OFFSET(binSparse->bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE)); binSparse->binSparse = (SparseBucket *) HTsrealloc(binSparse->binSparse, sizeof(*binSparse->binSparse) * ++binSparse->cOccupied, sizeof(*binSparse->binSparse)); memmove(binSparse->binSparse + offset+1, binSparse->binSparse + offset, (binSparse->cOccupied-1 - offset) * sizeof(*binSparse->binSparse)); binSparse->binSparse[offset] = *bckInsert; SET_BITMAP(binSparse->bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE)); *pfOverwrite = 0; return binSparse->binSparse + offset;}/*************************************************************************\| SparseFirstBucket() || SparseNextBucket() || SparseCurrentBit() || Iterate through the occupied buckets of a dense hashtable. You || must, of course, have allocated space yourself for the iterator. |\*************************************************************************/static SparseBucket *SparseNextBucket(SparseIterator *iter){ if ( iter->posOffset != -1 && /* not called from FirstBucket()? */ (++iter->posOffset < iter->binSparse[iter->posGroup].cOccupied) ) return iter->binSparse[iter->posGroup].binSparse + iter->posOffset; iter->posOffset = 0; /* start the next group */ for ( iter->posGroup++; iter->posGroup < SPARSE_GROUPS(iter->cBuckets); iter->posGroup++ ) if ( iter->binSparse[iter->posGroup].cOccupied > 0 ) return iter->binSparse[iter->posGroup].binSparse; /* + 0 */ return NULL; /* all remaining groups were empty */}static SparseBucket *SparseFirstBucket(SparseIterator *iter, SparseBin *binSparse, ulong cBuckets){ iter->binSparse = binSparse; /* set it up for NextBucket() */ iter->cBuckets = cBuckets; iter->posOffset = -1; /* when we advance, we're at 0 */ iter->posGroup = -1; return SparseNextBucket(iter);}/*************************************************************************\| SparseWrite() || SparseRead() || These are routines for storing a sparse hashtable onto disk. We || store the number of buckets and a bitmap indicating which buckets || are allocated (occupied). The actual contents of the buckets || must be stored separately. |\*************************************************************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -