📄 libchash.c

📁 google hash table算法
💻 C
📖 第 1 页 / 共 5 页
字号:
   unsigned long retval;   memcpy(&retval, ul, sizeof(retval));   return retval;}/*************************************************************************\| HTSetupKeyTrunc()                                                       ||     If keys are stored directly but cchKey is less than                 ||     sizeof(ulong), this cuts off the bits at the end.                   |\*************************************************************************/   static void HTSetupKeyTrunc(void){   int i, j;   for ( i = 0; i < sizeof(unsigned long); i++ )      for ( j = 0; j < sizeof(unsigned long); j++ )	 grgKeyTruncMask[i][j] = j < i ? 255 : 0;   /* chars have 8 bits */}/* ======================================================================== *//*                            TABLE ROUTINES                                *//*                         --------------------                             *//*  The idea is that a hashtable with (logically) t buckets is divided *  into t/M groups of M buckets each.  (M is a constant set in *  LOG_BM_WORDS for efficiency.)  Each group is stored sparsely. *  Thus, inserting into the table causes some array to grow, which is *  slow but still constant time.  Lookup involves doing a *  logical-position-to-sparse-position lookup, which is also slow but *  constant time.  The larger M is, the slower these operations are *  but the less overhead (slightly). * *  To store the sparse array, we store a bitmap B, where B[i] = 1 iff *  bucket i is non-empty.  Then to look up bucket i we really look up *  array[# of 1s before i in B].  This is constant time for fixed M. * *  Terminology: the position of an item in the overall table (from *  1 .. t) is called its "location."  The logical position in a group *  (from 1 .. M ) is called its "position."  The actual location in *  the array (from 1 .. # of non-empty buckets in the group) is *  called its "offset." * *  The following operations are supported: *     o Allocate an array with t buckets, all empty *     o Free a array (but not whatever was stored in the buckets) *     o Tell whether or not a bucket is empty *     o Return a bucket with a given location *     o Set the value of a bucket at a given location *     o Iterate through all the buckets in the array *     o Read and write an occupancy bitmap to disk *     o Return how much memory is being allocated by the array structure */#ifndef SparseBucket            /* by default, each bucket holds an HTItem */#define SparseBucket            HTItem#endiftypedef struct SparseBin {   SparseBucket *binSparse;   HTBitmap bmOccupied;      /* bmOccupied[i] is 1 if bucket i has an item */   short cOccupied;          /* size of binSparse; useful for iterators, eg */} SparseBin;typedef struct SparseIterator {   long posGroup;   long posOffset;   SparseBin *binSparse;     /* state info, to avoid args for NextBucket() */   ulong cBuckets;} SparseIterator;#define LOG_LOW_BIN_SIZE        ( LOG_BM_WORDS+LOG_WORD_SIZE )#define SPARSE_GROUPS(cBuckets) ( (((cBuckets)-1) >> LOG_LOW_BIN_SIZE) + 1 )   /* we need a small function to figure out # of items set in the bm */static HTOffset EntriesUpto(HTBitmapPart *bm, int i){                                       /* returns # of set bits in 0..i-1 */   HTOffset retval = 0;    static HTOffset rgcBits[256] =             /* # of bits set in one char */      {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,       1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,       2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,       3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,       4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};   if ( i == 0 ) return 0;   for ( ; i > sizeof(*bm)*8; i -= sizeof(*bm)*8, bm++ )   {                                       /* think of it as loop unrolling */#if LOG_WORD_SIZE >= 3                     /* 1 byte per word, or more */      retval += rgcBits[*bm & 255];        /* get the low byte */#if LOG_WORD_SIZE >= 4                     /* at least 2 bytes */      retval += rgcBits[(*bm >> 8) & 255];#if LOG_WORD_SIZE >= 5                     /* at least 4 bytes */      retval += rgcBits[(*bm >> 16) & 255];      retval += rgcBits[(*bm >> 24) & 255];#if LOG_WORD_SIZE >= 6                     /* 8 bytes! */      retval += rgcBits[(*bm >> 32) & 255];      retval += rgcBits[(*bm >> 40) & 255];      retval += rgcBits[(*bm >> 48) & 255];      retval += rgcBits[(*bm >> 56) & 255];#if LOG_WORD_SIZE >= 7                     /* not a concern for a while... */#error Need to rewrite EntriesUpto to support such big words#endif   /* >8 bytes */#endif   /* 8 bytes */#endif   /* 4 bytes */#endif   /* 2 bytes */#endif   /* 1 byte */   }   switch ( i ) {                         /* from 0 to 63 */      case 0:	 return retval;#if LOG_WORD_SIZE >= 3                     /* 1 byte per word, or more */      case 1: case 2: case 3: case 4: case 5: case 6: case 7: case 8:	 return (retval + rgcBits[*bm & ((1 << i)-1)]);#if LOG_WORD_SIZE >= 4                     /* at least 2 bytes */      case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16:	 return (retval + rgcBits[*bm & 255] + 		 rgcBits[(*bm >> 8) & ((1 << (i-8))-1)]);#if LOG_WORD_SIZE >= 5                     /* at least 4 bytes */      case 17: case 18: case 19: case 20: case 21: case 22: case 23: case 24:	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +		 rgcBits[(*bm >> 16) & ((1 << (i-16))-1)]);      case 25: case 26: case 27: case 28: case 29: case 30: case 31: case 32:	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +		 rgcBits[(*bm >> 16) & 255] + 		 rgcBits[(*bm >> 24) & ((1 << (i-24))-1)]);#if LOG_WORD_SIZE >= 6                     /* 8 bytes! */      case 33: case 34: case 35: case 36: case 37: case 38: case 39: case 40:	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +		 rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + 		 rgcBits[(*bm >> 32) & ((1 << (i-32))-1)]);      case 41: case 42: case 43: case 44: case 45: case 46: case 47: case 48:	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +		 rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + 		 rgcBits[(*bm >> 32) & 255] +		 rgcBits[(*bm >> 40) & ((1 << (i-40))-1)]);      case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56:	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +		 rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + 		 rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +		 rgcBits[(*bm >> 48) & ((1 << (i-48))-1)]);      case 57: case 58: case 59: case 60: case 61: case 62: case 63: case 64:	 return (retval + rgcBits[*bm & 255] + rgcBits[(*bm >> 8) & 255] +		 rgcBits[(*bm >> 16) & 255] + rgcBits[(*bm >> 24) & 255] + 		 rgcBits[(*bm >> 32) & 255] + rgcBits[(*bm >> 40) & 255] +		 rgcBits[(*bm >> 48) & 255] + 		 rgcBits[(*bm >> 56) & ((1 << (i-56))-1)]);#endif   /* 8 bytes */#endif   /* 4 bytes */#endif   /* 2 bytes */#endif   /* 1 byte */   }   assert("" == "word size is too big in EntriesUpto()");   return -1;}#define SPARSE_POS_TO_OFFSET(bm, i)   ( EntriesUpto(&((bm)[0]), i) )#define SPARSE_BUCKET(bin, location)  \   ( (bin)[(location) >> LOG_LOW_BIN_SIZE].binSparse +                     \      SPARSE_POS_TO_OFFSET((bin)[(location)>>LOG_LOW_BIN_SIZE].bmOccupied, \		           MOD2(location, LOG_LOW_BIN_SIZE)) )/*************************************************************************\| SparseAllocate()                                                        || SparseFree()                                                            ||     Allocates, sets-to-empty, and frees a sparse array.  All you need   ||     to tell me is how many buckets you want.  I return the number of    ||     buckets I actually allocated, setting the array as a parameter.     ||     Note that you have to set auxilliary parameters, like cOccupied.    |\*************************************************************************/static ulong SparseAllocate(SparseBin **pbinSparse, ulong cBuckets){   int cGroups = SPARSE_GROUPS(cBuckets);   *pbinSparse = (SparseBin *) HTscalloc(sizeof(**pbinSparse) * cGroups);   return cGroups << LOG_LOW_BIN_SIZE;}static SparseBin *SparseFree(SparseBin *binSparse, ulong cBuckets){   ulong iGroup, cGroups = SPARSE_GROUPS(cBuckets);   for ( iGroup = 0; iGroup < cGroups; iGroup++ )      HTfree(binSparse[iGroup].binSparse, (sizeof(*binSparse[iGroup].binSparse)					   * binSparse[iGroup].cOccupied));   HTfree(binSparse, sizeof(*binSparse) * cGroups);   return NULL;}/*************************************************************************\| SparseIsEmpty()                                                         || SparseFind()                                                            ||     You give me a location (ie a number between 1 and t), and I         ||     return the bucket at that location, or NULL if the bucket is        ||     empty.  It's OK to call Find() on an empty table.                   |\*************************************************************************/static int SparseIsEmpty(SparseBin *binSparse, ulong location){   return !TEST_BITMAP(binSparse[location>>LOG_LOW_BIN_SIZE].bmOccupied,		       MOD2(location, LOG_LOW_BIN_SIZE));}static SparseBucket *SparseFind(SparseBin *binSparse, ulong location){   if ( SparseIsEmpty(binSparse, location) )      return NULL;   return SPARSE_BUCKET(binSparse, location);}/*************************************************************************\| SparseInsert()                                                          ||     You give me a location, and contents to put there, and I insert     ||     into that location and RETURN a pointer to the location.  If        ||     bucket was already occupied, I write over the contents only if      ||     *pfOverwrite is 1.  We set *pfOverwrite to 1 if there was someone   ||     there (whether or not we overwrote) and 0 else.                     |\*************************************************************************/static SparseBucket *SparseInsert(SparseBin *binSparse, SparseBucket *bckInsert,				  ulong location, int *pfOverwrite){   SparseBucket *bckPlace;   HTOffset offset;   bckPlace = SparseFind(binSparse, location);   if ( bckPlace )                /* means we replace old contents */   {      if ( *pfOverwrite )	 *bckPlace = *bckInsert;      *pfOverwrite = 1;      return bckPlace;   }   binSparse += (location >> LOG_LOW_BIN_SIZE);   offset = SPARSE_POS_TO_OFFSET(binSparse->bmOccupied,				 MOD2(location, LOG_LOW_BIN_SIZE));   binSparse->binSparse = (SparseBucket *)       HTsrealloc(binSparse->binSparse,		 sizeof(*binSparse->binSparse) * ++binSparse->cOccupied,		 sizeof(*binSparse->binSparse));   memmove(binSparse->binSparse + offset+1,	   binSparse->binSparse + offset,	   (binSparse->cOccupied-1 - offset) * sizeof(*binSparse->binSparse));   binSparse->binSparse[offset] = *bckInsert;   SET_BITMAP(binSparse->bmOccupied, MOD2(location, LOG_LOW_BIN_SIZE));   *pfOverwrite = 0;   return binSparse->binSparse + offset;}/*************************************************************************\| SparseFirstBucket()                                                     || SparseNextBucket()                                                      || SparseCurrentBit()                                                      ||     Iterate through the occupied buckets of a dense hashtable.  You     ||     must, of course, have allocated space yourself for the iterator.    |\*************************************************************************/static SparseBucket *SparseNextBucket(SparseIterator *iter){   if ( iter->posOffset != -1 &&      /* not called from FirstBucket()? */        (++iter->posOffset < iter->binSparse[iter->posGroup].cOccupied) )      return iter->binSparse[iter->posGroup].binSparse + iter->posOffset;   iter->posOffset = 0;                         /* start the next group */   for ( iter->posGroup++;  iter->posGroup < SPARSE_GROUPS(iter->cBuckets);	 iter->posGroup++ )      if ( iter->binSparse[iter->posGroup].cOccupied > 0 )	 return iter->binSparse[iter->posGroup].binSparse; /* + 0 */   return NULL;                      /* all remaining groups were empty */}static SparseBucket *SparseFirstBucket(SparseIterator *iter,				       SparseBin *binSparse, ulong cBuckets){   iter->binSparse = binSparse;        /* set it up for NextBucket() */   iter->cBuckets = cBuckets;   iter->posOffset = -1;               /* when we advance, we're at 0 */   iter->posGroup = -1;   return SparseNextBucket(iter);}/*************************************************************************\| SparseWrite()                                                           || SparseRead()                                                            ||     These are routines for storing a sparse hashtable onto disk.  We    ||     store the number of buckets and a bitmap indicating which buckets   ||     are allocated (occupied).  The actual contents of the buckets       ||     must be stored separately.                                          |\*************************************************************************/
💿 文件大小 23 K
👤 上传用户 gjinli88
📂 所属分类 Linux/Unix编程
📄 代码行数 1,525 行
💻 语言类型 C语言
🏷️ 相关标签

#google #table #hash #算法
更多google资源 →
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -