📄 libchash.c

📁 google hash table算法
💻 C
📖 第 1 页 / 共 5 页
字号:
      case 2 : a += (ulong)key[1]<<8;      case 1 : a += key[0];   /* case 0 : nothing left to add */#elif LOG_WORD_SIZE == 6      case 23: c += (ulong)key[22]<<56;      case 22: c += (ulong)key[21]<<48;      case 21: c += (ulong)key[20]<<40;      case 20: c += (ulong)key[19]<<32;      case 19: c += (ulong)key[18]<<24;      case 18: c += (ulong)key[17]<<16;      case 17: c += (ulong)key[16]<<8;               /* the first byte of c is reserved for the length */      case 16: b += WORD_AT(key+8);  a+= WORD_AT(key);  break;      case 15: b += (ulong)key[14]<<48;      case 14: b += (ulong)key[13]<<40;      case 13: b += (ulong)key[12]<<32;      case 12: b += (ulong)key[11]<<24;      case 11: b += (ulong)key[10]<<16;      case 10: b += (ulong)key[ 9]<<8;      case  9: b += (ulong)key[ 8];      case  8: a += WORD_AT(key);  break;      case  7: a += (ulong)key[ 6]<<48;      case  6: a += (ulong)key[ 5]<<40;      case  5: a += (ulong)key[ 4]<<32;      case  4: a += (ulong)key[ 3]<<24;      case  3: a += (ulong)key[ 2]<<16;      case  2: a += (ulong)key[ 1]<<8;      case  1: a += (ulong)key[ 0];   /* case 0: nothing left to add */#endif   }   mix(a,b,c);   return c & (cBuckets-1);}/*************************************************************************\| Rehash()                                                                ||     You give me a hashtable, a new size, and a bucket to follow, and    ||     I resize the hashtable's bin to be the new size, rehashing          ||     everything in it.  I keep particular track of the bucket you pass   ||     in, and RETURN a pointer to where the item in the bucket got to.    ||     (If you pass in NULL, I return an arbitrary pointer.)               |\*************************************************************************/static HTItem *Rehash(HashTable *ht, ulong cNewBuckets, HTItem *bckWatch){   Table *tableNew;   ulong iBucketFirst;   HTItem *bck, *bckNew = NULL;   ulong offset;                         /* the i in h(x) + i*(i-1)/2 */   int fOverwrite = 0;    /* not an issue: there can be no collisions */   assert( ht->table );   cNewBuckets = Table(Allocate)(&tableNew, cNewBuckets);      /* Since we RETURN the new position of bckWatch, we want  *       * to make sure it doesn't get moved due to some table    *       * rehashing that comes after it's inserted.  Thus, we    *       * have to put it in last.  This makes the loop weird.    */   for ( bck = HashFirstBucket(ht); ; bck = HashNextBucket(ht) )   {      if ( bck == NULL )      /* we're done iterating, so look at bckWatch */      {	 bck = bckWatch;	 if ( bck == NULL )           /* I guess bckWatch wasn't specified */	    break;      }      else if ( bck == bckWatch )	 continue;             /* ignore if we see it during the iteration */      offset = 0;                              /* a new i for a new bucket */      for ( iBucketFirst = Hash(ht, KEY_PTR(ht, bck->key), cNewBuckets);	    !Table(IsEmpty)(tableNew, iBucketFirst);	    iBucketFirst = (iBucketFirst + JUMP(KEY_PTR(ht,bck->key), offset))	                   & (cNewBuckets-1) )	 ;      bckNew = Table(Insert)(tableNew, bck, iBucketFirst, &fOverwrite);      if ( bck == bckWatch )       /* we're done with the last thing to do */	 break;   }   Table(Free)(ht->table, ht->cBuckets);   ht->table = tableNew;   ht->cBuckets = cNewBuckets;   ht->cDeletedItems = 0;   return bckNew;     /* new position of bckWatch, which was inserted last */}/*************************************************************************\| Find()                                                                  ||     Does the quadratic searching stuff.  RETURNS NULL if we don't       ||     find an object with the given key, and a pointer to the Item        ||     holding the key, if we do.  Also sets posLastFind.  If piEmpty is   ||     non-NULL, we set it to the first open bucket we pass; helpful for   ||     doing a later insert if the search fails, for instance.             |\*************************************************************************/static HTItem *Find(HashTable *ht, ulong key, ulong *piEmpty){   ulong iBucketFirst;   HTItem *item;   ulong offset = 0;              /* the i in h(x) + i*(i-1)/2 */   int fFoundEmpty = 0;           /* set when we pass over an empty bucket */   ht->posLastFind = NULL;        /* set up for failure: a new find starts */   if ( ht->table == NULL )       /* empty hash table: find is bound to fail */      return NULL;   iBucketFirst = Hash(ht, KEY_PTR(ht, key), ht->cBuckets);   while ( 1 )                    /* now try all i > 0 */   {      item = Table(Find)(ht->table, iBucketFirst);      if ( item == NULL )         /* it's not in the table */      {	 if ( piEmpty && !fFoundEmpty ) *piEmpty = iBucketFirst;	 return NULL;      }      else      {	 if ( IS_BCK_DELETED(item) )      /* always 0 ifdef INSERT_ONLY */	 {	    if ( piEmpty && !fFoundEmpty )	    {	       *piEmpty = iBucketFirst;	       fFoundEmpty = 1;	    }	 } else	    if ( !KEY_CMP(ht, key, item->key) )     /* must be occupied */	    {	       ht->posLastFind = item;	       return item;               /* we found it! */	    }      }      iBucketFirst = ((iBucketFirst + JUMP(KEY_PTR(ht, key), offset))		      & (ht->cBuckets-1));   }}/*************************************************************************\| Insert()                                                                ||     If an item with the key already exists in the hashtable, RETURNS    ||     a pointer to the item (replacing its data if fOverwrite is 1).      ||     If not, we find the first place-to-insert (which Find() is nice     ||     enough to set for us) and insert the item there, RETURNing a        ||     pointer to the item.  We might grow the hashtable if it's getting   ||     full.  Note we include buckets holding DELETED when determining     ||     fullness, because they slow down searching.                         |\*************************************************************************/static ulong NextPow2(ulong x)    /* returns next power of 2 > x, or 2^31 */{   if ( ((x << 1) >> 1) != x )    /* next power of 2 overflows */      x >>= 1;                    /* so we return highest power of 2 we can */   while ( (x & (x-1)) != 0 )     /* blacks out all but the top bit */      x &= (x-1);   return x << 1;                 /* makes it the *next* power of 2 */}static HTItem *Insert(HashTable *ht, ulong key, ulong data, int fOverwrite){   HTItem *item, bckInsert;   ulong iEmpty;                  /* first empty bucket key probes */   if ( ht->table == NULL )       /* empty hash table: find is bound to fail */      return NULL;   item = Find(ht, key, &iEmpty);   ht->posLastFind = NULL;        /* last operation is insert, not find */   if ( item )   {      if ( fOverwrite )	 item->data = data;       /* key already matches */      return item;   }   COPY_KEY(ht, bckInsert.key, key);    /* make our own copy of the key */   bckInsert.data = data;               /* oh, and the data too */   item = Table(Insert)(ht->table, &bckInsert, iEmpty, &fOverwrite);   if ( fOverwrite )                    /* we overwrote a deleted bucket */      ht->cDeletedItems--;   ht->cItems++;                        /* insert couldn't have overwritten */   if ( ht->cDeltaGoalSize > 0 )  /* closer to our goal size */      ht->cDeltaGoalSize--;   if ( ht->cItems + ht->cDeletedItems >= ht->cBuckets * OCCUPANCY_PCT        || ht->cDeltaGoalSize < 0 ) /* we must've overestimated # of deletes */      item = Rehash(ht, 		    NextPow2((ulong)(((ht->cDeltaGoalSize > 0 ?				       ht->cDeltaGoalSize : 0)				      + ht->cItems) / OCCUPANCY_PCT)),		    item);   return item;}/*************************************************************************\| Delete()                                                                ||     Removes the item from the hashtable, and if fShrink is 1, will      ||     shrink the hashtable if it's too small (ie even after halving,      ||     the ht would be less than half full, though in order to avoid       ||     oscillating table size, we insist that after halving the ht would   ||     be less than 40% full).  RETURNS 1 if the item was found, 0 else.   ||        If fLastFindSet is true, then this function is basically         ||     DeleteLastFind.                                                     |\*************************************************************************/static int Delete(HashTable *ht, ulong key, int fShrink, int fLastFindSet){   if ( !fLastFindSet && !Find(ht, key, NULL) )      return 0;   SET_BCK_DELETED(ht, ht->posLastFind);       /* find set this, how nice */   ht->cItems--;    ht->cDeletedItems++;   if ( ht->cDeltaGoalSize < 0 )  /* heading towards our goal of deletion */      ht->cDeltaGoalSize++;   if ( fShrink && ht->cItems < ht->cBuckets * OCCUPANCY_PCT*0.4         && ht->cDeltaGoalSize >= 0       /* wait until we're done deleting */        && (ht->cBuckets >> 1) >= MIN_HASH_SIZE )                /* shrink */      Rehash(ht,	     NextPow2((ulong)((ht->cItems+ht->cDeltaGoalSize)/OCCUPANCY_PCT)),	     NULL);   ht->posLastFind = NULL;           /* last operation is delete, not find */   return 1;}/* ======================================================================== *//*                          USER-VISIBLE API                                *//*                       ----------------------                             *//*************************************************************************\| AllocateHashTable()                                                     || ClearHashTable()                                                        || FreeHashTable()                                                         ||     Allocate() allocates a hash table and sets up size parameters.      ||     Free() frees it.  Clear() deletes all the items from the hash       ||     table, but frees not.                                               ||        cchKey is < 0 if the keys you send me are meant to be pointers   ||     to \0-terminated strings.  Then -cchKey is the maximum key size.    ||     If cchKey < one word (ulong), the keys you send me are the keys     ||     themselves; else the keys you send me are pointers to the data.     ||        If fSaveKeys is 1, we copy any keys given to us to insert.  We   ||     also free these keys when freeing the hash table.  If it's 0, the   ||     user is responsible for key space management.                       ||        AllocateHashTable() RETURNS a hash table; the others TAKE one.   |\*************************************************************************/HashTable *AllocateHashTable(int cchKey, int fSaveKeys){   HashTable *ht;   ht = (HashTable *) HTsmalloc(sizeof(*ht));   /* set everything to 0 */   ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);   ht->cchKey = cchKey <= 0 ? NULL_TERMINATED : cchKey;   ht->cItems = 0;   ht->cDeletedItems = 0;   ht->fSaveKeys = fSaveKeys;   ht->cDeltaGoalSize = 0;   ht->iter = HTsmalloc( sizeof(TableIterator) );   ht->fpData = NULL;                           /* set by HashLoad, maybe */   ht->bckData.data = (ulong) NULL;             /* this must be done */   HTSetupKeyTrunc();                           /* in util.c */   return ht;}void ClearHashTable(HashTable *ht){   HTItem *bck;   if ( STORES_PTR(ht) && ht->fSaveKeys )       /* need to free keys */      for ( bck = HashFirstBucket(ht); bck; bck = HashNextBucket(ht) )      {	 FREE_KEY(ht, bck->key);	 if ( ht->fSaveKeys == 2 )  /* this means key stored in one block */	    break;                  /* ...so only free once */      }   Table(Free)(ht->table, ht->cBuckets);   ht->cBuckets = Table(Allocate)(&ht->table, MIN_HASH_SIZE);   ht->cItems = 0;   ht->cDeletedItems = 0;   ht->cDeltaGoalSize = 0;   ht->posLastFind = NULL;   ht->fpData = NULL;               /* no longer HashLoading */   if ( ht->bckData.data )  free( (char *)(ht)->bckData.data);   ht->bckData.data = (ulong) NULL;}void FreeHashTable(HashTable *ht){   ClearHashTable(ht);   if ( ht->iter )    HTfree(ht->iter, sizeof(TableIterator));   if ( ht->table )   Table(Free)(ht->table, ht->cBuckets);   free(ht);}/*************************************************************************\| HashFind()                                                              || HashFindLast()                                                          ||     HashFind(): looks in h(x) + i(i-1)/2 % t as i goes up from 0        ||     until we either find the key or hit an empty bucket.  RETURNS a     ||     pointer to the item in the hit bucket, if we find it, else          ||     RETURNS NULL.                                                       ||        HashFindLast() returns the item returned by the last             ||     HashFind(), which may be NULL if the last HashFind() failed.        ||        LOAD_AND_RETURN reads the data from off disk, if necessary.      |\*************************************************************************/
💿 文件大小 23 K
👤 上传用户 gjinli88
📂 所属分类 Linux/Unix编程
📄 代码行数 1,525 行
💻 语言类型 C语言
🏷️ 相关标签

#google #table #hash #算法
更多google资源 →
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -