📄 hash.c
字号:
/* * Hash Table Data Type * Copyright (C) 1997 Kaz Kylheku <kaz@ashi.footprints.net> * * Free Software License: * * All rights are reserved by the author, with the following exceptions: * Permission is granted to freely reproduce and distribute this software, * possibly in exchange for a fee, provided that this copyright notice appears * intact. Permission is also granted to adapt this software to produce * derivative works, as long as the modified versions carry this copyright * notice and additional notices stating that the work has been modified. * The copyright extends to translations of this work into other languages, * including machine languages. * * $Id: hash.c,v 1.2 2002/03/11 00:51:44 jick Exp $ * $Name: $ */#include <stdlib.h>#include <stddef.h>#include <assert.h>#include <string.h>#define HASH_IMPLEMENTATION#include "hash.h"static const char rcsid[] = "$Id: hash.c,v 1.2 2002/03/11 00:51:44 jick Exp $";static const char right[] = "Copyright (C) 1997 Kaz Kylheku";#define INIT_BITS 6#define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */#define INIT_MASK ((INIT_SIZE) - 1)static hnode_t *hnode_alloc(void *context);static void hnode_free(hnode_t *node, void *context);static hash_val_t hash_fun_default(const void *key);static int hash_comp_default(const void *key1, const void *key2);int hash_val_t_bit;/* * Compute the number of bits in the hash_val_t type. We know that hash_val_t * is an unsigned integral type. Thus the highest value it can hold is a * Mersenne number (power of two, less one). We initialize a hash_val_t * object with this value and then shift bits out one by one while counting. * Notes: * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power * of two. This means that its binary representation consists of all one * bits, and hence ``val'' is initialized to all one bits. * 2. We reset the bit count to zero in case this function is invoked more than * once. * 3. While bits remain in val, we increment the bit count and shift it to the * right, replacing the topmost bit by zero. */static void compute_bits(void){ hash_val_t val = HASH_VAL_T_MAX; /* 1 */ int bits = 0; while (val) { /* 3 */ bits++; val >>= 1; } hash_val_t_bit = bits;}/* * Verify whether the given argument is a power of two. */static int is_power_of_two(hash_val_t arg){ if (arg == 0) return 0; while ((arg & 1) == 0) arg >>= 1; return (arg == 1);}/* * Compute a shift amount from a given table size */static hash_val_t compute_mask(hashcount_t size){ hash_val_t mask = size; assert (is_power_of_two(size)); assert (size >= 2); mask /= 2; while ((mask & 1) == 0) mask |= (mask >> 1); return mask;}/* * Initialize the table of pointers to null. */static void clear_table(hash_t *hash){ hash_val_t i; for (i = 0; i < hash->nchains; i++) hash->table[i] = NULL;}/* * Double the size of a dynamic table. This works as follows. Each chain splits * into two adjacent chains. The shift amount increases by one, exposing an * additional bit of each hashed key. For each node in the original chain, the * value of this newly exposed bit will decide which of the two new chains will * receive the node: if the bit is 1, the chain with the higher index will have * the node, otherwise the lower chain will receive the node. In this manner, * the hash table will continue to function exactly as before without having to * rehash any of the keys. * Notes: * 1. Overflow check. * 2. The new number of chains is twice the old number of chains. * 3. The new mask is one bit wider than the previous, revealing a * new bit in all hashed keys. * 4. Allocate a new table of chain pointers that is twice as large as the * previous one. * 5. If the reallocation was successful, we perform the rest of the growth * algorithm, otherwise we do nothing. * 6. The exposed_bit variable holds a mask with which each hashed key can be * AND-ed to test the value of its newly exposed bit. * 7. Loop over the lower half of the table, which, at first, holds all of * the chains. * 8. Each chain from the original table must be split into two chains. * The nodes of each chain are examined in turn. The ones whose key value's * newly exposed bit is 1 are removed from the chain and put into newchain * (Steps 9 through 14). After this separation, the new chain is assigned * into its appropriate place in the upper half of the table (Step 15). * 9. Since we have relocated the table of pointers, we have to fix the * back-reference from the first node of each non-empty chain so it * properly refers to the moved pointer. * 10. We loop over the even chain looking for any nodes whose exposed bit is * set. Such nodes are removed from the lower-half chain and put into its * upper-half sister. * 11. Before moving the node to the other chain, we remember what the next * node is so we can coninue the loop. We have to do this because we will * be overwriting the node's next pointer when we insert it to its new * home. * 12. The next node's back pointer must be updated to skip to the previous * node. * 13. The deleted node's back pointer must be updated to refer to the next * node. * 14. We insert the node at the beginning of the new chain. * 15. Place the new chain into an upper-half slot. * 16. We have finished dealing with the chains and nodes. We now update * the various bookeeping fields of the hash structure. */static void grow_table(hash_t *hash){ hnode_t **newtable; assert (2 * hash->nchains > hash->nchains); /* 1 */ newtable = realloc(hash->table, sizeof *newtable * hash->nchains * 2); /* 4 */ if (newtable) { /* 5 */ hash_val_t mask = (hash->mask << 1) | 1; /* 3 */ hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */ hash_val_t chain; assert (mask != hash->mask); for (chain = 0; chain < hash->nchains; chain++) { /* 7 */ hnode_t *hptr = newtable[chain]; /* 8 */ hnode_t *newchain = NULL; if (hptr) /* 9 */ hptr->pself = &newtable[chain]; while (hptr) { /* 10 */ if ((hptr->hkey & exposed_bit)) { hnode_t *next = hptr->next; /* 11 */ if (next) /* 12 */ next->pself = hptr->pself; *hptr->pself = next; /* 13 */ hptr->next = newchain; /* 14 */ if (newchain) newchain->pself = &hptr->next; newchain = hptr; hptr->pself = &newchain; hptr = next; } else { hptr = hptr->next; } } newtable[chain + hash->nchains] = newchain; /* 15 */ if (newchain) newchain->pself = &newtable[chain + hash->nchains]; } hash->table = newtable; /* 16 */ hash->mask = mask; hash->nchains *= 2; hash->lowmark *= 2; hash->highmark *= 2; } assert (hash_verify(hash));}/* * Cut a table size in half. This is done by folding together adjacent chains * and populating the lower half of the table with these chains. The chains are * simply spliced together. Once this is done, the whole table is reallocated * to a smaller object. * Notes: * 1. It is illegal to have a hash table with one slot. This would mean that * hash->shift is equal to hash_val_t_bit, an illegal shift value. * Also, other things could go wrong, such as hash->lowmark becoming zero. * 2. Looping over each adjacent chain of pairs, the lo_chain is set to * reference the lower-numbered member of the pair, whereas hi_chain * is the index of the higher-numbered one. * 3. The intent here is to compute a pointer to the last node of the * lower chain into the lo_tail variable. If this chain is empty, * lo_tail ends up with a null value. * 4. If the lower chain is not empty, we have to merge chains, but only * if the upper chain is also not empty. In either case, the lower chain * will come first, with the upper one appended to it. * 5. The first part of the join is done by having the tail node of the lower * chain point to the head node of the upper chain. If the upper chain * is empty, this is remains a null pointer. * 6. If the upper chain is non-empty, we must do the additional house-keeping * task of ensuring that the upper chain's first node's back-pointer * references the tail node of the lower chain properly. * 8. If the low chain is empty, but the high chain is not, then the * high chain simply becomes the new chain. * 9. Otherwise if both chains are empty, then the merged chain is also empty. * 10. All the chain pointers are in the lower half of the table now, so * we reallocate it to a smaller object. This, of course, invalidates * all pointer-to-pointers which reference into the table from the * first node of each chain. * 11. Though it's unlikely, the reallocation may fail. In this case we * pretend that the table _was_ reallocated to a smaller object. * 12. This loop performs the housekeeping task of updating the back pointers * from the first node of each chain so that they reference their * corresponding table entries. * 13. Finally, update the various table parameters to reflect the new size. */static void shrink_table(hash_t *hash){ hash_val_t chain, nchains; hnode_t **newtable, *lo_tail, *lo_chain, *hi_chain; assert (hash->nchains >= 2); /* 1 */ nchains = hash->nchains / 2; for (chain = 0; chain < nchains; chain++) { lo_chain = hash->table[chain]; /* 2 */ hi_chain = hash->table[chain + nchains]; for (lo_tail=lo_chain; lo_tail && lo_tail->next; lo_tail=lo_tail->next) ; /* 3 */ if (lo_chain) { /* 4 */ lo_tail->next = hi_chain; /* 5 */ if (hi_chain) /* 6 */ hi_chain->pself = &lo_tail->next; } else if (hi_chain) { /* 8 */ hash->table[chain] = hi_chain; } else { hash->table[chain] = NULL; /* 9 */ } } newtable = realloc(hash->table, sizeof *newtable * nchains); /* 10 */ if (newtable) /* 11 */ hash->table = newtable; for (chain = 0; chain < nchains; chain++) /* 12 */ if (hash->table[chain]) hash->table[chain]->pself = &hash->table[chain]; hash->mask >>= 1; /* 13 */ hash->nchains = nchains; hash->lowmark /= 2; hash->highmark /= 2; assert (hash_verify(hash));}/* * Create a dynamic hash table. Both the hash table structure and the table * itself are dynamically allocated. Furthermore, the table is extendible in * that it will automatically grow as its load factor increases beyond a * certain threshold. * Notes: * 1. If the number of bits in the hash_val_t type has not been computed yet, * we do so here, because this is likely to be the first function that the * user calls. * 2. Safe malloc is used for added checking. The checking is disabled by * defining NDEBUG, which turns the safe malloc routines (via the * preprocessor) into direct calls to malloc, free, etc. * 3. If a hash table control structure is successfully allocated, we * proceed to initialize it. Otherwise we return a null pointer. * 4. Using the safe allocator, we try to allocate the table of hash * chains. * 5. If we were able to allocate the hash chain table, we can finish * initializing the hash structure and the table. Otherwise, we must * backtrack by freeing the hash structure. * 6. INIT_SIZE should be a power of two. The high and low marks are always set * to be twice the table size and half the table size respectively. When the * number of nodes in the table grows beyond the high size (beyond load * factor 2), it will double in size to cut the load factor down to about * about 1. If the table shrinks down to or beneath load factor 0.5, * it will shrink, bringing the load up to about 1. However, the table * will never shrink beneath INIT_SIZE even if it's emptied. * 7. This indicates that the table is dynamically allocated and dynamically * resized on the fly. A table that has this value set to zero is * assumed to be statically allocated and will not be resized. * 8. The table of chains must be properly reset to all null pointers. */hash_t *hash_create(hashcount_t maxcount, hash_comp_t compfun, hash_fun_t hashfun){ hash_t *hash; if (hash_val_t_bit == 0) /* 1 */ compute_bits(); hash = malloc(sizeof *hash); /* 2 */ if (hash) { /* 3 */ hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */ if (hash->table) { /* 5 */ hash->nchains = INIT_SIZE; /* 6 */ hash->highmark = INIT_SIZE * 2; hash->lowmark = INIT_SIZE / 2; hash->count = 0; hash->maxcount = maxcount; hash->compare = compfun ? compfun : hash_comp_default; hash->hash = hashfun ? hashfun : hash_fun_default; hash->allocnode = hnode_alloc; hash->freenode = hnode_free; hash->context = NULL; hash->mask = INIT_MASK; hash->dynamic = 1; /* 7 */ clear_table(hash); /* 8 */ assert (hash_verify(hash)); return hash; } free(hash); } return NULL;}/* * Select a different set of node allocator routines. */void hash_set_allocator(hash_t *hash, hnode_alloc_t al, hnode_free_t fr, void *context){ assert (hash_count(hash) == 0); assert ((al == 0 && fr == 0) || (al != 0 && fr != 0)); hash->allocnode = al ? al : hnode_alloc; hash->freenode = fr ? fr : hnode_free; hash->context = context;}void hash_free(hash_t *hash){ hscan_t hs; hnode_t *node; hash_scan_begin(&hs, hash); while ((node = hash_scan_next(&hs))) { hash_scan_delete(hash, node); hash->freenode(node, hash->context); } hash_destroy(hash);}/* * Free a dynamic hash table structure. */void hash_destroy(hash_t *hash){ assert (hash_val_t_bit != 0); assert (hash_isempty(hash)); free(hash->table); free(hash);}/* * Initialize a user supplied hash structure. The user also supplies a table of * chains which is assigned to the hash structure. The table is static---it * will not grow or shrink. * 1. See note 1. in hash_create(). * 2. The user supplied array of pointers hopefully contains nchains nodes. * 3. See note 7. in hash_create(). * 4. We must dynamically compute the mask from the given power of two table * size. * 5. The user supplied table can't be assumed to contain null pointers, * so we reset it here. */hash_t *hash_init(hash_t *hash, hashcount_t maxcount, hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table, hashcount_t nchains){ if (hash_val_t_bit == 0) /* 1 */ compute_bits(); assert (is_power_of_two(nchains)); hash->table = table; /* 2 */ hash->nchains = nchains; hash->count = 0; hash->maxcount = maxcount; hash->compare = compfun ? compfun : hash_comp_default; hash->hash = hashfun ? hashfun : hash_fun_default; hash->dynamic = 0; /* 3 */ hash->mask = compute_mask(nchains); /* 4 */ clear_table(hash); /* 5 */ assert (hash_verify(hash)); return hash;}/* * Reset the hash scanner so that the next element retrieved by * hash_scan_next() shall be the first element on the first non-empty chain. * Notes: * 1. Locate the first non empty chain. * 2. If an empty chain is found, remember which one it is and set the next * pointer to refer to its first element. * 3. Otherwise if a chain is not found, set the next pointer to NULL * so that hash_scan_next() shall indicate failure. */void hash_scan_begin(hscan_t *scan, hash_t *hash){ hash_val_t nchains = hash->nchains; hash_val_t chain; scan->hash = hash; /* 1 */ for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++) ; if (chain < nchains) { /* 2 */ scan->chain = chain; scan->next = hash->table[chain]; } else { /* 3 */ scan->next = NULL; }}/* * Retrieve the next node from the hash table, and update the pointer * for the next invocation of hash_scan_next(). * Notes: * 1. Remember the next pointer in a temporary value so that it can be * returned. * 2. This assertion essentially checks whether the module has been properly * initialized. The first point of interaction with the module should be * either hash_create() or hash_init(), both of which set hash_val_t_bit to * a non zero value. * 3. If the next pointer we are returning is not NULL, then the user is * allowed to call hash_scan_next() again. We prepare the new next pointer * for that call right now. That way the user is allowed to delete the node * we are about to return, since we will no longer be needing it to locate * the next node. * 4. If there is a next node in the chain (next->next), then that becomes the * new next node, otherwise ... * 5. We have exhausted the current chain, and must locate the next subsequent * non-empty chain in the table. * 6. If a non-empty chain is found, the first element of that chain becomes * the new next node. Otherwise there is no new next node and we set the * pointer to NULL so that the next time hash_scan_next() is called, a null * pointer shall be immediately returned. */hnode_t *hash_scan_next(hscan_t *scan){ hnode_t *next = scan->next; /* 1 */ hash_t *hash = scan->hash; hash_val_t chain = scan->chain + 1; hash_val_t nchains = hash->nchains; assert (hash_val_t_bit != 0); /* 2 */ if (next) { /* 3 */ if (next->next) { /* 4 */ scan->next = next->next; } else { while (chain < nchains && hash->table[chain] == 0) /* 5 */ chain++; if (chain < nchains) { /* 6 */ scan->chain = chain; scan->next = hash->table[chain]; } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -