📄 hash.c
字号:
ht->resize_threshold = newsize * HASH_MAX_FULLNESS; cells = xnew_array (struct cell, newsize); memset (cells, INVALID_PTR_CHAR, newsize * sizeof (struct cell)); ht->cells = cells; for (c = old_cells; c < old_end; c++) if (CELL_OCCUPIED (c)) { struct cell *new_c; /* We don't need to test for uniqueness of keys because they come from the hash table and are therefore known to be unique. */ new_c = cells + HASH_POSITION (c->key, hasher, newsize); FOREACH_OCCUPIED_ADJACENT (new_c, cells, newsize) ; *new_c = *c; } xfree (old_cells);}/* Put VALUE in the hash table HT under the key KEY. This regrows the table if necessary. */voidhash_table_put (struct hash_table *ht, const void *key, void *value){ struct cell *c = find_cell (ht, key); if (CELL_OCCUPIED (c)) { /* update existing item */ c->key = (void *)key; /* const? */ c->value = value; return; } /* If adding the item would make the table exceed max. fullness, grow the table first. */ if (ht->count >= ht->resize_threshold) { grow_hash_table (ht); c = find_cell (ht, key); } /* add new item */ ++ht->count; c->key = (void *)key; /* const? */ c->value = value;}/* Remove KEY->value mapping from HT. Return 0 if there was no such entry; return 1 if an entry was removed. */inthash_table_remove (struct hash_table *ht, const void *key){ struct cell *c = find_cell (ht, key); if (!CELL_OCCUPIED (c)) return 0; else { int size = ht->size; struct cell *cells = ht->cells; hashfun_t hasher = ht->hash_function; CLEAR_CELL (c); --ht->count; /* Rehash all the entries following C. The alternative approach is to mark the entry as deleted, i.e. create a "tombstone". That speeds up removal, but leaves a lot of garbage and slows down hash_table_get and hash_table_put. */ c = NEXT_CELL (c, cells, size); FOREACH_OCCUPIED_ADJACENT (c, cells, size) { const void *key2 = c->key; struct cell *c_new; /* Find the new location for the key. */ c_new = cells + HASH_POSITION (key2, hasher, size); FOREACH_OCCUPIED_ADJACENT (c_new, cells, size) if (key2 == c_new->key) /* The cell C (key2) is already where we want it (in C_NEW's "chain" of keys.) */ goto next_rehash; *c_new = *c; CLEAR_CELL (c); next_rehash: ; } return 1; }}/* Clear HT of all entries. After calling this function, the count and the fullness of the hash table will be zero. The size will remain unchanged. */voidhash_table_clear (struct hash_table *ht){ memset (ht->cells, INVALID_PTR_CHAR, ht->size * sizeof (struct cell)); ht->count = 0;}/* Call FN for each entry in HT. FN is called with three arguments: the key, the value, and ARG. When FN returns a non-zero value, the mapping stops. It is undefined what happens if you add or remove entries in the hash table while hash_table_for_each is running. The exception is the entry you're currently mapping over; you may call hash_table_put or hash_table_remove on that entry's key. That is also the reason why this function cannot be implemented in terms of hash_table_iterate. */voidhash_table_for_each (struct hash_table *ht, int (*fn) (void *, void *, void *), void *arg){ struct cell *c = ht->cells; struct cell *end = ht->cells + ht->size; for (; c < end; c++) if (CELL_OCCUPIED (c)) { void *key; repeat: key = c->key; if (fn (key, c->value, arg)) return; /* hash_table_remove might have moved the adjacent cells. */ if (c->key != key && CELL_OCCUPIED (c)) goto repeat; }}/* Initiate iteration over HT. Entries are obtained with hash_table_iter_next, a typical iteration loop looking like this: hash_table_iterator iter; for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) ... do something with iter.key and iter.value ... The iterator does not need to be deallocated after use. The hash table must not be modified while being iterated over. */voidhash_table_iterate (struct hash_table *ht, hash_table_iterator *iter){ iter->pos = ht->cells; iter->end = ht->cells + ht->size;}/* Get the next hash table entry. ITER is an iterator object initialized using hash_table_iterate. While there are more entries, the key and value pointers are stored to ITER->key and ITER->value respectively and 1 is returned. When there are no more entries, 0 is returned. If the hash table is modified between calls to this function, the result is undefined. */inthash_table_iter_next (hash_table_iterator *iter){ struct cell *c = iter->pos; struct cell *end = iter->end; for (; c < end; c++) if (CELL_OCCUPIED (c)) { iter->key = c->key; iter->value = c->value; iter->pos = c + 1; return 1; } return 0;}/* Return the number of elements in the hash table. This is not the same as the physical size of the hash table, which is always greater than the number of elements. */inthash_table_count (const struct hash_table *ht){ return ht->count;}/* Functions from this point onward are meant for convenience and don't strictly belong to this file. However, this is as good a place for them as any. *//* Guidelines for creating custom hash and test functions: - The test function returns non-zero for keys that are considered "equal", zero otherwise. - The hash function returns a number that represents the "distinctness" of the object. In more precise terms, it means that for any two objects that test "equal" under the test function, the hash function MUST produce the same result. This does not mean that all different objects must produce different values (that would be "perfect" hashing), only that non-distinct objects must produce the same values! For instance, a hash function that returns 0 for any given object is a perfectly valid (albeit extremely bad) hash function. A hash function that hashes a string by adding up all its characters is another example of a valid (but still quite bad) hash function. It is not hard to make hash and test functions agree about equality. For example, if the test function compares strings case-insensitively, the hash function can lower-case the characters when calculating the hash value. That ensures that two strings differing only in case will hash the same. - To prevent performance degradation, choose a hash function with as good "spreading" as possible. A good hash function will use all the bits of the input when calculating the hash, and will react to even small changes in input with a completely different output. But don't make the hash function itself overly slow, because you'll be incurring a non-negligible overhead to all hash table operations. *//* * Support for hash tables whose keys are strings. * */ /* Base 31 hash function. Taken from Gnome's glib, modified to use standard C types. We used to use the popular hash function from the Dragon Book, but this one seems to perform much better, both by being faster and by generating less collisions. */static unsigned longhash_string (const void *key){ const char *p = key; unsigned int h = *p; if (h) for (p += 1; *p != '\0'; p++) h = (h << 5) - h + *p; return h;}/* Frontend for strcmp usable for hash tables. */static intcmp_string (const void *s1, const void *s2){ return !strcmp ((const char *)s1, (const char *)s2);}/* Return a hash table of preallocated to store at least ITEMS items suitable to use strings as keys. */struct hash_table *make_string_hash_table (int items){ return hash_table_new (items, hash_string, cmp_string);}/* * Support for hash tables whose keys are strings, but which are * compared case-insensitively. * *//* Like hash_string, but produce the same hash regardless of the case. */static unsigned longhash_string_nocase (const void *key){ const char *p = key; unsigned int h = TOLOWER (*p); if (h) for (p += 1; *p != '\0'; p++) h = (h << 5) - h + TOLOWER (*p); return h;}/* Like string_cmp, but doing case-insensitive compareison. */static intstring_cmp_nocase (const void *s1, const void *s2){ return !strcasecmp ((const char *)s1, (const char *)s2);}/* Like make_string_hash_table, but uses string_hash_nocase and string_cmp_nocase. */struct hash_table *make_nocase_string_hash_table (int items){ return hash_table_new (items, hash_string_nocase, string_cmp_nocase);}/* Hashing of numeric values, such as pointers and integers. This implementation is the Robert Jenkins' 32 bit Mix Function, with a simple adaptation for 64-bit values. According to Jenkins it should offer excellent spreading of values. Unlike the popular Knuth's multiplication hash, this function doesn't need to know the hash table size to work. */unsigned longhash_pointer (const void *ptr){ uintptr_t key = (uintptr_t) ptr; key += (key << 12); key ^= (key >> 22); key += (key << 4); key ^= (key >> 9); key += (key << 10); key ^= (key >> 2); key += (key << 7); key ^= (key >> 12);#if SIZEOF_VOID_P > 4 key += (key << 44); key ^= (key >> 54); key += (key << 36); key ^= (key >> 41); key += (key << 42); key ^= (key >> 34); key += (key << 39); key ^= (key >> 44);#endif return (unsigned long) key;}static intcmp_pointer (const void *ptr1, const void *ptr2){ return ptr1 == ptr2;}#ifdef TEST#include <stdio.h>#include <string.h>voidprint_hash (struct hash_table *sht){ hash_table_iterator iter; int count = 0; for (hash_table_iterate (sht, &iter); hash_table_iter_next (&iter); ++count) printf ("%s: %s\n", iter.key, iter.value); assert (count == sht->count);}intmain (void){ struct hash_table *ht = make_string_hash_table (0); char line[80]; while ((fgets (line, sizeof (line), stdin))) { int len = strlen (line); if (len <= 1) continue; line[--len] = '\0'; if (!hash_table_contains (ht, line)) hash_table_put (ht, strdup (line), "here I am!");#if 1 if (len % 5 == 0) { char *line_copy; if (hash_table_get_pair (ht, line, &line_copy, NULL)) { hash_table_remove (ht, line); xfree (line_copy); } }#endif }#if 0 print_hash (ht);#endif#if 1 printf ("%d %d\n", ht->count, ht->size);#endif return 0;}#endif /* TEST */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -