📄 aht.c
字号:
/* An implementation of in-memory hash tables: * Copyright (c) 2000-2004 Salvatore Sanfilippo <antirez@invece.org> * * -- VERSION 2004.05.22 -- * * COPYRIGHT AND PERMISSION NOTICE * ------------------------------- * * Copyright (c) 2000 Salvatore Sanfilippo <antirez@invece.org> * Copyright (c) 2001 Salvatore Sanfilippo <antirez@invece.org> * Copyright (c) 2002 Salvatore Sanfilippo <antirez@invece.org> * Copyright (c) 2003 Salvatore Sanfilippo <antirez@invece.org> * Copyright (c) 2004 Salvatore Sanfilippo <antirez@invece.org> * * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, and/or sell copies of the Software, and to permit persons * to whom the Software is furnished to do so, provided that the above * copyright notice(s) and this permission notice appear in all copies of * the Software and that both the above copyright notice(s) and this * permission notice appear in supporting documentation. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * * Except as contained in this notice, the name of a copyright holder * shall not be used in advertising or otherwise to promote the sale, use * or other dealings in this Software without prior written authorization * of the copyright holder. * * CHANGELOG * --------- * * 22May2004 - Fixed a but in ht_destroy(). Now after this call the * hashtable is really ready to be reused. Fixed also a memory leak * in the same function. Luckly this function is only called at exit * in many programs. * * OVERVIEW * -------- * * AHT is an implementation of a dictionary with support for * INSERT, DELETE and SEARCH operations. It uses the hash table * as base data structure to provide almost constant times for * the three operations. AHT also automatically care about the * size of the current key-values set increasing the hash table * as needed. * * DESIGN PRINCIPLE * ---------------- * * - AHT try to resist to attacker-induced worst-case behaviour * trought the randomization of the hash-function. This is * optional. * * - AHT take care of the hash table expansion when needed. * The hash table load ranges from 0 to 0.5, the hash table * size is a power of two. * * - A simple implementation. The collisions resolution used * is a simple linear probing, that takes advantage of * the modern CPU caches, the low hash table max load and * the use of a strong hash function provided with this library * (ht_strong_hash), should mitigate the primary clustering * enough. Experimental results shown that double hashing * was a performance lost with common key types in modern * CPUs. * * - Moderatly method oriented, it is possible to define the hash * function, key/value destructors, key compare function, for a * given hash table, but not with a per-element base. * * === WARNING === * = Before to use this library, think about the -fact- that the * = worst case is O(N). Like for the quick sort algorithm, it may * = be a bad idea to use this library in medical software, or other * = software for wich the worst case should be taken in account * = even if not likely to happen. * = Good alternatives are red-black trees, and other trees with * = a good worst-case behavior. * =============== * * TODO * ---- * * - Write the documentation * - ht_copy() to copy an element between hash tables * - ht_dup() to duplicate an entire hash table * - ht_merge() to add the content of one hash table to another * - disk operations, the ability to save an hashtable from the * memory to the disk and the reverse operation. * * Most of this features needs additional methods, like one * to copy an object, and should return an error if such methods * are not defined. * */#include <sys/types.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include "aht.h"/* -------------------------- private prototypes ---------------------------- */static int ht_expand_if_needed(struct hashtable *t);static unsigned int next_power(unsigned int size);static int ht_insert(struct hashtable *t, void *key, unsigned int *avail_index);/* The special ht_free_element pointer is used to mark * a freed element in the hash table (note that the elements * neven used are just NULL pointers) */static struct ht_ele *ht_free_element = (void*) -1;/* -------------------------- hash functions -------------------------------- *//* The djb hash function, that's under public domain */u_int32_t djb_hash(unsigned char *buf, size_t len){ u_int32_t h = 5381; while(len--) h = (h + (h << 5)) ^ *buf++; return h;}u_int32_t djb_hashR(unsigned char *buf, size_t len){ u_int32_t h = 5381; buf += len-1; while(len--) h = (h + (h << 5)) ^ *buf--; return h;}/* Another trivial hash function */#define ROT32R(x,n) (((x)>>n)|(x<<(32-n)))u_int32_t trivial_hash(unsigned char *buf, size_t len){ u_int32_t h = 0; while(len--) { h = h + *buf++; h = ROT32R(h, 3); } return h;}u_int32_t trivial_hashR(unsigned char *buf, size_t len){ u_int32_t h = 0; buf += len-1; while(len--) { h = h + *buf--; h = ROT32R(h, 3); } return h;}/* A strong hash function that should be the default with this * hashtable implementation. Our hash tables does not support * double hashing for design: the idea is to avoid double * hashing and use a bit slower but very strong hash function like * this. This should provide quite good performances with * all the kinds of keys if you take the default max load of 50%. * * For more information see: http://burtleburtle.net/bob/hash/evahash.html *//* The mixing step */#define mix(a,b,c) \{ \ a=a-b; a=a-c; a=a^(c>>13); \ b=b-c; b=b-a; b=b^(a<<8); \ c=c-a; c=c-b; c=c^(b>>13); \ a=a-b; a=a-c; a=a^(c>>12); \ b=b-c; b=b-a; b=b^(a<<16); \ c=c-a; c=c-b; c=c^(b>>5); \ a=a-b; a=a-c; a=a^(c>>3); \ b=b-c; b=b-a; b=b^(a<<10); \ c=c-a; c=c-b; c=c^(b>>15); \}/* The whole new hash function */u_int32_t __ht_strong_hash(u_int8_t *k, u_int32_t length, u_int32_t initval){ u_int32_t a,b,c; /* the internal state */ u_int32_t len; /* how many key bytes still need mixing */ /* Set up the internal state */ len = length; a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ c = initval; /* variable initialization of internal state */ /*---------------------------------------- handle most of the key */ while (len >= 12) { a=a+(k[0]+((u_int32_t)k[1]<<8)+((u_int32_t)k[2]<<16)+ ((u_int32_t)k[3]<<24)); b=b+(k[4]+((u_int32_t)k[5]<<8)+((u_int32_t)k[6]<<16)+ ((u_int32_t)k[7]<<24)); c=c+(k[8]+((u_int32_t)k[9]<<8)+((u_int32_t)k[10]<<16)+ ((u_int32_t)k[11]<<24)); mix(a,b,c); k = k+12; len = len-12; } /*------------------------------------- handle the last 11 bytes */ c = c+length; switch(len) /* all the case statements fall through */ { case 11: c=c+((u_int32_t)k[10]<<24); case 10: c=c+((u_int32_t)k[9]<<16); case 9 : c=c+((u_int32_t)k[8]<<8); /* the first byte of c is reserved for the length */ case 8 : b=b+((u_int32_t)k[7]<<24); case 7 : b=b+((u_int32_t)k[6]<<16); case 6 : b=b+((u_int32_t)k[5]<<8); case 5 : b=b+k[4]; case 4 : a=a+((u_int32_t)k[3]<<24); case 3 : a=a+((u_int32_t)k[2]<<16); case 2 : a=a+((u_int32_t)k[1]<<8); case 1 : a=a+k[0]; /* case 0: nothing left to add */ } mix(a,b,c); /*-------------------------------------------- report the result */ return c;}/* ----------------------------- API implementation ------------------------- *//* reset an hashtable already initialized with ht_init(). * NOTE: This function should only called by ht_destroy(). */static void ht_reset(struct hashtable *t){ t->table = NULL; t->size = 0; t->sizemask = 0; t->used = 0; t->collisions = 0;}/* Initialize the hash table */int ht_init(struct hashtable *t){ ht_reset(t); t->hashf = ht_hash_pointer; t->key_destructor = ht_no_destructor; t->val_destructor = ht_no_destructor; t->key_compare = ht_compare_ptr; return HT_OK;}/* Resize the table to the minimal size that contains all the elements */int ht_resize(struct hashtable *t){ int minimal = (t->used * 2)+1; if (minimal < HT_INITIAL_SIZE) minimal = HT_INITIAL_SIZE; return ht_expand(t, minimal);}/* Move an element accross hash tables */int ht_move(struct hashtable *orig, struct hashtable *dest, unsigned int index){ int ret; unsigned int new_index; /* If the element isn't in the table ht_search will store * the index of the free ht_ele in the integer pointer by *index */ ret = ht_insert(dest, orig->table[index]->key, &new_index); if (ret != HT_OK) return ret; /* Move the element */ dest->table[new_index] = orig->table[index]; orig->table[index] = ht_free_element; orig->used--; dest->used++; return HT_OK;}/* Expand or create the hashtable */int ht_expand(struct hashtable *t, size_t size){ struct hashtable n; /* the new hashtable */ unsigned int realsize = next_power(size), i; /* the size is invalid if it is smaller than the number of * elements already inside the hashtable */ if (t->used >= size) return HT_INVALID; ht_init(&n); n.size = realsize; n.sizemask = realsize-1; n.table = malloc(realsize*sizeof(struct ht_ele*)); if (n.table == NULL) return HT_NOMEM; /* Copy methods */ n.hashf = t->hashf; n.key_destructor = t->key_destructor; n.val_destructor = t->val_destructor; n.key_compare= t->key_compare; /* Initialize all the pointers to NULL */ memset(n.table, 0, realsize*sizeof(struct ht_ele*)); /* Copy all the elements from the old to the new table: * note that if the old hash table is empty t->size is zero, * so ht_expand() acts like an ht_create() */ n.used = t->used; for (i = 0; i < t->size && t->used > 0; i++) { if (t->table[i] != NULL && t->table[i] != ht_free_element) { u_int32_t h; /* Get the new element index: note that we * know that there aren't freed elements in 'n' */ h = n.hashf(t->table[i]->key) & n.sizemask;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -