📄 libchash.c
字号:
/* Copyright (c) 1998 - 2005, Google Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * --- * Author: Craig Silverstein * * This library is intended to be used for in-memory hash tables, * though it provides rudimentary permanent-storage capabilities. * It attempts to be fast, portable, and small. The best algorithm * to fulfill these goals is an internal probing hashing algorithm, * as in Knuth, _Art of Computer Programming_, vol III. Unlike * chained (open) hashing, it doesn't require a pointer for every * item, yet it is still constant time lookup in practice. * * Also to save space, we let the contents (both data and key) that * you insert be a union: if the key/data is small, we store it * directly in the hashtable, otherwise we store a pointer to it. * To keep you from having to figure out which, use KEY_PTR and * PTR_KEY to convert between the arguments to these functions and * a pointer to the real data. For instance: * char key[] = "ab", *key2; * HTItem *bck; HashTable *ht; * HashInsert(ht, PTR_KEY(ht, key), 0); * bck = HashFind(ht, PTR_KEY(ht, "ab")); * key2 = KEY_PTR(ht, bck->key); * * There are a rich set of operations supported: * AllocateHashTable() -- Allocates a hashtable structure and * returns it. * cchKey: if it's a positive number, then each key is a * fixed-length record of that length. If it's 0, * the key is assumed to be a \0-terminated string. * fSaveKey: normally, you are responsible for allocating * space for the key. If this is 1, we make a * copy of the key for you. * ClearHashTable() -- Removes everything from a hashtable * FreeHashTable() -- Frees memory used by a hashtable * * HashFind() -- takes a key (use PTR_KEY) and returns the * HTItem containing that key, or NULL if the * key is not in the hashtable. * HashFindLast() -- returns the item found by last HashFind() * HashFindOrInsert() -- inserts the key/data pair if the key * is not already in the hashtable, or * returns the appropraite HTItem if it is. * HashFindOrInsertItem() -- takes key/data as an HTItem. * HashInsert() -- adds a key/data pair to the hashtable. What * it does if the key is already in the table * depends on the value of SAMEKEY_OVERWRITE. * HashInsertItem() -- takes key/data as an HTItem. * HashDelete() -- removes a key/data pair from the hashtable, * if it's there. RETURNS 1 if it was there, * 0 else. * If you use sparse tables and never delete, the full data * space is available. Otherwise we steal -2 (maybe -3), * so you can't have data fields with those values. * HashDeleteLast() -- deletes the item returned by the last Find(). * * HashFirstBucket() -- used to iterate over the buckets in a * hashtable. DON'T INSERT OR DELETE WHILE * ITERATING! You can't nest iterations. * HashNextBucket() -- RETURNS NULL at the end of iterating. * * HashSetDeltaGoalSize() -- if you're going to insert 1000 items * at once, call this fn with arg 1000. * It grows the table more intelligently. * * HashSave() -- saves the hashtable to a file. It saves keys ok, * but it doesn't know how to interpret the data field, * so if the data field is a pointer to some complex * structure, you must send a function that takes a * file pointer and a pointer to the structure, and * write whatever you want to write. It should return * the number of bytes written. If the file is NULL, * it should just return the number of bytes it would * write, without writing anything. * If your data field is just an integer, not a * pointer, just send NULL for the function. * HashLoad() -- loads a hashtable. It needs a function that takes * a file and the size of the structure, and expects * you to read in the structure and return a pointer * to it. You must do memory allocation, etc. If * the data is just a number, send NULL. * HashLoadKeys() -- unlike HashLoad(), doesn't load the data off disk * until needed. This saves memory, but if you look * up the same key a lot, it does a disk access each * time. * You can't do Insert() or Delete() on hashtables that were loaded * from disk. * * See libchash.h for parameters you can modify. Make sure LOG_WORD_SIZE * is defined correctly for your machine! (5 for 32 bit words, 6 for 64). */#include <stdlib.h>#include <stdio.h>#include <string.h> /* for strcmp, memcmp, etc */#ifdef WIN32#else#include <sys/types.h> /* ULTRIX needs this for in.h */#include <netinet/in.h> /* for reading/writing hashtables */#endif#include <assert.h>#include "libchash.h" /* all the types */ /* if keys are stored directly but cchKey is less than sizeof(ulong), */ /* this cuts off the bits at the end */char grgKeyTruncMask[sizeof(ulong)][sizeof(ulong)];#define KEY_TRUNC(ht, key) \ ( STORES_PTR(ht) || (ht)->cchKey == sizeof(ulong) \ ? (key) : ((key) & *(ulong *)&(grgKeyTruncMask[(ht)->cchKey][0])) ) /* round num up to a multiple of wordsize. (LOG_WORD_SIZE-3 is in bytes) */#define WORD_ROUND(num) ( ((num-1) | ((1<<(LOG_WORD_SIZE-3))-1)) + 1 )#define NULL_TERMINATED 0 /* val of cchKey if keys are null-term strings */ /* Useful operations we do to keys: compare them, copy them, free them */#define KEY_CMP(ht, key1, key2) ( !STORES_PTR(ht) ? (key1) - (key2) : \ (key1) == (key2) ? 0 : \ HashKeySize(ht) == NULL_TERMINATED ? \ strcmp((char *)key1, (char *)key2) :\ memcmp((void *)key1, (void *)key2, \ HashKeySize(ht)) )#define COPY_KEY(ht, keyTo, keyFrom) do \ if ( !STORES_PTR(ht) || !(ht)->fSaveKeys ) \ (keyTo) = (keyFrom); /* just copy pointer or info */\ else if ( (ht)->cchKey == NULL_TERMINATED ) /* copy 0-term.ed str */\ { \ (keyTo) = (ulong)HTsmalloc( WORD_ROUND(strlen((char *)(keyFrom))+1) ); \ strcpy((char *)(keyTo), (char *)(keyFrom)); \ } \ else \ { \ (keyTo) = (ulong) HTsmalloc( WORD_ROUND((ht)->cchKey) ); \ memcpy( (char *)(keyTo), (char *)(keyFrom), (ht)->cchKey); \ } \ while ( 0 )#define FREE_KEY(ht, key) do \ if ( STORES_PTR(ht) && (ht)->fSaveKeys ) \ if ( (ht)->cchKey == NULL_TERMINATED ) \ HTfree((char *)(key), WORD_ROUND(strlen((char *)(key))+1)); \ else \ HTfree((char *)(key), WORD_ROUND((ht)->cchKey)); \ while ( 0 ) /* the following are useful for bitmaps */ /* Format is like this (if 1 word = 4 bits): 3210 7654 ba98 fedc ... */typedef ulong HTBitmapPart; /* this has to be unsigned, for >> */typedef HTBitmapPart HTBitmap[1<<LOG_BM_WORDS];typedef ulong HTOffset; /* something big enough to hold offsets */#define BM_BYTES(cBuckets) /* we must ensure it's a multiple of word size */\ ( (((cBuckets) + 8*sizeof(ulong)-1) >> LOG_WORD_SIZE) << (LOG_WORD_SIZE-3) )#define MOD2(i, logmod) ( (i) & ((1<<(logmod))-1) )#define DIV_NUM_ENTRIES(i) ( (i) >> LOG_WORD_SIZE )#define MOD_NUM_ENTRIES(i) ( MOD2(i, LOG_WORD_SIZE) )#define MODBIT(i) ( ((ulong)1) << MOD_NUM_ENTRIES(i) )#define TEST_BITMAP(bm, i) ( (bm)[DIV_NUM_ENTRIES(i)] & MODBIT(i) ? 1 : 0 )#define SET_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] |= MODBIT(i)#define CLEAR_BITMAP(bm, i) (bm)[DIV_NUM_ENTRIES(i)] &= ~MODBIT(i) /* the following are useful for reading and writing hashtables */#define READ_UL(fp, data) \ do { \ long _ul; \ fread(&_ul, sizeof(_ul), 1, (fp)); \ data = ntohl(_ul); \ } while (0)#define WRITE_UL(fp, data) \ do { \ long _ul = htonl((long)(data)); \ fwrite(&_ul, sizeof(_ul), 1, (fp)); \ } while (0) /* Moves data from disk to memory if necessary. Note dataRead cannot be * * NULL, because then we might as well (and do) load the data into memory */#define LOAD_AND_RETURN(ht, loadCommand) /* lC returns an HTItem * */ \ if ( !(ht)->fpData ) /* data is stored in memory */ \ return (loadCommand); \ else /* must read data off of disk */ \ { \ int cchData; \ HTItem *bck; \ if ( (ht)->bckData.data ) free((char *)(ht)->bckData.data); \ ht->bckData.data = (ulong)NULL; /* needed if loadCommand fails */ \ bck = (loadCommand); \ if ( bck == NULL ) /* loadCommand failed: key not found */ \ return NULL; \ else \ (ht)->bckData = *bck; \ fseek(ht->fpData, (ht)->bckData.data, SEEK_SET); \ READ_UL((ht)->fpData, cchData); \ (ht)->bckData.data = (ulong)(ht)->dataRead((ht)->fpData, cchData); \ return &((ht)->bckData); \ }/* ======================================================================== *//* UTILITY ROUTINES *//* ---------------------- *//* HTsmalloc() -- safe malloc * allocates memory, or crashes if the allocation fails. */static void *HTsmalloc(unsigned long size){ void *retval; if ( size == 0 ) return NULL; retval = (void *)malloc(size); if ( !retval ) { fprintf(stderr, "HTsmalloc: Unable to allocate %lu bytes of memory\n", size); exit(1); } return retval;}/* HTscalloc() -- safe calloc * allocates memory and initializes it to 0, or crashes if * the allocation fails. */static void *HTscalloc(unsigned long size){ void *retval; retval = (void *)calloc(size, 1); if ( !retval && size > 0 ) { fprintf(stderr, "HTscalloc: Unable to allocate %lu bytes of memory\n", size); exit(1); } return retval;}/* HTsrealloc() -- safe calloc * grows the amount of memory from a source, or crashes if * the allocation fails. */static void *HTsrealloc(void *ptr, unsigned long new_size, long delta){ if ( ptr == NULL ) return HTsmalloc(new_size); ptr = realloc(ptr, new_size); if ( !ptr && new_size > 0 ) { fprintf(stderr, "HTsrealloc: Unable to reallocate %lu bytes of memory\n", new_size); exit(1); } return ptr;}/* HTfree() -- keep track of memory use * frees memory using free, but updates count of how much memory * is being used. */static void HTfree(void *ptr, unsigned long size){ if ( size > 0 ) /* some systems seem to not like freeing NULL */ free(ptr);}/*************************************************************************\| HTcopy() || Sometimes we interpret data as a ulong. But ulongs must be || aligned on some machines, so instead of casting we copy. |\*************************************************************************/unsigned long HTcopy(char *ul){
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -