hash.c

来自「CMU大名鼎鼎的SPHINX-3大词汇量连续语音识别系统」· C语言 代码 · 共 380 行

C
380
字号
/* ==================================================================== * Copyright (c) 1999-2004 Carnegie Mellon University.  All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer.  * * 2. Redistributions in binary form must reproduce the above copyright *    notice, this list of conditions and the following disclaimer in *    the documentation and/or other materials provided with the *    distribution. * * This work was supported in part by funding from the Defense Advanced  * Research Projects Agency and the National Science Foundation of the  * United States of America, and the CMU Sphinx Speech Consortium. * * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * ==================================================================== * *//* * hash.c -- Hash table module. * * ********************************************** * CMU ARPA Speech Project * * Copyright (c) 1999 Carnegie Mellon University. * ALL RIGHTS RESERVED. * ********************************************** *  * HISTORY *  * 05-May-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon * 		Removed hash_key2hash().  Added hash_enter_bkey() and hash_lookup_bkey(), * 		and len attribute to hash_entry_t. *  * 30-Apr-1999	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon * 		Added hash_key2hash(). *  * 18-Jun-97	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon * 		Included case sensitive/insensitive option.  Removed local, static * 		maintenance of all hash tables. *  * 31-Jul-95	M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon * 		Created. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include "hash.h"#include "err.h"#include "ckd_alloc.h"#include "case.h"#if 0static void prime_sieve (int32 max){    char *notprime;    int32 p, pp;        notprime = (char *) ckd_calloc (max+1, 1);    p = 2;    for (;;) {	printf ("%d\n", p);	for (pp = p+p; pp <= max; pp += p)	    notprime[pp] = 1;	for (++p; (p <= max) && notprime[p]; p++);	if (p > max)	    break;    }}#endif/* * HACK!!  Initial hash table size is restricted by this set of primes.  (Of course, * collision resolution by chaining will accommodate more entries indefinitely, but * efficiency will drop.) */const int32 prime[] = {    101, 211, 307, 401, 503, 601, 701, 809, 907,    1009, 1201, 1601, 2003, 2411, 3001, 4001, 5003, 6007, 7001, 8009, 9001,    10007, 12007, 16001, 20011, 24001, 30011, 40009, 50021, 60013, 70001, 80021, 90001,    100003, 120011, 160001, 200003, 240007, 300007, 400009, 500009, 600011, 700001, 800011, 900001,    -1};static int32 prime_size (int32 size){    int32 i;        for (i = 0; (prime[i] > 0) && (prime[i] < size); i++);    if (prime[i] <= 0) {	E_WARN("Very large hash table requested (%d entries)\n", size);	--i;    }    return (prime[i]);}hash_table_t *hash_new (int32 size, int32 casearg){    hash_table_t *h;        h = (hash_table_t *) ckd_calloc (1, sizeof(hash_table_t));    h->size = prime_size (size+(size>>1));    h->nocase = (casearg == HASH_CASE_NO);    h->table = (hash_entry_t *) ckd_calloc (h->size, sizeof(hash_entry_t));    /* The above calloc clears h->table[*].key and .next to NULL, i.e. an empty table */    return h;}/* * Compute hash value for given key string. * Somewhat tuned for English text word strings. */static uint32 key2hash (hash_table_t *h, const char *key){    register const char *cp;    register char c;    register int32 s;    register uint32 hash;        hash = 0;    s = 0;        if (h->nocase) {	for (cp = key; *cp; cp++) {	    c = *cp;	    c = UPPER_CASE(c);	    hash += c << s;	    s += 5;	    if (s >= 25)		s -= 24;	}    } else {	for (cp = key; *cp; cp++) {	    hash += (*cp) << s;	    s += 5;	    if (s >= 25)		s -= 24;	}    }        return (hash % h->size);}static char *makekey (uint8 *data, int32 len, char *key){    int32 i, j;        if (! key)	key = (char *) ckd_calloc (len*2 + 1, sizeof(char));        for (i = 0, j = 0; i < len; i++, j += 2) {	key[j] = 'A' + (data[i] & 0x000f);	key[j+1] = 'J' + ((data[i] >> 4) & 0x000f);    }    key[j] = '\0';        return key;}static int32 keycmp_nocase (hash_entry_t *entry, const char *key){    char c1, c2;    int32 i;    const char *str;        str = entry->key;    for (i = 0; i < entry->len; i++) {	c1 = *(str++);	c1 = UPPER_CASE(c1);	c2 = *(key++);	c2 = UPPER_CASE(c2);	if (c1 != c2)	    return (c1-c2);    }        return 0;}static int32 keycmp_case (hash_entry_t *entry, const char *key){    char c1, c2;    int32 i;    const char *str;        str = entry->key;    for (i = 0; i < entry->len; i++) {	c1 = *(str++);	c2 = *(key++);	if (c1 != c2)	    return (c1-c2);    }        return 0;}/* * Lookup chained entries with hash-value hash in table h for given key and return * associated value in *val. * Return value: 0 if key found in hash table, else -1. */static int32 lookup (hash_table_t *h, uint32 hash, const char *key, int32 len, int32 *val){    hash_entry_t *entry;        entry = &(h->table[hash]);    if (entry->key == NULL)	return -1;        if (h->nocase) {	while (entry && ((entry->len != len) || (keycmp_nocase (entry, key) != 0)))	    entry = entry->next;    } else {	while (entry && ((entry->len != len) || (keycmp_case (entry, key) != 0)))	    entry = entry->next;    }        if (entry) {	*val = entry->val;	return 0;    } else	return -1;}int32 hash_lookup (hash_table_t *h, const char *key, int32 *val){    uint32 hash;    int32 len;        hash = key2hash (h, key);    len = strlen(key);        return (lookup (h, hash, key, len, val));}int32 hash_lookup_bkey (hash_table_t *h, const char *key, int32 len, int32 *val){    uint32 hash;    char *str;        str = makekey ((uint8 *)key, len, NULL);    hash = key2hash (h, str);    ckd_free (str);        return (lookup (h, hash, key, len, val));}static int32 enter (hash_table_t *h, uint32 hash, const char *key, int32 len, int32 val){    int32 old;    hash_entry_t *cur, *new;        if (lookup (h, hash, key, len, &old) == 0) {	/* Key already exists */	return old;    }        cur = &(h->table[hash]);    if (cur->key == NULL) {	/* Empty slot at hashed location; add this entry */	cur->key = key;	cur->len = len;	cur->val = val;    } else {	/* Key collision; create new entry and link to hashed location */	new = (hash_entry_t *) ckd_calloc (1, sizeof(hash_entry_t));	new->key = key;	new->len = len;	new->val = val;	new->next = cur->next;	cur->next = new;    }    return val;}int32 hash_enter (hash_table_t *h, const char *key, int32 val){    uint32 hash;    int32 len;        hash = key2hash (h, key);    len = strlen(key);    return (enter (h, hash, key, len, val));}int32 hash_enter_bkey (hash_table_t *h, const char *key, int32 len, int32 val){    uint32 hash;    char *str;        str = makekey ((uint8 *)key, len, NULL);    hash = key2hash (h, str);    ckd_free (str);        return (enter (h, hash, key, len, val));}glist_t hash_tolist (hash_table_t *h, int32 *count){    glist_t g;    hash_entry_t *e;    int32 i, j;        g = NULL;        j = 0;    for (i = 0; i < h->size; i++) {	e = &(h->table[i]);		if (e->key != NULL) {	    g = glist_add_ptr (g, (void *)e);	    j++;	    	    for (e = e->next; e; e = e->next) {		g = glist_add_ptr (g, (void *)e);		j++;	    }	}    }        *count = j;        return g;}void hash_free (hash_table_t *h){    hash_entry_t *e, *e2;    int32 i;        /* Free additional entries created for key collision cases */    for (i = 0; i < h->size; i++) {	for (e = h->table[i].next; e; e = e2) {	    e2 = e->next;	    ckd_free ((void *) e);	}    }        ckd_free ((void *) h->table);    ckd_free ((void *) h);}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?