📄 jsdhash.h
字号:
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- *//* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License * for the specific language governing rights and limitations under the * License. * * The Original Code is Mozilla JavaScript code. * * The Initial Developer of the Original Code is * Netscape Communications Corporation. * Portions created by the Initial Developer are Copyright (C) 1999-2001 * the Initial Developer. All Rights Reserved. * * Contributor(s): * Brendan Eich <brendan@mozilla.org> (Original Author) * * Alternatively, the contents of this file may be used under the terms of * either of the GNU General Public License Version 2 or later (the "GPL"), * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), * in which case the provisions of the GPL or the LGPL are applicable instead * of those above. If you wish to allow use of your version of this file only * under the terms of either the GPL or the LGPL, and not to allow others to * use your version of this file under the terms of the MPL, indicate your * decision by deleting the provisions above and replace them with the notice * and other provisions required by the GPL or the LGPL. If you do not delete * the provisions above, a recipient may use your version of this file under * the terms of any one of the MPL, the GPL or the LGPL. * * ***** END LICENSE BLOCK ***** */#ifndef jsdhash_h___#define jsdhash_h___/* * Double hashing, a la Knuth 6. */#include "jstypes.h"JS_BEGIN_EXTERN_C#if defined(__GNUC__) && defined(__i386__) && (__GNUC__ >= 3) && !defined(XP_OS2)#define JS_DHASH_FASTCALL __attribute__ ((regparm (3),stdcall))#elif defined(XP_WIN)#define JS_DHASH_FASTCALL __fastcall#else#define JS_DHASH_FASTCALL#endif#ifdef DEBUG_XXXbrendan#define JS_DHASHMETER 1#endif/* Table size limit, do not equal or exceed (see min&maxAlphaFrac, below). */#undef JS_DHASH_SIZE_LIMIT#define JS_DHASH_SIZE_LIMIT JS_BIT(24)/* Minimum table size, or gross entry count (net is at most .75 loaded). */#ifndef JS_DHASH_MIN_SIZE#define JS_DHASH_MIN_SIZE 16#elif (JS_DHASH_MIN_SIZE & (JS_DHASH_MIN_SIZE - 1)) != 0#error "JS_DHASH_MIN_SIZE must be a power of two!"#endif/* * Multiplicative hash uses an unsigned 32 bit integer and the golden ratio, * expressed as a fixed-point 32-bit fraction. */#define JS_DHASH_BITS 32#define JS_DHASH_GOLDEN_RATIO 0x9E3779B9U/* Primitive and forward-struct typedefs. */typedef uint32 JSDHashNumber;typedef struct JSDHashEntryHdr JSDHashEntryHdr;typedef struct JSDHashEntryStub JSDHashEntryStub;typedef struct JSDHashTable JSDHashTable;typedef struct JSDHashTableOps JSDHashTableOps;/* * Table entry header structure. * * In order to allow in-line allocation of key and value, we do not declare * either here. Instead, the API uses const void *key as a formal parameter, * and asks each entry for its key when necessary via a getKey callback, used * when growing or shrinking the table. Other callback types are defined * below and grouped into the JSDHashTableOps structure, for single static * initialization per hash table sub-type. * * Each hash table sub-type should nest the JSDHashEntryHdr structure at the * front of its particular entry type. The keyHash member contains the result * of multiplying the hash code returned from the hashKey callback (see below) * by JS_DHASH_GOLDEN_RATIO, then constraining the result to avoid the magic 0 * and 1 values. The stored keyHash value is table size invariant, and it is * maintained automatically by JS_DHashTableOperate -- users should never set * it, and its only uses should be via the entry macros below. * * The JS_DHASH_ENTRY_IS_LIVE macro tests whether entry is neither free nor * removed. An entry may be either busy or free; if busy, it may be live or * removed. Consumers of this API should not access members of entries that * are not live. * * However, use JS_DHASH_ENTRY_IS_BUSY for faster liveness testing of entries * returned by JS_DHashTableOperate, as JS_DHashTableOperate never returns a * non-live, busy (i.e., removed) entry pointer to its caller. See below for * more details on JS_DHashTableOperate's calling rules. */struct JSDHashEntryHdr { JSDHashNumber keyHash; /* every entry must begin like this */};#define JS_DHASH_ENTRY_IS_FREE(entry) ((entry)->keyHash == 0)#define JS_DHASH_ENTRY_IS_BUSY(entry) (!JS_DHASH_ENTRY_IS_FREE(entry))#define JS_DHASH_ENTRY_IS_LIVE(entry) ((entry)->keyHash >= 2)/* * A JSDHashTable is currently 8 words (without the JS_DHASHMETER overhead) * on most architectures, and may be allocated on the stack or within another * structure or class (see below for the Init and Finish functions to use). * * To decide whether to use double hashing vs. chaining, we need to develop a * trade-off relation, as follows: * * Let alpha be the load factor, esize the entry size in words, count the * entry count, and pow2 the power-of-two table size in entries. * * (JSDHashTable overhead) > (JSHashTable overhead) * (unused table entry space) > (malloc and .next overhead per entry) + * (buckets overhead) * (1 - alpha) * esize * pow2 > 2 * count + pow2 * * Notice that alpha is by definition (count / pow2): * * (1 - alpha) * esize * pow2 > 2 * alpha * pow2 + pow2 * (1 - alpha) * esize > 2 * alpha + 1 * * esize > (1 + 2 * alpha) / (1 - alpha) * * This assumes both tables must keep keyHash, key, and value for each entry, * where key and value point to separately allocated strings or structures. * If key and value can be combined into one pointer, then the trade-off is: * * esize > (1 + 3 * alpha) / (1 - alpha) * * If the entry value can be a subtype of JSDHashEntryHdr, rather than a type * that must be allocated separately and referenced by an entry.value pointer * member, and provided key's allocation can be fused with its entry's, then * k (the words wasted per entry with chaining) is 4. * * To see these curves, feed gnuplot input like so: * * gnuplot> f(x,k) = (1 + k * x) / (1 - x) * gnuplot> plot [0:.75] f(x,2), f(x,3), f(x,4) * * For k of 2 and a well-loaded table (alpha > .5), esize must be more than 4 * words for chaining to be more space-efficient than double hashing. * * Solving for alpha helps us decide when to shrink an underloaded table: * * esize > (1 + k * alpha) / (1 - alpha) * esize - alpha * esize > 1 + k * alpha * esize - 1 > (k + esize) * alpha * (esize - 1) / (k + esize) > alpha * * alpha < (esize - 1) / (esize + k) * * Therefore double hashing should keep alpha >= (esize - 1) / (esize + k), * assuming esize is not too large (in which case, chaining should probably be * used for any alpha). For esize=2 and k=3, we want alpha >= .2; for esize=3 * and k=2, we want alpha >= .4. For k=4, esize could be 6, and alpha >= .5 * would still obtain. See the JS_DHASH_MIN_ALPHA macro further below. * * The current implementation uses a configurable lower bound on alpha, which * defaults to .25, when deciding to shrink the table (while still respecting * JS_DHASH_MIN_SIZE). * * Note a qualitative difference between chaining and double hashing: under * chaining, entry addresses are stable across table shrinks and grows. With * double hashing, you can't safely hold an entry pointer and use it after an * ADD or REMOVE operation, unless you sample table->generation before adding * or removing, and compare the sample after, dereferencing the entry pointer * only if table->generation has not changed. * * The moral of this story: there is no one-size-fits-all hash table scheme, * but for small table entry size, and assuming entry address stability is not * required, double hashing wins. */struct JSDHashTable { const JSDHashTableOps *ops; /* virtual operations, see below */ void *data; /* ops- and instance-specific data */ int16 hashShift; /* multiplicative hash shift */ uint8 maxAlphaFrac; /* 8-bit fixed point max alpha */ uint8 minAlphaFrac; /* 8-bit fixed point min alpha */ uint32 entrySize; /* number of bytes in an entry */ uint32 entryCount; /* number of entries in table */ uint32 removedCount; /* removed entry sentinels in table */ uint32 generation; /* entry storage generation number */ char *entryStore; /* entry storage */#ifdef JS_DHASHMETER struct JSDHashStats { uint32 searches; /* total number of table searches */ uint32 steps; /* hash chain links traversed */ uint32 hits; /* searches that found key */ uint32 misses; /* searches that didn't find key */ uint32 lookups; /* number of JS_DHASH_LOOKUPs */ uint32 addMisses; /* adds that miss, and do work */ uint32 addOverRemoved; /* adds that recycled a removed entry */ uint32 addHits; /* adds that hit an existing entry */ uint32 addFailures; /* out-of-memory during add growth */ uint32 removeHits; /* removes that hit, and do work */ uint32 removeMisses; /* useless removes that miss */ uint32 removeFrees; /* removes that freed entry directly */ uint32 removeEnums; /* removes done by Enumerate */ uint32 grows; /* table expansions */ uint32 shrinks; /* table contractions */ uint32 compresses; /* table compressions */ uint32 enumShrinks; /* contractions after Enumerate */ } stats;#endif};/* * Size in entries (gross, not net of free and removed sentinels) for table. * We store hashShift rather than sizeLog2 to optimize the collision-free case * in SearchTable. */#define JS_DHASH_TABLE_SIZE(table) JS_BIT(JS_DHASH_BITS - (table)->hashShift)/* * Table space at entryStore is allocated and freed using these callbacks. * The allocator should return null on error only (not if called with nbytes * equal to 0; but note that jsdhash.c code will never call with 0 nbytes). */typedef void *(* JS_DLL_CALLBACK JSDHashAllocTable)(JSDHashTable *table, uint32 nbytes);typedef void(* JS_DLL_CALLBACK JSDHashFreeTable) (JSDHashTable *table, void *ptr);/* * When a table grows or shrinks, each entry is queried for its key using this * callback. NB: in that event, entry is not in table any longer; it's in the * old entryStore vector, which is due to be freed once all entries have been * moved via moveEntry callbacks. */typedef const void *(* JS_DLL_CALLBACK JSDHashGetKey) (JSDHashTable *table, JSDHashEntryHdr *entry);/* * Compute the hash code for a given key to be looked up, added, or removed * from table. A hash code may have any JSDHashNumber value. */typedef JSDHashNumber(* JS_DLL_CALLBACK JSDHashHashKey) (JSDHashTable *table, const void *key);/* * Compare the key identifying entry in table with the provided key parameter. * Return JS_TRUE if keys match, JS_FALSE otherwise. */typedef JSBool(* JS_DLL_CALLBACK JSDHashMatchEntry)(JSDHashTable *table, const JSDHashEntryHdr *entry, const void *key);/* * Copy the data starting at from to the new entry storage at to. Do not add * reference counts for any strong references in the entry, however, as this * is a "move" operation: the old entry storage at from will be freed without * any reference-decrementing callback shortly. */typedef void(* JS_DLL_CALLBACK JSDHashMoveEntry)(JSDHashTable *table, const JSDHashEntryHdr *from, JSDHashEntryHdr *to);/* * Clear the entry and drop any strong references it holds. This callback is * invoked during a JS_DHASH_REMOVE operation (see below for operation codes), * but only if the given key is found in the table. */typedef void(* JS_DLL_CALLBACK JSDHashClearEntry)(JSDHashTable *table, JSDHashEntryHdr *entry);/*
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -