jsdhash.h

来自「一个基于alice开发的机器人」· C头文件 代码 · 共 574 行 · 第 1/2 页

H
574
字号
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is Mozilla JavaScript code.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1999-2001
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *   Brendan Eich <brendan@mozilla.org> (Original Author)
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#ifndef jsdhash_h___
#define jsdhash_h___
/*
 * Double hashing, a la Knuth 6.
 */
#include "jstypes.h"

JS_BEGIN_EXTERN_C

#ifdef DEBUG_XXXbrendan
#define JS_DHASHMETER 1
#endif

/* Table size limit, do not equal or exceed (see min&maxAlphaFrac, below). */
#undef JS_DHASH_SIZE_LIMIT
#define JS_DHASH_SIZE_LIMIT     JS_BIT(24)

/* Minimum table size, or gross entry count (net is at most .75 loaded). */
#ifndef JS_DHASH_MIN_SIZE
#define JS_DHASH_MIN_SIZE 16
#elif (JS_DHASH_MIN_SIZE & (JS_DHASH_MIN_SIZE - 1)) != 0
#error "JS_DHASH_MIN_SIZE must be a power of two!"
#endif

/*
 * Multiplicative hash uses an unsigned 32 bit integer and the golden ratio,
 * expressed as a fixed-point 32-bit fraction.
 */
#define JS_DHASH_BITS           32
#define JS_DHASH_GOLDEN_RATIO   0x9E3779B9U

/* Primitive and forward-struct typedefs. */
typedef uint32                  JSDHashNumber;
typedef struct JSDHashEntryHdr  JSDHashEntryHdr;
typedef struct JSDHashEntryStub JSDHashEntryStub;
typedef struct JSDHashTable     JSDHashTable;
typedef struct JSDHashTableOps  JSDHashTableOps;

/*
 * Table entry header structure.
 *
 * In order to allow in-line allocation of key and value, we do not declare
 * either here.  Instead, the API uses const void *key as a formal parameter,
 * and asks each entry for its key when necessary via a getKey callback, used
 * when growing or shrinking the table.  Other callback types are defined
 * below and grouped into the JSDHashTableOps structure, for single static
 * initialization per hash table sub-type.
 *
 * Each hash table sub-type should nest the JSDHashEntryHdr structure at the
 * front of its particular entry type.  The keyHash member contains the result
 * of multiplying the hash code returned from the hashKey callback (see below)
 * by JS_DHASH_GOLDEN_RATIO, then constraining the result to avoid the magic 0
 * and 1 values.  The stored keyHash value is table size invariant, and it is
 * maintained automatically by JS_DHashTableOperate -- users should never set
 * it, and its only uses should be via the entry macros below.
 *
 * The JS_DHASH_ENTRY_IS_LIVE macro tests whether entry is neither free nor
 * removed.  An entry may be either busy or free; if busy, it may be live or
 * removed.  Consumers of this API should not access members of entries that
 * are not live.
 *
 * However, use JS_DHASH_ENTRY_IS_BUSY for faster liveness testing of entries
 * returned by JS_DHashTableOperate, as JS_DHashTableOperate never returns a
 * non-live, busy (i.e., removed) entry pointer to its caller.  See below for
 * more details on JS_DHashTableOperate's calling rules.
 */
struct JSDHashEntryHdr {
    JSDHashNumber       keyHash;        /* every entry must begin like this */
};

#define JS_DHASH_ENTRY_IS_FREE(entry)   ((entry)->keyHash == 0)
#define JS_DHASH_ENTRY_IS_BUSY(entry)   (!JS_DHASH_ENTRY_IS_FREE(entry))
#define JS_DHASH_ENTRY_IS_LIVE(entry)   ((entry)->keyHash >= 2)

/*
 * A JSDHashTable is currently 8 words (without the JS_DHASHMETER overhead)
 * on most architectures, and may be allocated on the stack or within another
 * structure or class (see below for the Init and Finish functions to use).
 *
 * To decide whether to use double hashing vs. chaining, we need to develop a
 * trade-off relation, as follows:
 *
 * Let alpha be the load factor, esize the entry size in words, count the
 * entry count, and pow2 the power-of-two table size in entries.
 *
 *   (JSDHashTable overhead)    > (JSHashTable overhead)
 *   (unused table entry space) > (malloc and .next overhead per entry) +
 *                                (buckets overhead)
 *   (1 - alpha) * esize * pow2 > 2 * count + pow2
 *
 * Notice that alpha is by definition (count / pow2):
 *
 *   (1 - alpha) * esize * pow2 > 2 * alpha * pow2 + pow2
 *   (1 - alpha) * esize        > 2 * alpha + 1
 *
 *   esize > (1 + 2 * alpha) / (1 - alpha)
 *
 * This assumes both tables must keep keyHash, key, and value for each entry,
 * where key and value point to separately allocated strings or structures.
 * If key and value can be combined into one pointer, then the trade-off is:
 *
 *   esize > (1 + 3 * alpha) / (1 - alpha)
 *
 * If the entry value can be a subtype of JSDHashEntryHdr, rather than a type
 * that must be allocated separately and referenced by an entry.value pointer
 * member, and provided key's allocation can be fused with its entry's, then
 * k (the words wasted per entry with chaining) is 4.
 *
 * To see these curves, feed gnuplot input like so:
 *
 *   gnuplot> f(x,k) = (1 + k * x) / (1 - x)
 *   gnuplot> plot [0:.75] f(x,2), f(x,3), f(x,4)
 *
 * For k of 2 and a well-loaded table (alpha > .5), esize must be more than 4
 * words for chaining to be more space-efficient than double hashing.
 *
 * Solving for alpha helps us decide when to shrink an underloaded table:
 *
 *   esize                     > (1 + k * alpha) / (1 - alpha)
 *   esize - alpha * esize     > 1 + k * alpha
 *   esize - 1                 > (k + esize) * alpha
 *   (esize - 1) / (k + esize) > alpha
 *
 *   alpha < (esize - 1) / (esize + k)
 *
 * Therefore double hashing should keep alpha >= (esize - 1) / (esize + k),
 * assuming esize is not too large (in which case, chaining should probably be
 * used for any alpha).  For esize=2 and k=3, we want alpha >= .2; for esize=3
 * and k=2, we want alpha >= .4.  For k=4, esize could be 6, and alpha >= .5
 * would still obtain.  See the JS_DHASH_MIN_ALPHA macro further below.
 *
 * The current implementation uses a configurable lower bound on alpha, which
 * defaults to .25, when deciding to shrink the table (while still respecting
 * JS_DHASH_MIN_SIZE).
 *
 * Note a qualitative difference between chaining and double hashing: under
 * chaining, entry addresses are stable across table shrinks and grows.  With
 * double hashing, you can't safely hold an entry pointer and use it after an
 * ADD or REMOVE operation, unless you sample table->generation before adding
 * or removing, and compare the sample after, dereferencing the entry pointer
 * only if table->generation has not changed.
 *
 * The moral of this story: there is no one-size-fits-all hash table scheme,
 * but for small table entry size, and assuming entry address stability is not
 * required, double hashing wins.
 */
struct JSDHashTable {
    const JSDHashTableOps *ops;         /* virtual operations, see below */
    void                *data;          /* ops- and instance-specific data */
    int16               hashShift;      /* multiplicative hash shift */
    uint8               maxAlphaFrac;   /* 8-bit fixed point max alpha */
    uint8               minAlphaFrac;   /* 8-bit fixed point min alpha */
    uint32              entrySize;      /* number of bytes in an entry */
    uint32              entryCount;     /* number of entries in table */
    uint32              removedCount;   /* removed entry sentinels in table */
    uint32              generation;     /* entry storage generation number */
    char                *entryStore;    /* entry storage */
#ifdef JS_DHASHMETER
    struct JSDHashStats {
        uint32          searches;       /* total number of table searches */
        uint32          steps;          /* hash chain links traversed */
        uint32          hits;           /* searches that found key */
        uint32          misses;         /* searches that didn't find key */
        uint32          lookups;        /* number of JS_DHASH_LOOKUPs */
        uint32          addMisses;      /* adds that miss, and do work */
        uint32          addOverRemoved; /* adds that recycled a removed entry */
        uint32          addHits;        /* adds that hit an existing entry */
        uint32          addFailures;    /* out-of-memory during add growth */
        uint32          removeHits;     /* removes that hit, and do work */
        uint32          removeMisses;   /* useless removes that miss */
        uint32          removeFrees;    /* removes that freed entry directly */
        uint32          removeEnums;    /* removes done by Enumerate */
        uint32          grows;          /* table expansions */
        uint32          shrinks;        /* table contractions */
        uint32          compresses;     /* table compressions */
        uint32          enumShrinks;    /* contractions after Enumerate */
    } stats;
#endif
};

/*
 * Size in entries (gross, not net of free and removed sentinels) for table.
 * We store hashShift rather than sizeLog2 to optimize the collision-free case
 * in SearchTable.
 */
#define JS_DHASH_TABLE_SIZE(table)  JS_BIT(JS_DHASH_BITS - (table)->hashShift)

/*
 * Table space at entryStore is allocated and freed using these callbacks.
 * The allocator should return null on error only (not if called with nbytes
 * equal to 0; but note that jsdhash.c code will never call with 0 nbytes).
 */
typedef void *
(* JS_DLL_CALLBACK JSDHashAllocTable)(JSDHashTable *table, uint32 nbytes);

typedef void
(* JS_DLL_CALLBACK JSDHashFreeTable) (JSDHashTable *table, void *ptr);

/*
 * When a table grows or shrinks, each entry is queried for its key using this
 * callback.  NB: in that event, entry is not in table any longer; it's in the
 * old entryStore vector, which is due to be freed once all entries have been
 * moved via moveEntry callbacks.
 */
typedef const void *
(* JS_DLL_CALLBACK JSDHashGetKey)    (JSDHashTable *table,
                                      JSDHashEntryHdr *entry);

/*
 * Compute the hash code for a given key to be looked up, added, or removed
 * from table.  A hash code may have any JSDHashNumber value.
 */
typedef JSDHashNumber
(* JS_DLL_CALLBACK JSDHashHashKey)   (JSDHashTable *table, const void *key);

/*
 * Compare the key identifying entry in table with the provided key parameter.
 * Return JS_TRUE if keys match, JS_FALSE otherwise.
 */
typedef JSBool
(* JS_DLL_CALLBACK JSDHashMatchEntry)(JSDHashTable *table,
                                      const JSDHashEntryHdr *entry,
                                      const void *key);

/*
 * Copy the data starting at from to the new entry storage at to.  Do not add
 * reference counts for any strong references in the entry, however, as this
 * is a "move" operation: the old entry storage at from will be freed without
 * any reference-decrementing callback shortly.
 */
typedef void
(* JS_DLL_CALLBACK JSDHashMoveEntry)(JSDHashTable *table,
                                     const JSDHashEntryHdr *from,
                                     JSDHashEntryHdr *to);

/*
 * Clear the entry and drop any strong references it holds.  This callback is
 * invoked during a JS_DHASH_REMOVE operation (see below for operation codes),
 * but only if the given key is found in the table.
 */
typedef void
(* JS_DLL_CALLBACK JSDHashClearEntry)(JSDHashTable *table,
                                      JSDHashEntryHdr *entry);

/*
 * Called when a table (whether allocated dynamically by itself, or nested in
 * a larger structure, or allocated on the stack) is finished.  This callback
 * allows table->ops-specific code to finalize table->data.
 */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?