📄 utils.c
字号:
/* *\ * *\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *//* make a hash index from an erlang term *//*** There are three hash functions.** make_broken_hash: the one used for backward compatibility** is called from the bif erlang:hash/2. Should never be used** as it a) hashes only a part of binaries, b) hashes bignums really poorly,** c) hashes bignums differently on different endian processors and d) hashes ** small integers with different weights on different bytes.**** make_hash: A hash function that will give the same values for the same** terms regardless of the internal representation. Small integers are ** hashed using the same algorithm as bignums and bignums are hashed ** independent of the CPU endianess. ** Make_hash also hashes pids, ports and references like 32 bit numbers ** (but with different constants). ** make_hash() is called from the bif erlang:phash/2**** The idea behind the hash algorithm is to produce values suitable for ** linear dynamic hashing. We cannot choose the range at all while hashing ** (it's not even supplied to the hashing functions). The good old algorithm** [H = H*C+X mod M, where H is the hash value, C is a "random" constant(or M),** M is the range, preferably a prime, and X is each byte value] is therefore ** modified to:** H = H*C+X mod 2^32, where C is a large prime. This gives acceptable ** "spreading" of the hashes, so that later modulo calculations also will give** acceptable "spreading" in the range. ** We really need to hash on bytes, otherwise the ** upper bytes of a word will be less significant than the lower ones. That's ** not acceptable at all. For internal use one could maybe optimize by using** another hash function, that is less strict but faster. That is, however, not** implemented.**** Short semi-formal description of make_hash:**** In make_hash, the number N is treated like this:** Abs(N) is hashed bytewise with the least significant byte, B(0), first.** The number of bytes (J) to calculate hash on in N is ** (the number of _32_ bit words needed to store the unsigned ** value of abs(N)) * 4.** X = FUNNY_NUMBER2** If N < 0, Y = FUNNY_NUMBER4 else Y = FUNNY_NUMBER3.** The hash value is Y*h(J) mod 2^32 where h(J) is calculated like** h(0) = <initial hash> ** h(i) = h(i-i)*X + B(i-1)** The above should hold regardless of internal representation.** Pids are hashed like small numbers but with differrent constants, as are** ports.** References are hashed like ports but only on the least significant byte.** Binaries are hashed on all bytes (not on the 15 first as in ** make_broken_hash()).** Bytes in lists (possibly text strings) use a simpler multiplication inlined** in the handling of lists, that is an optimization.** Everything else is like in the old hash (make_broken_hash()).**** make_hash2() is faster than make_hash, in particular for bignums** and binaries, and produces better hash values. *//* some prime numbers just above 2 ^ 28 */#define FUNNY_NUMBER1 268440163#define FUNNY_NUMBER2 268439161#define FUNNY_NUMBER3 268435459#define FUNNY_NUMBER4 268436141#define FUNNY_NUMBER5 268438633#define FUNNY_NUMBER6 268437017#define FUNNY_NUMBER7 268438039#define FUNNY_NUMBER8 268437511#define FUNNY_NUMBER9 268439627#define FUNNY_NUMBER10 268440479#define FUNNY_NUMBER11 268440577static Uint32hash_binary_bytes(Eterm bin, Uint sz, Uint32 hash){ byte* ptr; Uint bitoffs; Uint bitsize; if (sz > 0) { ERTS_GET_BINARY_BYTES(bin, ptr, bitoffs, bitsize); if (bitoffs == 0) { while (sz--) { hash = hash*FUNNY_NUMBER1 + *ptr++; } } else { Uint previous = *ptr++; Uint b; Uint lshift = bitoffs; Uint rshift = 8 - lshift; while (sz--) { b = (previous << lshift) & 0xFF; previous = *ptr++; b |= previous >> rshift; hash = hash*FUNNY_NUMBER1 + b; } } } return hash;}Uint32make_hash(Eterm term, Uint32 hash){ /* ** Convenience macro for calculating a bytewise hash on an unsigned 32 bit ** integer. ** If the endianess is known, we could be smarter here, ** but that gives no significant speedup (on a sparc at least) */#define UINT32_HASH_STEP(Expr, Prime1) \ do { \ Uint32 x = (Uint32) (Expr); \ hash = \ (((((hash)*(Prime1) + (x & 0xFF)) * (Prime1) + \ ((x >> 8) & 0xFF)) * (Prime1) + \ ((x >> 16) & 0xFF)) * (Prime1) + \ (x >> 24)); \ } while(0)#define UINT32_HASH_RET(Expr, Prime1, Prime2) \ do { \ UINT32_HASH_STEP(Expr, Prime1); \ return hash * (Prime2); \ } while(0)#define SINT32_HASH_RET(Expr, Prime1, Prime2, Prime3) \ do { \ Sint32 y = (Sint32) Expr; \ if (y < 0) { \ UINT32_HASH_RET(-y, Prime1, Prime3); \ } \ UINT32_HASH_RET(y, Prime1, Prime2); \ } while(0) /* * Significant additions needed for real 64 bit port with larger fixnums. */ /* * Note, for the simple 64bit port, not utilizing the * larger word size this function will work without modification. */ switch (tag_val_def(term)) { case NIL_DEF: return hash*FUNNY_NUMBER3 + 1; case ATOM_DEF: return hash*FUNNY_NUMBER1 + (atom_tab(atom_val(term))->slot.bucket.hvalue); case SMALL_DEF: { Sint y1 = signed_val(term); Uint y2 = y1 < 0 ? -(Uint)y1 : y1; UINT32_HASH_STEP(y2, FUNNY_NUMBER2);#ifdef ARCH_64 if (y2 >> 32) UINT32_HASH_STEP(y2 >> 32, FUNNY_NUMBER2);#endif return hash * (y1 < 0 ? FUNNY_NUMBER4 : FUNNY_NUMBER3); } case BINARY_DEF: { Uint sz = binary_size(term); hash = hash_binary_bytes(term, sz, hash); return hash*FUNNY_NUMBER4 + sz; } case EXPORT_DEF: { Export* ep = (Export *) (export_val(term))[1]; hash = hash * FUNNY_NUMBER11 + ep->code[2]; hash = hash*FUNNY_NUMBER1 + (atom_tab(atom_val(ep->code[0]))->slot.bucket.hvalue); hash = hash*FUNNY_NUMBER1 + (atom_tab(atom_val(ep->code[1]))->slot.bucket.hvalue); return hash; } case FUN_DEF: { ErlFunThing* funp = (ErlFunThing *) fun_val(term); Uint num_free = funp->num_free; Uint i; hash = hash * FUNNY_NUMBER10 + num_free; hash = hash*FUNNY_NUMBER1 + (atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue); hash = hash*FUNNY_NUMBER2 + funp->fe->old_index; hash = hash*FUNNY_NUMBER2 + funp->fe->old_uniq; for (i = 0; i < num_free; i++) { hash = make_hash(funp->env[i], hash); } return hash; } case PID_DEF: UINT32_HASH_RET(internal_pid_number(term),FUNNY_NUMBER5,FUNNY_NUMBER6); case EXTERNAL_PID_DEF: UINT32_HASH_RET(external_pid_number(term),FUNNY_NUMBER5,FUNNY_NUMBER6); case PORT_DEF: UINT32_HASH_RET(internal_port_number(term),FUNNY_NUMBER9,FUNNY_NUMBER10); case EXTERNAL_PORT_DEF: UINT32_HASH_RET(external_port_number(term),FUNNY_NUMBER9,FUNNY_NUMBER10); case REF_DEF: UINT32_HASH_RET(internal_ref_numbers(term)[0],FUNNY_NUMBER9,FUNNY_NUMBER10); case EXTERNAL_REF_DEF: UINT32_HASH_RET(external_ref_numbers(term)[0],FUNNY_NUMBER9,FUNNY_NUMBER10); case FLOAT_DEF: { FloatDef ff; GET_DOUBLE(term, ff); return hash*FUNNY_NUMBER6 + (ff.fw[0] ^ ff.fw[1]); } break; case LIST_DEF: { Eterm* list = list_val(term); while(1) { if (is_byte(*list)) { /* Optimization for strings. ** Note that this hash is different from a 'small' hash, ** as multiplications on a Sparc is so slow. */ hash = hash*FUNNY_NUMBER2 + unsigned_val(*list); } else { hash = make_hash(*list, hash); } if (is_not_list(CDR(list))) return make_hash(CDR(list),hash)*FUNNY_NUMBER8; list = list_val(CDR(list)); } } break; case BIG_DEF: /* Note that this is the exact same thing as the hashing of smalls.*/ { /* FIXED: __alpha__ this was remade to be backwards + * compaible with 32 bit implmentation */ Eterm* ptr = big_val(term); Uint n = BIG_SIZE(ptr); int is_neg = BIG_SIGN(ptr); Uint i; if (D_EXP < 32 && (n & 1)) /* emulate 32 bit behaviour (add a MSB 0 :-( )*/ n++; for (i = 0; i < n; i++) { digit_t d = BIG_DIGIT(ptr, i); int j; for(j = 0; j < sizeof(digit_t); ++j) { hash = (hash*FUNNY_NUMBER2) + (d & 0xff); d >>= 8; } } if (is_neg) { return hash*FUNNY_NUMBER4; } else { return hash*FUNNY_NUMBER3; } } break; case TUPLE_DEF: { Eterm* ptr = tuple_val(term); Uint arity = arityval(*ptr); int i = arity; ptr++; while(i--) hash = make_hash(*ptr++, hash); return hash*FUNNY_NUMBER9 + arity; } break; default: erl_exit(1, "Invalid tag in make_hash(0x%X)\n", term); return 0; }#undef UINT32_HASH_STEP#undef UINT32_HASH_RET#undef SINT32_HASH_RET}/* Hash function suggested by Bob Jenkins. */#define MIX(a,b,c) \do { \ a -= b; a -= c; a ^= (c>>13); \ b -= c; b -= a; b ^= (a<<8); \ c -= a; c -= b; c ^= (b>>13); \ a -= b; a -= c; a ^= (c>>12); \ b -= c; b -= a; b ^= (a<<16); \ c -= a; c -= b; c ^= (b>>5); \ a -= b; a -= c; a ^= (c>>3); \ b -= c; b -= a; b ^= (a<<10); \ c -= a; c -= b; c ^= (b>>15); \} while(0)#define HCONST 0x9e3779b9UL /* the golden ratio; an arbitrary value */Uint32block_hash(byte *k, unsigned length, Uint32 initval){ Uint32 a,b,c; unsigned len; /* Set up the internal state */ len = length; a = b = HCONST; c = initval; /* the previous hash value */ while (len >= 12) { a += (k[0] +((Uint32)k[1]<<8) +((Uint32)k[2]<<16) +((Uint32)k[3]<<24)); b += (k[4] +((Uint32)k[5]<<8) +((Uint32)k[6]<<16) +((Uint32)k[7]<<24)); c += (k[8] +((Uint32)k[9]<<8) +((Uint32)k[10]<<16)+((Uint32)k[11]<<24)); MIX(a,b,c); k += 12; len -= 12; } c += length; switch(len) /* all the case statements fall through */ { case 11: c+=((Uint32)k[10]<<24); case 10: c+=((Uint32)k[9]<<16); case 9 : c+=((Uint32)k[8]<<8); /* the first byte of c is reserved for the length */ case 8 : b+=((Uint32)k[7]<<24); case 7 : b+=((Uint32)k[6]<<16); case 6 : b+=((Uint32)k[5]<<8); case 5 : b+=k[4]; case 4 : a+=((Uint32)k[3]<<24); case 3 : a+=((Uint32)k[2]<<16); case 2 : a+=((Uint32)k[1]<<8); case 1 : a+=k[0]; /* case 0: nothing left to add */ } MIX(a,b,c); return c;}Uint32make_hash2(Eterm term){ Uint32 hash; Eterm tmp_big[2];/* (HCONST * {2, ..., 14}) mod 2^32 */#define HCONST_2 0x3c6ef372UL#define HCONST_3 0xdaa66d2bUL#define HCONST_4 0x78dde6e4UL#define HCONST_5 0x1715609dUL#define HCONST_6 0xb54cda56UL#define HCONST_7 0x5384540fUL#define HCONST_8 0xf1bbcdc8UL#define HCONST_9 0x8ff34781UL#define HCONST_10 0x2e2ac13aUL#define HCONST_11 0xcc623af3UL#define HCONST_12 0x6a99b4acUL#define HCONST_13 0x08d12e65UL#define HCONST_14 0xa708a81eUL#define UINT32_HASH_2(Expr1, Expr2, AConst) \ do { \ Uint32 a,b; \ a = AConst + (Uint32) (Expr1); \ b = AConst + (Uint32) (Expr2); \ MIX(a,b,hash); \ } while(0)#define UINT32_HASH(Expr, AConst) UINT32_HASH_2(Expr, 0, AConst)#define SINT32_HASH(Expr, AConst) \ do { \ Sint32 y = (Sint32) (Expr); \ if (y < 0) { \ UINT32_HASH(-y, AConst); \ /* Negative numbers are unnecessarily mixed twice. */ \ } \ UINT32_HASH(y, AConst); \ } while(0)#define IS_SSMALL28(x) (((Uint) (((x) >> (28-1)) + 1)) < 2) /* Optimization. Simple cases before declaration of estack. */ if (primary_tag(term) == TAG_PRIMARY_IMMED1) { switch (term & _TAG_IMMED1_MASK) { case _TAG_IMMED1_IMMED2: switch (term & _TAG_IMMED2_MASK) { case _TAG_IMMED2_ATOM: return atom_tab(atom_val(term))->slot.bucket.hvalue; } break; case _TAG_IMMED1_SMALL: { Sint x = signed_val(term); if (SMALL_BITS > 28 && !IS_SSMALL28(x)) { term = small_to_big(x, tmp_big); break; } hash = 0; SINT32_HASH(x, HCONST); return hash; } } }; { Eterm tmp; DECLARE_ESTACK(s); hash = 0; for (;;) { switch (primary_tag(term)) { case TAG_PRIMARY_LIST: { int c = 0; Uint32 s = 0; Eterm* ptr = list_val(term); while (is_byte(*ptr)) { /* Optimization for strings. */ s = (s << 8) + unsigned_val(*ptr); if (c == 3) { UINT32_HASH(s, HCONST_4); c = s = 0; } else { c++; } term = CDR(ptr); if (is_not_list(term)) break; ptr = list_val(term); } if (c > 0) UINT32_HASH(s, HCONST_4); if (is_list(term)) { term = *ptr; tmp = *++ptr; ESTACK_PUSH(s, tmp); } } break; case TAG_PRIMARY_BOXED: { Eterm hdr = *boxed_val(term); ASSERT(is_header(hdr)); switch (hdr & _TAG_HEADER_MASK) { case ARITYVAL_SUBTAG: { int i; int arity = header_arity(hdr); Eterm* elem = tuple_val(term); UINT32_HASH(arity, HCONST_9); if (arity == 0) /* Empty tuple */ goto hash2_common; for (i = arity; i >= 2; i--) { tmp = elem[i]; ESTACK_PUSH(s, tmp); } term = elem[1]; } break; case EXPORT_SUBTAG: { Export* ep = (Export *) (export_val(term))[1]; UINT32_HASH_2 (ep->code[2], atom_tab(atom_val(ep->code[0]))->slot.bucket.hvalue, HCONST); UINT32_HASH (atom_tab(atom_val(ep->code[1]))->slot.bucket.hvalue, HCONST_14); goto hash2_common; } case FUN_SUBTAG: { ErlFunThing* funp = (ErlFunThing *) fun_val(term); Uint num_free = funp->num_free; UINT32_HASH_2 (num_free, atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue, HCONST); UINT32_HASH_2 (funp->fe->old_index, funp->fe->old_uniq, HCONST); if (num_free == 0) { goto hash2_common; } else { Eterm* bptr = funp->env + num_free - 1; while (num_free-- > 1) { term = *bptr--; ESTACK_PUSH(s, term); } term = *bptr; } } break; case REFC_BINARY_SUBTAG: case HEAP_BINARY_SUBTAG: case SUB_BINARY_SUBTAG: { byte* bptr; unsigned sz = binary_size(term); Uint32 con = HCONST_13 + hash; if (sz == 0) { hash = con; } else { Uint bitoffs; Uint bitsize; ERTS_GET_BINARY_BYTES(term, bptr, bitoffs, bitsize); if (bitoffs == 0) { hash = block_hash(bptr, sz, con); } else { byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP, sz); erts_copy_bits(bptr, bitoffs, 1, buf, 0, 1, sz*8); hash = block_hash(buf, sz, con); erts_free(ERTS_ALC_T_TMP, (void *) buf); } } goto hash2_common; } break; case POS_BIG_SUBTAG: case NEG_BIG_SUBTAG: { Eterm* ptr = big_val(term);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -