⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 utils.c

📁 OTP是开放电信平台的简称
💻 C
📖 第 1 页 / 共 5 页
字号:
/*                                                                           *\ *                                                                           *\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *//* make a hash index from an erlang term *//*** There are three hash functions.** make_broken_hash: the one used for backward compatibility** is called from the bif erlang:hash/2. Should never be used** as it a) hashes only a part of binaries, b) hashes bignums really poorly,** c) hashes bignums differently on different endian processors and d) hashes ** small integers with different weights on different bytes.**** make_hash: A hash function that will give the same values for the same** terms regardless of the internal representation. Small integers are ** hashed using the same algorithm as bignums and bignums are hashed ** independent of the CPU endianess. ** Make_hash also hashes pids, ports and references like 32 bit numbers ** (but with different constants). ** make_hash() is called from the bif erlang:phash/2**** The idea behind the hash algorithm is to produce values suitable for ** linear dynamic hashing. We cannot choose the range at all while hashing ** (it's not even supplied to the hashing functions). The good old algorithm** [H = H*C+X mod M, where H is the hash value, C is a "random" constant(or M),** M is the range, preferably a prime, and X is each byte value] is therefore ** modified to:** H = H*C+X mod 2^32, where C is a large prime. This gives acceptable ** "spreading" of the hashes, so that later modulo calculations also will give** acceptable "spreading" in the range. ** We really need to hash on bytes, otherwise the ** upper bytes of a word will be less significant than the lower ones. That's ** not acceptable at all. For internal use one could maybe optimize by using** another hash function, that is less strict but faster. That is, however, not** implemented.**** Short semi-formal description of make_hash:**** In make_hash, the number N is treated like this:**  Abs(N) is hashed bytewise with the least significant byte, B(0), first.**  The number of bytes (J) to calculate hash on in N is **  (the number of _32_ bit words needed to store the unsigned **   value of abs(N)) * 4.**  X = FUNNY_NUMBER2**  If N < 0, Y = FUNNY_NUMBER4 else Y = FUNNY_NUMBER3.**  The hash value is Y*h(J) mod 2^32 where h(J) is calculated like**  h(0) = <initial hash> **  h(i) = h(i-i)*X + B(i-1)** The above should hold regardless of internal representation.** Pids are hashed like small numbers but with differrent constants, as are** ports.** References are hashed like ports but only on the least significant byte.** Binaries are hashed on all bytes (not on the 15 first as in ** make_broken_hash()).** Bytes in lists (possibly text strings) use a simpler multiplication inlined** in the handling of lists, that is an optimization.** Everything else is like in the old hash (make_broken_hash()).**** make_hash2() is faster than make_hash, in particular for bignums** and binaries, and produces better hash values. *//* some prime numbers just above 2 ^ 28 */#define FUNNY_NUMBER1  268440163#define FUNNY_NUMBER2  268439161#define FUNNY_NUMBER3  268435459#define FUNNY_NUMBER4  268436141#define FUNNY_NUMBER5  268438633#define FUNNY_NUMBER6  268437017#define FUNNY_NUMBER7  268438039#define FUNNY_NUMBER8  268437511#define FUNNY_NUMBER9  268439627#define FUNNY_NUMBER10 268440479#define FUNNY_NUMBER11 268440577static Uint32hash_binary_bytes(Eterm bin, Uint sz, Uint32 hash){    byte* ptr;    Uint bitoffs;    Uint bitsize;    if (sz > 0) {	ERTS_GET_BINARY_BYTES(bin, ptr, bitoffs, bitsize);	if (bitoffs == 0) {	    while (sz--) {		hash = hash*FUNNY_NUMBER1 + *ptr++;	    }	} else {	    Uint previous = *ptr++;	    Uint b;	    Uint lshift = bitoffs;	    Uint rshift = 8 - lshift;	    	    while (sz--) {		b = (previous << lshift) & 0xFF;		previous = *ptr++;		b |= previous >> rshift;		hash = hash*FUNNY_NUMBER1 + b;	    }	}    }    return hash;}Uint32make_hash(Eterm term, Uint32 hash){    /*     ** Convenience macro for calculating a bytewise hash on an unsigned 32 bit     ** integer.    ** If the endianess is known, we could be smarter here,     ** but that gives no significant speedup (on a sparc at least)     */#define UINT32_HASH_STEP(Expr, Prime1)					\	do {								\	    Uint32 x = (Uint32) (Expr);	                                \	    hash =							\		(((((hash)*(Prime1) + (x & 0xFF)) * (Prime1) + 	        \		((x >> 8) & 0xFF)) * (Prime1) + 			\		((x >> 16) & 0xFF)) * (Prime1) + 			\		 (x >> 24));						\	} while(0)#define UINT32_HASH_RET(Expr, Prime1, Prime2)   			\	do {								\	    UINT32_HASH_STEP(Expr, Prime1);				\	    return hash * (Prime2);					\	} while(0)#define SINT32_HASH_RET(Expr, Prime1, Prime2, Prime3)	\	do {						\	    Sint32 y = (Sint32) Expr;			\	    if (y < 0) {				\		UINT32_HASH_RET(-y, Prime1, Prime3);	\	    } 						\	    UINT32_HASH_RET(y, Prime1, Prime2);		\	} while(0)			        /*      * Significant additions needed for real 64 bit port with larger fixnums.     */	        /*      * Note, for the simple 64bit port, not utilizing the      * larger word size this function will work without modification.      */    switch (tag_val_def(term)) {    case NIL_DEF:	return hash*FUNNY_NUMBER3 + 1;    case ATOM_DEF:	return hash*FUNNY_NUMBER1 + 	    (atom_tab(atom_val(term))->slot.bucket.hvalue);    case SMALL_DEF:	{	    Sint y1 = signed_val(term);	    Uint y2 = y1 < 0 ? -(Uint)y1 : y1;	    UINT32_HASH_STEP(y2, FUNNY_NUMBER2);#ifdef ARCH_64	    if (y2 >> 32)		UINT32_HASH_STEP(y2 >> 32, FUNNY_NUMBER2);#endif	    return hash * (y1 < 0 ? FUNNY_NUMBER4 : FUNNY_NUMBER3);	}    case BINARY_DEF:	{	    Uint sz = binary_size(term);	    hash = hash_binary_bytes(term, sz, hash);	    return hash*FUNNY_NUMBER4 + sz;	}    case EXPORT_DEF:	{	    Export* ep = (Export *) (export_val(term))[1];	    hash = hash * FUNNY_NUMBER11 + ep->code[2];	    hash = hash*FUNNY_NUMBER1 + 		(atom_tab(atom_val(ep->code[0]))->slot.bucket.hvalue);	    hash = hash*FUNNY_NUMBER1 + 		(atom_tab(atom_val(ep->code[1]))->slot.bucket.hvalue);	    return hash;	}    case FUN_DEF:	{	    ErlFunThing* funp = (ErlFunThing *) fun_val(term);	    Uint num_free = funp->num_free;	    Uint i;	    hash = hash * FUNNY_NUMBER10 + num_free;	    hash = hash*FUNNY_NUMBER1 + 		(atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue);	    hash = hash*FUNNY_NUMBER2 + funp->fe->old_index;	    hash = hash*FUNNY_NUMBER2 + funp->fe->old_uniq;	    for (i = 0; i < num_free; i++) {		hash = make_hash(funp->env[i], hash);	    }	    return hash;	}    case PID_DEF:	UINT32_HASH_RET(internal_pid_number(term),FUNNY_NUMBER5,FUNNY_NUMBER6);    case EXTERNAL_PID_DEF:	UINT32_HASH_RET(external_pid_number(term),FUNNY_NUMBER5,FUNNY_NUMBER6);    case PORT_DEF:	UINT32_HASH_RET(internal_port_number(term),FUNNY_NUMBER9,FUNNY_NUMBER10);    case EXTERNAL_PORT_DEF:	UINT32_HASH_RET(external_port_number(term),FUNNY_NUMBER9,FUNNY_NUMBER10);    case REF_DEF:	UINT32_HASH_RET(internal_ref_numbers(term)[0],FUNNY_NUMBER9,FUNNY_NUMBER10);    case EXTERNAL_REF_DEF:	UINT32_HASH_RET(external_ref_numbers(term)[0],FUNNY_NUMBER9,FUNNY_NUMBER10);    case FLOAT_DEF: 	{	    FloatDef ff;	    GET_DOUBLE(term, ff);	    return hash*FUNNY_NUMBER6 + (ff.fw[0] ^ ff.fw[1]);	}	break;    case LIST_DEF:	{	    Eterm* list = list_val(term);	    while(1) {		if (is_byte(*list)) {		    /* Optimization for strings. 		    ** Note that this hash is different from a 'small' hash,		    ** as multiplications on a Sparc is so slow.		    */		    hash = hash*FUNNY_NUMBER2 + unsigned_val(*list);		} else {		    hash = make_hash(*list, hash);		}		if (is_not_list(CDR(list)))		    return make_hash(CDR(list),hash)*FUNNY_NUMBER8;		list = list_val(CDR(list));	    }	}	break;    case BIG_DEF:	/* Note that this is the exact same thing as the hashing of smalls.*/	{	  /* FIXED: __alpha__ this was remade to be backwards +	   * compaible with 32 bit implmentation 	   */	    Eterm* ptr  = big_val(term);	    Uint n = BIG_SIZE(ptr);	    int is_neg = BIG_SIGN(ptr);	    Uint i;	    if (D_EXP < 32 && (n & 1)) /* emulate 32 bit behaviour (add a MSB 0 :-( )*/	      n++;	    for (i = 0; i < n; i++)  {	      digit_t d = BIG_DIGIT(ptr, i);	      int j;	      for(j = 0; j < sizeof(digit_t); ++j) {		  hash = (hash*FUNNY_NUMBER2) + (d & 0xff);		  d >>= 8;	      }	    }	    if (is_neg) {		return hash*FUNNY_NUMBER4;	    }	    else {	      return hash*FUNNY_NUMBER3;	    }	}	break;    case TUPLE_DEF: 	{	    Eterm* ptr = tuple_val(term);	    Uint arity = arityval(*ptr);	    int i = arity;	    ptr++;	    while(i--)		hash = make_hash(*ptr++, hash);	    return hash*FUNNY_NUMBER9 + arity;	}	break;    default:	erl_exit(1, "Invalid tag in make_hash(0x%X)\n", term);	return 0;    }#undef UINT32_HASH_STEP#undef UINT32_HASH_RET#undef SINT32_HASH_RET}/* Hash function suggested by Bob Jenkins. */#define MIX(a,b,c)                 \do {                               \  a -= b; a -= c; a ^= (c>>13);    \  b -= c; b -= a; b ^= (a<<8);     \  c -= a; c -= b; c ^= (b>>13);    \  a -= b; a -= c; a ^= (c>>12);    \  b -= c; b -= a; b ^= (a<<16);    \  c -= a; c -= b; c ^= (b>>5);     \  a -= b; a -= c; a ^= (c>>3);     \  b -= c; b -= a; b ^= (a<<10);    \  c -= a; c -= b; c ^= (b>>15);    \} while(0)#define HCONST 0x9e3779b9UL /* the golden ratio; an arbitrary value */Uint32block_hash(byte *k, unsigned length, Uint32 initval){   Uint32 a,b,c;   unsigned len;   /* Set up the internal state */   len = length;   a = b = HCONST;   c = initval;           /* the previous hash value */   while (len >= 12)   {      a += (k[0] +((Uint32)k[1]<<8) +((Uint32)k[2]<<16) +((Uint32)k[3]<<24));      b += (k[4] +((Uint32)k[5]<<8) +((Uint32)k[6]<<16) +((Uint32)k[7]<<24));      c += (k[8] +((Uint32)k[9]<<8) +((Uint32)k[10]<<16)+((Uint32)k[11]<<24));      MIX(a,b,c);      k += 12; len -= 12;   }   c += length;   switch(len)              /* all the case statements fall through */   {   case 11: c+=((Uint32)k[10]<<24);   case 10: c+=((Uint32)k[9]<<16);   case 9 : c+=((Uint32)k[8]<<8);      /* the first byte of c is reserved for the length */   case 8 : b+=((Uint32)k[7]<<24);   case 7 : b+=((Uint32)k[6]<<16);   case 6 : b+=((Uint32)k[5]<<8);   case 5 : b+=k[4];   case 4 : a+=((Uint32)k[3]<<24);   case 3 : a+=((Uint32)k[2]<<16);   case 2 : a+=((Uint32)k[1]<<8);   case 1 : a+=k[0];     /* case 0: nothing left to add */   }   MIX(a,b,c);   return c;}Uint32make_hash2(Eterm term){    Uint32 hash;    Eterm tmp_big[2];/* (HCONST * {2, ..., 14}) mod 2^32 */#define HCONST_2 0x3c6ef372UL#define HCONST_3 0xdaa66d2bUL#define HCONST_4 0x78dde6e4UL#define HCONST_5 0x1715609dUL#define HCONST_6 0xb54cda56UL#define HCONST_7 0x5384540fUL#define HCONST_8 0xf1bbcdc8UL#define HCONST_9 0x8ff34781UL#define HCONST_10 0x2e2ac13aUL#define HCONST_11 0xcc623af3UL#define HCONST_12 0x6a99b4acUL#define HCONST_13 0x08d12e65UL#define HCONST_14 0xa708a81eUL#define UINT32_HASH_2(Expr1, Expr2, AConst)       \         do {                                     \	    Uint32 a,b;                           \	    a = AConst + (Uint32) (Expr1);        \	    b = AConst + (Uint32) (Expr2);        \	    MIX(a,b,hash);                        \	 } while(0)#define UINT32_HASH(Expr, AConst) UINT32_HASH_2(Expr, 0, AConst)#define SINT32_HASH(Expr, AConst)                 \	do {					  \            Sint32 y = (Sint32) (Expr);           \	    if (y < 0) {			  \		UINT32_HASH(-y, AConst);          \                /* Negative numbers are unnecessarily mixed twice. */ \	    } 					  \	    UINT32_HASH(y, AConst);          	  \	} while(0)#define IS_SSMALL28(x) (((Uint) (((x) >> (28-1)) + 1)) < 2)    /* Optimization. Simple cases before declaration of estack. */    if (primary_tag(term) == TAG_PRIMARY_IMMED1) {	switch (term & _TAG_IMMED1_MASK) {	case _TAG_IMMED1_IMMED2:	    switch (term & _TAG_IMMED2_MASK) {	    case _TAG_IMMED2_ATOM:		return atom_tab(atom_val(term))->slot.bucket.hvalue;	    }	    break;	case _TAG_IMMED1_SMALL:	  {	      Sint x = signed_val(term);	      if (SMALL_BITS > 28 && !IS_SSMALL28(x)) {		  term = small_to_big(x, tmp_big);		  break;	      }	      hash = 0;	      SINT32_HASH(x, HCONST);	      return hash;	  }	}    };    {    Eterm tmp;    DECLARE_ESTACK(s);    hash = 0;    for (;;) {	switch (primary_tag(term)) {	case TAG_PRIMARY_LIST:	{	    int c = 0;	    Uint32 s = 0;	    Eterm* ptr = list_val(term);	    while (is_byte(*ptr)) {		/* Optimization for strings. */		s = (s << 8) + unsigned_val(*ptr);		if (c == 3) {		    UINT32_HASH(s, HCONST_4);		    c = s = 0;		} else {		    c++;		}		term = CDR(ptr);		if (is_not_list(term))		    break;		ptr = list_val(term);	    }	    if (c > 0)		UINT32_HASH(s, HCONST_4);	    if (is_list(term)) {		term = *ptr;		tmp = *++ptr;		ESTACK_PUSH(s, tmp);	    	    }	}	break;	case TAG_PRIMARY_BOXED:	{	    Eterm hdr = *boxed_val(term);	    ASSERT(is_header(hdr));	    switch (hdr & _TAG_HEADER_MASK) {	    case ARITYVAL_SUBTAG:	    {		int i;		int arity = header_arity(hdr);		Eterm* elem = tuple_val(term);		UINT32_HASH(arity, HCONST_9);		if (arity == 0) /* Empty tuple */ 		    goto hash2_common;		for (i = arity; i >= 2; i--) {		    tmp = elem[i];		    ESTACK_PUSH(s, tmp);		}		term = elem[1];	    }	    break;	    case EXPORT_SUBTAG:	    {		Export* ep = (Export *) (export_val(term))[1];		UINT32_HASH_2		    (ep->code[2], 		     atom_tab(atom_val(ep->code[0]))->slot.bucket.hvalue,		     HCONST);		UINT32_HASH		    (atom_tab(atom_val(ep->code[1]))->slot.bucket.hvalue,		     HCONST_14);		goto hash2_common;	    }	    case FUN_SUBTAG:	    {		ErlFunThing* funp = (ErlFunThing *) fun_val(term);		Uint num_free = funp->num_free;		UINT32_HASH_2		    (num_free, 		     atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue,		     HCONST);		UINT32_HASH_2		    (funp->fe->old_index, funp->fe->old_uniq, HCONST);		if (num_free == 0) {		    goto hash2_common;		} else {		    Eterm* bptr = funp->env + num_free - 1;		    while (num_free-- > 1) {			term = *bptr--;			ESTACK_PUSH(s, term);		    }		    term = *bptr;		}	    }	    break;	    case REFC_BINARY_SUBTAG:	    case HEAP_BINARY_SUBTAG:	    case SUB_BINARY_SUBTAG:	    {		byte* bptr;		unsigned sz = binary_size(term);		Uint32 con = HCONST_13 + hash;		if (sz == 0) {		    hash = con;		} else {		    Uint bitoffs;		    Uint bitsize;		    ERTS_GET_BINARY_BYTES(term, bptr, bitoffs, bitsize);		    if (bitoffs == 0) {			hash = block_hash(bptr, sz, con);		    } else {			byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP, sz);			erts_copy_bits(bptr, bitoffs, 1, buf, 0, 1, sz*8);			hash = block_hash(buf, sz, con);			erts_free(ERTS_ALC_T_TMP, (void *) buf);		    }		}		goto hash2_common;	    }	    break;	    case POS_BIG_SUBTAG:	    case NEG_BIG_SUBTAG:	    {		Eterm* ptr = big_val(term);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -