📄 vmac.cpp
字号:
rl = MUL32(_i1,_i2); \ ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \ ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \ } #define AccumulateNH(a, b, c) {\ word64 p = MUL32(b, c);\ a##1 += word32((p)>>32);\ a##0 += word32(p);\ p = MUL32((b)>>32, c);\ a##2 += word32((p)>>32);\ a##1 += word32(p);\ p = MUL32((b)>>32, (c)>>32);\ a##2 += p;\ p = MUL32(b, (c)>>32);\ a##1 += word32(p);\ a##2 += word32(p>>32);} #endif#endif#ifndef VMAC_BOOL_32BIT #define VMAC_BOOL_32BIT 0#endif#ifndef ADD128 #define ADD128(rh,rl,ih,il) \ { word64 _il = (il); \ (rl) += (_il); \ (rh) += (ih) + ((rl) < (_il)); \ }#endif#if !(defined(_MSC_VER) && _MSC_VER < 1300)template <bool T_128BitTag>#endifvoid VMAC_Base::VHASH_Update_Template(const word64 *data, size_t blocksRemainingInWord64){ #define INNER_LOOP_ITERATION(j) {\ word64 d0 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+0]);\ word64 d1 = ConditionalByteReverse(LITTLE_ENDIAN_ORDER, data[i+2*j+1]);\ AccumulateNH(nhA, d0+nhK[i+2*j+0], d1+nhK[i+2*j+1]);\ if (T_128BitTag)\ AccumulateNH(nhB, d0+nhK[i+2*j+2], d1+nhK[i+2*j+3]);\ }#if (defined(_MSC_VER) && _MSC_VER < 1300) bool T_128BitTag = m_is128;#endif size_t L1KeyLengthInWord64 = m_L1KeyLength / 8; size_t innerLoopEnd = L1KeyLengthInWord64; const word64 *nhK = m_nhKey(); word64 *polyS = m_polyState(); bool isFirstBlock = true; size_t i; #if !VMAC_BOOL_32BIT #if VMAC_BOOL_WORD128 word128 a1, a2; #else word64 ah1, al1, ah2, al2; #endif word64 kh1, kl1, kh2, kl2; kh1=(polyS+0*4+2)[0]; kl1=(polyS+0*4+2)[1]; if (T_128BitTag) { kh2=(polyS+1*4+2)[0]; kl2=(polyS+1*4+2)[1]; } #endif do { DeclareNH(nhA); DeclareNH(nhB); i = 0; if (blocksRemainingInWord64 < L1KeyLengthInWord64) { if (blocksRemainingInWord64 % 8) { innerLoopEnd = blocksRemainingInWord64 % 8; for (; i<innerLoopEnd; i+=2) INNER_LOOP_ITERATION(0); } innerLoopEnd = blocksRemainingInWord64; } for (; i<innerLoopEnd; i+=8) { INNER_LOOP_ITERATION(0); INNER_LOOP_ITERATION(1); INNER_LOOP_ITERATION(2); INNER_LOOP_ITERATION(3); } blocksRemainingInWord64 -= innerLoopEnd; data += innerLoopEnd; #if VMAC_BOOL_32BIT word32 nh0[2], nh1[2]; word64 nh2[2]; nh0[0] = word32(nhA0); nhA1 += (nhA0 >> 32); nh1[0] = word32(nhA1); nh2[0] = (nhA2 + (nhA1 >> 32)) & m62; if (T_128BitTag) { nh0[1] = word32(nhB0); nhB1 += (nhB0 >> 32); nh1[1] = word32(nhB1); nh2[1] = (nhB2 + (nhB1 >> 32)) & m62; } #define a0 (((word32 *)(polyS+i*4))[2+NativeByteOrder::ToEnum()]) #define a1 (*(((word32 *)(polyS+i*4))+3-NativeByteOrder::ToEnum())) // workaround for GCC 3.2 #define a2 (((word32 *)(polyS+i*4))[0+NativeByteOrder::ToEnum()]) #define a3 (*(((word32 *)(polyS+i*4))+1-NativeByteOrder::ToEnum())) #define aHi ((polyS+i*4)[0]) #define k0 (((word32 *)(polyS+i*4+2))[2+NativeByteOrder::ToEnum()]) #define k1 (*(((word32 *)(polyS+i*4+2))+3-NativeByteOrder::ToEnum())) #define k2 (((word32 *)(polyS+i*4+2))[0+NativeByteOrder::ToEnum()]) #define k3 (*(((word32 *)(polyS+i*4+2))+1-NativeByteOrder::ToEnum())) #define kHi ((polyS+i*4+2)[0]) if (isFirstBlock) { isFirstBlock = false; if (m_isFirstBlock) { m_isFirstBlock = false; for (i=0; i<=(size_t)T_128BitTag; i++) { word64 t = (word64)nh0[i] + k0; a0 = (word32)t; t = (t >> 32) + nh1[i] + k1; a1 = (word32)t; aHi = (t >> 32) + nh2[i] + kHi; } continue; } } for (i=0; i<=(size_t)T_128BitTag; i++) { word64 p, t; word32 t2; p = MUL32(a3, 2*k3); p += nh2[i]; p += MUL32(a0, k2); p += MUL32(a1, k1); p += MUL32(a2, k0); t2 = (word32)p; p >>= 32; p += MUL32(a0, k3); p += MUL32(a1, k2); p += MUL32(a2, k1); p += MUL32(a3, k0); t = (word64(word32(p) & 0x7fffffff) << 32) | t2; p >>= 31; p += nh0[i]; p += MUL32(a0, k0); p += MUL32(a1, 2*k3); p += MUL32(a2, 2*k2); p += MUL32(a3, 2*k1); t2 = (word32)p; p >>= 32; p += nh1[i]; p += MUL32(a0, k1); p += MUL32(a1, k0); p += MUL32(a2, 2*k3); p += MUL32(a3, 2*k2); a0 = t2; a1 = (word32)p; aHi = (p >> 32) + t; } #undef a0 #undef a1 #undef a2 #undef a3 #undef aHi #undef k0 #undef k1 #undef k2 #undef k3 #undef kHi #else // #if VMAC_BOOL_32BIT if (isFirstBlock) { isFirstBlock = false; if (m_isFirstBlock) { m_isFirstBlock = false; #if VMAC_BOOL_WORD128 #define first_poly_step(a, kh, kl, m) a = (m & m126) + ((word128(kh) << 64) | kl) first_poly_step(a1, kh1, kl1, nhA); if (T_128BitTag) first_poly_step(a2, kh2, kl2, nhB); #else #define first_poly_step(ah, al, kh, kl, mh, ml) {\ mh &= m62;\ ADD128(mh, ml, kh, kl); \ ah = mh; al = ml;} first_poly_step(ah1, al1, kh1, kl1, nhA1, nhA0); if (T_128BitTag) first_poly_step(ah2, al2, kh2, kl2, nhB1, nhB0); #endif continue; } else { #if VMAC_BOOL_WORD128 a1 = (word128((polyS+0*4)[0]) << 64) | (polyS+0*4)[1]; #else ah1=(polyS+0*4)[0]; al1=(polyS+0*4)[1]; #endif if (T_128BitTag) { #if VMAC_BOOL_WORD128 a2 = (word128((polyS+1*4)[0]) << 64) | (polyS+1*4)[1]; #else ah2=(polyS+1*4)[0]; al2=(polyS+1*4)[1]; #endif } } } #if VMAC_BOOL_WORD128 #define poly_step(a, kh, kl, m) \ { word128 t1, t2, t3, t4;\ Multiply128(t2, a>>64, kl);\ Multiply128(t3, a, kh);\ Multiply128(t1, a, kl);\ Multiply128(t4, a>>64, 2*kh);\ t2 += t3;\ t4 += t1;\ t2 += t4>>64;\ a = (word128(word64(t2)&m63) << 64) | word64(t4);\ t2 *= 2;\ a += m & m126;\ a += t2>>64;} poly_step(a1, kh1, kl1, nhA); if (T_128BitTag) poly_step(a2, kh2, kl2, nhB); #else #define poly_step(ah, al, kh, kl, mh, ml) \ { word64 t1h, t1l, t2h, t2l, t3h, t3l, z=0; \ /* compute ab*cd, put bd into result registers */ \ MUL64(t2h,t2l,ah,kl); \ MUL64(t3h,t3l,al,kh); \ MUL64(t1h,t1l,ah,2*kh); \ MUL64(ah,al,al,kl); \ /* add together ad + bc */ \ ADD128(t2h,t2l,t3h,t3l); \ /* add 2 * ac to result */ \ ADD128(ah,al,t1h,t1l); \ /* now (ah,al), (t2l,2*t2h) need summing */ \ /* first add the high registers, carrying into t2h */ \ ADD128(t2h,ah,z,t2l); \ /* double t2h and add top bit of ah */ \ t2h += t2h + (ah >> 63); \ ah &= m63; \ /* now add the low registers */ \ mh &= m62; \ ADD128(ah,al,mh,ml); \ ADD128(ah,al,z,t2h); \ } poly_step(ah1, al1, kh1, kl1, nhA1, nhA0); if (T_128BitTag) poly_step(ah2, al2, kh2, kl2, nhB1, nhB0); #endif #endif // #if VMAC_BOOL_32BIT } while (blocksRemainingInWord64); #if VMAC_BOOL_WORD128 (polyS+0*4)[0]=word64(a1>>64); (polyS+0*4)[1]=word64(a1); if (T_128BitTag) { (polyS+1*4)[0]=word64(a2>>64); (polyS+1*4)[1]=word64(a2); } #elif !VMAC_BOOL_32BIT (polyS+0*4)[0]=ah1; (polyS+0*4)[1]=al1; if (T_128BitTag) { (polyS+1*4)[0]=ah2; (polyS+1*4)[1]=al2; } #endif}inline void VMAC_Base::VHASH_Update(const word64 *data, size_t blocksRemainingInWord64){#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86 if (HasSSE2()) { VHASH_Update_SSE2(data, blocksRemainingInWord64, 0); if (m_is128) VHASH_Update_SSE2(data, blocksRemainingInWord64, 1); m_isFirstBlock = false; } else#endif {#if defined(_MSC_VER) && _MSC_VER < 1300 VHASH_Update_Template(data, blocksRemainingInWord64);#else if (m_is128) VHASH_Update_Template<true>(data, blocksRemainingInWord64); else VHASH_Update_Template<false>(data, blocksRemainingInWord64);#endif }}size_t VMAC_Base::HashMultipleBlocks(const word64 *data, size_t length){ size_t remaining = ModPowerOf2(length, m_L1KeyLength); VHASH_Update(data, (length-remaining)/8); return remaining;}static word64 L3Hash(const word64 *input, const word64 *l3Key, size_t len){ word64 rh, rl, t, z=0; word64 p1 = input[0], p2 = input[1]; word64 k1 = l3Key[0], k2 = l3Key[1]; /* fully reduce (p1,p2)+(len,0) mod p127 */ t = p1 >> 63; p1 &= m63; ADD128(p1, p2, len, t); /* At this point, (p1,p2) is at most 2^127+(len<<64) */ t = (p1 > m63) + ((p1 == m63) & (p2 == m64)); ADD128(p1, p2, z, t); p1 &= m63; /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */ t = p1 + (p2 >> 32); t += (t >> 32); t += (word32)t > 0xfffffffeU; p1 += (t >> 32); p2 += (p1 << 32); /* compute (p1+k1)%p64 and (p2+k2)%p64 */ p1 += k1; p1 += (0 - (p1 < k1)) & 257; p2 += k2; p2 += (0 - (p2 < k2)) & 257; /* compute (p1+k1)*(p2+k2)%p64 */ MUL64(rh, rl, p1, p2); t = rh >> 56; ADD128(t, rl, z, rh); rh <<= 8; ADD128(t, rl, z, rh); t += t << 8; rl += t; rl += (0 - (rl < t)) & 257; rl += (0 - (rl > p64-1)) & 257; return rl;}void VMAC_Base::TruncatedFinal(byte *mac, size_t size){ size_t len = ModPowerOf2(GetBitCountLo()/8, m_L1KeyLength); if (len) { memset(m_data()+len, 0, (0-len)%16); VHASH_Update(DataBuf(), ((len+15)/16)*2); len *= 8; // convert to bits } else if (m_isFirstBlock) { // special case for empty string m_polyState()[0] = m_polyState()[2]; m_polyState()[1] = m_polyState()[3]; if (m_is128) { m_polyState()[4] = m_polyState()[6]; m_polyState()[5] = m_polyState()[7]; } } if (m_is128) { word64 t[2]; t[0] = L3Hash(m_polyState(), m_l3Key(), len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()); t[1] = L3Hash(m_polyState()+4, m_l3Key()+2, len) + GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad()+8); if (size == 16) { PutWord(false, BIG_ENDIAN_ORDER, mac, t[0]); PutWord(false, BIG_ENDIAN_ORDER, mac+8, t[1]); } else { t[0] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[0]); t[1] = ConditionalByteReverse(BIG_ENDIAN_ORDER, t[1]); memcpy(mac, t, size); } } else { word64 t = L3Hash(m_polyState(), m_l3Key(), len); t += GetWord<word64>(true, BIG_ENDIAN_ORDER, m_pad() + (m_nonce()[IVSize()-1]&1) * 8); if (size == 8) PutWord(false, BIG_ENDIAN_ORDER, mac, t); else { t = ConditionalByteReverse(BIG_ENDIAN_ORDER, t); memcpy(mac, &t, size); } }}NAMESPACE_END
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -