📄 sha.cpp
字号:
// which is (w&x)|(y&(w|x))
#define ASMR3(v,w,x,y,z,i,W1,W2,W3,W4) \
AS2( mov edi, [esp + W1 * 4] ) \
AS2( mov esi, x ) \
AS2( xor edi, [esp + W2 * 4] ) \
AS2( or esi, w ) \
AS2( xor edi, [esp + W3 * 4] ) \
AS2( and esi, y ) \
AS2( xor edi, [esp + W4 * 4] ) \
AS2( movd mm0, esi ) \
AS2( rol edi, 1 ) \
AS2( mov esi, x ) \
AS2( mov [esp + W1 * 4], edi ) \
AS2( and esi, w ) \
AS2( lea z, [edi + z + 0x8F1BBCDC] ) \
AS2( movd edi, mm0 ) \
AS2( or esi, edi ) \
AS2( mov edi, v ) \
AS2( rol edi, 5 ) \
AS2( add z, esi ) \
AS2( rol w, 30 ) \
AS2( add z, edi )
// ASMR4 = ASMR2 but different constant
#define ASMR4(v,w,x,y,z,i,W1,W2,W3,W4) \
AS2( mov edi, [esp + W1 * 4] ) \
AS2( mov esi, x ) \
AS2( xor edi, [esp + W2 * 4] ) \
AS2( xor esi, y ) \
AS2( xor edi, [esp + W3 * 4] ) \
AS2( xor esi, w ) \
AS2( xor edi, [esp + W4 * 4] ) \
AS2( rol edi, 1 ) \
AS2( add z, esi ) \
AS2( mov [esp + W1 * 4], edi ) \
AS2( lea z, [edi + z + 0xCA62C1D6] ) \
AS2( mov edi, v ) \
AS2( rol edi, 5 ) \
AS2( rol w, 30 ) \
AS2( add z, edi )
#ifdef _MSC_VER
__declspec(naked)
#endif
void SHA::AsmTransform(const byte* data, word32 times)
{
#ifdef __GNUC__
#define AS1(x) asm(#x);
#define AS2(x, y) asm(#x ", " #y);
#define PROLOG() \
asm(".intel_syntax noprefix"); \
AS2( movd mm3, edi ) \
AS2( movd mm4, ebx ) \
AS2( movd mm5, esi ) \
AS2( movd mm6, ebp ) \
AS2( mov ecx, DWORD PTR [ebp + 8] ) \
AS2( mov edi, DWORD PTR [ebp + 12] ) \
AS2( mov eax, DWORD PTR [ebp + 16] )
#define EPILOG() \
AS2( movd ebp, mm6 ) \
AS2( movd esi, mm5 ) \
AS2( movd ebx, mm4 ) \
AS2( mov esp, ebp ) \
AS2( movd edi, mm3 ) \
AS1( emms ) \
asm(".att_syntax");
#else
#define AS1(x) __asm x
#define AS2(x, y) __asm x, y
#define PROLOG() \
AS1( push ebp ) \
AS2( mov ebp, esp ) \
AS2( movd mm3, edi ) \
AS2( movd mm4, ebx ) \
AS2( movd mm5, esi ) \
AS2( movd mm6, ebp ) \
AS2( mov edi, data ) \
AS2( mov eax, times )
#define EPILOG() \
AS2( movd ebp, mm6 ) \
AS2( movd esi, mm5 ) \
AS2( movd ebx, mm4 ) \
AS2( movd edi, mm3 ) \
AS2( mov esp, ebp ) \
AS1( pop ebp ) \
AS1( emms ) \
AS1( ret 8 )
#endif
PROLOG()
AS2( mov esi, ecx )
#ifdef OLD_GCC_OFFSET
AS2( add esi, 20 ) // digest_[0]
#else
AS2( add esi, 16 ) // digest_[0]
#endif
AS2( movd mm2, eax ) // store times_
AS2( movd mm1, esi ) // store digest_
AS2( sub esp, 68 ) // make room on stack
AS1( loopStart: )
// byte reverse 16 words of input, 4 at a time, put on stack for W[]
// part 1
AS2( mov eax, [edi] )
AS2( mov ebx, [edi + 4] )
AS2( mov ecx, [edi + 8] )
AS2( mov edx, [edi + 12] )
AS1( bswap eax )
AS1( bswap ebx )
AS1( bswap ecx )
AS1( bswap edx )
AS2( mov [esp], eax )
AS2( mov [esp + 4], ebx )
AS2( mov [esp + 8], ecx )
AS2( mov [esp + 12], edx )
// part 2
AS2( mov eax, [edi + 16] )
AS2( mov ebx, [edi + 20] )
AS2( mov ecx, [edi + 24] )
AS2( mov edx, [edi + 28] )
AS1( bswap eax )
AS1( bswap ebx )
AS1( bswap ecx )
AS1( bswap edx )
AS2( mov [esp + 16], eax )
AS2( mov [esp + 20], ebx )
AS2( mov [esp + 24], ecx )
AS2( mov [esp + 28], edx )
// part 3
AS2( mov eax, [edi + 32] )
AS2( mov ebx, [edi + 36] )
AS2( mov ecx, [edi + 40] )
AS2( mov edx, [edi + 44] )
AS1( bswap eax )
AS1( bswap ebx )
AS1( bswap ecx )
AS1( bswap edx )
AS2( mov [esp + 32], eax )
AS2( mov [esp + 36], ebx )
AS2( mov [esp + 40], ecx )
AS2( mov [esp + 44], edx )
// part 4
AS2( mov eax, [edi + 48] )
AS2( mov ebx, [edi + 52] )
AS2( mov ecx, [edi + 56] )
AS2( mov edx, [edi + 60] )
AS1( bswap eax )
AS1( bswap ebx )
AS1( bswap ecx )
AS1( bswap edx )
AS2( mov [esp + 48], eax )
AS2( mov [esp + 52], ebx )
AS2( mov [esp + 56], ecx )
AS2( mov [esp + 60], edx )
AS2( mov [esp + 64], edi ) // store edi for end
// read from digest_
AS2( mov eax, [esi] ) // a1
AS2( mov ebx, [esi + 4] ) // b1
AS2( mov ecx, [esi + 8] ) // c1
AS2( mov edx, [esi + 12] ) // d1
AS2( mov ebp, [esi + 16] ) // e1
ASMR0(eax, ebx, ecx, edx, ebp, 0)
ASMR0(ebp, eax, ebx, ecx, edx, 1)
ASMR0(edx, ebp, eax, ebx, ecx, 2)
ASMR0(ecx, edx, ebp, eax, ebx, 3)
ASMR0(ebx, ecx, edx, ebp, eax, 4)
ASMR0(eax, ebx, ecx, edx, ebp, 5)
ASMR0(ebp, eax, ebx, ecx, edx, 6)
ASMR0(edx, ebp, eax, ebx, ecx, 7)
ASMR0(ecx, edx, ebp, eax, ebx, 8)
ASMR0(ebx, ecx, edx, ebp, eax, 9)
ASMR0(eax, ebx, ecx, edx, ebp, 10)
ASMR0(ebp, eax, ebx, ecx, edx, 11)
ASMR0(edx, ebp, eax, ebx, ecx, 12)
ASMR0(ecx, edx, ebp, eax, ebx, 13)
ASMR0(ebx, ecx, edx, ebp, eax, 14)
ASMR0(eax, ebx, ecx, edx, ebp, 15)
ASMR1(ebp, eax, ebx, ecx, edx, 16, 0, 2, 8, 13)
ASMR1(edx, ebp, eax, ebx, ecx, 17, 1, 3, 9, 14)
ASMR1(ecx, edx, ebp, eax, ebx, 18, 2, 4, 10, 15)
ASMR1(ebx, ecx, edx, ebp, eax, 19, 3, 5, 11, 0)
ASMR2(eax, ebx, ecx, edx, ebp, 20, 4, 6, 12, 1)
ASMR2(ebp, eax, ebx, ecx, edx, 21, 5, 7, 13, 2)
ASMR2(edx, ebp, eax, ebx, ecx, 22, 6, 8, 14, 3)
ASMR2(ecx, edx, ebp, eax, ebx, 23, 7, 9, 15, 4)
ASMR2(ebx, ecx, edx, ebp, eax, 24, 8, 10, 0, 5)
ASMR2(eax, ebx, ecx, edx, ebp, 25, 9, 11, 1, 6)
ASMR2(ebp, eax, ebx, ecx, edx, 26, 10, 12, 2, 7)
ASMR2(edx, ebp, eax, ebx, ecx, 27, 11, 13, 3, 8)
ASMR2(ecx, edx, ebp, eax, ebx, 28, 12, 14, 4, 9)
ASMR2(ebx, ecx, edx, ebp, eax, 29, 13, 15, 5, 10)
ASMR2(eax, ebx, ecx, edx, ebp, 30, 14, 0, 6, 11)
ASMR2(ebp, eax, ebx, ecx, edx, 31, 15, 1, 7, 12)
ASMR2(edx, ebp, eax, ebx, ecx, 32, 0, 2, 8, 13)
ASMR2(ecx, edx, ebp, eax, ebx, 33, 1, 3, 9, 14)
ASMR2(ebx, ecx, edx, ebp, eax, 34, 2, 4, 10, 15)
ASMR2(eax, ebx, ecx, edx, ebp, 35, 3, 5, 11, 0)
ASMR2(ebp, eax, ebx, ecx, edx, 36, 4, 6, 12, 1)
ASMR2(edx, ebp, eax, ebx, ecx, 37, 5, 7, 13, 2)
ASMR2(ecx, edx, ebp, eax, ebx, 38, 6, 8, 14, 3)
ASMR2(ebx, ecx, edx, ebp, eax, 39, 7, 9, 15, 4)
ASMR3(eax, ebx, ecx, edx, ebp, 40, 8, 10, 0, 5)
ASMR3(ebp, eax, ebx, ecx, edx, 41, 9, 11, 1, 6)
ASMR3(edx, ebp, eax, ebx, ecx, 42, 10, 12, 2, 7)
ASMR3(ecx, edx, ebp, eax, ebx, 43, 11, 13, 3, 8)
ASMR3(ebx, ecx, edx, ebp, eax, 44, 12, 14, 4, 9)
ASMR3(eax, ebx, ecx, edx, ebp, 45, 13, 15, 5, 10)
ASMR3(ebp, eax, ebx, ecx, edx, 46, 14, 0, 6, 11)
ASMR3(edx, ebp, eax, ebx, ecx, 47, 15, 1, 7, 12)
ASMR3(ecx, edx, ebp, eax, ebx, 48, 0, 2, 8, 13)
ASMR3(ebx, ecx, edx, ebp, eax, 49, 1, 3, 9, 14)
ASMR3(eax, ebx, ecx, edx, ebp, 50, 2, 4, 10, 15)
ASMR3(ebp, eax, ebx, ecx, edx, 51, 3, 5, 11, 0)
ASMR3(edx, ebp, eax, ebx, ecx, 52, 4, 6, 12, 1)
ASMR3(ecx, edx, ebp, eax, ebx, 53, 5, 7, 13, 2)
ASMR3(ebx, ecx, edx, ebp, eax, 54, 6, 8, 14, 3)
ASMR3(eax, ebx, ecx, edx, ebp, 55, 7, 9, 15, 4)
ASMR3(ebp, eax, ebx, ecx, edx, 56, 8, 10, 0, 5)
ASMR3(edx, ebp, eax, ebx, ecx, 57, 9, 11, 1, 6)
ASMR3(ecx, edx, ebp, eax, ebx, 58, 10, 12, 2, 7)
ASMR3(ebx, ecx, edx, ebp, eax, 59, 11, 13, 3, 8)
ASMR4(eax, ebx, ecx, edx, ebp, 60, 12, 14, 4, 9)
ASMR4(ebp, eax, ebx, ecx, edx, 61, 13, 15, 5, 10)
ASMR4(edx, ebp, eax, ebx, ecx, 62, 14, 0, 6, 11)
ASMR4(ecx, edx, ebp, eax, ebx, 63, 15, 1, 7, 12)
ASMR4(ebx, ecx, edx, ebp, eax, 64, 0, 2, 8, 13)
ASMR4(eax, ebx, ecx, edx, ebp, 65, 1, 3, 9, 14)
ASMR4(ebp, eax, ebx, ecx, edx, 66, 2, 4, 10, 15)
ASMR4(edx, ebp, eax, ebx, ecx, 67, 3, 5, 11, 0)
ASMR4(ecx, edx, ebp, eax, ebx, 68, 4, 6, 12, 1)
ASMR4(ebx, ecx, edx, ebp, eax, 69, 5, 7, 13, 2)
ASMR4(eax, ebx, ecx, edx, ebp, 70, 6, 8, 14, 3)
ASMR4(ebp, eax, ebx, ecx, edx, 71, 7, 9, 15, 4)
ASMR4(edx, ebp, eax, ebx, ecx, 72, 8, 10, 0, 5)
ASMR4(ecx, edx, ebp, eax, ebx, 73, 9, 11, 1, 6)
ASMR4(ebx, ecx, edx, ebp, eax, 74, 10, 12, 2, 7)
ASMR4(eax, ebx, ecx, edx, ebp, 75, 11, 13, 3, 8)
ASMR4(ebp, eax, ebx, ecx, edx, 76, 12, 14, 4, 9)
ASMR4(edx, ebp, eax, ebx, ecx, 77, 13, 15, 5, 10)
ASMR4(ecx, edx, ebp, eax, ebx, 78, 14, 0, 6, 11)
ASMR4(ebx, ecx, edx, ebp, eax, 79, 15, 1, 7, 12)
AS2( movd esi, mm1 ) // digest_
AS2( add [esi], eax ) // write out
AS2( add [esi + 4], ebx )
AS2( add [esi + 8], ecx )
AS2( add [esi + 12], edx )
AS2( add [esi + 16], ebp )
// setup next round
AS2( movd ebp, mm2 ) // times
AS2( mov edi, DWORD PTR [esp + 64] ) // data
AS2( add edi, 64 ) // next round of data
AS2( mov [esp + 64], edi ) // restore
AS1( dec ebp )
AS2( movd mm2, ebp )
AS1( jnz loopStart )
EPILOG()
}
#endif // DO_SHA_ASM
} // namespace
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -