📄 rijndael.cpp
字号:
AS2( xor MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\#define MMOV(a,b,c) \ AS2( movzx esi, b)\ AS2( mov MM(a), DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\#endif#define L_SUBKEYS L_INDEX(0)#define L_SAVED_X L_SUBKEYS#define L_KEY12 L_INDEX(16*12)#define L_LASTROUND L_INDEX(16*13)#define L_INBLOCKS L_INDEX(16*14)#define MAP0TO4(i) (ASM_MOD(i+3,4)+1)#define XOR(a,b,c) \ AS2( movzx esi, b)\ AS2( xor a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\#define MOV(a,b,c) \ AS2( movzx esi, b)\ AS2( mov a, DWORD PTR [AS_REG_7+8*WORD_REG(si)+MAP0TO4(c)])\#ifdef CRYPTOPP_GENERATE_X64_MASM ALIGN 8 Rijndael_Enc_AdvancedProcessBlocks PROC FRAME rex_push_reg rsi push_reg rdi push_reg rbx push_reg r12 .endprolog mov L_REG, rcx mov AS_REG_7, ?Te@rdtable@CryptoPP@@3PA_KA mov rdi, QWORD PTR [?g_cacheLineSize@CryptoPP@@3IA]#elif defined(__GNUC__) __asm__ __volatile__ ( ".intel_syntax noprefix;" #if CRYPTOPP_BOOL_X64 AS2( mov L_REG, rcx) #endif AS_PUSH_IF86(bx) AS_PUSH_IF86(bp) AS2( mov AS_REG_7, WORD_REG(si))#else AS_PUSH_IF86(si) AS_PUSH_IF86(di)#if !defined(_MSC_VER) || (_MSC_VER < 1400) AS_PUSH_IF86(bx)#endif AS_PUSH_IF86(bp) AS2( lea AS_REG_7, [Te]) AS2( mov edi, [g_cacheLineSize])#endif#if CRYPTOPP_BOOL_X86 AS2( mov [ecx+16*12+16*4], esp) // save esp to L_SP AS2( lea esp, [ecx-512])#endif // copy subkeys to stack AS2( mov WORD_REG(si), [L_KEYS_BEGIN]) AS2( mov WORD_REG(ax), 16) AS2( and WORD_REG(ax), WORD_REG(si)) AS2( movdqa xmm3, XMMWORD_PTR [WORD_REG(dx)+16+WORD_REG(ax)]) // subkey 1 (non-counter) or 2 (counter) AS2( movdqa [L_KEY12], xmm3) AS2( lea WORD_REG(ax), [WORD_REG(dx)+WORD_REG(ax)+2*16]) AS2( sub WORD_REG(ax), WORD_REG(si)) ASL(0) AS2( movdqa xmm0, [WORD_REG(ax)+WORD_REG(si)]) AS2( movdqa XMMWORD_PTR [L_SUBKEYS+WORD_REG(si)], xmm0) AS2( add WORD_REG(si), 16) AS2( cmp WORD_REG(si), 16*12) ASJ( jl, 0, b) // read subkeys 0, 1 and last AS2( movdqa xmm4, [WORD_REG(ax)+WORD_REG(si)]) // last subkey AS2( movdqa xmm1, [WORD_REG(dx)]) // subkey 0 AS2( MOVD MM(1), [WORD_REG(dx)+4*4]) // 0,1,2,3 AS2( mov ebx, [WORD_REG(dx)+5*4]) // 4,5,6,7 AS2( mov ecx, [WORD_REG(dx)+6*4]) // 8,9,10,11 AS2( mov edx, [WORD_REG(dx)+7*4]) // 12,13,14,15 // load table into cache AS2( xor WORD_REG(ax), WORD_REG(ax)) ASL(9) AS2( mov esi, [AS_REG_7+WORD_REG(ax)]) AS2( add WORD_REG(ax), WORD_REG(di)) AS2( mov esi, [AS_REG_7+WORD_REG(ax)]) AS2( add WORD_REG(ax), WORD_REG(di)) AS2( mov esi, [AS_REG_7+WORD_REG(ax)]) AS2( add WORD_REG(ax), WORD_REG(di)) AS2( mov esi, [AS_REG_7+WORD_REG(ax)]) AS2( add WORD_REG(ax), WORD_REG(di)) AS2( cmp WORD_REG(ax), 2048) ASJ( jl, 9, b) AS1( lfence) AS2( test DWORD PTR [L_LENGTH], 1) ASJ( jz, 8, f) // counter mode one-time setup AS2( mov WORD_REG(si), [L_INBLOCKS]) AS2( movdqu xmm2, [WORD_REG(si)]) // counter AS2( pxor xmm2, xmm1) AS2( psrldq xmm1, 14) AS2( movd eax, xmm1) AS2( mov al, BYTE PTR [WORD_REG(si)+15]) AS2( MOVD MM(2), eax)#if CRYPTOPP_BOOL_X86 AS2( mov eax, 1) AS2( movd mm3, eax)#endif // partial first round, in: xmm2(15,14,13,12;11,10,9,8;7,6,5,4;3,2,1,0), out: mm1, ebx, ecx, edx AS2( movd eax, xmm2) AS2( psrldq xmm2, 4) AS2( movd edi, xmm2) AS2( psrldq xmm2, 4) MXOR( 1, al, 0) // 0 XOR( edx, ah, 1) // 1 AS2( shr eax, 16) XOR( ecx, al, 2) // 2 XOR( ebx, ah, 3) // 3 AS2( mov eax, edi) AS2( movd edi, xmm2) AS2( psrldq xmm2, 4) XOR( ebx, al, 0) // 4 MXOR( 1, ah, 1) // 5 AS2( shr eax, 16) XOR( edx, al, 2) // 6 XOR( ecx, ah, 3) // 7 AS2( mov eax, edi) AS2( movd edi, xmm2) XOR( ecx, al, 0) // 8 XOR( ebx, ah, 1) // 9 AS2( shr eax, 16) MXOR( 1, al, 2) // 10 XOR( edx, ah, 3) // 11 AS2( mov eax, edi) XOR( edx, al, 0) // 12 XOR( ecx, ah, 1) // 13 AS2( shr eax, 16) XOR( ebx, al, 2) // 14 AS2( psrldq xmm2, 3) // partial second round, in: ebx(4,5,6,7), ecx(8,9,10,11), edx(12,13,14,15), out: eax, ebx, edi, mm0 AS2( mov eax, [L_KEY12+0*4]) AS2( mov edi, [L_KEY12+2*4]) AS2( MOVD MM(0), [L_KEY12+3*4]) MXOR( 0, cl, 3) /* 11 */ XOR( edi, bl, 3) /* 7 */ MXOR( 0, bh, 2) /* 6 */ AS2( shr ebx, 16) /* 4,5 */ XOR( eax, bl, 1) /* 5 */ MOV( ebx, bh, 0) /* 4 */ AS2( xor ebx, [L_KEY12+1*4]) XOR( eax, ch, 2) /* 10 */ AS2( shr ecx, 16) /* 8,9 */ XOR( eax, dl, 3) /* 15 */ XOR( ebx, dh, 2) /* 14 */ AS2( shr edx, 16) /* 12,13 */ XOR( edi, ch, 0) /* 8 */ XOR( ebx, cl, 1) /* 9 */ XOR( edi, dl, 1) /* 13 */ MXOR( 0, dh, 0) /* 12 */ AS2( movd ecx, xmm2) AS2( MOVD edx, MM(1)) AS2( MOVD [L_SAVED_X+3*4], MM(0)) AS2( mov [L_SAVED_X+0*4], eax) AS2( mov [L_SAVED_X+1*4], ebx) AS2( mov [L_SAVED_X+2*4], edi) ASJ( jmp, 5, f) ASL(3) // non-counter mode per-block setup AS2( MOVD MM(1), [L_KEY12+0*4]) // 0,1,2,3 AS2( mov ebx, [L_KEY12+1*4]) // 4,5,6,7 AS2( mov ecx, [L_KEY12+2*4]) // 8,9,10,11 AS2( mov edx, [L_KEY12+3*4]) // 12,13,14,15 ASL(8) AS2( mov WORD_REG(ax), [L_INBLOCKS]) AS2( movdqu xmm2, [WORD_REG(ax)]) AS2( mov WORD_REG(si), [L_INXORBLOCKS]) AS2( movdqu xmm5, [WORD_REG(si)]) AS2( pxor xmm2, xmm1) AS2( pxor xmm2, xmm5) // first round, in: xmm2(15,14,13,12;11,10,9,8;7,6,5,4;3,2,1,0), out: eax, ebx, ecx, edx AS2( movd eax, xmm2) AS2( psrldq xmm2, 4) AS2( movd edi, xmm2) AS2( psrldq xmm2, 4) MXOR( 1, al, 0) // 0 XOR( edx, ah, 1) // 1 AS2( shr eax, 16) XOR( ecx, al, 2) // 2 XOR( ebx, ah, 3) // 3 AS2( mov eax, edi) AS2( movd edi, xmm2) AS2( psrldq xmm2, 4) XOR( ebx, al, 0) // 4 MXOR( 1, ah, 1) // 5 AS2( shr eax, 16) XOR( edx, al, 2) // 6 XOR( ecx, ah, 3) // 7 AS2( mov eax, edi) AS2( movd edi, xmm2) XOR( ecx, al, 0) // 8 XOR( ebx, ah, 1) // 9 AS2( shr eax, 16) MXOR( 1, al, 2) // 10 XOR( edx, ah, 3) // 11 AS2( mov eax, edi) XOR( edx, al, 0) // 12 XOR( ecx, ah, 1) // 13 AS2( shr eax, 16) XOR( ebx, al, 2) // 14 MXOR( 1, ah, 3) // 15 AS2( MOVD eax, MM(1)) AS2( add L_REG, [L_KEYS_BEGIN]) AS2( add L_REG, 4*16) ASJ( jmp, 2, f) ASL(1) // counter-mode per-block setup AS2( MOVD ecx, MM(2)) AS2( MOVD edx, MM(1)) AS2( mov eax, [L_SAVED_X+0*4]) AS2( mov ebx, [L_SAVED_X+1*4]) AS2( xor cl, ch) AS2( and WORD_REG(cx), 255) ASL(5)#if CRYPTOPP_BOOL_X86 AS2( paddb MM(2), mm3)#else AS2( add MM(2), 1)#endif // remaining part of second round, in: edx(previous round),esi(keyed counter byte) eax,ebx,[L_SAVED_X+2*4],[L_SAVED_X+3*4], out: eax,ebx,ecx,edx AS2( xor edx, DWORD PTR [AS_REG_7+WORD_REG(cx)*8+3]) XOR( ebx, dl, 3) MOV( ecx, dh, 2) AS2( shr edx, 16) AS2( xor ecx, [L_SAVED_X+2*4]) XOR( eax, dh, 0) MOV( edx, dl, 1) AS2( xor edx, [L_SAVED_X+3*4]) AS2( add L_REG, [L_KEYS_BEGIN]) AS2( add L_REG, 3*16) ASJ( jmp, 4, f)// in: eax(0,1,2,3), ebx(4,5,6,7), ecx(8,9,10,11), edx(12,13,14,15)// out: eax, ebx, edi, mm0#define ROUND() \ MXOR( 0, cl, 3) /* 11 */\ AS2( mov cl, al) /* 8,9,10,3 */\ XOR( edi, ah, 2) /* 2 */\ AS2( shr eax, 16) /* 0,1 */\ XOR( edi, bl, 3) /* 7 */\ MXOR( 0, bh, 2) /* 6 */\ AS2( shr ebx, 16) /* 4,5 */\ MXOR( 0, al, 1) /* 1 */\ MOV( eax, ah, 0) /* 0 */\ XOR( eax, bl, 1) /* 5 */\ MOV( ebx, bh, 0) /* 4 */\ XOR( eax, ch, 2) /* 10 */\ XOR( ebx, cl, 3) /* 3 */\ AS2( shr ecx, 16) /* 8,9 */\ XOR( eax, dl, 3) /* 15 */\ XOR( ebx, dh, 2) /* 14 */\ AS2( shr edx, 16) /* 12,13 */\ XOR( edi, ch, 0) /* 8 */\ XOR( ebx, cl, 1) /* 9 */\ XOR( edi, dl, 1) /* 13 */\ MXOR( 0, dh, 0) /* 12 */\ ASL(2) // 2-round loop AS2( MOVD MM(0), [L_SUBKEYS-4*16+3*4]) AS2( mov edi, [L_SUBKEYS-4*16+2*4]) ROUND() AS2( mov ecx, edi) AS2( xor eax, [L_SUBKEYS-4*16+0*4]) AS2( xor ebx, [L_SUBKEYS-4*16+1*4]) AS2( MOVD edx, MM(0)) ASL(4) AS2( MOVD MM(0), [L_SUBKEYS-4*16+7*4]) AS2( mov edi, [L_SUBKEYS-4*16+6*4]) ROUND() AS2( mov ecx, edi) AS2( xor eax, [L_SUBKEYS-4*16+4*4]) AS2( xor ebx, [L_SUBKEYS-4*16+5*4]) AS2( MOVD edx, MM(0)) AS2( add L_REG, 32) AS2( test L_REG, 255) ASJ( jnz, 2, b) AS2( sub L_REG, 16*16)#define LAST(a, b, c) \ AS2( movzx esi, a )\ AS2( movzx edi, BYTE PTR [AS_REG_7+WORD_REG(si)*8+1] )\ AS2( movzx esi, b )\ AS2( xor edi, DWORD PTR [AS_REG_7+WORD_REG(si)*8+0] )\ AS2( mov WORD PTR [L_LASTROUND+c], di )\ // last round LAST(ch, dl, 2) LAST(dh, al, 6) AS2( shr edx, 16) LAST(ah, bl, 10) AS2( shr eax, 16) LAST(bh, cl, 14) AS2( shr ebx, 16) LAST(dh, al, 12) AS2( shr ecx, 16) LAST(ah, bl, 0) LAST(bh, cl, 4) LAST(ch, dl, 8) AS2( mov WORD_REG(ax), [L_OUTXORBLOCKS]) AS2( mov WORD_REG(bx), [L_OUTBLOCKS]) AS2( mov WORD_REG(cx), [L_LENGTH]) AS2( sub WORD_REG(cx), 16) AS2( movdqu xmm2, [WORD_REG(ax)]) AS2( pxor xmm2, xmm4)#if CRYPTOPP_BOOL_X86 AS2( movdqa xmm0, [L_INCREMENTS]) AS2( paddd xmm0, [L_INBLOCKS]) AS2( movdqa [L_INBLOCKS], xmm0)#else AS2( movdqa xmm0, [L_INCREMENTS+16]) AS2( paddq xmm0, [L_INBLOCKS+16]) AS2( movdqa [L_INBLOCKS+16], xmm0)#endif AS2( pxor xmm2, [L_LASTROUND]) AS2( movdqu [WORD_REG(bx)], xmm2) ASJ( jle, 7, f) AS2( mov [L_LENGTH], WORD_REG(cx)) AS2( test WORD_REG(cx), 1) ASJ( jnz, 1, b)#if CRYPTOPP_BOOL_X64 AS2( movdqa xmm0, [L_INCREMENTS]) AS2( paddq xmm0, [L_INBLOCKS]) AS2( movdqa [L_INBLOCKS], xmm0)#endif ASJ( jmp, 3, b) ASL(7) // erase keys on stack AS2( xorps xmm0, xmm0) AS2( lea WORD_REG(ax), [L_SUBKEYS+7*16]) AS2( movaps [WORD_REG(ax)-7*16], xmm0) AS2( movaps [WORD_REG(ax)-6*16], xmm0) AS2( movaps [WORD_REG(ax)-5*16], xmm0) AS2( movaps [WORD_REG(ax)-4*16], xmm0) AS2( movaps [WORD_REG(ax)-3*16], xmm0) AS2( movaps [WORD_REG(ax)-2*16], xmm0) AS2( movaps [WORD_REG(ax)-1*16], xmm0) AS2( movaps [WORD_REG(ax)+0*16], xmm0) AS2( movaps [WORD_REG(ax)+1*16], xmm0) AS2( movaps [WORD_REG(ax)+2*16], xmm0) AS2( movaps [WORD_REG(ax)+3*16], xmm0) AS2( movaps [WORD_REG(ax)+4*16], xmm0) AS2( movaps [WORD_REG(ax)+5*16], xmm0) AS2( movaps [WORD_REG(ax)+6*16], xmm0)#if CRYPTOPP_BOOL_X86 AS2( mov esp, [L_SP]) AS1( emms)#endif AS_POP_IF86(bp)#if !defined(_MSC_VER) || (_MSC_VER < 1400) AS_POP_IF86(bx)#endif#if defined(_MSC_VER) && CRYPTOPP_BOOL_X86 AS_POP_IF86(di) AS_POP_IF86(si) AS1(ret)#endif#ifdef CRYPTOPP_GENERATE_X64_MASM pop r12 pop rbx pop rdi pop rsi ret Rijndael_Enc_AdvancedProcessBlocks ENDP#endif#ifdef __GNUC__ ".att_syntax prefix;" : : "c" (locals), "d" (k), "S" (Te), "D" (g_cacheLineSize) : "memory", "cc", "%eax" #if CRYPTOPP_BOOL_X64 , "%rbx", "%r8", "%r9", "%r10", "%r11", "%r12" #endif );#endif}#endif#ifndef CRYPTOPP_GENERATE_X64_MASM#ifdef CRYPTOPP_X64_MASM_AVAILABLEextern "C" {void Rijndael_Enc_AdvancedProcessBlocks(void *locals, const word32 *k);}#endif#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)static inline bool AliasedWithTable(const byte *begin, const byte *end){ size_t s0 = size_t(begin)%4096, s1 = size_t(end)%4096; size_t t0 = size_t(Te)%4096, t1 = (size_t(Te)+sizeof(Te))%4096; if (t1 > t0) return (s0 >= t0 && s0 < t1) || (s1 > t0 && s1 <= t1); else return (s0 < t1 || s1 <= t1) || (s0 >= t0 || s1 > t0);}size_t Rijndael::Enc::AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const{ if (length < BLOCKSIZE) return length; if (HasSSE2()) { struct Locals { word32 subkeys[4*12], workspace[8]; const byte *inBlocks, *inXorBlocks, *outXorBlocks; byte *outBlocks; size_t inIncrement, inXorIncrement, outXorIncrement, outIncrement; size_t regSpill, lengthAndCounterFlag, keysBegin; }; size_t increment = BLOCKSIZE; const byte* zeros = (byte *)(Te+256); byte *space; do { space = (byte *)alloca(255+sizeof(Locals)); space += (256-(size_t)space%256)%256; } while (AliasedWithTable(space, space+sizeof(Locals))); if (flags & BT_ReverseDirection) { assert(length % BLOCKSIZE == 0); inBlocks += length - BLOCKSIZE; xorBlocks += length - BLOCKSIZE; outBlocks += length - BLOCKSIZE; increment = 0-increment; } Locals &locals = *(Locals *)space; locals.inBlocks = inBlocks; locals.inXorBlocks = (flags & BT_XorInput) && xorBlocks ? xorBlocks : zeros; locals.outXorBlocks = (flags & BT_XorInput) || !xorBlocks ? zeros : xorBlocks; locals.outBlocks = outBlocks; locals.inIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment; locals.inXorIncrement = (flags & BT_XorInput) && xorBlocks ? increment : 0; locals.outXorIncrement = (flags & BT_XorInput) || !xorBlocks ? 0 : increment; locals.outIncrement = (flags & BT_DontIncrementInOutPointers) ? 0 : increment; locals.lengthAndCounterFlag = length - (length%16) - bool(flags & BT_InBlockIsCounter); int keysToCopy = m_rounds - (flags & BT_InBlockIsCounter ? 3 : 2); locals.keysBegin = (12-keysToCopy)*16; Rijndael_Enc_AdvancedProcessBlocks(&locals, m_key); return length%16; } else return BlockTransformation::AdvancedProcessBlocks(inBlocks, xorBlocks, outBlocks, length, flags);}#endifNAMESPACE_END#endif#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -