📄 salsa.cpp
字号:
L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \ L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \ L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \ L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \ L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \ L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \ L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \ L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \ L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \ L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \ L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \ L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \ L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \ L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \ L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \ L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \ L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \ L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \ L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \ L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \ L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \ L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \ L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i)#if CRYPTOPP_BOOL_X64 SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15)#else SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15) SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13)#endif AS2( mov REG_roundsLeft, REG_rounds) ASJ( jmp, 2, f) ASL(SSE2_Salsa_Output) AS2( movdqa xmm0, xmm4) AS2( punpckldq xmm4, xmm5) AS2( movdqa xmm1, xmm6) AS2( punpckldq xmm6, xmm7) AS2( movdqa xmm2, xmm4) AS2( punpcklqdq xmm4, xmm6) // e AS2( punpckhqdq xmm2, xmm6) // f AS2( punpckhdq xmm0, xmm5) AS2( punpckhdq xmm1, xmm7) AS2( movdqa xmm6, xmm0) AS2( punpcklqdq xmm0, xmm1) // g AS2( punpckhqdq xmm6, xmm1) // h AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1) AS1( ret) ASL(6)#if CRYPTOPP_BOOL_X64 SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15) ASL(2) SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6)#else SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15) SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13) ASL(2) SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6) SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4)#endif AS2( sub REG_roundsLeft, 2) ASJ( jnz, 6, b)#define SSE2_OUTPUT_4(a, b, c, d) \ AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\ AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\ AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\ AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\ AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\ AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\ AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\ AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\ ASC( call, SSE2_Salsa_Output) SSE2_OUTPUT_4(0, 13, 10, 7) SSE2_OUTPUT_4(4, 1, 14, 11) SSE2_OUTPUT_4(8, 5, 2, 15) SSE2_OUTPUT_4(12, 9, 6, 3) AS2( test REG_input, REG_input) ASJ( jz, 9, f) AS2( add REG_input, 12*16) ASL(9) AS2( add REG_output, 12*16) AS2( sub REG_iterationCount, 4) AS2( cmp REG_iterationCount, 4) ASJ( jge, 1, b) AS_POP_IF86( sp) ASL(5) AS2( sub REG_iterationCount, 1) ASJ( jl, 4, f) AS2( movdqa xmm0, [REG_state + 0*16]) AS2( movdqa xmm1, [REG_state + 1*16]) AS2( movdqa xmm2, [REG_state + 2*16]) AS2( movdqa xmm3, [REG_state + 3*16]) AS2( mov REG_roundsLeft, REG_rounds) ASL(0) SSE2_QUARTER_ROUND(0, 1, 3, 7) SSE2_QUARTER_ROUND(1, 2, 0, 9) SSE2_QUARTER_ROUND(2, 3, 1, 13) SSE2_QUARTER_ROUND(3, 0, 2, 18) ASS( pshufd xmm1, xmm1, 2, 1, 0, 3) ASS( pshufd xmm2, xmm2, 1, 0, 3, 2) ASS( pshufd xmm3, xmm3, 0, 3, 2, 1) SSE2_QUARTER_ROUND(0, 3, 1, 7) SSE2_QUARTER_ROUND(3, 2, 0, 9) SSE2_QUARTER_ROUND(2, 1, 3, 13) SSE2_QUARTER_ROUND(1, 0, 2, 18) ASS( pshufd xmm1, xmm1, 0, 3, 2, 1) ASS( pshufd xmm2, xmm2, 1, 0, 3, 2) ASS( pshufd xmm3, xmm3, 2, 1, 0, 3) AS2( sub REG_roundsLeft, 2) ASJ( jnz, 0, b) AS2( paddd xmm0, [REG_state + 0*16]) AS2( paddd xmm1, [REG_state + 1*16]) AS2( paddd xmm2, [REG_state + 2*16]) AS2( paddd xmm3, [REG_state + 3*16]) AS2( add dword ptr [REG_state + 8*4], 1) AS2( adc dword ptr [REG_state + 5*4], 0) AS2( pcmpeqb xmm6, xmm6) // all ones AS2( psrlq xmm6, 32) // lo32 mask ASS( pshufd xmm7, xmm6, 0, 1, 2, 3) // hi32 mask AS2( movdqa xmm4, xmm0) AS2( movdqa xmm5, xmm3) AS2( pand xmm0, xmm7) AS2( pand xmm4, xmm6) AS2( pand xmm3, xmm6) AS2( pand xmm5, xmm7) AS2( por xmm4, xmm5) // 0,13,2,15 AS2( movdqa xmm5, xmm1) AS2( pand xmm1, xmm7) AS2( pand xmm5, xmm6) AS2( por xmm0, xmm5) // 4,1,6,3 AS2( pand xmm6, xmm2) AS2( pand xmm2, xmm7) AS2( por xmm1, xmm6) // 8,5,10,7 AS2( por xmm2, xmm3) // 12,9,14,11 AS2( movdqa xmm5, xmm4) AS2( movdqa xmm6, xmm0) AS3( shufpd xmm4, xmm1, 2) // 0,13,10,7 AS3( shufpd xmm0, xmm2, 2) // 4,1,14,11 AS3( shufpd xmm1, xmm5, 2) // 8,5,2,15 AS3( shufpd xmm2, xmm6, 2) // 12,9,6,3 // output keystream AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4) ASJ( jmp, 5, b) ASL(4) AS_POP_IF86( bp)#ifdef __GNUC__ AS_POP_IF86( bx) ".att_syntax prefix;" : #if CRYPTOPP_BOOL_X64 : "r" (m_rounds), "r" (input), "r" (iterationCount), "r" (m_state.data()), "r" (output), "r" (workspace.m_ptr) : "%eax", "%edx", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" #else : "d" (m_rounds), "a" (input), "c" (iterationCount), "S" (m_state.data()), "D" (output) : "memory", "cc" #endif );#endif#ifdef CRYPTOPP_GENERATE_X64_MASM movdqa xmm6, [rsp + 0200h] movdqa xmm7, [rsp + 0210h] movdqa xmm8, [rsp + 0220h] movdqa xmm9, [rsp + 0230h] movdqa xmm10, [rsp + 0240h] movdqa xmm11, [rsp + 0250h] movdqa xmm12, [rsp + 0260h] movdqa xmm13, [rsp + 0270h] movdqa xmm14, [rsp + 0280h] movdqa xmm15, [rsp + 0290h] add rsp, 10*16 + 32*16 + 8 retSalsa20_OperateKeystream ENDP#else } else#endif#endif#ifndef CRYPTOPP_GENERATE_X64_MASM { word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; while (iterationCount--) { x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7]; x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11]; x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15]; for (int i=m_rounds; i>0; i-=2) { #define QUARTER_ROUND(a, b, c, d) \ b = b ^ rotlFixed(a + d, 7); \ c = c ^ rotlFixed(b + a, 9); \ d = d ^ rotlFixed(c + b, 13); \ a = a ^ rotlFixed(d + c, 18); QUARTER_ROUND(x0, x4, x8, x12) QUARTER_ROUND(x1, x5, x9, x13) QUARTER_ROUND(x2, x6, x10, x14) QUARTER_ROUND(x3, x7, x11, x15) QUARTER_ROUND(x0, x13, x10, x7) QUARTER_ROUND(x1, x14, x11, x4) QUARTER_ROUND(x2, x15, x8, x5) QUARTER_ROUND(x3, x12, x9, x6) } #define SALSA_OUTPUT(x) {\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\ CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);}#ifndef CRYPTOPP_DOXYGEN_PROCESSING CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SALSA_OUTPUT, BYTES_PER_ITERATION);#endif if (++m_state[8] == 0) ++m_state[5]; } }} // see comment above if an internal compiler error occurs herevoid XSalsa20_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length){ m_rounds = params.GetIntValueWithDefault(Name::Rounds(), 20); if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20)) throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds); GetUserKey(LITTLE_ENDIAN_ORDER, m_key.begin(), m_key.size(), key, length); if (length == 16) memcpy(m_key.begin()+4, m_key.begin(), 16); // "expand 32-byte k" m_state[0] = 0x61707865; m_state[1] = 0x3320646e; m_state[2] = 0x79622d32; m_state[3] = 0x6b206574;}void XSalsa20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length){ assert(length==24); word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; GetBlock<word32, LittleEndian> get(IV); get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]); x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3]; x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7]; x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; for (int i=m_rounds; i>0; i-=2) { QUARTER_ROUND(x0, x4, x8, x12) QUARTER_ROUND(x1, x5, x9, x13) QUARTER_ROUND(x2, x6, x10, x14) QUARTER_ROUND(x3, x7, x11, x15) QUARTER_ROUND(x0, x13, x10, x7) QUARTER_ROUND(x1, x14, x11, x4) QUARTER_ROUND(x2, x15, x8, x5) QUARTER_ROUND(x3, x12, x9, x6) } m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3; m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5; m_state[8] = m_state[5] = 0;}NAMESPACE_END#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -