📄 salsa.cpp
字号:
// salsa.cpp - written and placed in the public domain by Wei Dai// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM salsa.cpp" to generate MASM code#include "pch.h"#ifndef CRYPTOPP_GENERATE_X64_MASM#include "salsa.h"#include "misc.h"#include "argnames.h"#include "cpu.h"NAMESPACE_BEGIN(CryptoPP)void Salsa20_TestInstantiations(){ Salsa20::Encryption x;}void Salsa20_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length){ m_rounds = params.GetIntValueWithDefault(Name::Rounds(), 20); if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20)) throw InvalidRounds(Salsa20::StaticAlgorithmName(), m_rounds); // m_state is reordered for SSE2 GetBlock<word32, LittleEndian> get1(key); get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]); GetBlock<word32, LittleEndian> get2(key + length - 16); get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]); // "expand 16-byte k" or "expand 32-byte k" m_state[0] = 0x61707865; m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e; m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32; m_state[3] = 0x6b206574;}void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length){ assert(length==8); GetBlock<word32, LittleEndian> get(IV); get(m_state[14])(m_state[11]); m_state[8] = m_state[5] = 0;}void Salsa20_Policy::SeekToIteration(lword iterationCount){ m_state[8] = (word32)iterationCount; m_state[5] = (word32)SafeRightShift<32>(iterationCount);}#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64unsigned int Salsa20_Policy::GetAlignment() const{#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE if (HasSSE2()) return 16; else#endif return GetAlignmentOf<word32>();}unsigned int Salsa20_Policy::GetOptimalBlockSize() const{#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE if (HasSSE2()) return 4*BYTES_PER_ITERATION; else#endif return BYTES_PER_ITERATION;}#endif#ifdef CRYPTOPP_X64_MASM_AVAILABLEextern "C" {void Salsa20_OperateKeystream(byte *output, const byte *input, size_t iterationCount, int rounds, void *state);}#endif#pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly codevoid Salsa20_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount){#endif // #ifdef CRYPTOPP_GENERATE_X64_MASM#ifdef CRYPTOPP_X64_MASM_AVAILABLE Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.data()); return;#endif#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE#ifdef CRYPTOPP_GENERATE_X64_MASM ALIGN 8 Salsa20_OperateKeystream PROC FRAME mov r10, [rsp + 5*8] ; state alloc_stack(10*16 + 32*16 + 8) save_xmm128 xmm6, 0200h save_xmm128 xmm7, 0210h save_xmm128 xmm8, 0220h save_xmm128 xmm9, 0230h save_xmm128 xmm10, 0240h save_xmm128 xmm11, 0250h save_xmm128 xmm12, 0260h save_xmm128 xmm13, 0270h save_xmm128 xmm14, 0280h save_xmm128 xmm15, 0290h .endprolog #define REG_output rcx #define REG_input rdx #define REG_iterationCount r8 #define REG_state r10 #define REG_rounds e9d #define REG_roundsLeft eax #define REG_temp32 r11d #define REG_temp r11 #define SSE2_WORKSPACE rsp#else if (HasSSE2()) { #if CRYPTOPP_BOOL_X64 #define REG_output %4 #define REG_input %1 #define REG_iterationCount %2 #define REG_state %3 #define REG_rounds %0 #define REG_roundsLeft eax #define REG_temp32 edx #define REG_temp rdx #define SSE2_WORKSPACE %5 FixedSizeAlignedSecBlock<byte, 32*16> workspace; #else #define REG_output edi #define REG_input eax #define REG_iterationCount ecx #define REG_state esi #define REG_rounds edx #define REG_roundsLeft ebx #define REG_temp32 ebp #define REG_temp ebp #define SSE2_WORKSPACE esp + WORD_SZ #endif #ifdef __GNUC__ __asm__ __volatile__ ( ".intel_syntax noprefix;" AS_PUSH_IF86( bx) #else void *s = m_state.data(); word32 r = m_rounds; AS2( mov REG_iterationCount, iterationCount) AS2( mov REG_input, input) AS2( mov REG_output, output) AS2( mov REG_state, s) AS2( mov REG_rounds, r) #endif#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM AS_PUSH_IF86( bp) AS2( cmp REG_iterationCount, 4) ASJ( jl, 5, f)#if CRYPTOPP_BOOL_X86 AS2( mov ebx, esp) AS2( and esp, -16) AS2( sub esp, 32*16) AS1( push ebx)#endif#define SSE2_EXPAND_S(i, j) \ ASS( pshufd xmm4, xmm##i, j, j, j, j) \ AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4) AS2( movdqa xmm0, [REG_state + 0*16]) AS2( movdqa xmm1, [REG_state + 1*16]) AS2( movdqa xmm2, [REG_state + 2*16]) AS2( movdqa xmm3, [REG_state + 3*16]) SSE2_EXPAND_S(0, 0) SSE2_EXPAND_S(0, 1) SSE2_EXPAND_S(0, 2) SSE2_EXPAND_S(0, 3) SSE2_EXPAND_S(1, 0) SSE2_EXPAND_S(1, 2) SSE2_EXPAND_S(1, 3) SSE2_EXPAND_S(2, 1) SSE2_EXPAND_S(2, 2) SSE2_EXPAND_S(2, 3) SSE2_EXPAND_S(3, 0) SSE2_EXPAND_S(3, 1) SSE2_EXPAND_S(3, 2) SSE2_EXPAND_S(3, 3)#define SSE2_EXPAND_S85(i) \ AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \ AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \ AS2( add REG_roundsLeft, 1) \ AS2( adc REG_temp32, 0) ASL(1) AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4]) AS2( mov REG_temp32, dword ptr [REG_state + 5*4]) SSE2_EXPAND_S85(0) SSE2_EXPAND_S85(1) SSE2_EXPAND_S85(2) SSE2_EXPAND_S85(3) AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft) AS2( mov dword ptr [REG_state + 5*4], REG_temp32)#define SSE2_QUARTER_ROUND(a, b, d, i) \ AS2( movdqa xmm4, xmm##d) \ AS2( paddd xmm4, xmm##a) \ AS2( movdqa xmm5, xmm4) \ AS2( pslld xmm4, i) \ AS2( psrld xmm5, 32-i) \ AS2( pxor xmm##b, xmm4) \ AS2( pxor xmm##b, xmm5)#define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) /* y3 */#define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) /* y0 */ #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) /* y0+y3 */ #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) /* z1 */ #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) /* z1+y0 */ #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) /* z2 */ #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) /* z2+z1 */ #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) /* z3 */ #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) /* z3+z2 */ #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) /* xor y0 */ #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) /* z0 */ #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \ L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \ L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \ L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \ L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \ L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \ L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \ L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \ L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \ L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \ L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \ L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \ L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \ L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \ L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \ L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \ L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \ L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \ L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \ L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \ L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \ L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \ L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \ L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \ L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \ L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \ L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \ L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \ L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \ L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \ L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \ L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \ L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i)#define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \ L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \ L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \ L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \ L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \ L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \ L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \ L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \ L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \ L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -