📄 integer.cpp
字号:
Top_Begin(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)#ifndef __GNUC__ ASJ( jmp, 0, f) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0)#endif Top_Column0(8) Top_Column1(7) Mul_Column0(0, 6) Mul_Column1(1, 5) Mul_Column0(2, 4) Mul_Column1(3, 3) Mul_Column0(4, 2) Top_End(4)}void SSE2_MultiplyTop32(word *C, const word *A, const word *B, word L){ Top_Begin(16) Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)#ifndef __GNUC__ ASJ( jmp, 0, f) Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2) AS1( ret) ASL(0)#endif Top_Column0(16) Top_Column1(15) Mul_Column0(0, 14) Mul_Column1(1, 13) Mul_Column0(2, 12) Mul_Column1(3, 11) Mul_Column0(4, 10) Mul_Column1(5, 9) Mul_Column0(6, 8) Mul_Column1(7, 7) Mul_Column0(8, 6) Mul_Column1(9, 5) Mul_Column0(10, 4) Mul_Column1(11, 3) Mul_Column0(12, 2) Top_End(8)}#endif // #if CRYPTOPP_INTEGER_SSE2// ********************************************************typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word *C, const word *A, const word *B);typedef void (* PMul)(word *C, const word *A, const word *B);typedef void (* PSqu)(word *C, const word *A);typedef void (* PMulTop)(word *C, const word *A, const word *B, word L);#if CRYPTOPP_INTEGER_SSE2static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;static size_t s_recursionLimit = 8;#elsestatic const size_t s_recursionLimit = 16;#endifstatic PMul s_pMul[9], s_pBot[9];static PSqu s_pSqu[9];static PMulTop s_pTop[9];static void SetFunctionPointers(){ s_pMul[0] = &Baseline_Multiply2; s_pBot[0] = &Baseline_MultiplyBottom2; s_pSqu[0] = &Baseline_Square2; s_pTop[0] = &Baseline_MultiplyTop2; s_pTop[1] = &Baseline_MultiplyTop4;#if CRYPTOPP_INTEGER_SSE2 if (HasSSE2()) {#if _MSC_VER != 1200 || defined(NDEBUG) if (IsP4()) { s_pAdd = &SSE2_Add; s_pSub = &SSE2_Sub; }#endif s_recursionLimit = 32; s_pMul[1] = &SSE2_Multiply4; s_pMul[2] = &SSE2_Multiply8; s_pMul[4] = &SSE2_Multiply16; s_pMul[8] = &SSE2_Multiply32; s_pBot[1] = &SSE2_MultiplyBottom4; s_pBot[2] = &SSE2_MultiplyBottom8; s_pBot[4] = &SSE2_MultiplyBottom16; s_pBot[8] = &SSE2_MultiplyBottom32; s_pSqu[1] = &SSE2_Square4; s_pSqu[2] = &SSE2_Square8; s_pSqu[4] = &SSE2_Square16; s_pSqu[8] = &SSE2_Square32; s_pTop[2] = &SSE2_MultiplyTop8; s_pTop[4] = &SSE2_MultiplyTop16; s_pTop[8] = &SSE2_MultiplyTop32; } else#endif { s_pMul[1] = &Baseline_Multiply4; s_pMul[2] = &Baseline_Multiply8; s_pBot[1] = &Baseline_MultiplyBottom4; s_pBot[2] = &Baseline_MultiplyBottom8; s_pSqu[1] = &Baseline_Square4; s_pSqu[2] = &Baseline_Square8; s_pTop[2] = &Baseline_MultiplyTop8;#if !CRYPTOPP_INTEGER_SSE2 s_pMul[4] = &Baseline_Multiply16; s_pBot[4] = &Baseline_MultiplyBottom16; s_pSqu[4] = &Baseline_Square16; s_pTop[4] = &Baseline_MultiplyTop16;#endif }}inline int Add(word *C, const word *A, const word *B, size_t N){#if CRYPTOPP_INTEGER_SSE2 return s_pAdd(N, C, A, B);#else return Baseline_Add(N, C, A, B);#endif}inline int Subtract(word *C, const word *A, const word *B, size_t N){#if CRYPTOPP_INTEGER_SSE2 return s_pSub(N, C, A, B);#else return Baseline_Sub(N, C, A, B);#endif}// ********************************************************#define A0 A#define A1 (A+N2)#define B0 B#define B1 (B+N2)#define T0 T#define T1 (T+N2)#define T2 (T+N)#define T3 (T+N+N2)#define R0 R#define R1 (R+N2)#define R2 (R+N)#define R3 (R+N+N2)// R[2*N] - result = A*B// T[2*N] - temporary work space// A[N] --- multiplier// B[N] --- multiplicantvoid RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N){ assert(N>=2 && N%2==0); if (N <= s_recursionLimit) s_pMul[N/4](R, A, B); else { const size_t N2 = N/2; size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); RecursiveMultiply(R2, T2, A1, B1, N2); RecursiveMultiply(T0, T2, R0, R1, N2); RecursiveMultiply(R0, T2, A0, B0, N2); // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1 int c2 = Add(R2, R2, R1, N2); int c3 = c2; c2 += Add(R1, R2, R0, N2); c3 += Add(R2, R2, R3, N2); if (AN2 == BN2) c3 -= Subtract(R1, R1, T0, N); else c3 += Add(R1, R1, T0, N); c3 += Increment(R2, N2, c2); assert (c3 >= 0 && c3 <= 2); Increment(R3, N2, c3); }}// R[2*N] - result = A*A// T[2*N] - temporary work space// A[N] --- number to be squaredvoid RecursiveSquare(word *R, word *T, const word *A, size_t N){ assert(N && N%2==0); if (N <= s_recursionLimit) s_pSqu[N/4](R, A); else { const size_t N2 = N/2; RecursiveSquare(R0, T2, A0, N2); RecursiveSquare(R2, T2, A1, N2); RecursiveMultiply(T0, T2, A0, A1, N2); int carry = Add(R1, R1, T0, N); carry += Add(R1, R1, T0, N); Increment(R3, N2, carry); }}// R[N] - bottom half of A*B// T[3*N/2] - temporary work space// A[N] - multiplier// B[N] - multiplicantvoid RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N){ assert(N>=2 && N%2==0); if (N <= s_recursionLimit) s_pBot[N/4](R, A, B); else { const size_t N2 = N/2; RecursiveMultiply(R, T, A0, B0, N2); RecursiveMultiplyBottom(T0, T1, A1, B0, N2); Add(R1, R1, T0, N2); RecursiveMultiplyBottom(T0, T1, A0, B1, N2); Add(R1, R1, T0, N2); }}// R[N] --- upper half of A*B// T[2*N] - temporary work space// L[N] --- lower half of A*B// A[N] --- multiplier// B[N] --- multiplicantvoid MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N){ assert(N>=2 && N%2==0); if (N <= s_recursionLimit) s_pTop[N/4](R, A, B, L[N-1]); else { const size_t N2 = N/2; size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2; Subtract(R0, A + AN2, A + (N2 ^ AN2), N2); size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2; Subtract(R1, B + BN2, B + (N2 ^ BN2), N2); RecursiveMultiply(T0, T2, R0, R1, N2); RecursiveMultiply(R0, T2, A1, B1, N2); // now T[01] holds (A1-A0)*(B0-B1) = A1*B0+A0*B1-A1*B1-A0*B0, R[01] holds A1*B1 int t, c3; int c2 = Subtract(T2, L+N2, L, N2); if (AN2 == BN2) { c2 -= Add(T2, T2, T0, N2); t = (Compare(T2, R0, N2) == -1); c3 = t - Subtract(T2, T2, T1, N2); } else { c2 += Subtract(T2, T2, T0, N2); t = (Compare(T2, R0, N2) == -1); c3 = t + Add(T2, T2, T1, N2); } c2 += t; if (c2 >= 0) c3 += Increment(T2, N2, c2); else c3 -= Decrement(T2, N2, -c2); c3 += Add(R0, T2, R1, N2); assert (c3 >= 0 && c3 <= 2); Increment(R1, N2, c3); }}inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N){ RecursiveMultiply(R, T, A, B, N);}inline void Square(word *R, word *T, const word *A, size_t N){ RecursiveSquare(R, T, A, N);}inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N){ RecursiveMultiplyBottom(R, T, A, B, N);}// R[NA+NB] - result = A*B// T[NA+NB] - temporary work space// A[NA] ---- multiplier// B[NB] ---- multiplicantvoid AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB){ if (NA == NB) { if (A == B) Square(R, T, A, NA); else Multiply(R, T, A, B, NA); return; } if (NA > NB) { std::swap(A, B); std::swap(NA, NB); } assert(NB % NA == 0); if (NA==2 && !A[1]) { switch (A[0]) { case 0: SetWords(R, 0, NB+2); return; case 1: CopyWords(R, B, NB); R[NB] = R[NB+1] = 0; return; default: R[NB] = LinearMultiply(R, B, A[0], NB); R[NB+1] = 0; return; } } size_t i; if ((NB/NA)%2 == 0) { Multiply(R, T, A, B, NA); CopyWords(T+2*NA, R+NA, NA); for (i=2*NA; i<NB; i+=2*NA) Multiply(T+NA+i, T, A, B+i, NA); for (i=NA; i<NB; i+=2*NA) Multiply(R+i, T, A, B+i, NA); } else { for (i=0; i<NB; i+=2*NA) Multiply(R+i, T, A, B+i, NA); for (i=NA; i<NB; i+=2*NA) Multiply(T+NA+i, T, A, B+i, NA); } if (Add(R+NA, R+NA, T+2*NA, NB-NA)) Increment(R+NB, NA);}// R[N] ----- result = A inverse mod 2**(WORD_BITS*N)// T[3*N/2] - temporary work space// A[N] ----- an odd number as inputvoid RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N){ if (N==2) { T[0] = AtomicInverseModPower2(A[0]); T[1] = 0; s_pBot[0](T+2, T, A); TwosComplement(T+2, 2); Increment(T+2, 2, 2); s_pBot[0](R, T, T+2); } else { const size_t N2 = N/2; RecursiveInverseModPower2(R0, T0, A0, N2); T0[0] = 1; SetWords(T0+1, 0, N2-1); MultiplyTop(R1, T1, T0, R0, A0, N2); MultiplyBottom(T0, T1, R0, A1, N2); Add(T0, R1, T0, N2); TwosComplement(T0, N2); MultiplyBottom(R1, T1, R0, T0, N2); }}// R[N] --- result = X/(2**(WORD_BITS*N)) mod M// T[3*N] - temporary work space// X[2*N] - number to be reduced// M[N] --- modulus// U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)void MontgomeryReduce(word *R, word *T, word *X, const word *M, const word *U, size_t N){#if 1 MultiplyBottom(R, T, X, U, N); MultiplyTop(T, T+N, X, R, M, N); word borrow = Subtract(T, X+N, T, N); // defend against timing attack by doing this Add even when not needed word carry = Add(T+N, T, M, N); assert(carry | !borrow); CopyWords(R, T + ((0-borrow) & N), N);#elif 0 const word u = 0-U[0]; Declare2Words(p) for (size_t i=0; i<N; i++) { const word t = u * X[i]; word c = 0; for (size_t j=0; j<N; j+=2) { MultiplyWords(p, t, M[j]); Acc2WordsBy1(p, X[i+j]); Acc2WordsBy1(p, c); X[i+j] = LowWord(p); c = HighWord(p); MultiplyWords(p, t, M[j+1]); Acc2WordsBy1(p, X[i+j+1]); Acc2WordsBy1(p, c); X[i+j+1] = LowWord(p); c = HighWord(p); } if (Increment(X+N+i, N-i, c)) while (!Subtract(X+N, X+N, M, N)) {} } memcpy(R, X+N, N*WORD_SIZE);#else __m64 u = _mm_cvtsi32_si64(0-U[0]), p; for (size_t i=0; i<N; i++) { __m64 t = _mm_cvtsi32_si64(X[i]); t = _mm_mul_su32(t, u); __m64 c = _mm_setzero_si64(); for (size_t j=0; j<N; j+=2) { p = _mm_mul_su32(
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -