📄 integer.cpp
字号:
Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \ Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \ Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \ Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \ Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \ Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \ Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \ Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \ Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \ Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \ Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \ Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \ Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \ Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \ Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \ Squ_SaveAcc(28, 14, 15) Squ_NonDiag \ Squ_End(16)#define Bot_2 \ Mul_Begin(2) \ Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \ Bot_End(2)#define Bot_4 \ Mul_Begin(4) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \ Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \ Bot_End(4)#define Bot_8 \ Mul_Begin(8) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \ Bot_End(8)#define Bot_16 \ Mul_Begin(16) \ Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \ Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \ Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \ Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \ Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \ Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \ Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \ Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \ Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \ Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \ Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \ Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \ Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \ Bot_End(16) #endif#if 0#define Mul_Begin(n) \ Declare2Words(p) \ Declare2Words(c) \ Declare2Words(d) \ MultiplyWords(p, A[0], B[0]) \ AssignWord(c, LowWord(p)) \ AssignWord(d, HighWord(p))#define Mul_Acc(i, j) \ MultiplyWords(p, A[i], B[j]) \ Acc2WordsBy1(c, LowWord(p)) \ Acc2WordsBy1(d, HighWord(p))#define Mul_SaveAcc(k, i, j) \ R[k] = LowWord(c); \ Add2WordsBy1(c, d, HighWord(c)) \ MultiplyWords(p, A[i], B[j]) \ AssignWord(d, HighWord(p)) \ Acc2WordsBy1(c, LowWord(p))#define Mul_End(n) \ R[2*n-3] = LowWord(c); \ Acc2WordsBy1(d, HighWord(c)) \ MultiplyWords(p, A[n-1], B[n-1])\ Acc2WordsBy2(d, p) \ R[2*n-2] = LowWord(d); \ R[2*n-1] = HighWord(d);#define Bot_SaveAcc(k, i, j) \ R[k] = LowWord(c); \ word e = LowWord(d) + HighWord(c); \ e += A[i] * B[j];#define Bot_Acc(i, j) \ e += A[i] * B[j];#define Bot_End(n) \ R[n-1] = e;#else#define Mul_Begin(n) \ Declare2Words(p) \ word c; \ Declare2Words(d) \ MultiplyWords(p, A[0], B[0]) \ c = LowWord(p); \ AssignWord(d, HighWord(p))#define Mul_Acc(i, j) \ MulAcc(c, d, A[i], B[j])#define Mul_SaveAcc(k, i, j) \ R[k] = c; \ c = LowWord(d); \ AssignWord(d, HighWord(d)) \ MulAcc(c, d, A[i], B[j])#define Mul_End(k, i) \ R[k] = c; \ MultiplyWords(p, A[i], B[i]) \ Acc2WordsBy2(p, d) \ R[k+1] = LowWord(p); \ R[k+2] = HighWord(p);#define Bot_SaveAcc(k, i, j) \ R[k] = c; \ c = LowWord(d); \ c += A[i] * B[j];#define Bot_Acc(i, j) \ c += A[i] * B[j];#define Bot_End(n) \ R[n-1] = c;#endif#define Squ_Begin(n) \ Declare2Words(p) \ word c; \ Declare2Words(d) \ Declare2Words(e) \ MultiplyWords(p, A[0], A[0]) \ R[0] = LowWord(p); \ AssignWord(e, HighWord(p)) \ MultiplyWords(p, A[0], A[1]) \ c = LowWord(p); \ AssignWord(d, HighWord(p)) \ Squ_NonDiag \#define Squ_NonDiag \ Double3Words(c, d)#define Squ_SaveAcc(k, i, j) \ Acc3WordsBy2(c, d, e) \ R[k] = c; \ MultiplyWords(p, A[i], A[j]) \ c = LowWord(p); \ AssignWord(d, HighWord(p)) \#define Squ_Acc(i, j) \ MulAcc(c, d, A[i], A[j])#define Squ_Diag(i) \ Squ_NonDiag \ MulAcc(c, d, A[i], A[i])#define Squ_End(n) \ Acc3WordsBy2(c, d, e) \ R[2*n-3] = c; \ MultiplyWords(p, A[n-1], A[n-1])\ Acc2WordsBy2(p, e) \ R[2*n-2] = LowWord(p); \ R[2*n-1] = HighWord(p);void Baseline_Multiply2(word *R, const word *A, const word *B){ Mul_2}void Baseline_Multiply4(word *R, const word *A, const word *B){ Mul_4}void Baseline_Multiply8(word *R, const word *A, const word *B){ Mul_8}void Baseline_Square2(word *R, const word *A){ Squ_2}void Baseline_Square4(word *R, const word *A){ Squ_4}void Baseline_Square8(word *R, const word *A){ Squ_8}void Baseline_MultiplyBottom2(word *R, const word *A, const word *B){ Bot_2}void Baseline_MultiplyBottom4(word *R, const word *A, const word *B){ Bot_4}void Baseline_MultiplyBottom8(word *R, const word *A, const word *B){ Bot_8}#define Top_Begin(n) \ Declare2Words(p) \ word c; \ Declare2Words(d) \ MultiplyWords(p, A[0], B[n-2]);\ AssignWord(d, HighWord(p));#define Top_Acc(i, j) \ MultiplyWords(p, A[i], B[j]);\ Acc2WordsBy1(d, HighWord(p));#define Top_SaveAcc0(i, j) \ c = LowWord(d); \ AssignWord(d, HighWord(d)) \ MulAcc(c, d, A[i], B[j])#define Top_SaveAcc1(i, j) \ c = L<c; \ Acc2WordsBy1(d, c); \ c = LowWord(d); \ AssignWord(d, HighWord(d)) \ MulAcc(c, d, A[i], B[j])void Baseline_MultiplyTop2(word *R, const word *A, const word *B, word L){ word T[4]; Baseline_Multiply2(T, A, B); R[0] = T[2]; R[1] = T[3];}void Baseline_MultiplyTop4(word *R, const word *A, const word *B, word L){ Top_Begin(4) Top_Acc(1, 1) Top_Acc(2, 0) \ Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \ Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \ Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \ Mul_End(1, 3)}void Baseline_MultiplyTop8(word *R, const word *A, const word *B, word L){ Top_Begin(8) Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \ Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \ Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \ Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \ Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \ Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \ Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \ Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \ Mul_End(5, 7)}#if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is availablevoid Baseline_Multiply16(word *R, const word *A, const word *B){ Mul_16}void Baseline_Square16(word *R, const word *A){ Squ_16}void Baseline_MultiplyBottom16(word *R, const word *A, const word *B){ Bot_16}void Baseline_MultiplyTop16(word *R, const word *A, const word *B, word L){ Top_Begin(16) Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \ Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \ Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \ Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \ Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \ Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \ Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \ Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \ Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \ Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \ Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \ Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \ Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \ Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \ Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \ Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \ Mul_End(13, 15)}#endif// ********************************************************#if CRYPTOPP_INTEGER_SSE2CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};#undef Mul_Begin#undef Mul_Acc#undef Top_Begin#undef Top_Acc#undef Squ_Acc#undef Squ_NonDiag#undef Squ_Diag#undef Squ_SaveAcc#undef Squ_Begin#undef Mul_SaveAcc#undef Bot_Acc#undef Bot_SaveAcc#undef Bot_End#undef Squ_End#undef Mul_End#define SSE2_FinalSave(k) \ AS2( psllq xmm5, 16) \ AS2( paddq xmm4, xmm5) \ AS2( movq QWORD PTR [ecx+8*(k)], xmm4)#define SSE2_SaveShift(k) \ AS2( movq xmm0, xmm6) \ AS2( punpckhqdq xmm6, xmm0) \ AS2( movq xmm1, xmm7) \ AS2( punpckhqdq xmm7, xmm1) \ AS2( paddd xmm6, xmm0) \ AS2( pslldq xmm6, 4) \ AS2( paddd xmm7, xmm1) \ AS2( paddd xmm4, xmm6) \ AS2( pslldq xmm7, 4) \ AS2( movq xmm6, xmm4) \ AS2( paddd xmm5, xmm7) \ AS2( movq xmm7, xmm5) \ AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ AS2( psrlq xmm6, 16) \ AS2( paddq xmm6, xmm7) \ AS2( punpckhqdq xmm4, xmm0) \ AS2( punpckhqdq xmm5, xmm0) \ AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \ AS2( psrlq xmm6, 3*16) \ AS2( paddd xmm4, xmm6) \#define Squ_SSE2_SaveShift(k) \ AS2( movq xmm0, xmm6) \ AS2( punpckhqdq xmm6, xmm0) \ AS2( movq xmm1, xmm7) \ AS2( punpckhqdq xmm7, xmm1) \ AS2( paddd xmm6, xmm0) \ AS2( pslldq xmm6, 4) \ AS2( paddd xmm7, xmm1) \ AS2( paddd xmm4, xmm6) \ AS2( pslldq xmm7, 4) \ AS2( movhlps xmm6, xmm4) \ AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \ AS2( paddd xmm5, xmm7) \ AS2( movhps QWORD PTR [esp+12], xmm5)\ AS2( psrlq xmm4, 16) \ AS2( paddq xmm4, xmm5) \ AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \ AS2( psrlq xmm4, 3*16) \ AS2( paddd xmm4, xmm6) \ AS2( movq QWORD PTR [esp+4], xmm4)\#define SSE2_FirstMultiply(i) \ AS2( movdqa xmm7, [esi+(i)*16])\ AS2( movdqa xmm5, [edi-(i)*16])\ AS2( pmuludq xmm5, xmm7) \ AS2( movdqa xmm4, [ebx])\ AS2( movdqa xmm6, xmm4) \ AS2( pand xmm4, xmm5) \ AS2( psrld xmm5, 16) \ AS2( pmuludq xmm7, [edx-(i)*16])\ AS2( pand xmm6, xmm7) \ AS2( psrld xmm7, 16)#define Squ_Begin(n) \ SquPrologue \ AS2( mov esi, esp)\ AS2( and esp, 0xfffffff0)\ AS2( lea edi, [esp-32*n])\ AS2( sub esp, 32*n+16)\ AS1( push esi)\ AS2( mov esi, edi) \ AS2( xor edx, edx) \ ASL(1) \ ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \ ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \ AS2( movdqa [edi+2*edx], xmm0) \ AS2( psrlq xmm0, 32) \ AS2( movdqa [edi+2*edx+16], xmm0) \ AS2( movdqa [edi+16*n+2*edx], xmm1) \ AS2( psrlq xmm1, 32) \ AS2( movdqa [edi+16*n+2*edx+16], xmm1) \ AS2( add edx, 16) \ AS2( cmp edx, 8*(n)) \ ASJ( jne, 1, b) \ AS2( lea edx, [edi+16*n])\ SSE2_FirstMultiply(0) \#define Squ_Acc(i) \ ASL(LSqu##i) \ AS2( movdqa xmm1, [esi+(i)*16]) \ AS2( movdqa xmm0, [edi-(i)*16]) \ AS2( movdqa xmm2, [ebx]) \ AS2( pmuludq xmm0, xmm1) \ AS2( pmuludq xmm1, [edx-(i)*16]) \ AS2( movdqa xmm3, xmm2) \ AS2( pand xmm2, xmm0) \ AS2( psrld xmm0, 16) \ AS2( paddd xmm4, xmm2) \ AS2( paddd xmm5, xmm0) \ AS2( pand xmm3, xmm1) \ AS2( psrld xmm1, 16) \ AS2( paddd xmm6, xmm3) \ AS2( paddd xmm7, xmm1) \#define Squ_Acc1(i) #define Squ_Acc2(i) ASC(call, LSqu##i)#define Squ_Acc3(i) Squ_Acc2(i)#define Squ_Acc4(i) Squ_Acc2(i)#define Squ_Acc5(i) Squ_Acc2(i)#define Squ_Acc6(i) Squ_Acc2(i)#define Squ_Acc7(i) Squ_Acc2(i)#define Squ_Acc8(i) Squ_Acc2(i)#define SSE2_End(E, n) \ SSE2_SaveShift(2*(n)-3) \ AS2( movdqa xmm7, [esi+16]) \ AS2( movdqa xmm0, [edi]) \ AS2( pmuludq xmm0, xmm7) \ AS2( movdqa xmm2, [ebx]) \ AS2( pmuludq xmm7, [edx]) \ AS2( movdqa xmm6, xmm2) \ AS2( pand xmm2, xmm0) \ AS2( psrld xmm0, 16) \ AS2( paddd xmm4, xmm2) \ AS2( paddd xmm5, xmm0) \ AS2( pand xmm6, xmm7) \ AS2( psrld xmm7, 16) \ SSE2_SaveShift(2*(n)-2) \ SSE2_FinalSave(2*(n)-1) \ AS1( pop esp)\ E#define Squ_End(n) SSE2_End(SquEpilogue, n)#define Mul_End(n) SSE2_End(MulEpilogue, n)#define Top_End(n) SSE2_End(TopEpilogue, n)#define Squ_Column1(k, i) \ Squ_SSE2_SaveShift(k) \
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -