📄 integer.cpp
字号:
s1 = _mm_add_si64(s1, z4);
s1 = _mm_add_si64(s1, s3);
C[9] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s3 = _mm_add_si64(x22, y22);
s4 = _mm_add_si64(x26, y26);
s1 = _mm_add_si64(s1, z6);
s3 = _mm_add_si64(s3, s4);
s1 = _mm_add_si64(s1, z8);
s1 = _mm_add_si64(s1, s3);
C[10] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s3 = _mm_add_si64(x27, y27);
s1 = _mm_add_si64(s1, z10);
s1 = _mm_add_si64(s1, z12);
s1 = _mm_add_si64(s1, s3);
C[11] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s3 = _mm_add_si64(z14, z16);
s1 = _mm_add_si64(s1, s3);
C[12] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s3 = _mm_add_si64(z18, z20);
s1 = _mm_add_si64(s1, s3);
C[13] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s3 = _mm_add_si64(z22, z26);
s1 = _mm_add_si64(s1, s3);
C[14] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
C[15] = z[27] + _mm_cvtsi64_si32(s1);
_mm_empty();
}
void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
{
__m128i temp[21];
const word *w = (word *)temp;
const __m64 *mw = (__m64 *)w;
const word *x = (word *)temp+7*4;
const __m64 *mx = (__m64 *)x;
const word *y = (word *)temp+7*4*2;
const __m64 *my = (__m64 *)y;
P4_Mul(temp, (__m128i *)A, (__m128i *)B);
P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
C[0] = w[0];
__m64 s1, s2, s3, s4;
__m64 w1 = _mm_cvtsi32_si64(w[1]);
__m64 w4 = mw[2];
__m64 w6 = mw[3];
__m64 w8 = mw[4];
__m64 w10 = mw[5];
__m64 w12 = mw[6];
__m64 w14 = mw[7];
__m64 w16 = mw[8];
__m64 w18 = mw[9];
__m64 w20 = mw[10];
__m64 w22 = mw[11];
__m64 w26 = _mm_cvtsi32_si64(w[26]);
__m64 x0 = _mm_cvtsi32_si64(x[0]);
__m64 x1 = _mm_cvtsi32_si64(x[1]);
__m64 x4 = mx[2];
__m64 x6 = mx[3];
__m64 x8 = mx[4];
__m64 y0 = _mm_cvtsi32_si64(y[0]);
__m64 y1 = _mm_cvtsi32_si64(y[1]);
__m64 y4 = my[2];
__m64 y6 = my[3];
__m64 y8 = my[4];
s1 = _mm_add_si64(w1, w4);
C[1] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s2 = _mm_add_si64(w6, w8);
s1 = _mm_add_si64(s1, s2);
C[2] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s2 = _mm_add_si64(w10, w12);
s1 = _mm_add_si64(s1, s2);
C[3] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s3 = _mm_add_si64(x0, y0);
s2 = _mm_add_si64(w14, w16);
s1 = _mm_add_si64(s1, s3);
s1 = _mm_add_si64(s1, s2);
C[4] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s3 = _mm_add_si64(x1, y1);
s4 = _mm_add_si64(x4, y4);
s1 = _mm_add_si64(s1, w18);
s3 = _mm_add_si64(s3, s4);
s1 = _mm_add_si64(s1, w20);
s1 = _mm_add_si64(s1, s3);
C[5] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
s3 = _mm_add_si64(x6, y6);
s4 = _mm_add_si64(x8, y8);
s1 = _mm_add_si64(s1, w22);
s3 = _mm_add_si64(s3, s4);
s1 = _mm_add_si64(s1, w26);
s1 = _mm_add_si64(s1, s3);
C[6] = _mm_cvtsi64_si32(s1);
s1 = _mm_srli_si64(s1, 32);
C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
_mm_empty();
}
#endif // #ifdef SSE2_INTRINSICS_AVAILABLE
// end optimized
// ********************************************************
#define A0 A
#define A1 (A+N2)
#define B0 B
#define B1 (B+N2)
#define T0 T
#define T1 (T+N2)
#define T2 (T+N)
#define T3 (T+N+N2)
#define R0 R
#define R1 (R+N2)
#define R2 (R+N)
#define R3 (R+N+N2)
//VC60 workaround: compiler bug triggered without the extra dummy parameters
// R[2*N] - result = A*B
// T[2*N] - temporary work space
// A[N] --- multiplier
// B[N] --- multiplicant
void RecursiveMultiply(word *R, word *T, const word *A, const word *B,
unsigned int N)
{
assert(N>=2 && N%2==0);
if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
LowLevel::Multiply8(R, A, B);
else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
LowLevel::Multiply4(R, A, B);
else if (N==2)
LowLevel::Multiply2(R, A, B);
else
{
const unsigned int N2 = N/2;
int carry;
int aComp = Compare(A0, A1, N2);
int bComp = Compare(B0, B1, N2);
switch (2*aComp + aComp + bComp)
{
case -4:
LowLevel::Subtract(R0, A1, A0, N2);
LowLevel::Subtract(R1, B0, B1, N2);
RecursiveMultiply(T0, T2, R0, R1, N2);
LowLevel::Subtract(T1, T1, R0, N2);
carry = -1;
break;
case -2:
LowLevel::Subtract(R0, A1, A0, N2);
LowLevel::Subtract(R1, B0, B1, N2);
RecursiveMultiply(T0, T2, R0, R1, N2);
carry = 0;
break;
case 2:
LowLevel::Subtract(R0, A0, A1, N2);
LowLevel::Subtract(R1, B1, B0, N2);
RecursiveMultiply(T0, T2, R0, R1, N2);
carry = 0;
break;
case 4:
LowLevel::Subtract(R0, A1, A0, N2);
LowLevel::Subtract(R1, B0, B1, N2);
RecursiveMultiply(T0, T2, R0, R1, N2);
LowLevel::Subtract(T1, T1, R1, N2);
carry = -1;
break;
default:
SetWords(T0, 0, N);
carry = 0;
}
RecursiveMultiply(R0, T2, A0, B0, N2);
RecursiveMultiply(R2, T2, A1, B1, N2);
// now T[01] holds (A1-A0)*(B0-B1),R[01] holds A0*B0, R[23] holds A1*B1
carry += LowLevel::Add(T0, T0, R0, N);
carry += LowLevel::Add(T0, T0, R2, N);
carry += LowLevel::Add(R1, R1, T0, N);
assert (carry >= 0 && carry <= 2);
Increment(R3, N2, carry);
}
}
void RecursiveSquare(word *R, word *T, const word *A, unsigned int N)
{
assert(N && N%2==0);
if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
LowLevel::Square8(R, A);
if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
LowLevel::Square4(R, A);
else if (N==2)
LowLevel::Square2(R, A);
else
{
const unsigned int N2 = N/2;
RecursiveSquare(R0, T2, A0, N2);
RecursiveSquare(R2, T2, A1, N2);
RecursiveMultiply(T0, T2, A0, A1, N2);
word carry = LowLevel::Add(R1, R1, T0, N);
carry += LowLevel::Add(R1, R1, T0, N);
Increment(R3, N2, carry);
}
}
// R[N] - bottom half of A*B
// T[N] - temporary work space
// A[N] - multiplier
// B[N] - multiplicant
void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B,
unsigned int N)
{
assert(N>=2 && N%2==0);
if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
LowLevel::Multiply8Bottom(R, A, B);
else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
LowLevel::Multiply4Bottom(R, A, B);
else if (N==2)
LowLevel::Multiply2Bottom(R, A, B);
else
{
const unsigned int N2 = N/2;
RecursiveMultiply(R, T, A0, B0, N2);
RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
LowLevel::Add(R1, R1, T0, N2);
RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
LowLevel::Add(R1, R1, T0, N2);
}
}
void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A,
const word *B, unsigned int N)
{
assert(N>=2 && N%2==0);
if (N==4)
{
LowLevel::Multiply4(T, A, B);
memcpy(R, T+4, 4*WORD_SIZE);
}
else if (N==2)
{
LowLevel::Multiply2(T, A, B);
memcpy(R, T+2, 2*WORD_SIZE);
}
else
{
const unsigned int N2 = N/2;
int carry;
int aComp = Compare(A0, A1, N2);
int bComp = Compare(B0, B1, N2);
switch (2*aComp + aComp + bComp)
{
case -4:
LowLevel::Subtract(R0, A1, A0, N2);
LowLevel::Subtract(R1, B0, B1, N2);
RecursiveMultiply(T0, T2, R0, R1, N2);
LowLevel::Subtract(T1, T1, R0, N2);
carry = -1;
break;
case -2:
LowLevel::Subtract(R0, A1, A0, N2);
LowLevel::Subtract(R1, B0, B1, N2);
RecursiveMultiply(T0, T2, R0, R1, N2);
carry = 0;
break;
case 2:
LowLevel::Subtract(R0, A0, A1, N2);
LowLevel::Subtract(R1, B1, B0, N2);
RecursiveMultiply(T0, T2, R0, R1, N2);
carry = 0;
break;
case 4:
LowLevel::Subtract(R0, A1, A0, N2);
LowLevel::Subtract(R1, B0, B1, N2);
RecursiveMultiply(T0, T2, R0, R1, N2);
LowLevel::Subtract(T1, T1, R1, N2);
carry = -1;
break;
default:
SetWords(T0, 0, N);
carry = 0;
}
RecursiveMultiply(T2, R0, A1, B1, N2);
// now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1
word c2 = LowLevel::Subtract(R0, L+N2, L, N2);
c2 += LowLevel::Subtract(R0, R0, T0, N2);
word t = (Compare(R0, T2, N2) == -1);
carry += t;
carry += Increment(R0, N2, c2+t);
carry += LowLevel::Add(R0, R0, T1, N2);
carry += LowLevel::Add(R0, R0, T3, N2);
assert (carry >= 0 && carry <= 2);
CopyWords(R1, T3, N2);
Increment(R1, N2, carry);
}
}
inline word Add(word *C, const word *A, const word *B, unsigned int N)
{
return LowLevel::Add(C, A, B, N);
}
inline word Subtract(word *C, const word *A, const word *B, unsigned int N)
{
return LowLevel::Subtract(C, A, B, N);
}
inline void Multiply(word *R, word *T, const word *A, const word *B,
unsigned int N)
{
RecursiveMultiply(R, T, A, B, N);
}
inline void Square(word *R, word *T, const word *A, unsigned int N)
{
RecursiveSquare(R, T, A, N);
}
void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA,
const word *B, unsigned int NB)
{
if (NA == NB)
{
if (A == B)
Square(R, T, A, NA);
else
Multiply(R, T, A, B, NA);
return;
}
if (NA > NB)
{
mySTL::swap(A, B);
mySTL::swap(NA, NB);
}
assert(NB % NA == 0);
assert((NB/NA)%2 == 0); // NB is an even multiple of NA
if (NA==2 && !A[1])
{
switch (A[0])
{
case 0:
SetWords(R, 0, NB+2);
return;
case 1:
CopyWords(R, B, NB);
R[NB] = R[NB+1] = 0;
return;
default:
R[NB] = LinearMultiply(R, B, A[0], NB);
R[NB+1] = 0;
return;
}
}
Multiply(R, T, A, B, NA);
CopyWords(T+2*NA, R+NA, NA);
unsigned i;
for (i=2*NA; i<NB; i+=2*NA)
Multiply(T+NA+i, T, A, B+i, NA);
for (i=NA; i<NB; i+=2*NA)
Multiply(R+i, T, A, B+i, NA);
if (Add(R+NA, R+NA, T+2*NA, NB-NA))
Increment(R+NB, NA);
}
void PositiveMultiply(Integer& product, const Integer& a, const Integer& b)
{
unsigned int aSize = RoundupSize(a.WordCount());
unsigned int bSize = RoundupSize(b.WordCount());
product.reg_.CleanNew(RoundupSize(aSize + bSize));
product.sign_ = Integer::POSITIVE;
WordBlock workspace(aSize + bSize);
AsymmetricMultiply(product.reg_.get_buffer(), workspace.get_buffer(),
a.reg_.get_buffer(), aSize, b.reg_.get_buffer(), bSize);
}
void Multiply(Integer &product, const Integer &a, const Integer &b)
{
PositiveMultiply(product, a, b);
if (a.NotNegative() != b.NotNegative())
product.Negate();
}
static inline unsigned int EvenWordCount(const word *X, unsigned int N)
{
while (N && X[N-2]==0 && X[N-1]==0)
N-=2;
return N;
}
unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA,
const word *M, unsigned int N)
{
assert(NA<=N && N && N%2==0);
word *b = T;
word *c = T+N;
word *f = T+2*N;
word *g = T+3*N;
unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
unsigned int k=0, s=0;
SetWords(T, 0, 3*N);
b[0]=1;
CopyWords(f, A, NA);
CopyWords(g, M, N);
while (1)
{
word t=f[0];
while (!t)
{
if (EvenWordCount(f, fgLen)==0)
{
SetWords(R, 0, N);
return 0;
}
ShiftWordsRightByWords(f, fgLen, 1);
if (c[bcLen-1]) bcLen+=2;
assert(bcLen <= N);
ShiftWordsLeftByWords(c, bcLen, 1);
k+=WORD_BITS;
t=f[0];
}
unsigned int i=0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -