📄 integer.cpp
字号:
DWord d = DWord::Multiply(D[di], D[di+2]);
D[1] = D[3] = 0;
unsigned int si = ai + !bi;
word s = D[si];
DWord A0B0 = DWord::Multiply(A[0], B[0]);
DWord t = A0B0 + C[0];
C[0] = t.GetLowHalf();
DWord A1B1 = DWord::Multiply(A[1], B[1]);
t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
C[1] = t.GetLowHalf();
t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
C[2] = t.GetLowHalf();
t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
C[3] = t.GetLowHalf();
return t.GetHighHalf();
}
#define MulAcc(x, y) \
p = DWord::MultiplyAndAdd(A[x], B[y], c); \
c = p.GetLowHalf(); \
p = (DWord) d + p.GetHighHalf(); \
d = p.GetLowHalf(); \
e += p.GetHighHalf();
#define SaveMulAcc(s, x, y) \
R[s] = c; \
p = DWord::MultiplyAndAdd(A[x], B[y], d); \
c = p.GetLowHalf(); \
p = (DWord) e + p.GetHighHalf(); \
d = p.GetLowHalf(); \
e = p.GetHighHalf();
#define SquAcc(x, y) \
q = DWord::Multiply(A[x], A[y]); \
p = q + c; \
c = p.GetLowHalf(); \
p = (DWord) d + p.GetHighHalf(); \
d = p.GetLowHalf(); \
e += p.GetHighHalf(); \
p = q + c; \
c = p.GetLowHalf(); \
p = (DWord) d + p.GetHighHalf(); \
d = p.GetLowHalf(); \
e += p.GetHighHalf();
#define SaveSquAcc(s, x, y) \
R[s] = c; \
q = DWord::Multiply(A[x], A[y]); \
p = q + d; \
c = p.GetLowHalf(); \
p = (DWord) e + p.GetHighHalf(); \
d = p.GetLowHalf(); \
e = p.GetHighHalf(); \
p = q + c; \
c = p.GetLowHalf(); \
p = (DWord) d + p.GetHighHalf(); \
d = p.GetLowHalf(); \
e += p.GetHighHalf();
void Portable::Multiply4(word *R, const word *A, const word *B)
{
DWord p;
word c, d, e;
p = DWord::Multiply(A[0], B[0]);
R[0] = p.GetLowHalf();
c = p.GetHighHalf();
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
SaveMulAcc(3, 3, 1);
MulAcc(2, 2);
MulAcc(1, 3);
SaveMulAcc(4, 2, 3);
MulAcc(3, 2);
R[5] = c;
p = DWord::MultiplyAndAdd(A[3], B[3], d);
R[6] = p.GetLowHalf();
R[7] = e + p.GetHighHalf();
}
void Portable::Square2(word *R, const word *A)
{
DWord p, q;
word c, d, e;
p = DWord::Multiply(A[0], A[0]);
R[0] = p.GetLowHalf();
c = p.GetHighHalf();
d = e = 0;
SquAcc(0, 1);
R[1] = c;
p = DWord::MultiplyAndAdd(A[1], A[1], d);
R[2] = p.GetLowHalf();
R[3] = e + p.GetHighHalf();
}
void Portable::Square4(word *R, const word *A)
{
#ifdef _MSC_VER
// VC60 workaround: MSVC 6.0 has an optimization bug that makes
// (dword)A*B where either A or B has been cast to a dword before
// very expensive. Revisit this function when this
// bug is fixed.
Multiply4(R, A, A);
#else
const word *B = A;
DWord p, q;
word c, d, e;
p = DWord::Multiply(A[0], A[0]);
R[0] = p.GetLowHalf();
c = p.GetHighHalf();
d = e = 0;
SquAcc(0, 1);
SaveSquAcc(1, 2, 0);
MulAcc(1, 1);
SaveSquAcc(2, 0, 3);
SquAcc(1, 2);
SaveSquAcc(3, 3, 1);
MulAcc(2, 2);
SaveSquAcc(4, 2, 3);
R[5] = c;
p = DWord::MultiplyAndAdd(A[3], A[3], d);
R[6] = p.GetLowHalf();
R[7] = e + p.GetHighHalf();
#endif
}
void Portable::Multiply8(word *R, const word *A, const word *B)
{
DWord p;
word c, d, e;
p = DWord::Multiply(A[0], B[0]);
R[0] = p.GetLowHalf();
c = p.GetHighHalf();
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
SaveMulAcc(3, 0, 4);
MulAcc(1, 3);
MulAcc(2, 2);
MulAcc(3, 1);
MulAcc(4, 0);
SaveMulAcc(4, 0, 5);
MulAcc(1, 4);
MulAcc(2, 3);
MulAcc(3, 2);
MulAcc(4, 1);
MulAcc(5, 0);
SaveMulAcc(5, 0, 6);
MulAcc(1, 5);
MulAcc(2, 4);
MulAcc(3, 3);
MulAcc(4, 2);
MulAcc(5, 1);
MulAcc(6, 0);
SaveMulAcc(6, 0, 7);
MulAcc(1, 6);
MulAcc(2, 5);
MulAcc(3, 4);
MulAcc(4, 3);
MulAcc(5, 2);
MulAcc(6, 1);
MulAcc(7, 0);
SaveMulAcc(7, 1, 7);
MulAcc(2, 6);
MulAcc(3, 5);
MulAcc(4, 4);
MulAcc(5, 3);
MulAcc(6, 2);
MulAcc(7, 1);
SaveMulAcc(8, 2, 7);
MulAcc(3, 6);
MulAcc(4, 5);
MulAcc(5, 4);
MulAcc(6, 3);
MulAcc(7, 2);
SaveMulAcc(9, 3, 7);
MulAcc(4, 6);
MulAcc(5, 5);
MulAcc(6, 4);
MulAcc(7, 3);
SaveMulAcc(10, 4, 7);
MulAcc(5, 6);
MulAcc(6, 5);
MulAcc(7, 4);
SaveMulAcc(11, 5, 7);
MulAcc(6, 6);
MulAcc(7, 5);
SaveMulAcc(12, 6, 7);
MulAcc(7, 6);
R[13] = c;
p = DWord::MultiplyAndAdd(A[7], B[7], d);
R[14] = p.GetLowHalf();
R[15] = e + p.GetHighHalf();
}
void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
{
DWord p;
word c, d, e;
p = DWord::Multiply(A[0], B[0]);
R[0] = p.GetLowHalf();
c = p.GetHighHalf();
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
R[2] = c;
R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
}
void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
{
DWord p;
word c, d, e;
p = DWord::Multiply(A[0], B[0]);
R[0] = p.GetLowHalf();
c = p.GetHighHalf();
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
SaveMulAcc(3, 0, 4);
MulAcc(1, 3);
MulAcc(2, 2);
MulAcc(3, 1);
MulAcc(4, 0);
SaveMulAcc(4, 0, 5);
MulAcc(1, 4);
MulAcc(2, 3);
MulAcc(3, 2);
MulAcc(4, 1);
MulAcc(5, 0);
SaveMulAcc(5, 0, 6);
MulAcc(1, 5);
MulAcc(2, 4);
MulAcc(3, 3);
MulAcc(4, 2);
MulAcc(5, 1);
MulAcc(6, 0);
R[6] = c;
R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
}
#undef MulAcc
#undef SaveMulAcc
#undef SquAcc
#undef SaveSquAcc
#ifdef CRYPTOPP_X86ASM_AVAILABLE
// ************** x86 feature detection ***************
static bool s_sse2Enabled = true;
static void CpuId(word32 input, word32 *output)
{
#ifdef __GNUC__
__asm__
(
// save ebx in case -fPIC is being used
"push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
: "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
: "a" (input)
);
#else
__asm
{
mov eax, input
cpuid
mov edi, output
mov [edi], eax
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], edx
}
#endif
}
#ifdef SSE2_INTRINSICS_AVAILABLE
#ifndef _MSC_VER
static jmp_buf s_env;
static void SigIllHandler(int)
{
longjmp(s_env, 1);
}
#endif
static bool HasSSE2()
{
if (!s_sse2Enabled)
return false;
word32 cpuid[4];
CpuId(1, cpuid);
if ((cpuid[3] & (1 << 26)) == 0)
return false;
#ifdef _MSC_VER
__try
{
__asm xorpd xmm0, xmm0 // executing SSE2 instruction
}
__except (1)
{
return false;
}
return true;
#else
typedef void (*SigHandler)(int);
SigHandler oldHandler = signal(SIGILL, SigIllHandler);
if (oldHandler == SIG_ERR)
return false;
bool result = true;
if (setjmp(s_env))
result = false;
else
__asm __volatile ("xorps %xmm0, %xmm0");
signal(SIGILL, oldHandler);
return result;
#endif
}
#endif
static bool IsP4()
{
word32 cpuid[4];
CpuId(0, cpuid);
std::swap(cpuid[2], cpuid[3]);
if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
return false;
CpuId(1, cpuid);
return ((cpuid[0] >> 8) & 0xf) == 0xf;
}
// ************** Pentium/P4 optimizations ***************
class PentiumOptimized : public Portable
{
public:
static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N);
static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);
static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B);
static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B);
static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B);
};
class P4Optimized
{
public:
static word CRYPTOPP_CDECL Add(word *C, const word *A, const word *B, unsigned int N);
static word CRYPTOPP_CDECL Subtract(word *C, const word *A, const word *B, unsigned int N);
#ifdef SSE2_INTRINSICS_AVAILABLE
static void CRYPTOPP_CDECL Multiply4(word *C, const word *A, const word *B);
static void CRYPTOPP_CDECL Multiply8(word *C, const word *A, const word *B);
static void CRYPTOPP_CDECL Multiply8Bottom(word *C, const word *A, const word *B);
#endif
};
typedef word (CRYPTOPP_CDECL * PAddSub)(word *C, const word *A, const word *B, unsigned int N);
typedef void (CRYPTOPP_CDECL * PMul)(word *C, const word *A, const word *B);
static PAddSub s_pAdd, s_pSub;
#ifdef SSE2_INTRINSICS_AVAILABLE
static PMul s_pMul4, s_pMul8, s_pMul8B;
#endif
static void SetPentiumFunctionPointers()
{
if (IsP4())
{
s_pAdd = &P4Optimized::Add;
s_pSub = &P4Optimized::Subtract;
}
else
{
s_pAdd = &PentiumOptimized::Add;
s_pSub = &PentiumOptimized::Subtract;
}
#ifdef SSE2_INTRINSICS_AVAILABLE
if (HasSSE2())
{
s_pMul4 = &P4Optimized::Multiply4;
s_pMul8 = &P4Optimized::Multiply8;
s_pMul8B = &P4Optimized::Multiply8Bottom;
}
else
{
s_pMul4 = &PentiumOptimized::Multiply4;
s_pMul8 = &PentiumOptimized::Multiply8;
s_pMul8B = &PentiumOptimized::Multiply8Bottom;
}
#endif
}
static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);
void DisableSSE2()
{
s_sse2Enabled = false;
SetPentiumFunctionPointers();
}
class LowLevel : public PentiumOptimized
{
public:
inline static word Add(word *C, const word *A, const word *B, unsigned int N)
{return s_pAdd(C, A, B, N);}
inline static word Subtract(word *C, const word *A, const word *B, unsigned int N)
{return s_pSub(C, A, B, N);}
inline static void Square4(word *R, const word *A)
{Multiply4(R, A, A);}
#ifdef SSE2_INTRINSICS_AVAILABLE
inline static void Multiply4(word *C, const word *A, const word *B)
{s_pMul4(C, A, B);}
inline static void Multiply8(word *C, const word *A, const word *B)
{s_pMul8(C, A, B);}
inline static void Multiply8Bottom(word *C, const word *A, const word *B)
{s_pMul8B(C, A, B);}
#endif
};
// use some tricks to share assembly code between MSVC and GCC
#ifdef _MSC_VER
#define CRYPTOPP_NAKED __declspec(naked)
#define AS1(x) __asm x
#define AS2(x, y) __asm x, y
#define AddPrologue \
__asm push ebp \
__asm push ebx \
__asm push esi \
__asm push edi \
__asm mov ecx, [esp+20] \
__asm mov edx, [esp+24] \
__asm mov ebx, [esp+28] \
__asm mov esi, [esp+32]
#define AddEpilogue \
__asm pop edi \
__asm pop esi \
__asm pop ebx \
__asm pop ebp \
__asm ret
#define MulPrologue \
__asm push ebp \
__asm push ebx \
__asm push esi \
__asm push edi \
__asm mov ecx, [esp+28] \
__asm mov esi, [esp+24] \
__asm push [esp+20]
#define MulEpilogue \
__asm add esp, 4 \
__asm pop edi \
__asm pop esi \
__asm pop ebx \
__asm pop ebp \
__asm ret
#else
#define CRYPTOPP_NAKED
#define AS1(x) #x ";"
#define AS2(x, y) #x ", " #y ";"
#define AddPrologue \
__asm__ __volatile__ \
( \
"push %%ebx;" /* save this manually, in case of -fPIC */ \
"mov %2, %%ebx;" \
".intel_syntax noprefix;" \
"push ebp;"
#define AddEpilogue \
"pop ebp;" \
".att_syntax prefix;" \
"pop %%ebx;" \
: \
: "c" (C), "d" (A), "m" (B), "S" (N) \
: "%edi", "memory", "cc" \
);
#define MulPrologue \
__asm__ __volatile__ \
( \
"push %%ebx;" /* save this manually, in case of -fPIC */ \
"push %%ebp;" \
"push %0;" \
".intel_syntax noprefix;"
#define MulEpilogue \
"add esp, 4;" \
"pop ebp;" \
"pop ebx;" \
".att_syntax prefix;" \
: \
: "rm" (Z), "S" (X), "c" (Y) \
: "%eax", "%edx", "%edi", "memory", "cc" \
);
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -