📄 integer.cpp
字号:
// integer.cpp - written and placed in the public domain by Wei Dai
#include "pch.h"
#include "integer.h"
#include "modarith.h"
#include "nbtheory.h"
#include "asn.h"
#include "oids.h"
#include "words.h"
#include <iostream>
#include "algebra.cpp"
#include "eprecomp.cpp"
NAMESPACE_BEGIN(CryptoPP)
template class AbstractGroup<Integer>; // for MacOS X
#define MAKE_DWORD(lowWord, highWord) ((dword(highWord)<<WORD_BITS) | (lowWord))
// Add() and Subtract() are coded in Pentium assembly for a speed increase
// of about 10-20 percent for a RSA signature
// CodeWarrior defines _MSC_VER
#if defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86) && (_M_IX86<=600)
static __declspec(naked) word __fastcall Add(word *C, const word *A, const word *B, unsigned int N)
{
__asm
{
push ebp
push ebx
push esi
push edi
mov esi, [esp+24] ; N
mov ebx, [esp+20] ; B
// now: ebx = B, ecx = C, edx = A, esi = N
sub ecx, edx // hold the distance between C & A so we can add this to A to get C
xor eax, eax // clear eax
sub eax, esi // eax is a negative index from end of B
lea ebx, [ebx+4*esi] // ebx is end of B
sar eax, 1 // unit of eax is now dwords; this also clears the carry flag
jz loopend // if no dwords then nothing to do
loopstart:
mov esi,[edx] // load lower word of A
mov ebp,[edx+4] // load higher word of A
mov edi,[ebx+8*eax] // load lower word of B
lea edx,[edx+8] // advance A and C
adc esi,edi // add lower words
mov edi,[ebx+8*eax+4] // load higher word of B
adc ebp,edi // add higher words
inc eax // advance B
mov [edx+ecx-8],esi // store lower word result
mov [edx+ecx-4],ebp // store higher word result
jnz loopstart // loop until eax overflows and becomes zero
loopend:
adc eax, 0 // store carry into eax (return result register)
pop edi
pop esi
pop ebx
pop ebp
ret 8
}
}
static __declspec(naked) word __fastcall Subtract(word *C, const word *A, const word *B, unsigned int N)
{
__asm
{
push ebp
push ebx
push esi
push edi
mov esi, [esp+24] ; N
mov ebx, [esp+20] ; B
sub ecx, edx
xor eax, eax
sub eax, esi
lea ebx, [ebx+4*esi]
sar eax, 1
jz loopend
loopstart:
mov esi,[edx]
mov ebp,[edx+4]
mov edi,[ebx+8*eax]
lea edx,[edx+8]
sbb esi,edi
mov edi,[ebx+8*eax+4]
sbb ebp,edi
inc eax
mov [edx+ecx-8],esi
mov [edx+ecx-4],ebp
jnz loopstart
loopend:
adc eax, 0
pop edi
pop esi
pop ebx
pop ebp
ret 8
}
}
#elif defined(__GNUC__) && defined(__i386__)
__attribute__((regparm(4))) static word Add(word *C, const word *A, const word *B, unsigned int N)
{
assert (N%2 == 0);
register word carry, temp;
__asm__ __volatile__(
"push %%ebp;"
"sub %3, %2;"
"xor %0, %0;"
"sub %4, %0;"
"lea (%1,%4,4), %1;"
"sar $1, %0;"
"jz 1f;"
"0:;"
"mov 0(%3), %4;"
"mov 4(%3), %%ebp;"
"mov (%1,%0,8), %5;"
"lea 8(%3), %3;"
"adc %5, %4;"
"mov 4(%1,%0,8), %5;"
"adc %5, %%ebp;"
"inc %0;"
"mov %4, -8(%3, %2);"
"mov %%ebp, -4(%3, %2);"
"jnz 0b;"
"1:;"
"adc $0, %0;"
"pop %%ebp;"
: "=aSD" (carry), "+r" (B), "+r" (C), "+r" (A), "+r" (N), "=r" (temp)
: : "cc", "memory");
return carry;
}
__attribute__((regparm(4))) static word Subtract(word *C, const word *A, const word *B, unsigned int N)
{
assert (N%2 == 0);
register word carry, temp;
__asm__ __volatile__(
"push %%ebp;"
"sub %3, %2;"
"xor %0, %0;"
"sub %4, %0;"
"lea (%1,%4,4), %1;"
"sar $1, %0;"
"jz 1f;"
"0:;"
"mov 0(%3), %4;"
"mov 4(%3), %%ebp;"
"mov (%1,%0,8), %5;"
"lea 8(%3), %3;"
"sbb %5, %4;"
"mov 4(%1,%0,8), %5;"
"sbb %5, %%ebp;"
"inc %0;"
"mov %4, -8(%3, %2);"
"mov %%ebp, -4(%3, %2);"
"jnz 0b;"
"1:;"
"adc $0, %0;"
"pop %%ebp;"
: "=aSD" (carry), "+r" (B), "+r" (C), "+r" (A), "+r" (N), "=r" (temp)
: : "cc", "memory");
return carry;
}
#else // defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86) && (_M_IX86<=600)
static word Add(word *C, const word *A, const word *B, unsigned int N)
{
assert (N%2 == 0);
#ifdef IS_LITTLE_ENDIAN
if (sizeof(dword) == sizeof(size_t)) // dword is only register size
{
dword carry = 0;
N >>= 1;
for (unsigned int i = 0; i < N; i++)
{
dword a = ((const dword *)A)[i] + carry;
dword c = a + ((const dword *)B)[i];
((dword *)C)[i] = c;
carry = (a < carry) | (c < a);
}
return (word)carry;
}
else
#endif
{
word carry = 0;
for (unsigned int i = 0; i < N; i+=2)
{
dword u = (dword) carry + A[i] + B[i];
C[i] = LOW_WORD(u);
u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1];
C[i+1] = LOW_WORD(u);
carry = HIGH_WORD(u);
}
return carry;
}
}
static word Subtract(word *C, const word *A, const word *B, unsigned int N)
{
assert (N%2 == 0);
#ifdef IS_LITTLE_ENDIAN
if (sizeof(dword) == sizeof(size_t)) // dword is only register size
{
dword borrow = 0;
N >>= 1;
for (unsigned int i = 0; i < N; i++)
{
dword a = ((const dword *)A)[i];
dword b = a - borrow;
dword c = b - ((const dword *)B)[i];
((dword *)C)[i] = c;
borrow = (b > a) | (c > b);
}
return (word)borrow;
}
else
#endif
{
word borrow=0;
for (unsigned i = 0; i < N; i+=2)
{
dword u = (dword) A[i] - B[i] - borrow;
C[i] = LOW_WORD(u);
u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u));
C[i+1] = LOW_WORD(u);
borrow = 0-HIGH_WORD(u);
}
return borrow;
}
}
#endif // defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86) && (_M_IX86<=600)
static int Compare(const word *A, const word *B, unsigned int N)
{
while (N--)
if (A[N] > B[N])
return 1;
else if (A[N] < B[N])
return -1;
return 0;
}
static word Increment(word *A, unsigned int N, word B=1)
{
assert(N);
word t = A[0];
A[0] = t+B;
if (A[0] >= t)
return 0;
for (unsigned i=1; i<N; i++)
if (++A[i])
return 0;
return 1;
}
static word Decrement(word *A, unsigned int N, word B=1)
{
assert(N);
word t = A[0];
A[0] = t-B;
if (A[0] <= t)
return 0;
for (unsigned i=1; i<N; i++)
if (A[i]--)
return 0;
return 1;
}
static void TwosComplement(word *A, unsigned int N)
{
Decrement(A, N);
for (unsigned i=0; i<N; i++)
A[i] = ~A[i];
}
static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
{
word carry=0;
for(unsigned i=0; i<N; i++)
{
dword p = (dword)A[i] * B + carry;
C[i] = LOW_WORD(p);
carry = HIGH_WORD(p);
}
return carry;
}
#if defined(__GNUC__) && defined(__alpha__)
static inline void AtomicMultiply(word *C, const word *A, const word *B)
{
register dword c, a = *(const dword *)A, b = *(const dword *)B;
((dword *)C)[0] = a*b;
__asm__("umulh %1,%2,%0" : "=r" (c) : "r" (a), "r" (b));
((dword *)C)[1] = c;
}
static inline word AtomicMultiplyAdd(word *C, const word *A, const word *B)
{
register dword c, d, e, a = *(const dword *)A, b = *(const dword *)B;
c = ((dword *)C)[0];
d = a*b + c;
__asm__("umulh %1,%2,%0" : "=r" (e) : "r" (a), "r" (b));
((dword *)C)[0] = d;
d = (d < c);
c = ((dword *)C)[1] + d;
d = (c < d);
c += e;
((dword *)C)[1] = c;
d |= (c < e);
return d;
}
#else // defined(__GNUC__) && defined(__alpha__)
static void AtomicMultiply(word *C, const word *A, const word *B)
{
/*
word s;
dword d;
if (A1 >= A0)
if (B0 >= B1)
{
s = 0;
d = (dword)(A1-A0)*(B0-B1);
}
else
{
s = (A1-A0);
d = (dword)s*(word)(B0-B1);
}
else
if (B0 > B1)
{
s = (B0-B1);
d = (word)(A1-A0)*(dword)s;
}
else
{
s = 0;
d = (dword)(A0-A1)*(B1-B0);
}
*/
// this segment is the branchless equivalent of above
word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
unsigned int ai = A[1] < A[0];
unsigned int bi = B[0] < B[1];
unsigned int di = ai & bi;
dword d = (dword)D[di]*D[di+2];
D[1] = D[3] = 0;
unsigned int si = ai + !bi;
word s = D[si];
dword A0B0 = (dword)A[0]*B[0];
C[0] = LOW_WORD(A0B0);
dword A1B1 = (dword)A[1]*B[1];
dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1);
C[1] = LOW_WORD(t);
t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s;
C[2] = LOW_WORD(t);
C[3] = HIGH_WORD(t);
}
static word AtomicMultiplyAdd(word *C, const word *A, const word *B)
{
word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
unsigned int ai = A[1] < A[0];
unsigned int bi = B[0] < B[1];
unsigned int di = ai & bi;
dword d = (dword)D[di]*D[di+2];
D[1] = D[3] = 0;
unsigned int si = ai + !bi;
word s = D[si];
dword A0B0 = (dword)A[0]*B[0];
dword t = A0B0 + C[0];
C[0] = LOW_WORD(t);
dword A1B1 = (dword)A[1]*B[1];
t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1];
C[1] = LOW_WORD(t);
t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2];
C[2] = LOW_WORD(t);
t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3];
C[3] = LOW_WORD(t);
return HIGH_WORD(t);
}
#endif // defined(__GNUC__) && defined(__alpha__)
static inline void AtomicMultiplyBottom(word *C, const word *A, const word *B)
{
#ifdef IS_LITTLE_ENDIAN
if (sizeof(dword) == sizeof(size_t))
{
dword a = *(const dword *)A, b = *(const dword *)B;
((dword *)C)[0] = a*b;
}
else
#endif
{
dword t = (dword)A[0]*B[0];
C[0] = LOW_WORD(t);
C[1] = HIGH_WORD(t) + A[0]*B[1] + A[1]*B[0];
}
}
#define MulAcc(x, y) \
p = (dword)A[x] * B[y] + c; \
c = LOW_WORD(p); \
p = (dword)d + HIGH_WORD(p); \
d = LOW_WORD(p); \
e += HIGH_WORD(p);
#define SaveMulAcc(s, x, y) \
R[s] = c; \
p = (dword)A[x] * B[y] + d; \
c = LOW_WORD(p); \
p = (dword)e + HIGH_WORD(p); \
d = LOW_WORD(p); \
e = HIGH_WORD(p);
#define MulAcc1(x, y) \
p = (dword)A[x] * A[y] + c; \
c = LOW_WORD(p); \
p = (dword)d + HIGH_WORD(p); \
d = LOW_WORD(p); \
e += HIGH_WORD(p);
#define SaveMulAcc1(s, x, y) \
R[s] = c; \
p = (dword)A[x] * A[y] + d; \
c = LOW_WORD(p); \
p = (dword)e + HIGH_WORD(p); \
d = LOW_WORD(p); \
e = HIGH_WORD(p);
#define SquAcc(x, y) \
p = (dword)A[x] * A[y]; \
p = p + p + c; \
c = LOW_WORD(p); \
p = (dword)d + HIGH_WORD(p); \
d = LOW_WORD(p); \
e += HIGH_WORD(p);
#define SaveSquAcc(s, x, y) \
R[s] = c; \
p = (dword)A[x] * A[y]; \
p = p + p + d; \
c = LOW_WORD(p); \
p = (dword)e + HIGH_WORD(p); \
d = LOW_WORD(p); \
e = HIGH_WORD(p);
static void CombaSquare4(word *R, const word *A)
{
dword p;
word c, d, e;
p = (dword)A[0] * A[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
SquAcc(0, 1);
SaveSquAcc(1, 2, 0);
MulAcc1(1, 1);
SaveSquAcc(2, 0, 3);
SquAcc(1, 2);
SaveSquAcc(3, 3, 1);
MulAcc1(2, 2);
SaveSquAcc(4, 2, 3);
R[5] = c;
p = (dword)A[3] * A[3] + d;
R[6] = LOW_WORD(p);
R[7] = e + HIGH_WORD(p);
}
static void CombaMultiply4(word *R, const word *A, const word *B)
{
dword p;
word c, d, e;
p = (dword)A[0] * B[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
SaveMulAcc(3, 3, 1);
MulAcc(2, 2);
MulAcc(1, 3);
SaveMulAcc(4, 2, 3);
MulAcc(3, 2);
R[5] = c;
p = (dword)A[3] * B[3] + d;
R[6] = LOW_WORD(p);
R[7] = e + HIGH_WORD(p);
}
static void CombaMultiply8(word *R, const word *A, const word *B)
{
dword p;
word c, d, e;
p = (dword)A[0] * B[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
SaveMulAcc(3, 0, 4);
MulAcc(1, 3);
MulAcc(2, 2);
MulAcc(3, 1);
MulAcc(4, 0);
SaveMulAcc(4, 0, 5);
MulAcc(1, 4);
MulAcc(2, 3);
MulAcc(3, 2);
MulAcc(4, 1);
MulAcc(5, 0);
SaveMulAcc(5, 0, 6);
MulAcc(1, 5);
MulAcc(2, 4);
MulAcc(3, 3);
MulAcc(4, 2);
MulAcc(5, 1);
MulAcc(6, 0);
SaveMulAcc(6, 0, 7);
MulAcc(1, 6);
MulAcc(2, 5);
MulAcc(3, 4);
MulAcc(4, 3);
MulAcc(5, 2);
MulAcc(6, 1);
MulAcc(7, 0);
SaveMulAcc(7, 1, 7);
MulAcc(2, 6);
MulAcc(3, 5);
MulAcc(4, 4);
MulAcc(5, 3);
MulAcc(6, 2);
MulAcc(7, 1);
SaveMulAcc(8, 2, 7);
MulAcc(3, 6);
MulAcc(4, 5);
MulAcc(5, 4);
MulAcc(6, 3);
MulAcc(7, 2);
SaveMulAcc(9, 3, 7);
MulAcc(4, 6);
MulAcc(5, 5);
MulAcc(6, 4);
MulAcc(7, 3);
SaveMulAcc(10, 4, 7);
MulAcc(5, 6);
MulAcc(6, 5);
MulAcc(7, 4);
SaveMulAcc(11, 5, 7);
MulAcc(6, 6);
MulAcc(7, 5);
SaveMulAcc(12, 6, 7);
MulAcc(7, 6);
R[13] = c;
p = (dword)A[7] * B[7] + d;
R[14] = LOW_WORD(p);
R[15] = e + HIGH_WORD(p);
}
static void CombaMultiplyBottom4(word *R, const word *A, const word *B)
{
dword p;
word c, d, e;
p = (dword)A[0] * B[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
R[2] = c;
R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
}
static void CombaMultiplyBottom8(word *R, const word *A, const word *B)
{
dword p;
word c, d, e;
p = (dword)A[0] * B[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -