📄 integer.cpp
字号:
// integer.cpp - written and placed in the public domain by Wei Dai
#include "pch.h"
#include "integer.h"
#include "modarith.h"
#include "nbtheory.h"
#include "asn.h"
#include "oids.h"
#include "words.h"
#include <iostream>
#include "algebra.cpp"
#include "eprecomp.cpp"
NAMESPACE_BEGIN(CryptoPP)
#define MAKE_DWORD(lowWord, highWord) ((dword(highWord)<<WORD_BITS) | (lowWord))
// CodeWarrior defines _MSC_VER
#if defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86) && (_M_IX86<=600)
// Add() and Subtract() are coded in Pentium assembly for a speed increase
// of about 10-20 percent for a RSA signature
static __declspec(naked) word __fastcall Add(word *C, const word *A, const word *B, unsigned int N)
{
__asm
{
push ebp
push ebx
push esi
push edi
mov esi, [esp+24] ; N
mov ebx, [esp+20] ; B
sub ecx, edx
xor eax, eax
sub eax, esi
lea ebx, [ebx+4*esi]
sar eax, 1 // clears the carry flag
jz loopend
loopstart:
mov esi,[edx]
mov ebp,[edx+4]
mov edi,[ebx+8*eax]
lea edx,[edx+8]
adc esi,edi
mov edi,[ebx+8*eax+4]
adc ebp,edi
inc eax
mov [edx+ecx-8],esi
mov [edx+ecx-4],ebp
jnz loopstart
loopend:
adc eax, 0
pop edi
pop esi
pop ebx
pop ebp
ret 8
}
}
static __declspec(naked) word __fastcall Subtract(word *C, const word *A, const word *B, unsigned int N)
{
__asm
{
push ebp
push ebx
push esi
push edi
mov esi, [esp+24] ; N
mov ebx, [esp+20] ; B
sub ecx, edx
xor eax, eax
sub eax, esi
lea ebx, [ebx+4*esi]
sar eax, 1 // clears the carry flag
jz loopend
loopstart:
mov esi,[edx]
mov ebp,[edx+4]
mov edi,[ebx+8*eax]
lea edx,[edx+8]
sbb esi,edi
mov edi,[ebx+8*eax+4]
sbb ebp,edi
inc eax
mov [edx+ecx-8],esi
mov [edx+ecx-4],ebp
jnz loopstart
loopend:
adc eax, 0
pop edi
pop esi
pop ebx
pop ebp
ret 8
}
}
#elif defined(__GNUC__) && defined(__i386__)
static word Add(word *C, const word *A, const word *B, unsigned int N)
{
assert (N%2 == 0);
register word carry;
// Notes and further work (by Alister Lee):
// - get extended asm to accept parameter into ebx. Currently, the parameter
// is accepted into eax and moved to ebx resulting in an extra instruction
// outside the loop. I think this is a bug in gcc.
// - get extended asm to save and restore ebp through the clobbered list.
// I think this is a limitation of gcc.
// on entry esi = N, edx = A, ecx = C, eax = B through extended asm (see below)
__asm__(
"push %%ebp\n\t" // can't automatically save ebp
"mov %%eax, %%ebx\n\t" // ebx is B (can't automatically accept
// parameter into ebx)
"sub %%edx, %%ecx\n\t" // hold the distance between C & A so
// we can add this to A to get C
"xor %%eax, %%eax\n\t"
"sub %%esi, %%eax\n\t" // eax is a negative index from end of B
"lea (%%ebx,%%esi,4), %%ebx\n\t" // ebx is end of B
"sar $1, %%eax\n\t" // eax is number of dwords
// this also clears the carry flag
"jz 1f\n" // to loopend
// if no dwords then nothing to do
"0:\n\t" // loopstart:
"mov 0(%%edx), %%esi\n\t" // load next dword of A into ebp:esi
"mov 4(%%edx), %%ebp\n\t"
"mov (%%ebx,%%eax,8), %%edi\n\t" // load next word of B, using eax as index
"lea 8(%%edx), %%edx\n\t" // advance A
"adc %%edi, %%esi\n\t" // add with carry
"mov 4(%%ebx,%%eax,8), %%edi\n\t" // load next word of B, using eax as index
"adc %%edi, %%ebp\n\t" // add with carry
"inc %%eax\n\t" // advance index into B
// no more words when zero
"mov %%esi, -8(%%edx,%%ecx)\n\t" // store ebp:esi into next dword of C
"mov %%ebp, -4(%%edx,%%ecx)\n\t"
"jnz 0b\n" // to loopstart
// carry flag feeds into next iteration
"1:\n\t" // loopend:
"adc $0, %%eax\n\t" // capture carry flag
"pop %%ebp"
: "=a" (carry)
: "S" (N), "d" (A), "c" (C), "a" (B)
: "%edi", "%ebx"
);
return carry;
}
static word Subtract(word *C, const word *A, const word *B, unsigned int N)
{
assert (N%2 == 0);
register word carry;
// Notes: see notes on Add above
__asm__(
"push %%ebp\n\t"
"mov %%eax, %%ebx\n\t"
"sub %%edx, %%ecx\n\t"
"xor %%eax, %%eax\n\t"
"sub %%esi, %%eax\n\t"
"lea (%%ebx,%%esi,4), %%ebx\n\t"
"sar $1, %%eax\n\t"
"jz 1f\n"
"0:\n\t"
"mov 0(%%edx), %%esi\n\t"
"mov 4(%%edx), %%ebp\n\t"
"mov (%%ebx,%%eax,8), %%edi\n\t"
"lea 8(%%edx), %%edx\n\t"
"sbb %%edi, %%esi\n\t"
"mov 4(%%ebx,%%eax,8), %%edi\n\t"
"sbb %%edi, %%ebp\n\t"
"inc %%eax\n\t"
"mov %%esi, -8(%%edx, %%ecx)\n\t"
"mov %%ebp, -4(%%edx, %%ecx)\n\t"
"jnz 0b\n"
"1:\n\t"
"adc $0, %%eax\n\t"
"pop %%ebp"
: "=a" (carry)
: "S" (N), "d" (A), "c" (C), "a" (B)
: "%edi", "%ebx"
);
return carry;
}
#else // defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86) && (_M_IX86<=600)
static word Add(word *C, const word *A, const word *B, unsigned int N)
{
assert (N%2 == 0);
word carry=0;
for (unsigned i = 0; i < N; i+=2)
{
dword u = (dword) carry + A[i] + B[i];
C[i] = LOW_WORD(u);
u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1];
C[i+1] = LOW_WORD(u);
carry = HIGH_WORD(u);
}
return carry;
}
static word Subtract(word *C, const word *A, const word *B, unsigned int N)
{
assert (N%2 == 0);
word borrow=0;
for (unsigned i = 0; i < N; i+=2)
{
dword u = (dword) A[i] - B[i] - borrow;
C[i] = LOW_WORD(u);
u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u));
C[i+1] = LOW_WORD(u);
borrow = 0-HIGH_WORD(u);
}
return borrow;
}
#endif // defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86) && (_M_IX86<=600)
static int Compare(const word *A, const word *B, unsigned int N)
{
while (N--)
if (A[N] > B[N])
return 1;
else if (A[N] < B[N])
return -1;
return 0;
}
static word Increment(word *A, unsigned int N, word B=1)
{
assert(N);
word t = A[0];
A[0] = t+B;
if (A[0] >= t)
return 0;
for (unsigned i=1; i<N; i++)
if (++A[i])
return 0;
return 1;
}
static word Decrement(word *A, unsigned int N, word B=1)
{
assert(N);
word t = A[0];
A[0] = t-B;
if (A[0] <= t)
return 0;
for (unsigned i=1; i<N; i++)
if (A[i]--)
return 0;
return 1;
}
static void TwosComplement(word *A, unsigned int N)
{
Decrement(A, N);
for (unsigned i=0; i<N; i++)
A[i] = ~A[i];
}
static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
{
word carry=0;
for(unsigned i=0; i<N; i++)
{
dword p = (dword)A[i] * B + carry;
C[i] = LOW_WORD(p);
carry = HIGH_WORD(p);
}
return carry;
}
static void AtomicMultiply(word *C, word A0, word A1, word B0, word B1)
{
/*
word s;
dword d;
if (A1 >= A0)
if (B0 >= B1)
{
s = 0;
d = (dword)(A1-A0)*(B0-B1);
}
else
{
s = (A1-A0);
d = (dword)s*(word)(B0-B1);
}
else
if (B0 > B1)
{
s = (B0-B1);
d = (word)(A1-A0)*(dword)s;
}
else
{
s = 0;
d = (dword)(A0-A1)*(B1-B0);
}
*/
// this segment is the branchless equivalent of above
word D[4] = {A1-A0, A0-A1, B0-B1, B1-B0};
unsigned int ai = A1 < A0;
unsigned int bi = B0 < B1;
unsigned int di = ai & bi;
dword d = (dword)D[di]*D[di+2];
D[1] = D[3] = 0;
unsigned int si = ai + !bi;
word s = D[si];
dword A0B0 = (dword)A0*B0;
C[0] = LOW_WORD(A0B0);
dword A1B1 = (dword)A1*B1;
dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1);
C[1] = LOW_WORD(t);
t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s;
C[2] = LOW_WORD(t);
C[3] = HIGH_WORD(t);
}
static word AtomicMultiplyAdd(word *C, word A0, word A1, word B0, word B1)
{
word D[4] = {A1-A0, A0-A1, B0-B1, B1-B0};
unsigned int ai = A1 < A0;
unsigned int bi = B0 < B1;
unsigned int di = ai & bi;
dword d = (dword)D[di]*D[di+2];
D[1] = D[3] = 0;
unsigned int si = ai + !bi;
word s = D[si];
dword A0B0 = (dword)A0*B0;
dword t = A0B0 + C[0];
C[0] = LOW_WORD(t);
dword A1B1 = (dword)A1*B1;
t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1];
C[1] = LOW_WORD(t);
t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2];
C[2] = LOW_WORD(t);
t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3];
C[3] = LOW_WORD(t);
return HIGH_WORD(t);
}
static inline void AtomicSquare(word *C, word A, word B)
{
dword t1 = (dword) A*A;
C[0] = LOW_WORD(t1);
dword t2 = (dword) A*B;
t1 = (dword) HIGH_WORD(t1) + LOW_WORD(t2) + LOW_WORD(t2);
C[1] = LOW_WORD(t1);
t1 = (dword) B*B + HIGH_WORD(t1) + HIGH_WORD(t2) + HIGH_WORD(t2);
C[2] = LOW_WORD(t1);
C[3] = HIGH_WORD(t1);
}
static inline void AtomicMultiplyBottom(word *C, word A0, word A1, word B0, word B1)
{
dword t = (dword)A0*B0;
C[0] = LOW_WORD(t);
C[1] = HIGH_WORD(t) + A0*B1 + A1*B0;
}
#define MulAcc(x, y) \
p = (dword)A[x] * B[y] + c; \
c = LOW_WORD(p); \
p = (dword)d + HIGH_WORD(p); \
d = LOW_WORD(p); \
e += HIGH_WORD(p);
#define SaveMulAcc(s, x, y) \
R[s] = c; \
p = (dword)A[x] * B[y] + d; \
c = LOW_WORD(p); \
p = (dword)e + HIGH_WORD(p); \
d = LOW_WORD(p); \
e = HIGH_WORD(p);
#define MulAcc1(x, y) \
p = (dword)A[x] * A[y] + c; \
c = LOW_WORD(p); \
p = (dword)d + HIGH_WORD(p); \
d = LOW_WORD(p); \
e += HIGH_WORD(p);
#define SaveMulAcc1(s, x, y) \
R[s] = c; \
p = (dword)A[x] * A[y] + d; \
c = LOW_WORD(p); \
p = (dword)e + HIGH_WORD(p); \
d = LOW_WORD(p); \
e = HIGH_WORD(p);
#define SquAcc(x, y) \
p = (dword)A[x] * A[y]; \
p = p + p + c; \
c = LOW_WORD(p); \
p = (dword)d + HIGH_WORD(p); \
d = LOW_WORD(p); \
e += HIGH_WORD(p);
#define SaveSquAcc(s, x, y) \
R[s] = c; \
p = (dword)A[x] * A[y]; \
p = p + p + d; \
c = LOW_WORD(p); \
p = (dword)e + HIGH_WORD(p); \
d = LOW_WORD(p); \
e = HIGH_WORD(p);
static void CombaSquare4(word *R, const word *A)
{
dword p;
word c, d, e;
p = (dword)A[0] * A[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
SquAcc(0, 1);
SaveSquAcc(1, 2, 0);
MulAcc1(1, 1);
SaveSquAcc(2, 0, 3);
SquAcc(1, 2);
SaveSquAcc(3, 3, 1);
MulAcc1(2, 2);
SaveSquAcc(4, 2, 3);
R[5] = c;
p = (dword)A[3] * A[3] + d;
R[6] = LOW_WORD(p);
R[7] = e + HIGH_WORD(p);
}
static void CombaMultiply4(word *R, const word *A, const word *B)
{
dword p;
word c, d, e;
p = (dword)A[0] * B[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
SaveMulAcc(3, 3, 1);
MulAcc(2, 2);
MulAcc(1, 3);
SaveMulAcc(4, 2, 3);
MulAcc(3, 2);
R[5] = c;
p = (dword)A[3] * B[3] + d;
R[6] = LOW_WORD(p);
R[7] = e + HIGH_WORD(p);
}
static void CombaMultiply8(word *R, const word *A, const word *B)
{
dword p;
word c, d, e;
p = (dword)A[0] * B[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
SaveMulAcc(3, 0, 4);
MulAcc(1, 3);
MulAcc(2, 2);
MulAcc(3, 1);
MulAcc(4, 0);
SaveMulAcc(4, 0, 5);
MulAcc(1, 4);
MulAcc(2, 3);
MulAcc(3, 2);
MulAcc(4, 1);
MulAcc(5, 0);
SaveMulAcc(5, 0, 6);
MulAcc(1, 5);
MulAcc(2, 4);
MulAcc(3, 3);
MulAcc(4, 2);
MulAcc(5, 1);
MulAcc(6, 0);
SaveMulAcc(6, 0, 7);
MulAcc(1, 6);
MulAcc(2, 5);
MulAcc(3, 4);
MulAcc(4, 3);
MulAcc(5, 2);
MulAcc(6, 1);
MulAcc(7, 0);
SaveMulAcc(7, 1, 7);
MulAcc(2, 6);
MulAcc(3, 5);
MulAcc(4, 4);
MulAcc(5, 3);
MulAcc(6, 2);
MulAcc(7, 1);
SaveMulAcc(8, 2, 7);
MulAcc(3, 6);
MulAcc(4, 5);
MulAcc(5, 4);
MulAcc(6, 3);
MulAcc(7, 2);
SaveMulAcc(9, 3, 7);
MulAcc(4, 6);
MulAcc(5, 5);
MulAcc(6, 4);
MulAcc(7, 3);
SaveMulAcc(10, 4, 7);
MulAcc(5, 6);
MulAcc(6, 5);
MulAcc(7, 4);
SaveMulAcc(11, 5, 7);
MulAcc(6, 6);
MulAcc(7, 5);
SaveMulAcc(12, 6, 7);
MulAcc(7, 6);
R[13] = c;
p = (dword)A[7] * B[7] + d;
R[14] = LOW_WORD(p);
R[15] = e + HIGH_WORD(p);
}
static void CombaMultiplyBottom4(word *R, const word *A, const word *B)
{
dword p;
word c, d, e;
p = (dword)A[0] * B[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
R[2] = c;
R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
}
static void CombaMultiplyBottom8(word *R, const word *A, const word *B)
{
dword p;
word c, d, e;
p = (dword)A[0] * B[0];
R[0] = LOW_WORD(p);
c = HIGH_WORD(p);
d = e = 0;
MulAcc(0, 1);
MulAcc(1, 0);
SaveMulAcc(1, 2, 0);
MulAcc(1, 1);
MulAcc(0, 2);
SaveMulAcc(2, 0, 3);
MulAcc(1, 2);
MulAcc(2, 1);
MulAcc(3, 0);
SaveMulAcc(3, 0, 4);
MulAcc(1, 3);
MulAcc(2, 2);
MulAcc(3, 1);
MulAcc(4, 0);
SaveMulAcc(4, 0, 5);
MulAcc(1, 4);
MulAcc(2, 3);
MulAcc(3, 2);
MulAcc(4, 1);
MulAcc(5, 0);
SaveMulAcc(5, 0, 6);
MulAcc(1, 5);
MulAcc(2, 4);
MulAcc(3, 3);
MulAcc(4, 2);
MulAcc(5, 1);
MulAcc(6, 0);
R[6] = c;
R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
}
#undef MulAcc
#undef SaveMulAcc
static void AtomicInverseModPower2(word *C, word A0, word A1)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -