📄 mrgf2mnew.c
字号:
add2(z[nd2+m],y[nd2+m],z[nd2+m]);
}
karmul2_poly(_MIPP_ nd2,&t[nd],z,&z[nd2],t);
karmul2_poly(_MIPP_ nd2,&t[nd],x,y,z);
for (m=0;m<nd;m++) add2(t[m],z[m],t[m]);
karmul2_poly(_MIPP_ md2,&t[nd],&x[nd2],&y[nd2],&z[nd]);
for (m=0;m<md;m++) add2(t[m],z[nd+m],t[m]);
for (m=0;m<nd;m++) add2(z[nd2+m],t[m],z[nd2+m]);
}
void karmul2_poly_upper(_MIPD_ int n,big *t,big *x,big *y,big *z)
{ /* n is large and even, and upper half of z is known already */
int m,nd2,nd;
nd2=n/2; nd=n;
for (m=0;m<nd2;m++)
{
add2(x[m],x[nd2+m],z[m]);
add2(y[m],y[nd2+m],z[nd2+m]);
}
karmul2_poly(_MIPP_ nd2,&t[nd],z,&z[nd2],t);
karmul2_poly(_MIPP_ nd2,&t[nd],x,y,z); /* only 2 karmuls needed! */
for (m=0;m<nd;m++) add2(t[m],z[m],t[m]);
for (m=0;m<nd2;m++)
{
add2(z[nd+m],z[nd+nd2+m],z[nd+m]);
add2(z[nd+m],t[nd2+m],z[nd+m]);
}
for (m=0;m<nd;m++)
{
add2(t[m],z[nd+m],t[m]);
add2(z[nd2+m],t[m],z[nd2+m]);
}
}
#endif
/* Some in-line karatsuba down at the bottom... */
#ifndef MR_COMBA2
static inline void mr_bottom2(mr_small *x,mr_small *y,mr_small *z)
{
mr_small q0,r0,q1,r1,q2,r2;
q0=mr_mul2(x[0],y[0],&r0);
q1=mr_mul2(x[1],y[1],&r1);
q2=mr_mul2((mr_small)(x[0]^x[1]),(mr_small)(y[0]^y[1]),&r2);
z[0]=r0;
z[1]=q0^r1^r0^r2;
z[2]=q0^r1^q1^q2;
z[3]=q1;
}
static inline void mr_bottom3(mr_small *x,mr_small *y,mr_small *z)
{ /* just 6 mr_muls... */
mr_small q0,r0,q1,r1,q2,r2;
mr_small a0,b0,a1,b1,a2,b2;
q0=mr_mul2(x[0],y[0],&r0);
q1=mr_mul2(x[1],y[1],&r1);
q2=mr_mul2(x[2],y[2],&r2);
a0=mr_mul2((mr_small)(x[0]^x[1]),(mr_small)(y[0]^y[1]),&b0);
a1=mr_mul2((mr_small)(x[1]^x[2]),(mr_small)(y[1]^y[2]),&b1);
a2=mr_mul2((mr_small)(x[0]^x[2]),(mr_small)(y[0]^y[2]),&b2);
b0^=r0^r1;
a0^=q0^q1;
b1^=r1^r2;
a1^=q1^q2;
b2^=r0^r2;
a2^=q0^q2;
z[0]=r0;
z[1]=q0^b0;
z[2]=r1^a0^b2;
z[3]=q1^b1^a2; z[4]=r2^a1;
z[5]=q2;
}
static inline void mr_bottom5(mr_small *x,mr_small *y,mr_small *z)
{ /* Montgomery's 5x5 formula. Requires 13 mr_muls.... */
mr_small u0,v0,u1,v1,u2,v2,s0,t0,q,r,t;
mr_small z1,z2,z3,z4,z5,z6,z7,z8;
q=mr_mul2(x[0],y[0],&r);
t=q^r;
z[0]=r;
z1=t;
z2=t;
z3=q;
z4=r;
z5=t;
z6=t;
z7=q;
q=mr_mul2(x[1],y[1],&r);
z1^=r;
z2^=q;
z4^=r;
z5^=q;
q=mr_mul2(x[3],y[3],&r);
z4^=r;
z5^=q;
z7^=r;
z8=q;
q=mr_mul2(x[4],y[4],&r);
t=q^r;
z2^=r;
z3^=t;
z4^=t;
z5^=q;
z6^=r;
z7^=t;
z8^=t;
z[9]=q;
u0=x[0]^x[4]; /* u0=a0+a4 */
v0=y[0]^y[4]; /* v0=b0+b4 */
q=mr_mul2(u0,v0,&r);
t=q^r;
z2^=r;
z3^=t;
z4^=q;
z5^=r;
z6^=t;
z7^=q;
u1=x[0]^x[1]; /* u1=a0+a1 */
v1=y[0]^y[1]; /* v1=b0+b1 */
q=mr_mul2(u1,v1,&r);
t=q^r;
z1^=r;
z2^=t;
z3^=q;
z4^=r;
z5^=t;
z6^=q;
u2=x[3]^x[4]; /* u2=a3+a4 */
v2=y[3]^y[4]; /* v2=b3+b4 */
q=mr_mul2(u2,v2,&r);
t=q^r;
z3^=r;
z4^=t;
z5^=q;
z6^=r;
z7^=t;
z8^=q;
/*u=u1^u2; u=a0+a1+a3+a4 */
/*v=v1^v2; v=b0+b1+b3+b4 */
q=mr_mul2(u1^u2,v1^v2,&r);
z3^=r;
z4^=q;
z5^=r;
z6^=q;
s0=u1^x[2]; /* s0=a0+a1+a2 */
t0=v1^y[2]; /* t0=b0+b1+b2 */
u2^=x[2]; /* u2=a2+a3+a4 */
v2^=y[2]; /* v2=b2+b3+b4 */
u1^=u2; /* u1=a0+a1+a2+a3+a4 */
v1^=v2; /* v1=b0+b1+b2+b3+b4 */
q=mr_mul2(u1,v1,&r);
t=q^r;
z3^=r;
z4^=t;
z5^=t;
z6^=q;
u2^=x[0]; /* s1=a0+a2+a3+a4 */
v2^=y[0]; /* t1=b0+b2+b3+b4 */
q=mr_mul2(u2,v2,&r);
z3^=r;
z4^=q;
z6^=r;
z7^=q;
s0^=x[4]; /* s0=a0+a1+a2+a4 */
t0^=y[4]; /* t0=b0+b1+b2+b4 */
q=mr_mul2(s0,t0,&r);
z2^=r;
z3^=q;
z5^=r;
z6^=q;
u2^=x[4]; /* u2=a0+a2+a3 */
v2^=y[4]; /* v2=b0+b2+b3 */
q=mr_mul2(u2,v2,&r);
z4^=r;
z5^=q;
z6^=r;
z7^=q;
s0^=x[0]; /* s0=a1+a2+a4 */
t0^=y[0]; /* t0=b1+b2+b4 */
q=mr_mul2(s0,t0,&r);
z2^=r;
z3^=q;
z4^=r;
z5^=q;
z[1]=z1;
z[2]=z2;
z[3]=z3;
z[4]=z4;
z[5]=z5;
z[6]=z6;
z[7]=z7;
z[8]=z8;
}
static inline void mr_bottom4(mr_small *x,mr_small *y,mr_small *z)
{ /* unwound 4x4 karatsuba multiplication - only 9 muls */
mr_small q0,r0,q1,r1,q2,r2,tx,ty;
mr_small xx0,yy0,xx1,yy1,t0,t1,t2,t3;
q0=mr_mul2(x[0],y[0],&r0);
q1=mr_mul2(x[1],y[1],&r1);
tx=x[0]^x[1];
ty=y[0]^y[1];
q2=mr_mul2(tx,ty,&r2);
z[0]=r0;
z[1]=q0^r1^r0^r2;
z[2]=q0^r1^q1^q2;
z[3]=q1;
q0=mr_mul2(x[2],y[2],&r0);
q1=mr_mul2(x[3],y[3],&r1);
tx=x[2]^x[3];
ty=y[2]^y[3];
q2=mr_mul2(tx,ty,&r2);
z[4]=r0;
z[5]=q0^r1^r0^r2;
z[6]=q0^r1^q1^q2;
z[7]=q1;
xx0=x[2]^x[0];
yy0=y[2]^y[0];
q0=mr_mul2(xx0,yy0,&r0);
xx1=x[3]^x[1];
yy1=y[3]^y[1];
q1=mr_mul2(xx1,yy1,&r1);
tx=xx0^xx1;
ty=yy0^yy1;
q2=mr_mul2(tx,ty,&r2);
t0=z[0]^z[4]^r0;
t1=z[1]^z[5]^q0^r1^r0^r2;
t2=z[2]^z[6]^q0^r1^q1^q2;
t3=z[3]^z[7]^q1;
z[2]^=t0;
z[3]^=t1;
z[4]^=t2;
z[5]^=t3;
}
void karmul2(int n,mr_small *t,mr_small *x,mr_small *y,mr_small *z)
{ /* Karatsuba multiplication - note that n can be odd or even */
int m,nd2,nd,md,md2;
if (n<=5)
{
if (n==1)
{
z[1]=mr_mul2(x[0],y[0],&z[0]);
return;
}
if (n==2)
{
mr_bottom2(x,y,z);
return;
}
if (n==3)
{
mr_bottom3(x,y,z);
return;
}
if (n==4)
{
mr_bottom4(x,y,z);
return;
}
if (n==5)
{
mr_bottom5(x,y,z);
return;
}
}
if (n%2==0)
{
md=nd=n;
md2=nd2=n/2;
}
else
{
nd=n+1;
md=n-1;
nd2=nd/2; md2=md/2;
}
for (m=0;m<nd2;m++)
{
z[m]=x[m];
z[nd2+m]=y[m];
}
for (m=0;m<md2;m++)
{
z[m]^=x[nd2+m];
z[nd2+m]^=y[nd2+m];
}
karmul2(nd2,&t[nd],z,&z[nd2],t);
karmul2(nd2,&t[nd],x,y,z);
for (m=0;m<nd;m++) t[m]^=z[m];
karmul2(md2,&t[nd],&x[nd2],&y[nd2],&z[nd]);
for (m=0;m<md;m++) t[m]^=z[nd+m];
for (m=0;m<nd;m++) z[nd2+m]^=t[m];
}
#endif
/* this is time-critical, so use karatsuba here, since addition is cheap *
* and easy (no carries to worry about...) */
/* #define CLAIRE */
void multiply2(_MIPD_ big x,big y,big w)
{
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_COMBA2
comba_mult2(_MIPP_ x,y,w);
return;
#else
int i,j,xl,yl,ml;
#ifdef CLAIRE
int d;
mr_small hi,lo;
#endif
mr_small p,q;
big w0=mr_mip->w0;
if (x==NULL || y==NULL)
{
zero(w);
return;
}
if (x->len==0 || y->len==0)
{
zero(w);
return;
}
xl=x->len;
yl=y->len;
zero(w0);
#ifdef CLAIRE
/* Comba method */
w0->len=xl+yl;
d=1+mr_mip->M/MIRACL;
hi=lo=0;
for (i=0;i<d;i++)
{
for (j=0;j<=i;j++)
{
q=mr_mul2(x->w[j],y->w[i-j],&p);
hi^=q; lo^=p;
}
w0->w[i]=lo; lo=hi; hi=0;
}
for (i=d;i<2*d-1;i++)
{
for (j=i-d+1;j<d;j++)
{
q=mr_mul2(x->w[j],y->w[i-j],&p);
hi^=q; lo^=p;
}
w0->w[i]=lo; lo=hi; hi=0;
}
w0->w[2*d-1]=lo;
mr_lzero(w0);
copy(w0,w);
#else
/* recommended method as mr_mul2 is so slow... */
if (xl>=MR_KARATSUBA && yl>=MR_KARATSUBA)
{
if (xl>yl) ml=xl;
else ml=yl;
karmul2(ml,mr_mip->w7->w,x->w,y->w,w0->w);
mr_mip->w7->len=w0->len=2*ml+1;
mr_lzero(w0);
mr_lzero(mr_mip->w7);
copy(w0,w);
return;
}
w0->len=xl+yl;
for (i=0;i<xl;i++)
{
for (j=0;j<yl;j++)
{
q=mr_mul2(x->w[i],y->w[j],&p);
w0->w[i+j]^=p;
w0->w[i+j+1]^=q;
}
}
mr_lzero(w0);
copy(w0,w);
#endif
#endif
}
void add2(big x,big y,big z)
{ /* XOR x and y */
int i,lx,ly,lz,lm;
mr_small *gx,*gy,*gz;
if (x==y)
{
zero(z);
return;
}
if (y==NULL)
{
copy(x,z);
return;
}
else if (x==NULL)
{
copy(y,z);
return;
}
if (x==z)
{
gy=y->w; gz=z->w;
ly=y->len; lz=z->len;
lm=lz; if (ly>lz) lm=ly;
for (i=0;i<lm;i++)
gz[i]^=gy[i];
z->len=lm;
if (gz[lm-1]==0) mr_lzero(z);
}
else
{
gx=x->w; gy=y->w; gz=z->w;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -