📄 mrgf2m.c
字号:
n=0;
lsb=gx[m];
while (!(bit&lsb))
{
n++;
bit<<=1;
}
break;
}
return (MIRACL*m+n);
}
static void shiftrightbits(big x,int m)
{
int i,k=x->len;
int w=m/MIRACL;
int b=m%MIRACL;
mr_small *gx=x->w;
if (k==0 || m==0) return;
if (w>0)
{
for (i=0;i<k-w;i++)
gx[i]=gx[i+w];
for (i=k-w;i<k;i++) gx[i]=0;
x->len-=w;
}
if (b!=0)
{
for (i=0;i<k-w-1;i++)
gx[i]=(gx[i]>>b)|(gx[i+1]<<(MIRACL-b));
gx[k-w-1]>>=b;
if (gx[k-w-1]==0) x->len--;
}
}
*/
static void shiftleftbits(big x,int m)
{
int i,k=x->len;
mr_small j;
int w=m/MIRACL; /* words */
int b=m%MIRACL; /* bits */
mr_small *gx=x->w;
if (k==0 || m==0) return;
if (w>0)
{
for (i=k+w-1;i>=w;i--)
gx[i]=gx[i-w];
for (i=w-1;i>=0;i--) gx[i]=0;
x->len+=w;
}
/* time critical */
if (b!=0)
{
j=gx[k+w-1]>>(MIRACL-b);
if (j!=0)
{
x->len++;
gx[k+w]=j;
}
for (i=k+w-1;i>w;i--)
{
gx[i]=(gx[i]<<b)|(gx[i-1]>>(MIRACL-b));
}
gx[w]<<=b;
}
}
static void square2(big x,big w)
{ /* w=x*x where x can be NULL so be careful */
int i,j,n,m;
mr_small a,t,r,*gw;
static const mr_small look[16]=
{0,(mr_small)1<<M8,(mr_small)4<<M8,(mr_small)5<<M8,(mr_small)16<<M8,
(mr_small)17<<M8,(mr_small)20<<M8,(mr_small)21<<M8,(mr_small)64<<M8,
(mr_small)65<<M8,(mr_small)68<<M8,(mr_small)69<<M8,(mr_small)80<<M8,
(mr_small)81<<M8,(mr_small)84<<M8,(mr_small)85<<M8};
if (x!=w) copy(x,w);
n=w->len;
if (n==0) return;
m=n+n;
w->len=m;
gw=w->w;
for (i=n-1;i>=0;i--)
{
a=gw[i];
#if MIRACL == 8
#define UNWOUNDS
gw[i+i]=look[a&0xF];
gw[i+i+1]=look[(a>>4)];
#endif
#if MIRACL == 16
#define UNWOUNDS
gw[i+i]=(look[a&0xF]>>8)|look[(a>>4)&0xF];
gw[i+i+1]=(look[(a>>8)&0xF]>>8)|look[(a>>12)];
#endif
#if MIRACL == 32
#define UNWOUNDS
gw[i+i]=(look[a&0xF]>>24)|(look[(a>>4)&0xF]>>16)|(look[(a>>8)&0xF]>>8)|look[(a>>12)&0xF];
gw[i+i+1]=(look[(a>>16)&0xF]>>24)|(look[(a>>20)&0xF]>>16)|(look[(a>>24)&0xF]>>8)|look[(a>>28)];
#endif
#ifndef UNWOUNDS
r=0;
for (j=0;j<MIRACL/2;j+=4)
{
t=look[a&0xF];
a>>=4;
r>>=8;
r|=t;
}
gw[i+i]=r; r=0;
for (j=0;j<MIRACL/2;j+=4)
{
t=look[a&0xF];
a>>=4;
r>>=8;
r|=t;
}
gw[i+i+1]=r;
#endif
}
if (gw[m-1]==0)
{
w->len--;
if (gw[m-2]==0)
mr_lzero(w);
}
}
/* Use karatsuba to multiply two polynomials with coefficients in GF(2^m) */
#ifndef MR_STATIC
void karmul2_poly(_MIPD_ int n,big *t,big *x,big *y,big *z)
{
int m,nd2,nd,md,md2;
if (n==1)
{ /* finished */
modmult2(_MIPP_ *x,*y,*z);
zero(z[1]);
return;
}
if (n==2)
{ /* in-line 2x2 */
modmult2(_MIPP_ x[0],y[0],z[0]);
modmult2(_MIPP_ x[1],y[1],z[2]);
add2(x[0],x[1],t[0]);
add2(y[0],y[1],t[1]);
modmult2(_MIPP_ t[0],t[1],z[1]);
add2(z[1],z[0],z[1]);
add2(z[1],z[2],z[1]);
zero(z[3]);
return;
}
if (n==3)
{
modmult2(_MIPP_ x[0],y[0],z[0]);
modmult2(_MIPP_ x[1],y[1],z[2]);
modmult2(_MIPP_ x[2],y[2],z[4]);
add2(x[0],x[1],t[0]);
add2(y[0],y[1],t[1]);
modmult2(_MIPP_ t[0],t[1],z[1]);
add2(z[1],z[0],z[1]);
add2(z[1],z[2],z[1]);
add2(x[1],x[2],t[0]);
add2(y[1],y[2],t[1]);
modmult2(_MIPP_ t[0],t[1],z[3]);
add2(z[3],z[2],z[3]);
add2(z[3],z[4],z[3]);
add2(x[0],x[2],t[0]);
add2(y[0],y[2],t[1]);
modmult2(_MIPP_ t[0],t[1],t[0]);
add2(z[2],t[0],z[2]);
add2(z[2],z[0],z[2]);
add2(z[2],z[4],z[2]);
zero(z[5]);
return;
}
if (n%2==0)
{
md=nd=n;
md2=nd2=n/2;
}
else
{
nd=n+1;
md=n-1;
nd2=nd/2; md2=md/2;
}
for (m=0;m<nd2;m++)
{
copy(x[m],z[m]);
copy(y[m],z[nd2+m]);
}
for (m=0;m<md2;m++)
{
add2(z[m],x[nd2+m],z[m]);
add2(z[nd2+m],y[nd2+m],z[nd2+m]);
}
karmul2_poly(_MIPP_ nd2,&t[nd],z,&z[nd2],t);
karmul2_poly(_MIPP_ nd2,&t[nd],x,y,z);
for (m=0;m<nd;m++) add2(t[m],z[m],t[m]);
karmul2_poly(_MIPP_ md2,&t[nd],&x[nd2],&y[nd2],&z[nd]);
for (m=0;m<md;m++) add2(t[m],z[nd+m],t[m]);
for (m=0;m<nd;m++) add2(z[nd2+m],t[m],z[nd2+m]);
}
void karmul2_poly_upper(_MIPD_ int n,big *t,big *x,big *y,big *z)
{ /* n is large and even, and upper half of z is known already */
int m,nd2,nd;
nd2=n/2; nd=n;
for (m=0;m<nd2;m++)
{
add2(x[m],x[nd2+m],z[m]);
add2(y[m],y[nd2+m],z[nd2+m]);
}
karmul2_poly(_MIPP_ nd2,&t[nd],z,&z[nd2],t);
karmul2_poly(_MIPP_ nd2,&t[nd],x,y,z); /* only 2 karmuls needed! */
for (m=0;m<nd;m++) add2(t[m],z[m],t[m]);
for (m=0;m<nd2;m++)
{
add2(z[nd+m],z[nd+nd2+m],z[nd+m]);
add2(z[nd+m],t[nd2+m],z[nd+m]);
}
for (m=0;m<nd;m++)
{
add2(t[m],z[nd+m],t[m]);
add2(z[nd2+m],t[m],z[nd2+m]);
}
}
#endif
/* Some in-line karatsuba down at the bottom... */
#ifndef MR_COMBA2
static void mr_bottom2(mr_small *x,mr_small *y,mr_small *z)
{
mr_small q0,r0,q1,r1,q2,r2;
q0=mr_mul2(x[0],y[0],&r0);
q1=mr_mul2(x[1],y[1],&r1);
q2=mr_mul2((mr_small)(x[0]^x[1]),(mr_small)(y[0]^y[1]),&r2);
z[0]=r0;
z[1]=q0^r1^r0^r2;
z[2]=q0^r1^q1^q2;
z[3]=q1;
}
static void mr_bottom3(mr_small *x,mr_small *y,mr_small *z)
{ /* just 6 mr_muls... */
mr_small q0,r0,q1,r1,q2,r2;
mr_small a0,b0,a1,b1,a2,b2;
q0=mr_mul2(x[0],y[0],&r0);
q1=mr_mul2(x[1],y[1],&r1);
q2=mr_mul2(x[2],y[2],&r2);
a0=mr_mul2((mr_small)(x[0]^x[1]),(mr_small)(y[0]^y[1]),&b0);
a1=mr_mul2((mr_small)(x[1]^x[2]),(mr_small)(y[1]^y[2]),&b1);
a2=mr_mul2((mr_small)(x[0]^x[2]),(mr_small)(y[0]^y[2]),&b2);
b0^=r0^r1;
a0^=q0^q1;
b1^=r1^r2;
a1^=q1^q2;
b2^=r0^r2;
a2^=q0^q2;
z[0]=r0;
z[1]=q0^b0;
z[2]=r1^a0^b2;
z[3]=q1^b1^a2;
z[4]=r2^a1;
z[5]=q2;
}
static void mr_bottom4(mr_small *x,mr_small *y,mr_small *z)
{ /* unwound 4x4 karatsuba multiplication - only 9 muls */
mr_small q0,r0,q1,r1,q2,r2,tx,ty;
mr_small t0,t1,t2,t3;
mr_small z0,z1,z2,z3,z4,z5,z6,z7;
mr_small x0,x1,x2,x3,y0,y1,y2,y3;
x0=x[0]; x1=x[1]; x2=x[2]; x3=x[3];
y0=y[0]; y1=y[1]; y2=y[2]; y3=y[3];
q0=mr_mul2(x0,y0,&r0);
q1=mr_mul2(x1,y1,&r1);
tx=x0^x1;
ty=y0^y1;
q2=mr_mul2(tx,ty,&r2);
z0=r0;
q0^=r1;
z1=q0^r0^r2;
z2=q0^q1^q2;
z3=q1;
q0=mr_mul2(x2,y2,&r0);
q1=mr_mul2(x3,y3,&r1);
tx=x2^x3;
ty=y2^y3;
q2=mr_mul2(tx,ty,&r2);
z4=r0;
q0^=r1;
z5=q0^r0^r2;
z6=q0^q1^q2;
z7=q1;
x2^=x0;
y2^=y0;
q0=mr_mul2(x2,y2,&r0);
x3^=x1;
y3^=y1;
q1=mr_mul2(x3,y3,&r1);
x2^=x3;
y2^=y3;
q2=mr_mul2(x2,y2,&r2);
q0^=r1;
t0=z0^z4^r0;
t1=z1^z5^q0^r0^r2;
t2=z2^z6^q0^q1^q2;
t3=z3^z7^q1;
z2^=t0;
z3^=t1;
z4^=t2;
z5^=t3;
z[0]=z0;
z[1]=z1;
z[2]=z2;
z[3]=z3;
z[4]=z4;
z[5]=z5;
z[6]=z6;
z[7]=z7;
}
static void mr_bottom5(mr_small *x,mr_small *y,mr_small *z)
{ /* Montgomery's 5x5 formula. Requires 13 mr_muls.... */
mr_small u0,v0,u1,v1,u2,v2,s0,t0,q,r,t;
mr_small z1,z2,z3,z4,z5,z6,z7,z8;
mr_small x0,x1,x2,x3,x4,y0,y1,y2,y3,y4;
x0=x[0]; x1=x[1]; x2=x[2]; x3=x[3]; x4=x[4];
y0=y[0]; y1=y[1]; y2=y[2]; y3=y[3]; y4=y[4];
q=mr_mul2(x0,y0,&r);
t=q^r;
z[0]=r;
z1=t;
z2=t;
z3=q;
z4=r;
z5=t;
z6=t;
z7=q;
q=mr_mul2(x1,y1,&r);
z1^=r;
z2^=q;
z4^=r;
z5^=q;
q=mr_mul2(x3,y3,&r);
z4^=r;
z5^=q;
z7^=r;
z8=q;
q=mr_mul2(x4,y4,&r);
t=q^r;
z2^=r;
z3^=t;
z4^=t;
z5^=q;
z6^=r;
z7^=t;
z8^=t;
z[9]=q;
u0=x0^x4; /* u0=a0+a4 */
v0=y0^y4; /* v0=b0+b4 */
q=mr_mul2(u0,v0,&r);
t=q^r;
z2^=r;
z3^=t;
z4^=q;
z5^=r;
z6^=t;
z7^=q;
u1=x0^x1; /* u1=a0+a1 */
v1=y0^y1; /* v1=b0+b1 */
q=mr_mul2(u1,v1,&r);
t=q^r;
z1^=r;
z2^=t;
z3^=q;
z4^=r;
z5^=t;
z6^=q;
u2=x3^x4; /* u2=a3+a4 */
v2=y3^y4; /* v2=b3+b4 */
q=mr_mul2(u2,v2,&r);
t=q^r;
z3^=r;
z4^=t;
z5^=q;
z6^=r;
z7^=t;
z8^=q;
/*u=u1^u2; u=a0+a1+a3+a4 */
/*v=v1^v2; v=b0+b1+b3+b4 */
q=mr_mul2(u1^u2,v1^v2,&r);
z3^=r;
z4^=q;
z5^=r;
z6^=q;
s0=u1^x2; /* s0=a0+a1+a2 */
t0=v1^y2; /* t0=b0+b1+b2 */
u2^=x2; /* u2=a2+a3+a4 */
v2^=y2; /* v2=b2+b3+b4 */
u1^=u2; /* u1=a0+a1+a2+a3+a4 */
v1^=v2; /* v1=b0+b1+b2+b3+b4 */
q=mr_mul2(u1,v1,&r);
t=q^r;
z3^=r;
z4^=t;
z5^=t;
z6^=q;
u2^=x0; /* s1=a0+a2+a3+a4 */
v2^=y0; /* t1=b0+b2+b3+b4 */
q=mr_mul2(u2,v2,&r);
z3^=r;
z4^=q;
z6^=r;
z7^=q;
s0^=x4; /* s0=a0+a1+a2+a4 */
t0^=y4; /* t0=b0+b1+b2+b4 */
q=mr_mul2(s0,t0,&r);
z2^=r;
z3^=q;
z5^=r;
z6^=q;
u2^=x4; /* u2=a0+a2+a3 */
v2^=y4; /* v2=b0+b2+b3 */
q=mr_mul2(u2,v2,&r);
z4^=r;
z5^=q;
z6^=r;
z7^=q;
s0^=x0; /* s0=a1+a2+a4 */
t0^=y0; /* t0=b1+b2+b4 */
q=mr_mul2(s0,t0,&r);
z2^=r;
z3^=q;
z4^=r;
z5^=q;
z[1]=z1;
z[2]=z2;
z[3]=z3;
z[4]=z4;
z[5]=z5;
z[6]=z6;
z[7]=z7;
z[8]=z8;
}
void karmul2(int n,mr_small *t,mr_small *x,mr_small *y,mr_small *z)
{ /* Karatsuba multiplication - note that n can be odd or even */
int m,nd2,nd,md,md2;
if (n<=5)
{
if (n==1)
{
z[1]=mr_mul2(x[0],y[0],&z[0]);
return;
}
if (n==2)
{
mr_bottom2(x,y,z);
return;
}
if (n==3)
{
mr_bottom3(x,y,z);
return;
}
if (n==4)
{
mr_bottom4(x,y,z);
return;
}
if (n==5)
{
mr_bottom5(x,y,z);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -