📄 mrecgf2m.c
字号:
{
w=gx[i]; gx[i]=0;
gx[i-8]^=(w>>27);
gx[i-9]^=(w<<5);
gx[i-5]^=(w>>4)^(w>>26)^(w>>30);
gx[i-6]^=(w<<28)^(w<<6)^(w<<2);
}
top=gx[8]>>27;
gx[0]^=top;
top<<=27;
gx[3]^=(top>>4)^(top>>26)^(top>>30);
gx[2]^=(top<<28)^(top<<6)^(top<<2);
gx[8]^=top;
x->len=9;
if (gx[8]==0) mr_lzero(x);
return;
}
if (M==379 && A==315)
{
for (i=xl-1;i>=12;i--)
{
w=gx[i]; gx[i]=0;
gx[i-11]^=(w>>27);
gx[i-12]^=(w<<5);
gx[i-2]^=((w)^(w>>14)^(w>>28));
gx[i-3]^=(w<<18)^(w<<4);
}
top=gx[11]>>27;
gx[0]^=top;
top<<=27;
gx[9]^=((top)^(top>>14)^(top>>28));
gx[8]^=(top<<18)^(top<<4);
gx[11]^=top;
x->len=12;
if (gx[11]==0) mr_lzero(x);
return;
}
#endif
k1=1+M/MIRACL; /* words from MSB to LSB */
if (xl<=k1)
{
if (numbits(x)<=M) return;
}
rs1=M%MIRACL;
ls1=MIRACL-rs1;
if (M-A < MIRACL)
{ /* slow way */
while (numbits(x)>=M+1)
{
copy(mr_mip->modulus,mr_mip->w7);
shiftleftbits(mr_mip->w7,numbits(x)-M-1);
add2(x,mr_mip->w7,x);
}
return;
}
k2=1+(M-A)/MIRACL; /* words from MSB to bit */
rs2=(M-A)%MIRACL;
ls2=MIRACL-rs2;
if (B)
{ /* Pentanomial */
k3=1+(M-B)/MIRACL;
rs3=(M-B)%MIRACL;
ls3=MIRACL-rs3;
k4=1+(M-C)/MIRACL;
rs4=(M-C)%MIRACL;
ls4=MIRACL-rs4;
}
for (i=xl-1;i>=k1;i--)
{
w=gx[i]; gx[i]=0;
if (rs1==0) gx[i-k1+1]^=w;
else
{
gx[i-k1+1]^=(w>>rs1);
gx[i-k1]^=(w<<ls1);
}
if (rs2==0) gx[i-k2+1]^=w;
else
{
gx[i-k2+1]^=(w>>rs2);
gx[i-k2]^=(w<<ls2);
}
if (B)
{
if (rs3==0) gx[i-k3+1]^=w;
else
{
gx[i-k3+1]^=(w>>rs3);
gx[i-k3]^=(w<<ls3);
}
if (rs4==0) gx[i-k4+1]^=w;
else
{
gx[i-k4+1]^=(w>>rs4);
gx[i-k4]^=(w<<ls4);
}
}
}
top=gx[k1-1]>>rs1;
if (top!=0)
{
gx[0]^=top;
top<<=rs1;
if (rs2==0) gx[k1-k2]^=top;
else
{
gx[k1-k2]^=(top>>rs2);
if (k1>k2) gx[k1-k2-1]^=(top<<ls2);
}
if (B)
{
if (rs3==0) gx[k1-k3]^=top;
else
{
gx[k1-k3]^=(top>>rs3);
if (k1>k3) gx[k1-k3-1]^=(top<<ls3);
}
if (rs4==0) gx[k1-k4]^=top;
else
{
gx[k1-k4]^=(top>>rs4);
if (k1>k4) gx[k1-k4-1]^=(top<<ls4);
}
}
gx[k1-1]^=top;
}
x->len=k1;
if (gx[k1-1]==0) mr_lzero(x);
}
void incr2(big x,int n,big w)
{ /* increment x by small amount */
if (x!=w) copy(x,w);
if (n==0) return;
if (w->len==0)
{
w->len=1;
w->w[0]=n;
}
else
{
w->w[0]^=(mr_small)n;
if (w->len==1 && w->w[0]==0) w->len=0;
}
}
void modsquare2(_MIPD_ big x,big w)
{ /* w=x*x mod f */
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
square2(x,mr_mip->w0);
reduce2(_MIPP_ mr_mip->w0,mr_mip->w0);
copy(mr_mip->w0,w);
}
/* Experimental code for GF(2^103) modular multiplication *
* Inspired by Robert Harley's ECDL code */
#ifdef SP103
#ifdef __GNUC__
#include <xmmintrin.h>
#else
#include <emmintrin.h>
#endif
void modmult2(_MIPD_ big x,big y,big w)
{
int i,j;
mr_small b;
__m128i t[16];
__m128i m,r,s,p,q,xe,xo;
__m64 a3,a2,a1,a0,top;
if (x==y)
{
modsquare2(_MIPP_ x,w);
return;
}
if (x->len==0 || y->len==0)
{
zero(w);
return;
}
m=_mm_set_epi32(0,0,0xff<<24,0); /* shifting mask */
/* precompute a small table */
t[0]=_mm_set1_epi32(0);
xe=_mm_set_epi32(0,x->w[2],0,x->w[0]);
xo=_mm_set_epi32(0,x->w[3],0,x->w[1]);
t[1]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
xe=_mm_slli_epi64(xe,1);
xo=_mm_slli_epi64(xo,1);
t[2]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
t[3]=_mm_xor_si128(t[2],t[1]);
xe=_mm_slli_epi64(xe,1);
xo=_mm_slli_epi64(xo,1);
t[4]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
t[5]=_mm_xor_si128(t[4],t[1]);
t[6]=_mm_xor_si128(t[4],t[2]);
t[7]=_mm_xor_si128(t[4],t[3]);
xe=_mm_slli_epi64(xe,1);
xo=_mm_slli_epi64(xo,1);
t[8]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
t[9]=_mm_xor_si128(t[8],t[1]);
t[10]=_mm_xor_si128(t[8],t[2]);
t[11]=_mm_xor_si128(t[8],t[3]);
t[12]=_mm_xor_si128(t[8],t[4]);
t[13]=_mm_xor_si128(t[8],t[5]);
t[14]=_mm_xor_si128(t[8],t[6]);
t[15]=_mm_xor_si128(t[8],t[7]);
b=y->w[0];
i=b&0xf; j=(b>>4)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4); /* net shift left 4 */
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
p=q=r; q=_mm_srli_si128(q,1);
i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,1);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,2);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>24)&0xf; j=(b>>28); r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,3);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
b=y->w[1];
i=(b)&0xf; j=(b>>4)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,4);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,5);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,6);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>24)&0xf; j=(b>>28); r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,7);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
b=y->w[2];
i=(b)&0xf; j=(b>>4)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,8);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,9);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,10);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>24)&0xf; j=(b>>28); r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,11);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
b=y->w[3];
i=(b)&0xf; j=(b>>4)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,12);
p=_mm_xor_si128(p,r);
q=_mm_srli_si128(q,4); /* only 103 bits, so we are done */
/* modular reduction - x^103+x^9+1 */
a0=_mm_movepi64_pi64(p);
a1=_mm_movepi64_pi64(_mm_srli_si128(p,8));
a2=_mm_movepi64_pi64(q);
a3=_mm_movepi64_pi64(_mm_srli_si128(q,8));
a2=_m_pxor(a2,_m_psrlqi(a3,39));
a2=_m_pxor(a2,_m_psrlqi(a3,30));
a1=_m_pxor(a1,_m_psllqi(a3,25));
a1=_m_pxor(a1,_m_psllqi(a3,34));
a1=_m_pxor(a1,_m_psrlqi(a2,39));
a1=_m_pxor(a1,_m_psrlqi(a2,30));
a0=_m_pxor(a0,_m_psllqi(a2,25));
a0=_m_pxor(a0,_m_psllqi(a2,34));
top=_m_psrlqi(a1,39);
a0=_m_pxor(a0,top);
top=_m_psllqi(top,39);
a0=_m_pxor(a0,_m_psrlqi(top,30));
a1=_m_pxor(a1,top);
if (w->len>4) zero(w);
w->w[0]=_m_to_int(a0);
a0=_m_psrlqi(a0,32);
w->w[1]=_m_to_int(a0);
w->w[2]=_m_to_int(a1);
a1=_m_psrlqi(a1,32);
w->w[3]=_m_to_int(a1);
w->len=4;
if (w->w[3]==0) mr_lzero(w);
_m_empty();
}
#endif
#ifdef SP79
#ifdef __GNUC__
#include <xmmintrin.h>
#else
#include <emmintrin.h>
#endif
void modmult2(_MIPD_ big x,big y,big w)
{
int i,j;
mr_small b;
__m128i t[16];
__m128i m,r,s,p,q,xe,xo;
__m64 a2,a1,a0,top;
if (x==y)
{
modsquare2(_MIPP_ x,w);
return;
}
if (x->len==0 || y->len==0)
{
zero(w);
return;
}
m=_mm_set_epi32(0,0,0xff<<24,0); /* shifting mask */
/* precompute a small table */
t[0]=_mm_set1_epi32(0);
xe=_mm_set_epi32(0,x->w[2],0,x->w[0]);
xo=_mm_set_epi32(0,0,0,x->w[1]);
t[1]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
xe=_mm_slli_epi64(xe,1);
xo=_mm_slli_epi64(xo,1);
t[2]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
t[3]=_mm_xor_si128(t[2],t[1]);
xe=_mm_slli_epi64(xe,1);
xo=_mm_slli_epi64(xo,1);
t[4]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
t[5]=_mm_xor_si128(t[4],t[1]);
t[6]=_mm_xor_si128(t[4],t[2]);
t[7]=_mm_xor_si128(t[4],t[3]);
xe=_mm_slli_epi64(xe,1);
xo=_mm_slli_epi64(xo,1);
t[8]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
t[9]=_mm_xor_si128(t[8],t[1]);
t[10]=_mm_xor_si128(t[8],t[2]);
t[11]=_mm_xor_si128(t[8],t[3]);
t[12]=_mm_xor_si128(t[8],t[4]);
t[13]=_mm_xor_si128(t[8],t[5]);
t[14]=_mm_xor_si128(t[8],t[6]);
t[15]=_mm_xor_si128(t[8],t[7]);
b=y->w[0];
i=b&0xf; j=(b>>4)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4); /* net shift left 4 */
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
p=q=r; q=_mm_srli_si128(q,1);
i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,1);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,2);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>24)&0xf; j=(b>>28); r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,3);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
b=y->w[1];
i=(b)&0xf; j=(b>>4)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,4);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,5);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,6);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>24)&0xf; j=(b>>28); r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,7);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
b=y->w[2];
i=(b)&0xf; j=(b>>4)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,8);
p=_mm_xor_si128(p,r); q=_mm_srli_si128(q,1);
i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j];
s=_mm_and_si128(r,m); r=_mm_slli_epi64(r,4);
s=_mm_slli_si128(s,1); s=_mm_srli_epi64(s,4);
r=_mm_xor_si128(r,s); r=_mm_xor_si128(r,t[i]);
q=_mm_xor_si128(q,r); r=_mm_slli_si128(r,9);
p=_mm_xor_si128(p,r);
q=_mm_srli_si128(q,7); /* only 79 bits, so we are done */
/* modular reduction - x^79+x^9+1 */
a0=_mm_movepi64_pi64(p);
a1=_mm_movepi64_pi64(_mm_srli_si128(p,8));
a2=_mm_movepi64_pi64(q);
a1=_m_pxor(a1,_m_psrlqi(a2,15));
a1=_m_pxor(a1,_m_psrlqi(a2,6));
a0=_m_pxor(a0,_m_psllqi(a2,49));
a0=_m_pxor(a0,_m_psllqi(a2,58));
top=_m_psrlqi(a1,15);
a0=_m_pxor(a0,top);
top=_m_psllqi(top,15);
a0=_m_pxor(a0,_m_psrlqi(top,6));
a1=_m_pxor(a1,top);
w->w[2]=_m_to_int(a1);
if (w->len>3)
{ /* Yes I know its crazy, but its needed to fix the broken /O2 optimizer */
for (i=3;i<w->len;i++) w->w[i]=0;
}
w->w[0]=_m_to_int(a0);
a0=_m_psrlqi(a0,32);
w->w[1]=_m_to_int(a0);
w->len=3;
if (w->w[2]==0) mr_lzero(w);
_m_empty();
}
#endif
#ifndef SP103
#ifndef SP79
void modmult2(_MIPD_ big x,big y,big w)
{ /* w=x*y mod f */
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -