⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mrecgf2m.c

📁 miracl-大数运算库,大家使用有什么问题请多多提意见
💻 C
📖 第 1 页 / 共 5 页
字号:
        {
            w=gx[i]; gx[i]=0;
            gx[i-8]^=(w>>27);
            gx[i-9]^=(w<<5);
            gx[i-5]^=(w>>4)^(w>>26)^(w>>30);
            gx[i-6]^=(w<<28)^(w<<6)^(w<<2);
        }

        top=gx[8]>>27;
        gx[0]^=top;
        top<<=27;
        gx[3]^=(top>>4)^(top>>26)^(top>>30);
        gx[2]^=(top<<28)^(top<<6)^(top<<2);
        gx[8]^=top;

        x->len=9;
        if (gx[8]==0) mr_lzero(x);

        return;
    }

    if (M==379 && A==315)
    {
        for (i=xl-1;i>=12;i--)
        {
            w=gx[i]; gx[i]=0;
            gx[i-11]^=(w>>27);
            gx[i-12]^=(w<<5);
            gx[i-2]^=((w)^(w>>14)^(w>>28));
            gx[i-3]^=(w<<18)^(w<<4);
        }
        top=gx[11]>>27;
        gx[0]^=top;
        top<<=27;
        gx[9]^=((top)^(top>>14)^(top>>28));
        gx[8]^=(top<<18)^(top<<4);
        gx[11]^=top;

        x->len=12;
        if (gx[11]==0) mr_lzero(x);

        return;
    }

#endif

    k1=1+M/MIRACL;       /* words from MSB to LSB */

    if (xl<=k1)
    {
      if (numbits(x)<=M) return;
    }

    rs1=M%MIRACL;
    ls1=MIRACL-rs1;

    if (M-A < MIRACL)
    { /* slow way */
        while (numbits(x)>=M+1)
        {
            copy(mr_mip->modulus,mr_mip->w7);
            shiftleftbits(mr_mip->w7,numbits(x)-M-1);
            add2(x,mr_mip->w7,x);    
        }
        return;
    }

    k2=1+(M-A)/MIRACL;   /* words from MSB to bit */
    rs2=(M-A)%MIRACL;
    ls2=MIRACL-rs2;

    if (B)
    { /* Pentanomial */
        k3=1+(M-B)/MIRACL;
        rs3=(M-B)%MIRACL;
        ls3=MIRACL-rs3;

        k4=1+(M-C)/MIRACL;
        rs4=(M-C)%MIRACL;
        ls4=MIRACL-rs4;
    }
    
    for (i=xl-1;i>=k1;i--)
    {
        w=gx[i]; gx[i]=0;
        if (rs1==0) gx[i-k1+1]^=w;
        else
        {
            gx[i-k1+1]^=(w>>rs1);
            gx[i-k1]^=(w<<ls1);
        }
        if (rs2==0) gx[i-k2+1]^=w;
        else
        {
            gx[i-k2+1]^=(w>>rs2);
            gx[i-k2]^=(w<<ls2);
        }
        if (B)
        {
            if (rs3==0) gx[i-k3+1]^=w;
            else
            {
                gx[i-k3+1]^=(w>>rs3);
                gx[i-k3]^=(w<<ls3);
            }
            if (rs4==0) gx[i-k4+1]^=w;
            else
            {
                gx[i-k4+1]^=(w>>rs4);
                gx[i-k4]^=(w<<ls4);
            }
        }
    }

    top=gx[k1-1]>>rs1;

    if (top!=0)
    {  
        gx[0]^=top;
        top<<=rs1;

        if (rs2==0) gx[k1-k2]^=top;
        else
        {
            gx[k1-k2]^=(top>>rs2);
            if (k1>k2) gx[k1-k2-1]^=(top<<ls2);
        }
        if (B)
        {
            if (rs3==0) gx[k1-k3]^=top;
            else
            {
                gx[k1-k3]^=(top>>rs3);
                if (k1>k3) gx[k1-k3-1]^=(top<<ls3);
            }
            if (rs4==0) gx[k1-k4]^=top;
            else
            {
                gx[k1-k4]^=(top>>rs4);
                if (k1>k4) gx[k1-k4-1]^=(top<<ls4);
            }
        }
        gx[k1-1]^=top;
    }
    x->len=k1;
    if (gx[k1-1]==0) mr_lzero(x);
}

void incr2(big x,int n,big w)
{ /* increment x by small amount */
    if (x!=w) copy(x,w);
    if (n==0) return;
    if (w->len==0)
    {
        w->len=1;
        w->w[0]=n;
    }
    else
    {
        w->w[0]^=(mr_small)n;
        if (w->len==1 && w->w[0]==0) w->len=0;
    }
}

void modsquare2(_MIPD_ big x,big w)
{ /* w=x*x mod f */
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif

    square2(x,mr_mip->w0);
    reduce2(_MIPP_ mr_mip->w0,mr_mip->w0);
    copy(mr_mip->w0,w);
}

/* Experimental code for GF(2^103) modular multiplication *
 * Inspired by Robert Harley's ECDL code                  */

#ifdef SP103

#ifdef __GNUC__
#include <xmmintrin.h>
#else
#include <emmintrin.h>
#endif

void modmult2(_MIPD_ big x,big y,big w)
{
    int i,j;
    mr_small b;

    __m128i t[16];
    __m128i m,r,s,p,q,xe,xo;
    __m64 a3,a2,a1,a0,top;

    if (x==y)
    {
        modsquare2(_MIPP_ x,w);
        return;
    }
    
    if (x->len==0 || y->len==0)
    {
        zero(w);
        return;
    }

    m=_mm_set_epi32(0,0,0xff<<24,0);    /* shifting mask */

/* precompute a small table */

    t[0]=_mm_set1_epi32(0);
    xe=_mm_set_epi32(0,x->w[2],0,x->w[0]);
    xo=_mm_set_epi32(0,x->w[3],0,x->w[1]);
    t[1]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[2]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[3]=_mm_xor_si128(t[2],t[1]);
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[4]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[5]=_mm_xor_si128(t[4],t[1]);
    t[6]=_mm_xor_si128(t[4],t[2]);
    t[7]=_mm_xor_si128(t[4],t[3]);
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[8]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[9]=_mm_xor_si128(t[8],t[1]);
    t[10]=_mm_xor_si128(t[8],t[2]);
    t[11]=_mm_xor_si128(t[8],t[3]);
    t[12]=_mm_xor_si128(t[8],t[4]);
    t[13]=_mm_xor_si128(t[8],t[5]);
    t[14]=_mm_xor_si128(t[8],t[6]);
    t[15]=_mm_xor_si128(t[8],t[7]);

    b=y->w[0];

    i=b&0xf; j=(b>>4)&0xf;    r=t[j]; 
    s=_mm_and_si128(r,m);     r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);    s=_mm_srli_epi64(s,4);  /* net shift left 4 */
    r=_mm_xor_si128(r,s);     r=_mm_xor_si128(r,t[i]);    
    p=q=r;                    q=_mm_srli_si128(q,1); 

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,1); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,2); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>24)&0xf; j=(b>>28); r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,3); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    b=y->w[1];

    i=(b)&0xf; j=(b>>4)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,4); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,5); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,6); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>24)&0xf; j=(b>>28); r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,7); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    b=y->w[2];

    i=(b)&0xf; j=(b>>4)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,8); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,9); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,10); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>24)&0xf; j=(b>>28); r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,11); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    b=y->w[3];

    i=(b)&0xf; j=(b>>4)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,12); 
    p=_mm_xor_si128(p,r);

    q=_mm_srli_si128(q,4);   /* only 103 bits, so we are done */

/* modular reduction - x^103+x^9+1 */

    a0=_mm_movepi64_pi64(p);
    a1=_mm_movepi64_pi64(_mm_srli_si128(p,8));
    a2=_mm_movepi64_pi64(q);
    a3=_mm_movepi64_pi64(_mm_srli_si128(q,8));

    a2=_m_pxor(a2,_m_psrlqi(a3,39));
    a2=_m_pxor(a2,_m_psrlqi(a3,30));
    a1=_m_pxor(a1,_m_psllqi(a3,25));
    a1=_m_pxor(a1,_m_psllqi(a3,34));

    a1=_m_pxor(a1,_m_psrlqi(a2,39));
    a1=_m_pxor(a1,_m_psrlqi(a2,30));
    a0=_m_pxor(a0,_m_psllqi(a2,25));
    a0=_m_pxor(a0,_m_psllqi(a2,34));

    top=_m_psrlqi(a1,39);
    a0=_m_pxor(a0,top);
    top=_m_psllqi(top,39);
    a0=_m_pxor(a0,_m_psrlqi(top,30));
    a1=_m_pxor(a1,top);

    if (w->len>4) zero(w);
    
    w->w[0]=_m_to_int(a0);
    a0=_m_psrlqi(a0,32);
    w->w[1]=_m_to_int(a0);
    w->w[2]=_m_to_int(a1);
    a1=_m_psrlqi(a1,32);
    w->w[3]=_m_to_int(a1);

    w->len=4;
    if (w->w[3]==0) mr_lzero(w);
    _m_empty();
}

#endif

#ifdef SP79

#ifdef __GNUC__
#include <xmmintrin.h>
#else
#include <emmintrin.h>
#endif

void modmult2(_MIPD_ big x,big y,big w)
{
    int i,j;
    mr_small b;

    __m128i t[16];
    __m128i m,r,s,p,q,xe,xo;
    __m64 a2,a1,a0,top;

    if (x==y)
    {
        modsquare2(_MIPP_ x,w);
        return;
    }
    
    if (x->len==0 || y->len==0)
    {
        zero(w);
        return;
    }

    m=_mm_set_epi32(0,0,0xff<<24,0);    /* shifting mask */

/* precompute a small table */

    t[0]=_mm_set1_epi32(0);
    xe=_mm_set_epi32(0,x->w[2],0,x->w[0]);
    xo=_mm_set_epi32(0,0,0,x->w[1]);
    t[1]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[2]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[3]=_mm_xor_si128(t[2],t[1]);
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[4]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[5]=_mm_xor_si128(t[4],t[1]);
    t[6]=_mm_xor_si128(t[4],t[2]);
    t[7]=_mm_xor_si128(t[4],t[3]);
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[8]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[9]=_mm_xor_si128(t[8],t[1]);
    t[10]=_mm_xor_si128(t[8],t[2]);
    t[11]=_mm_xor_si128(t[8],t[3]);
    t[12]=_mm_xor_si128(t[8],t[4]);
    t[13]=_mm_xor_si128(t[8],t[5]);
    t[14]=_mm_xor_si128(t[8],t[6]);
    t[15]=_mm_xor_si128(t[8],t[7]);

    b=y->w[0];

    i=b&0xf; j=(b>>4)&0xf;    r=t[j]; 
    s=_mm_and_si128(r,m);     r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);    s=_mm_srli_epi64(s,4);  /* net shift left 4 */
    r=_mm_xor_si128(r,s);     r=_mm_xor_si128(r,t[i]);    
    p=q=r;                    q=_mm_srli_si128(q,1); 

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,1); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,2); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>24)&0xf; j=(b>>28); r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,3); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    b=y->w[1];

    i=(b)&0xf; j=(b>>4)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,4); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,5); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,6); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>24)&0xf; j=(b>>28); r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,7); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    b=y->w[2];

    i=(b)&0xf; j=(b>>4)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,8); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,9); 
    p=_mm_xor_si128(p,r); 

    q=_mm_srli_si128(q,7);    /* only 79 bits, so we are done */
    
/* modular reduction - x^79+x^9+1 */

    a0=_mm_movepi64_pi64(p);
    a1=_mm_movepi64_pi64(_mm_srli_si128(p,8));
    a2=_mm_movepi64_pi64(q);

    a1=_m_pxor(a1,_m_psrlqi(a2,15));
    a1=_m_pxor(a1,_m_psrlqi(a2,6));
    a0=_m_pxor(a0,_m_psllqi(a2,49));
    a0=_m_pxor(a0,_m_psllqi(a2,58));

    top=_m_psrlqi(a1,15);
    a0=_m_pxor(a0,top);
    top=_m_psllqi(top,15);
    a0=_m_pxor(a0,_m_psrlqi(top,6));
    a1=_m_pxor(a1,top);

    w->w[2]=_m_to_int(a1);

    if (w->len>3)
    { /* Yes I know its crazy, but its needed to fix the broken /O2 optimizer */
        for (i=3;i<w->len;i++) w->w[i]=0;
    }

    w->w[0]=_m_to_int(a0);
    a0=_m_psrlqi(a0,32);
    w->w[1]=_m_to_int(a0);
    
    w->len=3;
    if (w->w[2]==0) mr_lzero(w);
    _m_empty();
}

#endif

#ifndef SP103
#ifndef SP79

void modmult2(_MIPD_ big x,big y,big w)
{ /* w=x*y mod f */
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -