⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mrgf2m.c

📁 miracl大数库 miracl大数库 miracl大数库
💻 C
📖 第 1 页 / 共 5 页
字号:
    __m128i m,r,s,p,q,xe,xo;
    __m64 a2,a1,a0,top;

    if (x==y)
    {
        modsquare2(_MIPP_ x,w);
        return;
    }
#ifdef MR_COUNT_OPS
fpm2++; 
#endif    
    if (x->len==0 || y->len==0)
    {
        zero(w);
        return;
    }

    m=_mm_set_epi32(0,0,0xff<<24,0);    /* shifting mask */

/* precompute a small table */

    t[0]=_mm_set1_epi32(0);
    xe=_mm_set_epi32(0,x->w[2],0,x->w[0]);
    xo=_mm_set_epi32(0,0,0,x->w[1]);
    t[1]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[2]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[3]=_mm_xor_si128(t[2],t[1]);
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[4]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[5]=_mm_xor_si128(t[4],t[1]);
    t[6]=_mm_xor_si128(t[4],t[2]);
    t[7]=_mm_xor_si128(t[4],t[3]);
    xe=_mm_slli_epi64(xe,1);
    xo=_mm_slli_epi64(xo,1);
    t[8]=_mm_xor_si128(xe,_mm_slli_si128(xo,4));
    t[9]=_mm_xor_si128(t[8],t[1]);
    t[10]=_mm_xor_si128(t[8],t[2]);
    t[11]=_mm_xor_si128(t[8],t[3]);
    t[12]=_mm_xor_si128(t[8],t[4]);
    t[13]=_mm_xor_si128(t[8],t[5]);
    t[14]=_mm_xor_si128(t[8],t[6]);
    t[15]=_mm_xor_si128(t[8],t[7]);

    b=y->w[0];

    i=b&0xf; j=(b>>4)&0xf;    r=t[j]; 
    s=_mm_and_si128(r,m);     r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);    s=_mm_srli_epi64(s,4);  /* net shift left 4 */
    r=_mm_xor_si128(r,s);     r=_mm_xor_si128(r,t[i]);    
    p=q=r;                    q=_mm_srli_si128(q,1); 

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,1); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,2); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>24)&0xf; j=(b>>28); r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,3); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    b=y->w[1];

    i=(b)&0xf; j=(b>>4)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,4); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,5); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>16)&0xf; j=(b>>20)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,6); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>24)&0xf; j=(b>>28); r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,7); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    b=y->w[2];

    i=(b)&0xf; j=(b>>4)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,8); 
    p=_mm_xor_si128(p,r);    q=_mm_srli_si128(q,1);

    i=(b>>8)&0xf; j=(b>>12)&0xf; r=t[j]; 
    s=_mm_and_si128(r,m);    r=_mm_slli_epi64(r,4);
    s=_mm_slli_si128(s,1);   s=_mm_srli_epi64(s,4);
    r=_mm_xor_si128(r,s);    r=_mm_xor_si128(r,t[i]);
    q=_mm_xor_si128(q,r);    r=_mm_slli_si128(r,9); 
    p=_mm_xor_si128(p,r); 

    q=_mm_srli_si128(q,7);    /* only 79 bits, so we are done */
    
/* modular reduction - x^79+x^9+1 */

    a0=_mm_movepi64_pi64(p);
    a1=_mm_movepi64_pi64(_mm_srli_si128(p,8));
    a2=_mm_movepi64_pi64(q);

    a1=_m_pxor(a1,_m_psrlqi(a2,15));
    a1=_m_pxor(a1,_m_psrlqi(a2,6));
    a0=_m_pxor(a0,_m_psllqi(a2,49));
    a0=_m_pxor(a0,_m_psllqi(a2,58));

    top=_m_psrlqi(a1,15);
    a0=_m_pxor(a0,top);
    top=_m_psllqi(top,15);
    a0=_m_pxor(a0,_m_psrlqi(top,6));
    a1=_m_pxor(a1,top);

    w->w[2]=_m_to_int(a1);

    if (w->len>3)
    { /* Yes I know its crazy, but its needed to fix the broken /O2 optimizer */
        for (i=3;i<w->len;i++) w->w[i]=0;
    }

    w->w[0]=_m_to_int(a0);
    a0=_m_psrlqi(a0,32);
    w->w[1]=_m_to_int(a0);
    
    w->len=3;
    if (w->w[2]==0) mr_lzero(w);
    _m_empty();
}

#endif


#ifndef SP103
#ifndef SP79
/*#ifndef SP271 */

void modmult2(_MIPD_ big x,big y,big w)
{ /* w=x*y mod f */
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif

    if (x==NULL || y==NULL)
    {
        zero(w);
        return;
    }

    if (x==y)
    {
        modsquare2(_MIPP_ x,w);
        return;
    }

    if (y->len==0)
    {
        zero(w);
        return;
    }

    if (y->len==1)
    {
        if (y->w[0]==1)
        {
            copy(x,w);
            return;
        }
    }    

#ifdef MR_COUNT_OPS
fpm2++; 
#endif

    multiply2(_MIPP_ x,y,mr_mip->w0);
    reduce2(_MIPP_ mr_mip->w0,mr_mip->w0);
    copy(mr_mip->w0,w);
}

#endif
#endif
/*#endif*/

/* Will be *much* faster if M,A,(B and C) are all odd */
/* This could/should be optimized for a particular irreducible polynomial and fixed A, B and C */

void sqroot2(_MIPD_ big x,big y)
{ 
    int i,M,A,B,C;
    int k,n,h,s,a,aw,ab,bw,bb,cw,cb;
 #if MIRACL != 32
    int mm,j;
 #endif
    mr_small *wk,w,we,wo;
    BOOL slow;
/* Using Harley's trick */
    static const mr_small evens[16]=
    {0,1,4,5,2,3,6,7,8,9,12,13,10,11,14,15};
    static const mr_small odds[16]=
    {0,4,1,5,8,12,9,13,2,6,3,7,10,14,11,15};

#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
    M=mr_mip->M;
    A=mr_mip->AA;
    if (A==0)
    {
        mr_berror(_MIPP_ MR_ERR_NO_BASIS);
        return;
    }
    B=mr_mip->BB;
    C=mr_mip->CC;

    slow=FALSE;
    if (B)
    {
        if (M%2!=1 || A%2!=1 || B%2!=1 || C%2!=1) slow=TRUE;
    }
    else
    {
        if (M%2!=1 || A%2!=1) slow=TRUE;
    }

    if (slow)
    {
        copy(x,y);
        for (i=1;i<mr_mip->M;i++)
            modsquare2(_MIPP_ y,y);
        return;
    }

    bb=cb=cw=bw=0;
/* M, A (B and C) are all odd - so use fast
   Fong, Hankerson, Lopez and Menezes method */

    if (x==y)
    {
        copy (x,mr_mip->w0);
        wk=mr_mip->w0->w;
    }
    else
    {
        wk=x->w;
    }
    zero(y);

#if MIRACL==8
    if (M==271 && A==207 && B==175 && C==111)
    {
        y->len=34;
        for (i=0;i<34;i++)
        {
            n=i/2;
            w=wk[i];

            we=evens[((w&0x5)+((w&0x50)>>3))];
            wo=odds[((w&0xA)+((w&0xA0)>>5))];

            i++;
            w=wk[i];

            we|=evens[((w&0x5)+((w&0x50)>>3))]<<4;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<4;
      
            y->w[n]^=we;
            y->w[n+17]=wo;

            y->w[n+13]^=wo;
            y->w[n+11]^=wo;
            y->w[n+7]^=wo;
        }
        if (y->w[33]==0) mr_lzero(y);
        return;
    }
#endif    

#if MIRACL==32
    if (M==1223 && A==255)
    {
        y->len=39;
        for (i=0;i<39;i++)
        {
            n=i/2;
            w=wk[i];

            we=evens[((w&0x5)+((w&0x50)>>3))];
            wo=odds[((w&0xA)+((w&0xA0)>>5))];
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<4;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<4;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<8;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<8;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<12;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<12;

            i++;
            if (i<39)
            {
            w=wk[i];

            we|=evens[((w&0x5)+((w&0x50)>>3))]<<16;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<16;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<20;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<20;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<24;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<24;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<28;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<28;
            }
            y->w[n]^=we;

            y->w[20+n-1]^=wo<<4; 
            y->w[20+n]^=wo>>28; 

            y->w[n+4]^=wo;
        }
        if (y->w[38]==0) mr_lzero(y);
        return;
    }

#endif

#if MIRACL==64
    if (M==1223 && A==255)
    {
        y->len=20;
        for (i=0;i<20;i++)
        {
            n=i/2;
            w=wk[i];

            we=evens[((w&0x5)+((w&0x50)>>3))];
            wo=odds[((w&0xA)+((w&0xA0)>>5))];
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<4;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<4;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<8;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<8;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<12;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<12;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<16;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<16;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<20;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<20;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<24;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<24;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<28;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<28;

            i++;          
            w=wk[i];

            we|=evens[((w&0x5)+((w&0x50)>>3))]<<32;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<32;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<36;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<36;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<40;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<40;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<44;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<44;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<48;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<48;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<52;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<52;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<56;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<56;
            w>>=8;
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<60;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<60;

            y->w[n]^=we;

            y->w[10+n-1]^=wo<<36; 
            y->w[10+n]^=wo>>28; 

            y->w[n+2]^=wo;
        }
        if (y->w[19]==0) mr_lzero(y);
        return;
    }

#endif

    k=1+(M/MIRACL);
    h=(k+1)/2;

    a=(A+1)/2;
    aw=a/MIRACL;
    ab=a%MIRACL;

    if (B)
    {
        a=(B+1)/2;
        bw=a/MIRACL;
        bb=a%MIRACL;

        a=(C+1)/2;
        cw=a/MIRACL;
        cb=a%MIRACL;
    }
    s=h*MIRACL-1-(M-1)/2;
    
    y->len=k;
    for (i=0;i<k;i++)
    {
        n=i/2;
        w=wk[i];
       
#if MIRACL == 32

        we=evens[((w&0x5)+((w&0x50)>>3))];
        wo=odds[((w&0xA)+((w&0xA0)>>5))];
        w>>=8;

        we|=evens[((w&0x5)+((w&0x50)>>3))]<<4;
        wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<4;
        w>>=8;

        we|=evens[((w&0x5)+((w&0x50)>>3))]<<8;
        wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<8;
        w>>=8;

        we|=evens[((w&0x5)+((w&0x50)>>3))]<<12;
        wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<12;

#else
        mm=0;
        we=wo=0;
        for (j=0;j<MIRACL/8;j++)
        {
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<mm;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<mm;
            mm+=4; w>>=8;
        }

#endif
        i++;
        if (i<k)
        {
            w=wk[i];
#if MIRACL == 32

        we|=evens[((w&0x5)+((w&0x50)>>3))]<<16;
        wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<16;
        w>>=8;

        we|=evens[((w&0x5)+((w&0x50)>>3))]<<20;
        wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<20;
        w>>=8;

        we|=evens[((w&0x5)+((w&0x50)>>3))]<<24;
        wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<24;
        w>>=8;

        we|=evens[((w&0x5)+((w&0x50)>>3))]<<28;
        wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<28;


#else 
        for (j=0;j<MIRACL/8;j++)
        {
            we|=evens[((w&0x5)+((w&0x50)>>3))]<<mm;
            wo|=odds[((w&0xA)+((w&0xA0)>>5))]<<mm;
            mm+=4; w>>=8;
        }

#endif
        }
        y->w[n]^=we; 

        if (s==0) y->w[h+n]=wo;
        else
        {
            y->w[h+n-1]^=wo<<(MIRACL-s); 
            y->w[h+n]^=wo>>s;     /* abutt odd bits to even */
        }
        if (ab==0) y->w[n+aw]^=wo;
        else
        {
            y->w[n+aw]^=wo<<ab; 
            y->w[n+aw+1]^=wo>>(MIRACL-ab);
        }
        if (B)
        {
            if (bb==0) y->w[n+bw]^=wo;
            else
            {
                y->w[n+bw]^=wo<<bb; 
                y->w[n+bw+1]^=wo>>(MIRACL-bb);
            }
            if (cb==0) y->w[n+cw]^=wo;
            else
            {
                y->w[n+cw]^=wo

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -