📄 mrfast.c

📁 miracl-大数运算库,大家使用有什么问题请多多提意见
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/*
 *   MIRACL fast fourier multiplication routine, using 3 prime method.
 *   mrfast.c  - only faster for very high precision multiplication
 *               of numbers > about 4096 bits  (see below)
 *   See "The Fast Fourier Transform in a Finite Field"  by J.M. Pollard,
 *   Mathematics of Computation, Vol. 25, No. 114, April 1971, pp365-374
 *   Also Knuth Vol. 2, Chapt. 4.3.3
 *
 *   Takes time preportional to 9+15N+9N.lg(N) to multiply two different 
 *   N digit numbers. This reduces to 6+18N+6N.lg(N) when squaring.
 *   The classic method takes N.N and N(N+1)/2 respectively
 *
 *   Fast Polynomial arithmetic 
 * 
 *   See "A New Polynomial Factorisation Algorithm and its Implementation"
 *   by Victor Shoup, Jl. Symbolic Computation 1996
 *   Uses FFT method for fast arithmetic of large degree polynomials
 *
 *   Copyright (c) 1988-2003 Shamus Software Ltd.
 */

#include <stdlib.h>
#include "miracl.h"

#ifndef MR_STATIC

static mr_utype twop(int n)
{ /* 2^n */
#ifdef MR_FP
    int i;
#endif 
    mr_utype r=1;
    if (n==0) return r;
#ifdef MR_FP
    for (i=0;i<n;i++) r*=2.0;
    return r;
#else
    return  (r<<n);
#endif 
}

int mr_fft_init(_MIPD_ int logn,big m1,big m2,BOOL cr)
{ /* logn is number of bits, m1 and m2 are the largest elements 
     that will arise in each number to be multiplied */
    mr_small kk;
    int i,j,newn,pr;
    mr_utype p,proot;
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif

    newn=(1<<logn);
    mr_mip->check=OFF;
    multiply(_MIPP_ m1,m2,mr_mip->w5);
    premult(_MIPP_ mr_mip->w5,2*newn+1,mr_mip->w5);
    kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);  
    if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);

    pr=0;
    while (size(mr_mip->w5)>0)
    { /* find out how many primes will be needed */
        do
        { 
            kk--;
            p=kk*newn+1;
        } while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
#ifdef MR_FP_ROUNDING
        mr_sdiv(_MIPP_ mr_mip->w5,p,mr_invert(p),mr_mip->w5);
#else
        mr_sdiv(_MIPP_ mr_mip->w5,p,mr_mip->w5);
#endif
        pr++;
    }
   mr_mip->check=ON;
/* if nothing has changed, don't recalculate */
    if (logn<=mr_mip->logN && pr==mr_mip->nprimes) return pr;
    fft_reset(_MIPPO_ );


    mr_mip->prime=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
    mr_mip->inverse=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
    mr_mip->roots=(mr_utype**)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
    mr_mip->t= (mr_utype **)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
    mr_mip->cr=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
    mr_mip->wa=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
    mr_mip->wb=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
    mr_mip->wc=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));

    kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
    if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
    for (i=0;i<pr;i++)
    { /* find the primes again */
        mr_mip->roots[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
        mr_mip->t[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));

        do
        { 
            kk--;
            p=kk*newn+1;
        } while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);

        proot=p-1;
        for (j=1;j<logn;j++) proot=sqrmp(proot,p); 
        mr_mip->roots[i][0]=proot;   /* build residue table */
        for (j=1;j<newn;j++) mr_mip->roots[i][j]=smul(mr_mip->roots[i][j-1],proot,p);
        mr_mip->inverse[i]=invers((mr_small)newn,p);
        mr_mip->prime[i]=p;
    }

    mr_mip->logN=logn;

    mr_mip->nprimes=pr;
/* set up chinese remainder structure */
    if (cr)
        if (!scrt_init(_MIPP_ &mr_mip->chin,pr,mr_mip->prime)) fft_reset(_MIPPO_ );
    return pr;
}

void fft_reset(_MIPDO_ )
{ /* reclaim any space used by FFT */
    int i;
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif

    if (mr_mip->degree!=0)
    { /* clear any precomputed tables */
        for (i=0;i<mr_mip->nprimes;i++)
        {
            mr_free(mr_mip->s1[i]);
            mr_free(mr_mip->s2[i]);
        }
        mr_free(mr_mip->s1);
        mr_free(mr_mip->s2);
        mr_mip->degree=0;
    }

    if (mr_mip->logN!=0)
    { /* clear away old stuff */
        for (i=0;i<mr_mip->nprimes;i++) 
        {
            mr_free(mr_mip->roots[i]);
            mr_free(mr_mip->t[i]);
        }
        mr_free(mr_mip->wa);
        mr_free(mr_mip->wb);
        mr_free(mr_mip->wc);
        mr_free(mr_mip->cr);
        mr_free(mr_mip->t);
        mr_free(mr_mip->roots);
        mr_free(mr_mip->inverse);
        mr_free(mr_mip->prime);
        mr_mip->nprimes=0;
        mr_mip->logN=0;
        mr_mip->same=FALSE;
    }
/* clear CRT structure */
    if (mr_mip->chin.NP!=0) scrt_end(&mr_mip->chin);
}

void mr_dif_fft(_MIPD_ int logn,int pr,mr_utype *data)
{ /* decimate-in-frequency fourier transform */
    int mmax,m,j,k,istep,i,ii,jj,newn,offset;
    mr_utype w,temp,prime,*roots;
#ifdef MR_NOASM
    mr_large dble,ldres;
#endif
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
    mr_large iprime;
#endif
    prime=mr_mip->prime[pr];
    roots=mr_mip->roots[pr];
#ifdef MR_FP_ROUNDING
    iprime=mr_invert(prime);
#endif

    newn=(1<<logn);
    offset=(mr_mip->logN-logn);
    mmax=newn;
    for (k=0;k<logn;k++) {
        istep=mmax;
        mmax>>=1; 
        ii=newn;
        jj=newn/istep;
        ii-=jj;
        for (i=0;i<newn;i+=istep)
        { /* special case root=1 */ 
            j=i+mmax;
            temp=data[i]-data[j];
            if (temp<0) temp+=prime;
            data[i]+=data[j];
            if (data[i]>=prime) data[i]-=prime;
            data[j]=temp;
        }
        for (m=1;m<mmax;m++) {

            w=roots[(ii<<offset)-1];
            ii-=jj;

#ifndef MR_FP
#if INLINE_ASM == 3

#define MR_IMPASM

/*  esi = i
    edi = j
    ebx = data
    edx = temp
    ecx = prime
*/    

        ASM mov   ecx,DWORD PTR prime
        ASM mov   ebx,DWORD PTR data
        ASM mov   esi,DWORD PTR m
    hop1:
        ASM cmp   esi,DWORD PTR newn
        ASM jge   hop0
        ASM mov   edi,esi
        ASM add   edi,DWORD PTR mmax

        ASM mov   edx,[ebx+4*esi]
        ASM add   edx,ecx
        ASM sub   edx,[ebx+4*edi]
        ASM mov   eax,[ebx+4*esi]
        ASM add   eax,[ebx+4*edi]
        ASM cmp   eax,ecx
        ASM jl    hop2
        ASM sub   eax,ecx
    hop2:
        ASM mov   [ebx+4*esi],eax 

        ASM mov   eax,DWORD PTR w      
        ASM mul   edx                  
        ASM div   ecx
        ASM mov   [ebx+4*edi],edx 

        ASM add   esi,DWORD PTR istep
        ASM jmp   hop1
    hop0:
        ASM nop
#endif
#if INLINE_ASM == 4

#define MR_IMPASM
    ASM (
           "movl %0,%%ecx\n"
           "movl %1,%%ebx\n"
           "movl %2,%%esi\n"
         "hop1:\n"
           "cmpl %3,%%esi\n"
           "jge hop0\n"
           "movl %%esi,%%edi\n"
           "addl %4,%%edi\n"

           "movl (%%ebx,%%esi,4),%%edx\n"
           "addl %%ecx,%%edx\n"
           "subl (%%ebx,%%edi,4),%%edx\n"
           "movl (%%ebx,%%esi,4),%%eax\n"
           "addl (%%ebx,%%edi,4),%%eax\n"
           "cmpl %%ecx,%%eax\n"
           "jl hop2\n"
           "subl %%ecx,%%eax\n"
         "hop2:\n"
           "movl %%eax,(%%ebx,%%esi,4)\n"
           "movl %5,%%eax\n"
           "mull %%edx\n"
           "divl %%ecx\n"
           "movl %%edx,(%%ebx,%%edi,4)\n"
           "addl %6,%%esi\n"
           "jmp hop1\n"
         "hop0:\n"
           "nop\n"
         :
         : "m"(prime),"m"(data),"m"(m),"m"(newn),"m"(mmax),"m"(w),"m"(istep)
         : "eax","edi","esi","edi","ebx","ecx","edx","memory"
    );
#endif
#endif
#ifndef MR_IMPASM
            for (i=m;i<newn;i+=istep) {
                j=i+mmax;
                temp=prime+data[i]-data[j];
                data[i]+=data[j];
                if (data[i]>=prime) data[i]-=prime;

#ifdef MR_NOASM
                dble=(mr_large)w*temp;
#ifdef MR_FP_ROUNDING
                data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
#else
                data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
#endif
#else
#ifdef MR_FP_ROUNDING
                imuldiv(w,temp,(mr_small)0,prime,iprime,(mr_small *)&data[j]);
#else
                muldiv(w,temp,(mr_small)0,prime,(mr_small *)&data[j]);
#endif
#endif
            }
#endif
        }
    }
}

void mr_dit_fft(_MIPD_ int logn,int pr,mr_utype *data)
{ /* decimate-in-time inverse fourier transform */
    int mmax,m,j,k,i,istep,ii,jj,newn,offset;
    mr_utype w,temp,prime,*roots;
#ifdef MR_NOASM
    mr_large dble,ldres;
#endif
#ifdef MR_OS_THREADS
    miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
    mr_large iprime;
#endif
    prime=mr_mip->prime[pr];
    roots=mr_mip->roots[pr];
    offset=(mr_mip->logN-logn);
#ifdef MR_FP_ROUNDING
    iprime=mr_invert(prime);
#endif
    newn=(1<<logn);
    mmax=1;
    for (k=0;k<logn;k++) {
        istep=(mmax<<1); 
        ii=0;
        jj=newn/istep;
        ii+=jj;
        for (i=0;i<newn;i+=istep)
        { /* special case root=1 */
            j=i+mmax;
            temp=data[j];
            data[j]=data[i]-temp;
            if (data[j]<0) data[j]+=prime;
            data[i]+=temp;
            if (data[i]>=prime) data[i]-=prime;
        }
        for (m=1;m<mmax;m++) {
            w=roots[(ii<<offset)-1];
            ii+=jj;

#ifndef MR_FP
#if INLINE_ASM == 3

#define MR_IMPASM
/*  esi = i
    edi = j
    ebx = data
    edx = temp
    ecx = prime
*/    


        ASM mov   ebx,DWORD PTR data
        ASM mov   ecx,DWORD PTR prime
        ASM mov   esi,DWORD PTR m
hop4:
        ASM cmp   esi,DWORD PTR newn
        ASM jge   hop5
        ASM mov   edi,esi
        ASM add   edi,DWORD PTR mmax

        ASM mov   eax,[ebx+4*edi]
        ASM mul   DWORD PTR w
        ASM div   ecx

        ASM cmp   [ebx+4*esi],ecx
        ASM jl    hop3
        ASM sub   [ebx+4*esi],ecx
hop3:   
        ASM mov   eax,[ebx+4*esi]
        ASM add   [ebx+4*esi],edx
        ASM add   eax,ecx
        ASM sub   eax,edx
        ASM mov   [ebx+4*edi],eax

        ASM add   esi,DWORD PTR istep
        ASM jmp   hop4
hop5:
        ASM nop
#endif
#if INLINE_ASM == 4

#define MR_IMPASM
    ASM (
           "movl %0,%%ebx\n"
           "movl %1,%%ecx\n"
           "movl %2,%%esi\n"
         "hop4:\n"
           "cmpl %3,%%esi\n"
           "jge hop5\n"
           "movl %%esi,%%edi\n"
           "addl %4,%%edi\n"

           "movl (%%ebx,%%edi,4),%%eax\n"
           "mull %5\n"
           "divl %%ecx\n"

           "cmpl %%ecx,(%%ebx,%%esi,4)\n"
           "jl hop3\n"
           "subl %%ecx,(%%ebx,%%esi,4)\n"
         "hop3:\n"
           "movl (%%ebx,%%esi,4),%%eax\n"
           "addl %%edx,(%%ebx,%%esi,4)\n"
           "addl %%ecx,%%eax\n"
           "subl %%edx,%%eax\n"
           "movl %%eax,(%%ebx,%%edi,4)\n"

           "addl %6,%%esi\n"
           "jmp hop4\n"
         "hop5:\n"
            "nop\n"
         :
         : "m"(data),"m"(prime),"m"(m),"m"(newn),"m"(mmax),"m"(w),"m"(istep)
         : "eax","edi","esi","edi","ebx","ecx","edx","memory"
        );
#endif
#endif
#ifndef MR_IMPASM
            for (i=m;i<newn;i+=istep) {
                j=i+mmax;
#ifdef MR_NOASM
                dble=(mr_large)w*data[j];
#ifdef MR_FP_ROUNDING
                temp=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
#else
                temp=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
#endif
12 3 下一页
💿 文件大小 1432 K
👤 上传用户 foxfsk
📂 所属分类数据结构
🏷️ 相关标签

#miracl #运算 #家
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -