📄 mrfast.c
字号:
/*
* MIRACL fast fourier multiplication routine, using 3 prime method.
* mrfast.c - only faster for very high precision multiplication
* of numbers > about 4096 bits (see below)
* See "The Fast Fourier Transform in a Finite Field" by J.M. Pollard,
* Mathematics of Computation, Vol. 25, No. 114, April 1971, pp365-374
* Also Knuth Vol. 2, Chapt. 4.3.3
*
* Takes time preportional to 9+15N+9N.lg(N) to multiply two different
* N digit numbers. This reduces to 6+18N+6N.lg(N) when squaring.
* The classic method takes N.N and N(N+1)/2 respectively
*
* Fast Polynomial arithmetic
*
* See "A New Polynomial Factorisation Algorithm and its Implementation"
* by Victor Shoup, Jl. Symbolic Computation 1996
* Uses FFT method for fast arithmetic of large degree polynomials
*
* Copyright (c) 1988-2003 Shamus Software Ltd.
*/
#include <stdlib.h>
#include "miracl.h"
#ifndef MR_STATIC
static mr_utype twop(int n)
{ /* 2^n */
#ifdef MR_FP
int i;
#endif
mr_utype r=1;
if (n==0) return r;
#ifdef MR_FP
for (i=0;i<n;i++) r*=2.0;
return r;
#else
return (r<<n);
#endif
}
int mr_fft_init(_MIPD_ int logn,big m1,big m2,BOOL cr)
{ /* logn is number of bits, m1 and m2 are the largest elements
that will arise in each number to be multiplied */
mr_small kk;
int i,j,newn,pr;
mr_utype p,proot;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
newn=(1<<logn);
mr_mip->check=OFF;
multiply(_MIPP_ m1,m2,mr_mip->w5);
premult(_MIPP_ mr_mip->w5,2*newn+1,mr_mip->w5);
kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
pr=0;
while (size(mr_mip->w5)>0)
{ /* find out how many primes will be needed */
do
{
kk--;
p=kk*newn+1;
} while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
#ifdef MR_FP_ROUNDING
mr_sdiv(_MIPP_ mr_mip->w5,p,mr_invert(p),mr_mip->w5);
#else
mr_sdiv(_MIPP_ mr_mip->w5,p,mr_mip->w5);
#endif
pr++;
}
mr_mip->check=ON;
/* if nothing has changed, don't recalculate */
if (logn<=mr_mip->logN && pr==mr_mip->nprimes) return pr;
fft_reset(_MIPPO_ );
mr_mip->prime=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
mr_mip->inverse=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
mr_mip->roots=(mr_utype**)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
mr_mip->t= (mr_utype **)mr_alloc(_MIPP_ pr,sizeof(mr_utype *));
mr_mip->cr=(mr_utype *)mr_alloc(_MIPP_ pr,sizeof(mr_utype));
mr_mip->wa=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
mr_mip->wb=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
mr_mip->wc=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
kk=mr_shiftbits((mr_small)1,MIRACL-2-logn);
if (mr_mip->base!=0) while (4*kk*newn>mr_mip->base) kk=mr_shiftbits(kk,-1);
for (i=0;i<pr;i++)
{ /* find the primes again */
mr_mip->roots[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
mr_mip->t[i]=(mr_utype *)mr_alloc(_MIPP_ newn,sizeof(mr_utype));
do
{
kk--;
p=kk*newn+1;
} while(spmd((mr_small)2,(mr_small)(p-1),p)!=1);
proot=p-1;
for (j=1;j<logn;j++) proot=sqrmp(proot,p);
mr_mip->roots[i][0]=proot; /* build residue table */
for (j=1;j<newn;j++) mr_mip->roots[i][j]=smul(mr_mip->roots[i][j-1],proot,p);
mr_mip->inverse[i]=invers((mr_small)newn,p);
mr_mip->prime[i]=p;
}
mr_mip->logN=logn;
mr_mip->nprimes=pr;
/* set up chinese remainder structure */
if (cr)
if (!scrt_init(_MIPP_ &mr_mip->chin,pr,mr_mip->prime)) fft_reset(_MIPPO_ );
return pr;
}
void fft_reset(_MIPDO_ )
{ /* reclaim any space used by FFT */
int i;
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
if (mr_mip->degree!=0)
{ /* clear any precomputed tables */
for (i=0;i<mr_mip->nprimes;i++)
{
mr_free(mr_mip->s1[i]);
mr_free(mr_mip->s2[i]);
}
mr_free(mr_mip->s1);
mr_free(mr_mip->s2);
mr_mip->degree=0;
}
if (mr_mip->logN!=0)
{ /* clear away old stuff */
for (i=0;i<mr_mip->nprimes;i++)
{
mr_free(mr_mip->roots[i]);
mr_free(mr_mip->t[i]);
}
mr_free(mr_mip->wa);
mr_free(mr_mip->wb);
mr_free(mr_mip->wc);
mr_free(mr_mip->cr);
mr_free(mr_mip->t);
mr_free(mr_mip->roots);
mr_free(mr_mip->inverse);
mr_free(mr_mip->prime);
mr_mip->nprimes=0;
mr_mip->logN=0;
mr_mip->same=FALSE;
}
/* clear CRT structure */
if (mr_mip->chin.NP!=0) scrt_end(&mr_mip->chin);
}
void mr_dif_fft(_MIPD_ int logn,int pr,mr_utype *data)
{ /* decimate-in-frequency fourier transform */
int mmax,m,j,k,istep,i,ii,jj,newn,offset;
mr_utype w,temp,prime,*roots;
#ifdef MR_NOASM
mr_large dble,ldres;
#endif
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large iprime;
#endif
prime=mr_mip->prime[pr];
roots=mr_mip->roots[pr];
#ifdef MR_FP_ROUNDING
iprime=mr_invert(prime);
#endif
newn=(1<<logn);
offset=(mr_mip->logN-logn);
mmax=newn;
for (k=0;k<logn;k++) {
istep=mmax;
mmax>>=1;
ii=newn;
jj=newn/istep;
ii-=jj;
for (i=0;i<newn;i+=istep)
{ /* special case root=1 */
j=i+mmax;
temp=data[i]-data[j];
if (temp<0) temp+=prime;
data[i]+=data[j];
if (data[i]>=prime) data[i]-=prime;
data[j]=temp;
}
for (m=1;m<mmax;m++) {
w=roots[(ii<<offset)-1];
ii-=jj;
#ifndef MR_FP
#if INLINE_ASM == 3
#define MR_IMPASM
/* esi = i
edi = j
ebx = data
edx = temp
ecx = prime
*/
ASM mov ecx,DWORD PTR prime
ASM mov ebx,DWORD PTR data
ASM mov esi,DWORD PTR m
hop1:
ASM cmp esi,DWORD PTR newn
ASM jge hop0
ASM mov edi,esi
ASM add edi,DWORD PTR mmax
ASM mov edx,[ebx+4*esi]
ASM add edx,ecx
ASM sub edx,[ebx+4*edi]
ASM mov eax,[ebx+4*esi]
ASM add eax,[ebx+4*edi]
ASM cmp eax,ecx
ASM jl hop2
ASM sub eax,ecx
hop2:
ASM mov [ebx+4*esi],eax
ASM mov eax,DWORD PTR w
ASM mul edx
ASM div ecx
ASM mov [ebx+4*edi],edx
ASM add esi,DWORD PTR istep
ASM jmp hop1
hop0:
ASM nop
#endif
#if INLINE_ASM == 4
#define MR_IMPASM
ASM (
"movl %0,%%ecx\n"
"movl %1,%%ebx\n"
"movl %2,%%esi\n"
"hop1:\n"
"cmpl %3,%%esi\n"
"jge hop0\n"
"movl %%esi,%%edi\n"
"addl %4,%%edi\n"
"movl (%%ebx,%%esi,4),%%edx\n"
"addl %%ecx,%%edx\n"
"subl (%%ebx,%%edi,4),%%edx\n"
"movl (%%ebx,%%esi,4),%%eax\n"
"addl (%%ebx,%%edi,4),%%eax\n"
"cmpl %%ecx,%%eax\n"
"jl hop2\n"
"subl %%ecx,%%eax\n"
"hop2:\n"
"movl %%eax,(%%ebx,%%esi,4)\n"
"movl %5,%%eax\n"
"mull %%edx\n"
"divl %%ecx\n"
"movl %%edx,(%%ebx,%%edi,4)\n"
"addl %6,%%esi\n"
"jmp hop1\n"
"hop0:\n"
"nop\n"
:
: "m"(prime),"m"(data),"m"(m),"m"(newn),"m"(mmax),"m"(w),"m"(istep)
: "eax","edi","esi","edi","ebx","ecx","edx","memory"
);
#endif
#endif
#ifndef MR_IMPASM
for (i=m;i<newn;i+=istep) {
j=i+mmax;
temp=prime+data[i]-data[j];
data[i]+=data[j];
if (data[i]>=prime) data[i]-=prime;
#ifdef MR_NOASM
dble=(mr_large)w*temp;
#ifdef MR_FP_ROUNDING
data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
#else
data[j]=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
#endif
#else
#ifdef MR_FP_ROUNDING
imuldiv(w,temp,(mr_small)0,prime,iprime,(mr_small *)&data[j]);
#else
muldiv(w,temp,(mr_small)0,prime,(mr_small *)&data[j]);
#endif
#endif
}
#endif
}
}
}
void mr_dit_fft(_MIPD_ int logn,int pr,mr_utype *data)
{ /* decimate-in-time inverse fourier transform */
int mmax,m,j,k,i,istep,ii,jj,newn,offset;
mr_utype w,temp,prime,*roots;
#ifdef MR_NOASM
mr_large dble,ldres;
#endif
#ifdef MR_OS_THREADS
miracl *mr_mip=get_mip();
#endif
#ifdef MR_FP_ROUNDING
mr_large iprime;
#endif
prime=mr_mip->prime[pr];
roots=mr_mip->roots[pr];
offset=(mr_mip->logN-logn);
#ifdef MR_FP_ROUNDING
iprime=mr_invert(prime);
#endif
newn=(1<<logn);
mmax=1;
for (k=0;k<logn;k++) {
istep=(mmax<<1);
ii=0;
jj=newn/istep;
ii+=jj;
for (i=0;i<newn;i+=istep)
{ /* special case root=1 */
j=i+mmax;
temp=data[j];
data[j]=data[i]-temp;
if (data[j]<0) data[j]+=prime;
data[i]+=temp;
if (data[i]>=prime) data[i]-=prime;
}
for (m=1;m<mmax;m++) {
w=roots[(ii<<offset)-1];
ii+=jj;
#ifndef MR_FP
#if INLINE_ASM == 3
#define MR_IMPASM
/* esi = i
edi = j
ebx = data
edx = temp
ecx = prime
*/
ASM mov ebx,DWORD PTR data
ASM mov ecx,DWORD PTR prime
ASM mov esi,DWORD PTR m
hop4:
ASM cmp esi,DWORD PTR newn
ASM jge hop5
ASM mov edi,esi
ASM add edi,DWORD PTR mmax
ASM mov eax,[ebx+4*edi]
ASM mul DWORD PTR w
ASM div ecx
ASM cmp [ebx+4*esi],ecx
ASM jl hop3
ASM sub [ebx+4*esi],ecx
hop3:
ASM mov eax,[ebx+4*esi]
ASM add [ebx+4*esi],edx
ASM add eax,ecx
ASM sub eax,edx
ASM mov [ebx+4*edi],eax
ASM add esi,DWORD PTR istep
ASM jmp hop4
hop5:
ASM nop
#endif
#if INLINE_ASM == 4
#define MR_IMPASM
ASM (
"movl %0,%%ebx\n"
"movl %1,%%ecx\n"
"movl %2,%%esi\n"
"hop4:\n"
"cmpl %3,%%esi\n"
"jge hop5\n"
"movl %%esi,%%edi\n"
"addl %4,%%edi\n"
"movl (%%ebx,%%edi,4),%%eax\n"
"mull %5\n"
"divl %%ecx\n"
"cmpl %%ecx,(%%ebx,%%esi,4)\n"
"jl hop3\n"
"subl %%ecx,(%%ebx,%%esi,4)\n"
"hop3:\n"
"movl (%%ebx,%%esi,4),%%eax\n"
"addl %%edx,(%%ebx,%%esi,4)\n"
"addl %%ecx,%%eax\n"
"subl %%edx,%%eax\n"
"movl %%eax,(%%ebx,%%edi,4)\n"
"addl %6,%%esi\n"
"jmp hop4\n"
"hop5:\n"
"nop\n"
:
: "m"(data),"m"(prime),"m"(m),"m"(newn),"m"(mmax),"m"(w),"m"(istep)
: "eax","edi","esi","edi","ebx","ecx","edx","memory"
);
#endif
#endif
#ifndef MR_IMPASM
for (i=m;i<newn;i+=istep) {
j=i+mmax;
#ifdef MR_NOASM
dble=(mr_large)w*data[j];
#ifdef MR_FP_ROUNDING
temp=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble*iprime));
#else
temp=(mr_utype)(dble-(mr_large)prime*MR_LROUND(dble/prime));
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -