📄 fft_flp32.asm
字号:
q[j2+=4]=r27:26; k5=k5+k9; fr6=r22*r4; fr18=r6-r7;; // S2-, K3++, M1++, A1+
yr31:28=q[j0+=4]; k3=k5 and k4; fr15=r11*r12; fr24=r0+r16, fr26=r0-r16;; // F1+++, K1+++, M4+, A3
xr31:28=q[j0+=4]; r13:12=l[k7+k3]; fr7=r23*r5; fr25=r1+r17, fr27=r1-r17;; // F2+++, K2+++, M2++, A4
.align_code 4;
if NLC0E, jump _BflyLoop;
q[j1+=4]=r25:24; fr14=r22*r5; fr19=r14+r15;; // S1, M3++, A2+
q[j2+=4]=r27:26; fr6=r30*r12; fr16=r6-r7;; // S2----, M1-, A1--
j0=j31+j5; fr15=r23*r4; fr24=r8+r18, fr26=r8-r18;; // M4--, A3---, swap ping-pong pointers
j5=j31+j6; fr7=r31*r13; fr25=r9+r19, fr27=r9-r19;; // M2-, A4---
q[j1+=4]=r25:24; fr14=r30*r13; fr17=r14+r15;; // S1---, M3-, A2--
q[j2+=4]=r27:26; fr18=r6-r7;; // S2---, A1-
j6=j31+j0; fr15=r31*r12; fr24=r20+r16, fr26=r20-r16;; // M4-, A3--
fr25=r21+r17, fr27=r21-r17;; // A4--
q[j1+=4]=r25:24; fr19=r14+r15;; // S1--, A2-
q[j2+=4]=r27:26; fr24=r28+r18, fr22=r28-r18;; // S2-- A3-
j0=j31+j6; fr25=r29+r19, fr23=r29-r19;; // A4-
q[j1+=4]=r25:24; k5=k31+0;; // S1-
.align_code 4;
if NLC1E, jump _StageLoop;
q[j2+=4]=r23:22; k4=ashiftr k4;; // S2-, shift the mask
//******************************* Last stage *********************************
k9 = ashiftr k9;;//in this manner any MAX_FFT_SIZE can be used
yr3:0=q[j0+=4]; yr5:4 = l[k7+=k9];; // F1,
xr3:0=q[j0+=4]; xr5:4=l[k7+=k9];; // F2, K2
j1=j31+j7; fr6=r2*r4; LC0=k10;; // M1
yr11:8=q[j0+=4]; yr13:12=l[k7+=k9];; // F1+
xr11:8=q[j0+=4]; xr13:12=l[k7+=k9]; fr7=r3*r5;; // F2+, K2+, M2
j2=j1+j11; fr14=r2*r5;; // M3
fr6=r10*r12; fr16=r6-r7;; // M1+, A1
yr23:20=q[j0+=4]; yr5:4=l[k7+=k9]; fr15=r3*r4;; // F1++, M4
xr23:20=q[j0+=4]; xr5:4=l[k7+=k9]; fr7=r11*r13;; // F2++, K2++, M2+
fr14=r10*r13; fr17=r14+r15;; // M3+, A2
fr6=r22*r4; fr18=r6-r7;; // M1++, A1+
yr31:28=q[j0+=4]; yr13:12=l[k7+=k9];fr15=r11*r12; fr24=r0+r16, fr26=r0-r16;; // F1+++, M4+, A3
xr31:28=q[j0+=4]; xr13:12=l[k7+=k9]; fr7=r23*r5; fr25=r1+r17, fr27=r1-r17;; // F2+++, K2+++, M2++, A4
q[j1+=4]=r25:24; fr14=r22*r5; fr19=r14+r15;; // S1, M3++, A2+
.align_code 4;
_BflyLastLoop:
q[j2+=4]=r27:26; fr6=r30*r12; fr16=r6-r7;; // S2----, M1-, A1--
yr3:0=q[j0+=4]; yr5:4=l[k7+=k9]; fr15=r23*r4; fr24=r8+r18, fr26=r8-r18;; // F1, M4--, A3---
xr3:0=q[j0+=4]; xr5:4=l[k7+=k9]; fr7=r31*r13; fr25=r9+r19, fr27=r9-r19;; // F2, K2, M2-, A4---
q[j1+=4]=r25:24; fr14=r30*r13; fr17=r14+r15;; // S1---, M3-, A2--
q[j2+=4]=r27:26; fr6=r2*r4; fr18=r6-r7;; // S2---, M1, A1-
yr11:8=q[j0+=4]; yr13:12=l[k7+=k9];fr15=r31*r12; fr24=r20+r16, fr26=r20-r16;; // F1+, M4-, A3--
xr11:8=q[j0+=4]; xr13:12=l[k7+=k9];fr7=r3*r5; fr25=r21+r17, fr27=r21-r17;; // F2+, K2+, M2, A4--
q[j1+=4]=r25:24; fr14=r2*r5; fr19=r14+r15;; // S1--, M3, A2-
q[j2+=4]=r27:26; fr6=r10*r12; fr16=r6-r7;; // S2--, M1+, A1
yr23:20=q[j0+=4]; yr5:4=l[k7+=k9]; fr15=r3*r4; fr24=r28+r18, fr26=r28-r18;; // F1++, M4, A3-
xr23:20=q[j0+=4]; xr5:4=l[k7+=k9]; fr7=r11*r13; fr25=r29+r19, fr27=r29-r19;; // F2++, K2++, M2+, A4-
q[j1+=4]=r25:24; fr14=r10*r13; fr17=r14+r15;; // S1-, M3+, A2
q[j2+=4]=r27:26; fr6=r22*r4; fr18=r6-r7;; // S2-, M1++, A1+
yr31:28=q[j0+=4];yr13:12=l[k7+=k9]; fr15=r11*r12; fr24=r0+r16, fr26=r0-r16;; // F1+++, M4+, A3
xr31:28=q[j0+=4];xr13:12=l[k7+=k9]; fr7=r23*r5; fr25=r1+r17, fr27=r1-r17;; // F2+++, K2+++, M2++, A4
.align_code 4;
if NLC0E, jump _BflyLastLoop;
q[j1+=4]=r25:24; fr14=r22*r5; fr19=r14+r15;; // S1, M3++, A2+
q[j2+=4]=r27:26; fr6=r30*r12; fr16=r6-r7;; // S2----, M1-, A1--
fr15=r23*r4; fr24=r8+r18, fr26=r8-r18;; // M4--, A3---
fr7=r31*r13; fr25=r9+r19, fr27=r9-r19;; // M2-, A4---
q[j1+=4]=r25:24; fr14=r30*r13; fr17=r14+r15;; // S1---, M3-, A2--
q[j2+=4]=r27:26; fr18=r6-r7;; // S2---, A1-
fr15=r31*r12; fr24=r20+r16, fr26=r20-r16;; // M4-, A3--
fr25=r21+r17, fr27=r21-r17;; // A4--
q[j1+=4]=r25:24; fr19=r14+r15;; // S1--, A2-
q[j2+=4]=r27:26;; // S2--
fr24=r28+r18, fr26=r28-r18;; // A3-
fr25=r29+r19, fr27=r29-r19;; // A4-
q[j1+=4]=r25:24;; // S1-
q[j2+=4]=r27:26;; // S2-
j11=[j27+0x19];; // j11=COMPLEX or REAL, off the stack
comp(j11,COMPLEX);; // Complex or Real?
.align_code 4;
if jeq, jump _FFTEpilogue;; // If Complex, done
//******************************* Real re-combine ********************************
//j17=N/2, j7=output
k8=k31+_twiddles; j0=j31+j7;; // k8->twiddles, j0->internal buffer
k9=ashiftr k9; j10=j31+j7;; // k9=twiddle stride, j10->internal buffer
j14=j17+j17;; // j14=N (N/2 complex values)
j14=j14-4;; // j14=N-4 real=N/2-2 complex
j1=j0+j14;; // j1->internal buffer+(N/2-2)
j14=j10+j14;; // j14->internal buffer+(N/2-2)
j29 = ashiftr j17;; // j29=N/4
k15=k31+MAX_FFT_SIZE/4; j30=ashiftr j29;; // k15=N/4*twiddle_stride, j30=N/8
j30 = ashiftr j30;; // N/16
k8=k8+k9; r0=l[j7+j17];; // k8->twiddles+1, get G(N/4)
j0=j0+2; k12=k8+k15;; // j0->internal buffer+1, k12->twiddles+N/8+1
LC0=j30; fr0=r0+r0; j2=j0+j29;; // LC0=N/16, compute F(N/4)=2*conj(G(N/4)), j2->internal buffer+1+N/8
j3=j1-j29;; // j3->internal buffer+3N/8-2
xfr0=-r0; j10=j10+2; k10=yr0;; // j10->internal buffer+1, k10=Im(F(N/4))
j12=j10+j29;; // j12->internal buffer+N/8+1
if LC0E; j13=j14-j29; k11=xr0;; // LC0=N/16-1, j13->internal buffer+3N/8-2, k11=Re(F(N/4))
yr3:0=DAB q[j0+=4];; // Prime the DAB
xr3:0=DAB q[j2+=4];; // Prime the DAB
yr3:0=DAB q[j0+=4];; // yr0=Re(G(n)), yr1=Im(G(n)), yr2=Re(G(n+1)), yr3=Im(G(n+1))
xr3:0=DAB q[j2+=4];; // xr0=Re(G(n+N/8)), xr1=Im(G(n+N/8))
// xr2=Re(G(n+1+N/8)), xr3=Im(G(n+1+N/8))
yr7:4=q[j1+=-4]; xr9:8=l[k12+=k9];; // yr4=Re(G(N/2-(n+1))), yr5=Im(G(N/2-(n+1)))
// yr6=Re(G(N/2-n)), yr7=Im(G(N/2-n))
// twiddles(n+N/8) - want to mult by sin(x)-icos(x)
xr7:4=q[j3+=-4]; xr11:10=l[k12+=k9];; // xr4=Re(G(N/2-(n+1+N/8))), xr5=Im(G(N/2-(n+1+N/8)))
// xr6=Re(G(N/2-(n+N/8))), xr7=Im(G(N/2-(n+N/8)))
// twiddles(n+1+N/8)
if LC0E; fr16=r0+r6, fr20=r0-r6; yr9:8=l[k8+=k9];; // LC0=N/16-2, r16=Re(G(n)+conj(G(N/2-n))), r20=Re(G(n)-conj(G(N/2-n)))
// twiddles(n)
fr18=r2+r4, fr22=r2-r4; yr11:10=l[k8+=k9];; // r18=Re(G(n+1)+conj(G(N/2-(n+1)))), r22=Re(G(n+1)-conj(G(N/2-(n+1))))
// twiddles(n+1)
fr24=r20*r9; fr21=r1+r7, fr17=r1-r7;; // r24=s(n)*Re(G(n)-conj(G(N/2-n)))
// r17=Im(G(n)+conj(G(N/2-n))), r21=Im(G(n)-conj(G(N/2-n)))
fr26=r22*r11; fr23=r3+r5, fr19=r3-r5; xr3:0=DAB q[j2+=4];; // r26=s(n+1)*Re(G(n+1)-conj(G(N/2-(n+1))))
// r19=Im(G(n+1)+conj(G(N/2-(n+1)))), r23=Im(G(n+1)-conj(G(N/2-(n+1))))
// xr3:0=next G(n+2+N/8), G(n+3+N/8)
fr25=r21*r8; yr3:0=DAB q[j0+=4];; // r25=c(n)*Im(G(n)-conj(G(N/2-n))), yr3:0=next G(n+2), G(n+3)
fr27=r23*r10;; // r27=c(n+1)*Im(G(n+1)-conj(G(N/2-(n+1))))
fr24=r24+r25; fr25=r21*r9; yr7:4=q[j1+=-4];; // r24=Re(-i*exp(2*pi*i*n)(G(n)-conj(G(N/2-n))))
// r13=s(n)*Im(G(n)-conj(G(N/2-n)))
// yr7:4=next G(N/2-(n+2)), G(N/2-(n+3))
fr26=r26+r27; fr27=r23*r11; xr7:4=q[j3+=-4];; // r26=Re(-i*exp(2*pi*i*(n+1))(G(n+1)-conj(G(N/2-(n+1)))))
// r27=s(n+1)*Im(G(n+1)-conj(G(N/2-(n+1))))
// xr7:4=next G(N/2-(n+2+N/8)), G(N/2-(n+3+N/8))
fr13=r20*r8; fr12=r16+r24, fr30=r16-r24;; // r13=c(n)*Re(G(n)-conj(G(N/2-n))), r12=Re(F(n)), r30=Re(F(N/2-n))
fr15=r22*r10; fr14=r18+r26, fr28=r18-r26;; // r15=c(n+1)*Re(G(n+1)-conj(G(N/2-(n+1))))
// r14=Re(F(n+1)), r28=Re(F(N/2-(n+1)))
fr13=r25-r13; xr9:8=l[k12+=k9];; // r13=Im(-i*exp(2*pi*i*x)(G(n)-conj(G(N/2-n)))), next twiddles(n+2+N/8)
fr15=r27-r15; xr11:10=l[k12+=k9];; // r15=Im(-i*exp(2*pi*i*x)(G(n+1)-conj(G(N/2-(n+1)))))
// next twiddles(n+3+N/8)
.align_code 4;
_combine_stage:
fr16=r0+r6, fr20=r0-r6; yr9:8=l[k8+=k9];; // r16=Re(G(n+2)+conj(G(N/2-(n+2)))), r20=Re(G(n+2)-conj(G(N/2-(n+2))))
// next twiddles(n+2)
fr18=r2+r4, fr22=r2-r4; yr11:10=l[k8+=k9];; // r18=Re(G(n+3)+conj(G(N/2-(n+3)))), r22=Re(G(n+3)-conj(G(N/2-(n+3))))
// next twiddles(n+3)
fr13=r13+r17, fr31=r13-r17;; // r13=Im(F(n)), r31=Im(F(N/2-n))
fr15=r15+r19, fr29=r15-r19; l[j12+=2]=xr13:12;; // r15=Im(F(n+1)), r29=Im(F(N/2-(n+1))), store F(n+N/8)
fr24=r20*r9; fr21=r1+r7, fr17=r1-r7; q[j14+=-4]=yr31:28;; // r24=s(n+2)*Re(G(n+2)-conj(G(N/2-(n+2))))
// r21=Im(G(n+2)+conj(G(N/2-(n+2)))), r17=Im(G(n+2)-conj(G(N/2-(n+2))))
// store F(N/2-n), F(N/2-(n+1))
fr26=r22*r11; fr23=r3+r5, fr19=r3-r5; xr3:0=DAB q[j2+=4];; // r26=s(n+3)*Re(G(n+3)-conj(G(N/2-(n+3))))
// r23=Im(G(n+3)+conj(G(N/2-(n+3)))), r19=Im(G(n+3)-conj(G(N/2-(n+3))))
// xr3:0=next G(n+4+N/8), G(n+5+N/8)
fr25=r21*r8; yr3:0=DAB q[j0+=4];; // r25=c(n+2)*Im(G(n+2)-conj(G(N/2-(n+2))))
// yr3:0=next G(n+4), G(n+5)
fr27=r23*r10; q[j13+=-4]=xr31:28;; // r27=c(n+3)*Im(G(n+3)-conj(G(N/2-(n+3))))
// store F(N/2-(n+N/8)), F(N/2-(n+1+N/8))
fr24=r24+r25; fr25=r21*r9; l[j10+=2]=yr13:12;; // r24=Re(-i*exp(2*pi*i*x)(G(n+2)-conj(G(N/2-(n+2)))))
// r25=s(n+2)*Im(G(n+2)-conj(G(N/2-(n+2)))), store F(n)
fr26=r26+r27; fr27=r23*r11; l[j10+=2]=yr15:14;; // r26=Re(-i*exp(2*pi*i*x)(G(n+3)-conj(G(N/2-(n+3)))))
// r27=s(n+3)*Im(G(n+3)-conj(G(N/2-(n+3)))), store F(n+1)
fr13=r20*r8; fr12=r16+r24, fr30=r16-r24; l[j12+=2]=xr15:14;;// r13=cos(n+2)*Re(G(n+2)-conj(G(N/2-(n+2))))
// r12=Re(F(n+2)), r30=Re(F(N/2-(n+2))), store F(n+1+N/8)
fr15=r22*r10; fr14=r18+r26, fr28=r18-r26; xr7:4=q[j3+=-4];; // r15=cos(n+3)*Re(G(n+3)-conj(G(N/2-(n+3))))
// r14=Re(F(n+3)), r28=Re(F(N/2-(n+3)))
// xr7:4=next G(N/2-(n+4+N/8)), G(N/2-(n+5+N/8))
fr13=r25-r13; xr9:8=l[k12+=k9]; yr7:4=q[j1+=-4];; // r13=Im(-i*exp(2*pi*i*x)(G(n+2)-conj(G(N/2-(n+2)))))
// next twiddles(n+4+N/8)
// yr7:4=next G(N/2-(n+4)), G(N/2-(n+5))
.align_code 4;
if NLC0E, jump _combine_stage(P); fr15=r27-r15; xr11:10=l[k12+=k9];;// r15=Im(-i*exp(2*pi*i*x)(G(n+3)-conj(G(N/2-(n+3)))))
// next twiddles(n+5+N/8)
fr16=r0+r6, fr20=r0-r6; yr9:8=l[k8+=k9];; // r16=Re(G(n+4)+conj(G(N/2-(n+4)))), r20=Re(G(n+4)-conj(G(N/2-(n+4))))
// next twiddles(n+4)
fr18=r2+r4, fr22=r2-r4; yr11:10=l[k8+=k9];; // r18=Re(G(n+5)+conj(G(N/2-(n+5)))), r22=Re(G(n+5)-conj(G(N/2-(n+5))))
// next twiddles(n+5)
fr13=r13+r17, fr31=r13-r17;; // r13=Im(F(n+2)), r31=Im(F(N/2-(n+2)))
fr15=r15+r19, fr29=r15-r19;; // r15=Im(F(n+3)), r29=Im(F(N/2-(n+3)))
fr24=r20*r9; fr21=r1+r7, fr17=r1-r7; yr1:0=l[j31+j7];; // r24=s(n+4)*Re(G(n+4)-conj(G(N/2-(n+4))))
// r21=Im(G(n+4)+conj(G(N/2-(n+4)))), r17=Im(G(n+4)-conj(G(N/2-(n+4))))
// yr0=Re(G(0)), yr1=Im(G(0))
fr26=r22*r11; fr23=r3+r5, fr19=r3-r5;; // r26=s(n+5)*Re(G(n+5)-conj(G(N/2-(n+5))))
// r23=Im(G(n+5)+conj(G(N/2-(n+5)))), r19=Im(G(n+5)-conj(G(N/2-(n+5))))
fr25=r21*r8;; // r25=cos(x)*Im(G(n)-conj(G(N/2-n)))
fr27=r23*r10; l[j12+=2]=xr13:12;; // r27=cos(x)*Im(G(n)-conj(G(N/2-n)))
// store F(n+2+N/8)
yfr0=r1+r0; yr1=lshift r1 by -32; q[j14+=-4]=yr31:28;; // yr0=Re(G(0))+Im(G(0)), yr1=0=Im(F(0))
// store F(N/2-(n+2)), F(N/2-(n+3))
yfr0=r0+r0; q[j13+=-4]=xr31:28;; // yr0=Re(F(0))
// store F(N/2-(n+2+N/8)), F(N/2-(n+3+N/8))
fr24=r24+r25; fr25=r21*r9; l[j10+=2]=yr13:12;; // r24=Re(-i*exp(2*pi*i*x)(G(n+4)-conj(G(N/2-(n+4)))))
// r25=s(n+4)*Im(G(n+4)-conj(G(N/2-(n+4)))), store F(n+2)
fr26=r26+r27; fr27=r23*r11; l[j10+=2]=yr15:14;; // r26=Re(-i*exp(2*pi*i*x)(G(n+5)-conj(G(N/2-(n+5)))))
// r27=s(n+5)*Im(G(n+5)-conj(G(N/2-(n+5)))), store F(n+3)
fr13=r20*r8; fr12=r16+r24, fr30=r16-r24; l[j12+=2]=xr15:14;; // r13=c(n+4)*Re(G(n+4)-conj(G(N/2-(n+4))))
// r12=Re(F(n+4)), r30=Re(F(N/2-(n+4))), store F(n+3+N/8)
fr15=r22*r10; fr14=r18+r26, fr28=r18-r26; l[j31+j7]=yr1:0;; // r15=c(n+5)*Re(G(n+5)-conj(G(N/2-(n+5))))
// r14=Re(F(n+5)), r28=Re(F(N/2-(n+5)))
// store F(0)
fr13=r25-r13; l[j7+j17]=k11:10;; // r13=Im(-i*exp(2*pi*i*x)(G(n+4)-conj(G(N/2-(n+4)))))
// store F(N/4)
fr15=r27-r15;; // r15=Im(-i*exp(2*pi*i*x)(G(n+5)-conj(G(N/2-(n+5)))))
fr13=r13+r17, fr31=r13-r17;; // r13=Im(F(n+4)), r31=Im(F(N/2-(n+4)))
fr15=r15+r19, fr29=r15-r19; l[j12+=2]=xr13:12;; // r15=Im(F(n+5)), r29=Im(F(N/2-(n+5))), store F(n+4+N/8)
q[j14+=-4]=yr31:28;; // store F(N/2-(n+4)), F(N/2-(n+5))
q[j13+=-4]=xr31:28;; // store F(N/2-(n+4+N/8)), F(N/2-(n+5+N/8))
l[j10+=2]=yr13:12;; // store F(n+4)
l[j10+=2]=yr15:14;; // store F(n+5)
l[j12+=2]=xr15:14;; // store F(n+5+N/8)
//******************************** Epilogue **********************************
_FFTEpilogue:
mPOPQ(yR27:24)
mPOPQ(yR31:28)
mPOPQ(xR27:24)
mPOPQ(xR31:28)
mRETURN
//********************* End Label For Statistical Profiling ******************
_FFT32.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -