📄 fft16_emac.s
字号:
ext.l d5 ;d5 = ai1 move.l (4,a1),d6 ;d6 = bi0,bi1 move.l d6,d7 ;d7 = bi0,bi1 swap d6 ;d6 = bi1,bi0 ext.l d6 ;d6 = bi0 ext.l d7 ;d7 = bi1 movea.l d0,a2 ;a2 = ar0 movea.l d1,a3 ;a3 = ar1 movea.l d4,a4 ;a4 = ai0 movea.l d5,a5 ;a5 = ai1 add.l d2,d0 ;xr0 = ar0 + br0 move.w d0,(a0)+ add.l d7,d1 ;xr1 = ar1 + bi1 move.w d1,(a0)+ suba.l d2,a2 ;yr0 = ar0 - br0 move.w a2,(a0)+ suba.l d7,a3 ;yr1 = ar1 - bi1 move.w a3,(a0)+ add.l d6,d4 ;xi0 = ai0 + bi0 move.w d4,(a1)+ sub.l d3,d5 ;xi1 = ai1 - br1 move.w d5,(a1)+ suba.l d6,a4 ;yi0 = ai0 - bi0 move.w a4,(a1)+ adda.l d3,a5 ;yi1 = ai1 + br1 move.w a5,(a1)+ addq.l #1,a6; cmpa.l #128,a6 bcs.b second_stage move.l #64,d0 ;FFT for complex values move.l d0,(64,a7) ;starts from 3-rd stage moveq.l #2,d0 move.l d0,(68,a7) ;stage loop counter (starts from 3rd stage) movea.l #8,a5 ;a5 contains the number of butterflies ;per one sub DFT multiplied ;by 2 (the size of values) move.l #512,d6 ;step in the table of twiddle factors ;(multiplied by 2 because of the size ;of coefficients) movea.l #0,a6 ;counter for butterfly loop ;from MSW to LSW to store it correctly ;into the memory) move.l #0x00000070,MACSR clr.l d2 clr.l d7 move.l #0,ACC0 move.l #0,ACC1next_stage: ;start of stages loop moveq.l #0,d0 move.l d0,(60,a7) ;sub DFT loop counter movem.l (76,a7),a0-a1 ;a0 points to ar0, a1 points to ai0 movea.l a0,a2 movea.l a1,a3 adda.l a5,a2 ;a2 points to br0 adda.l a5,a3 ;a3 points to bi0next_subDFT: ;start of sub DFTs loop movea.l #TF_table,a4 ;a4 points to the beginning of the table //*************************************** NEXT_BF ******************next_bf: ;start of butterflies loop move.l (a4),d0 ;wr -> MSW of d0 ;wi -> LSW of d0 move.l (a2),d4 move.w (a0),d2 move.w (a1),d7 msacl.w d0.u,d4.u,<<,(a3),d5,ACC0 ;ar-br*wr -> ACC, bi -> MSW of d5 msac.w d0.l,d5.u,<<,ACC0 ;ar-br*wr-bi*wi = xr -> ACC, ai -> MSW of d7 mac.w d0.l,d4.u,<<,ACC1 ;ai+br*wi -> ACC, ar -> MSW of d2 msac.w d0.u,d5.u,<<,ACC1 ;ai+br*wi-bi*wr = xi -> ACC, br -> MSW of d4 movclr.l ACC0,d3 movclr.l ACC1,d1 add.l d2,d3 move.w d3,(a0)+ ;xr -> memory add.l d2,d2 ;2*ar -> d2 sub.l d3,d2 ;2*ar-xr = yr -> d2 move.w d2,(a2)+ ;yr -> memory add.l d7,d1 move.w d1,(a1)+ ;xi -> memory add.l d7,d7 ;2*ai -> d7 sub.l d1,d7 ;2*ai-xi = yi -> d7 move.w d7,(a3)+ ;yi -> memory adda.l d6,a4 ;modify pointer to the twiddle factor ;for the next butterfly addq.l #2,a6 cmpa.l a5,a6 bcs.b next_bf ;end of butterflies loop ;of the current sub DFT//*************************************** END OF NEXT_BF ****************** move.l #0,a6 adda.l a5,a0 ;a0 - a3 point to the input values adda.l a5,a1 ;for the first butterfly adda.l a5,a2 ;of the next sub DFT adda.l a5,a3 move.l (60,a7),d0 addq.l #1,d0 move.l d0,(60,a7) ;increment sub DFT loop counter cmp.l (64,a7),d0 ;compare sub DFT loop counter with ;the number of sub DFTs on this stage bcs.w next_subDFT ;end of sub DFTs loop moveq.l #0,d0 move.l d0,(60,a7) ;store 0 to the sub DFT loop counter adda.l a5,a5 ;multiply contents of a5 (the number of ;butterflies per one sub DFT) by 2 for the ;next stage lsr.l #1,d6 ;divide step in the table of twiddle ;factors by 2 move.l (64,a7),d0 ;divide the number of sub DFTs for the lsr.l #1,d0 ;next stage by 2 move.l d0,(64,a7) move.l (68,a7),d0 ;increment stage loop counter addq.l #1,d0 move.l d0,(68,a7) cmpi.l #9,d0 bcs.w next_stage ;end of stage loop;even/odd frequency domain decomposition ;Corresponding C code: ;nm1=smpl_num-1; ;nd2=smpl_num>>1; ;n4=(smpl_num>>2); ;for (i=1;i<n4;i++){ ; im=nd2-i; ; ip2=i+nd2; ; ipm=im+nd2; ; ; ReX[ip2]=(ImX[i]+ImX[im])/2; ; ReX[ipm]=ReX[ip2]; ; ; ImX[ip2]=(ReX[im]-ReX[i])/2; ; ImX[ipm]=-ImX[ip2]; ; ; ReX[i]=(ReX[i]+ReX[im])/2; ; ReX[im]=ReX[i]; ; ; ImX[i]=(ImX[i]-ImX[im])/2; ; ImX[im]=-ImX[i]; ;} ;n34=(smpl_num*3)>>2; ;ReX[n34]=ImX[n4]; ;ReX[nd2]=ImX[0]; ;ImX[n34]=0; ;ImX[nd2]=0; ;ImX[n4]=0; ;ImX[0]=0; movem.l (76,a7),a0/a1 ;even/odd frequency domain decomposition lea (2,a0),a2 lea (2,a1),a3 lea (1024,a0),a4 lea (1024,a1),a5 lea (1026,a0),a6 move.l #1026,d0 move.l #2046,d6adjust: move.w (a3),d3 ;d3 = ImX[i] ext.l d3 move.l d3,d1 ;d1 = ImX[i] move.w -(a5),d5 ;d5 = ImX[im] ext.l d5 add.l d5,d3 ;d3 = ImX[i] + ImX[im] asr.l #1,d3 ;d3 = (ImX[i] + ImX[im]) / 2 move.w d3,(a6)+ ;ReX[ip2] = d3 move.w d3,(a0,d6.l) ;ReX[ipm] = ReX[ip2] move.w -(a4),d4 ;d4 = ReX[im] ext.l d4 move.l d4,d7 ;d7 = ReX[im] move.w (a2),d2 ;d2 = ReX[i] ext.l d2 sub.l d2,d4 ;d4 = ReX[im] - ReX[i] asr.l #1,d4 ;d4 = (ReX[im] - ReX[i]) / 2 move.w d4,(a1,d0.l) ;ImX[ip2] = d4 neg.l d4 ;d4 = -d4 move.w d4,(a1,d6.l) ;ImX[ipm] = -ImX[ip2] add.l d7,d2 ;d2 = ReX[i] + ReX[im] asr.l #1,d2 ;d2 = (ReX[i] + ReX[im]) / 2 move.w d2,(a2)+ ;ReX[i] = d2 move.w d2,(a4) ;ReX[im] = ReX[i] sub.l d5,d1 ;d1 = ImX[i] - ImX[im] asr.l #1,d1 ;d1 = (ImX[i] - ImX[im]) / 2 move.w d1,(a3)+ ;ImX[i] = d1 neg.l d1 ;d1 = -d1 move.w d1,(a5) ;ImX[im]=-ImX[i]; addq.l #2,d0 ;loop processing subq.l #2,d6 cmpi.l #1536,d0 bcs.b adjust moveq.l #0,d0 move.w (512,a1),(1536,a0) ;ReX[n34]=ImX[n4]; move.w (a1),(1024,a0) ;ReX[nd2]=ImX[0]; move.w d0,(1536,a1) ;ImX[n34]=0; move.w d0,(1024,a1) ;ImX[nd2]=0; move.w d0,(512,a1) ;ImX[n4]=0; move.w d0,(a1) ;ImX[0]=0; movea.l #1024,a5 ;the last stage of FFT movea.l #TF_table,a4 ;a4 points to the first twiddle factor movea.l a0,a2 ;a0 points to ar0 movea.l a1,a3 ;a1 points to ai0 adda.l a5,a2 ;a2 points to br0 adda.l a5,a3 ;a3 points to bi0 move.l #0,a6 ;counter for butterfly loop//******************************************** FIN_STAGE ********** move.l #0,ACC0 move.l #0,ACC1 //****************;turn on fractional modefin_stage: move.l (a4)+,d0 ;wr -> MSW of d0 ;wi -> LSW of d0 move.l (a2),d4 move.w (a0),d2 move.w (a1),d7 ext.l d2 ext.l d7 msacl.w d0.u,d4.u,(a3),d5,ACC0 ;ar-br*wr -> ACC, bi -> MSW of d5 msac.w d0.l,d5.u,ACC0 ;ar-br*wr-bi*wi = xr -> ACC, ai -> MSW of d7 mac.w d0.l,d4.u,ACC1 ;ai+br*wi -> ACC, ar -> MSW of d2 msac.w d0.u,d5.u,ACC1 ;ai+br*wi-bi*wr = xi -> ACC, br -> MSW of d4 movclr.l ACC0,d3 movclr.l ACC1,d1 add.l d2,d3 ext.l d3 asr.l #1,d3 move.w d3,(a0)+ ;xr -> memory sub.l d3,d2 ;2*ar-xr = yr -> d2 move.w d2,(a2)+ ;yr -> memory add.l d7,d1 ext.l d1 asr.l #1,d1 move.w d1,(a1)+ ;xi -> memory sub.l d1,d7 ;2*ai-xi = yi -> d7 move.w d7,(a3)+ ;yi -> memory adda.l #2,a6 cmpa.l a5,a6 bcs.b fin_stage ;end of butterfly loop ;turn off fractional mode move.l MACSR,d0 and.l #0xffffff8f,d0 move.l d0,MACSR movem.l (a7),d0-d7/a0-a6 lea +72(a7),a7 rts;******************************************************************************;Inversed Real FFT;******************************************************************************;Upon entry, REX[ ] and IMX[ ] contain the real and imaginary parts of the;frequency domain running from index 0 to 512. The remaining samples in;REX[ ] and IMX[ ] are ignored.;Upon return, REX[ ] contains the real time domain.;******************************************************************************_inv_fft16_emac:;make frequency domain symmetrical;Corresponding C code: ;n=smpl_num; ;for (i=((n>>1)+1);i<n;i++){ ; ReX[i]=ReX[n-i]; ; ImX[i]=-ImX[n-i]; ;} moveq.l #0,d1 ;make frequency domain movem.l (4,a7),a0-a1 ;symmetrical lea (1024,a0),a2 lea (1024,a1),a3 lea (1026,a0),a0 lea (1026,a1),a1movneg: move.w -(a2),(a0)+ ;ReX[i]=ReX[n-i]; move.w -(a3),d0 neg.l d0 move.w d0,(a1)+ ;ImX[i]=-ImX[n-i]; addq.l #1,d1 ;loop processing cmp.l #511,d1 bcs.b movneg;add real and imaginary parts together;Corresponding C code: ;for (i=0;i<n;i++){ ; ReX[i]=ReX[i]+ImX[i]; ;} moveq.l #0,d0 ;add real and imaginary parts movem.l (4,a7),a0-a1 ;togethersum: move.w (a0),d1 ;d1 = ReX[i] ext.l d1 move.w (a1)+,d2 ;d2 = ImX[i] ext.l d2 add.l d2,d1 ;d1 = ReX[i] + ImX[i] move.w d1,(a0)+ ;ReX[i] = d1 addq.l #1,d0 ;loop processing cmp.l #1024,d0 bcs.b sum move.l (12,a7),-(a7) ;calculate forward real FFT move.l (12,a7),-(a7) move.l (12,a7),-(a7) jsr _fft16_emac add.l #12,a7;add real and imaginary parts together;Corresponding C code:;for (i=0;i<n;i++){; ReX[i]=(ReX[i]+ImX[i])/n;;} moveq.l #0,d0 ;add real and imaginary parts movem.l (4,a7),a0-a1 ;togethernorm: move.w (a0),d1 ;d1 = ReX[i] move.w (a1)+,d2 ;d2 = ImX[i] add.l d2,d1 ;d1 = ReX[i] + ImX[i] move.w d1,(a0)+ ;ReX[i] = d1 addq.l #1,d0 ;loop processing cmp.l #1024,d0 bcs.b norm rts
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -