📄 fft32_emac.s
字号:
addq.l #1,d6 cmpi.l #256,d6 bcs.b first_stage movea.l #0,a6 ;second stage of FFT lea (-2048,a0),a0 ;a0 points to the beginning of ReX buffer lea (-2048,a1),a1 ;a1 points to the beginning of ImX buffer ;on the second stage we will calculate ;two butterflies, first of which looks like ;butterfly on the first stage ;xr0 = ar0 + br0 ;xi0 = ai0 + bi0 ;yr0 = ar0 - br0 ;yi0 = ai0 - bi0, ;and second looks like ;xr1 = ar1 + bi1 ;xi1 = ai1 - br1 ;yr1 = ar1 - bi1 ;yi1 = ai1 + br1second_stage: movem.l (a0),d0-d3 ;d0 = ar0, d1 = ar1, d2 = br0, d3 = br1 movem.l (a1),d4-d7 ;d4 = ai0, d5 = ai1, d6 = bi0, d7 = bi1 movea.l d0,a2 ;a2 = ar0 movea.l d1,a3 ;a3 = ar1 movea.l d4,a4 ;a4 = ai0 movea.l d5,a5 ;a5 = ai1 add.l d2,d0 ;xr0 = ar0 + br0 move.l d0,(a0)+ add.l d7,d1 ;xr1 = ar1 + bi1 move.l d1,(a0)+ suba.l d2,a2 ;yr0 = ar0 - br0 move.l a2,(a0)+ suba.l d7,a3 ;yr1 = ar1 - bi1 move.l a3,(a0)+ add.l d6,d4 ;xi0 = ai0 + bi0 move.l d4,(a1)+ sub.l d3,d5 ;xi1 = ai1 - br1 move.l d5,(a1)+ suba.l d6,a4 ;yi0 = ai0 - bi0 move.l a4,(a1)+ adda.l d3,a5 ;yi1 = ai1 + br1 move.l a5,(a1)+ addq.l #1,a6; cmpa.l #128,a6 bcs.b second_stage move.l #64,d0 ;fft for complex values move.l d0,(64,a7) ;starts from 3-rd stage moveq.l #2,d0 move.l d0,(68,a7) ;stage loop counter (starts from 3rd stage) movea.l #16,a5 ;a5 contains the number of butterflies ;per one sub DFT multiplied ;by 4 (the size of values) move.l #1024,d6 ;step in the table of twiddle factors ;(multiplied by 4 because of the size ;of coefficients) movea.l #0,a6 ;counter for butterfly loop ;from MSW to LSW to store it correctly ;into the memory) move.l #0x00000030,MACSR clr.l d2 clr.l d7 move.l #0,ACC0 move.l #0,ACC1next_stage: ;start of stages loop moveq.l #0,d0 move.l d0,(60,a7) ;sub DFT loop counter movem.l (76,a7),a0-a1 ;a0 points to ar0, a1 points to ai0 movea.l a0,a2 movea.l a1,a3 adda.l a5,a2 ;a2 points to br0 adda.l a5,a3 ;a3 points to bi0next_subDFT: ;start of sub DFTs loop movea.l #TF_table,a4 ;a4 points to the first twiddle factor//********************** NEXT_BF ********************************* movem.l (a4),d0-d1 ;wr -> d0, wi -> d1next_bf: ;start of butterflies loop adda.l d6,a4 move.l (a0),d2 ;ar -> d2 move.l (a2),d4 ;br -> d4 move.l d2,ACC0 ;ar -> ACC msacl.l d0,d4,(a3),d5,ACC0 ;ar-br*wr -> ACC, bi -> d5 msacl.l d1,d5,(a1),a6,ACC0 ;ar-br*wr-bi*wi = xr -> ACC, ai -> a6 macl.l d1,d4,4(a4),d1,ACC1 ;ai+br*wi -> ACC, ar -> d2 msacl.l d0,d5,(a4),d0,ACC1 ;ai+br*wi-bi*wr = xi -> ACC, br -> d4 movclr.l ACC0,d3 move.l d3,(a0)+ ;xr -> memory add.l d2,d2 ;2*ar -> d2 sub.l d3,d2 ;2*ar-xr = yr -> d2 movclr.l ACC1,d3 add.l a6,d3 move.l d2,(a2)+ ;yr -> memory move.l d3,(a1)+ ;xi -> memory adda.l a6,a6 ;2*ai -> a6 suba.l d3,a6 ;2*ai-xi = yi -> a6 move.l a6,(a3)+ ;yi -> memory adda.l d6,a4 move.l (a0),d2 ;ar -> d2 move.l (a2),d4 ;br -> d4 move.l d2,ACC0 ;ar -> ACC msacl.l d0,d4,(a3),d5,ACC0 ;ar-br*wr -> ACC, bi -> d5 msacl.l d1,d5,(a1),a6,ACC0 ;ar-br*wr-bi*wi = xr -> ACC, ai -> a6 macl.l d1,d4,4(a4),d1,ACC1 ;ai+br*wi -> ACC, ar -> d2 msacl.l d0,d5,(a4),d0,ACC1 ;ai+br*wi-bi*wr = xi -> ACC, br -> d4 movclr.l ACC0,d3 move.l d3,(a0)+ ;xr -> memory add.l d2,d2 ;2*ar -> d2 sub.l d3,d2 ;2*ar-xr = yr -> d2 movclr.l ACC1,d3 add.l a6,d3 move.l d2,(a2)+ ;yr -> memory move.l d3,(a1)+ ;xi -> memory adda.l a6,a6 ;2*ai -> a6 suba.l d3,a6 ;2*ai-xi = yi -> a6 move.l a6,(a3)+ ;yi -> memory addq.l #8,d7 cmp.l a5,d7 bcs next_bf ;end of butterflies loop ;of the current sub DFT//********************** END OF NEXT_BF ********************************* moveq.l #0,d7 adda.l a5,a0 ;a0 - a3 point to the input values adda.l a5,a1 ;for the first butterfly adda.l a5,a2 ;of the next sub DFT adda.l a5,a3 move.l (60,a7),d0 addq.l #1,d0 move.l d0,(60,a7) ;increment sub DFT loop counter cmp.l (64,a7),d0 ;compare sub DFT loop counter with ;the number of sub DFTs on this stage bcs.w next_subDFT ;end of sub DFTs loop moveq.l #0,d0 move.l d0,(60,a7) ;store 0 to the sub DFT loop counter adda.l a5,a5 ;multiply contents of a5 (the number of ;butterflies per one sub DFT) by 2 for the ;next stage lsr.l #1,d6 ;divide step in the table of twiddle ;factors by 2 move.l (64,a7),d0 ;divide the number of sub DFTs for the lsr.l #1,d0 ;next stage by 2 move.l d0,(64,a7) move.l (68,a7),d0 ;increment stage loop counter addq.l #1,d0 move.l d0,(68,a7) cmpi.l #9,d0 bcs.w next_stage ;end of stage loop;even/odd frequency domain decomposition ;Corresponding C code: ;nm1=smpl_num-1; ;nd2=smpl_num>>1; ;n4=(smpl_num>>2); ;for (i=1;i<n4;i++){ ; im=nd2-i; ; ip2=i+nd2; ; ipm=im+nd2; ; ; ReX[ip2]=(ImX[i]+ImX[im])/2; ; ReX[ipm]=ReX[ip2]; ; ; ImX[ip2]=(ReX[im]-ReX[i])/2; ; ImX[ipm]=-ImX[ip2]; ; ; ReX[i]=(ReX[i]+ReX[im])/2; ; ReX[im]=ReX[i]; ; ; ImX[i]=(ImX[i]-ImX[im])/2; ; ImX[im]=-ImX[i]; ;} ;n34=(smpl_num*3)>>2; ;ReX[n34]=ImX[n4]; ;ReX[nd2]=ImX[0]; ;ImX[n34]=0; ;ImX[nd2]=0; ;ImX[n4]=0; ;ImX[0]=0; movem.l (76,a7),a0/a1 ;even/odd frequency domain decomposition lea (4,a0),a2 lea (4,a1),a3 lea (2048,a0),a4 lea (2048,a1),a5 lea (2052,a0),a6 move.l #2052,d0 move.l #4092,d6adjust: move.l (a3),d3 ;d3 = ImX[i] move.l d3,d1 ;d1 = ImX[i] move.l -(a5),d5 ;d5 = ImX[im] add.l d5,d3 ;d3 = ImX[i] + ImX[im] asr.l #1,d3 ;d3 = (ImX[i] + ImX[im]) / 2 move.l d3,(a6)+ ;ReX[ip2] = d3 move.l d3,(a0,d6.l) ;ReX[ipm] = ReX[ip2] move.l -(a4),d4 ;d4 = ReX[im] move.l d4,d7 ;d7 = ReX[im] move.l (a2),d2 ;d2 = ReX[i] sub.l d2,d4 ;d4 = ReX[im] - ReX[i] asr.l #1,d4 ;d4 = (ReX[im] - ReX[i]) / 2 move.l d4,(a1,d0.l) ;ImX[ip2] = d4 neg.l d4 ;d4 = -d4 move.l d4,(a1,d6.l) ;ImX[ipm] = -ImX[ip2] add.l d7,d2 ;d2 = ReX[i] + ReX[im] asr.l #1,d2 ;d2 = (ReX[i] + ReX[im]) / 2 move.l d2,(a2)+ ;ReX[i] = d2 move.l d2,(a4) ;ReX[im] = ReX[i] sub.l d5,d1 ;d1 = ImX[i] - ImX[im] asr.l #1,d1 ;d1 = (ImX[i] - ImX[im]) / 2 move.l d1,(a3)+ ;ImX[i] = d1 neg.l d1 ;d1 = -d1 move.l d1,(a5) ;ImX[im]=-ImX[i]; addq.l #4,d0 ;loop processing subq.l #4,d6 cmpi.l #3072,d0 bcs.b adjust moveq.l #0,d0 move.l (1024,a1),(3072,a0) ;ReX[n34]=ImX[n4]; move.l (a1),(2048,a0) ;ReX[nd2]=ImX[0]; move.l d0,(3072,a1) ;ImX[n34]=0; move.l d0,(2048,a1) ;ImX[nd2]=0; move.l d0,(1024,a1) ;ImX[n4]=0; move.l d0,(a1) ;ImX[0]=0; movea.l #2048,a5 ;the 10th stage of FFT movea.l #TF_table,a4 ;a4 points to the first twiddle factor movea.l a0,a2 ;a0 points to ar0 movea.l a1,a3 ;a1 points to ai0 adda.l a5,a2 ;a2 points to br0 adda.l a5,a3 ;a3 points to bi0 move.l (a0),d2 ;ar -> d2 move.l (a2),d4 ;br -> d4 moveq.l #0,d7 ;counter for butterfly loop movem.l (a4),d0-d1 ;wr -> d0, wi -> d1/* movem.l (a7),d0-d7/a0-a6 lea +72(a7),a7 rts*/fin_stage: adda.l #8,a4 move.l (a0),d2 ;ar -> d2 move.l (a2),d4 ;br -> d4 move.l d2,ACC0 ;ar -> ACC msacl.l d0,d4,(a3),d5,ACC0 ;ar-br*wr -> ACC, bi -> d5 msacl.l d1,d5,(a1),a6,ACC0 ;ar-br*wr-bi*wi = xr -> ACC, ai -> a6 macl.l d1,d4,4(a4),d1,ACC1 ;ai+br*wi -> ACC, ar -> d2 msacl.l d0,d5,(a4),d0,ACC1 ;ai+br*wi-bi*wr = xi -> ACC, br -> d4 movclr.l ACC0,d3 move.l d3,(a0)+ ;xr -> memory add.l d2,d2 ;2*ar -> d2 sub.l d3,d2 ;2*ar-xr = yr -> d2 movclr.l ACC1,d3 add.l a6,d3 move.l d2,(a2)+ ;yr -> memory move.l d3,(a1)+ ;xi -> memory adda.l a6,a6 ;2*ai -> a6 suba.l d3,a6 ;2*ai-xi = yi -> a6 move.l a6,(a3)+ ;yi -> memory adda.l #8,a4 move.l (a0),d2 ;ar -> d2 move.l (a2),d4 ;br -> d4 move.l d2,ACC0 ;ar -> ACC msacl.l d0,d4,(a3),d5,ACC0 ;ar-br*wr -> ACC, bi -> d5 msacl.l d1,d5,(a1),a6,ACC0 ;ar-br*wr-bi*wi = xr -> ACC, ai -> a6 macl.l d1,d4,4(a4),d1,ACC1 ;ai+br*wi -> ACC, ar -> d2 msacl.l d0,d5,(a4),d0,ACC1 ;ai+br*wi-bi*wr = xi -> ACC, br -> d4 movclr.l ACC0,d3 move.l d3,(a0)+ ;xr -> memory add.l d2,d2 ;2*ar -> d2 sub.l d3,d2 ;2*ar-xr = yr -> d2 movclr.l ACC1,d3 add.l a6,d3 move.l d2,(a2)+ ;yr -> memory move.l d3,(a1)+ ;xi -> memory adda.l a6,a6 ;2*ai -> a6 suba.l d3,a6 ;2*ai-xi = yi -> a6 move.l a6,(a3)+ ;yi -> memory addq.l #8,d7 cmp.l a5,d7 bcs.b fin_stage ;end of butterfly loop movem.l (a7),d0-d7/a0-a6 lea +72(a7),a7 rts;******************************************************************************;Inversed Real FFT;******************************************************************************;Upon entry, ReX[ ] and ImX[ ] contain the real and imaginary parts of the;frequency domain running from index 0 to 512. The remaining samples in;ReX[ ] and ImX[ ] are ignored.;Upon return, ReX[ ] contains the real time domain.;******************************************************************************_inv_fft32_emac:;make frequency domain symmetrical;Corresponding C code: ;n=smpl_num; ;for (i=((n>>1)+1);i<n;i++){ ; ReX[i]=ReX[n-i]; ; ImX[i]=-ImX[n-i]; ;} moveq.l #0,d1 ;make frequency domain movem.l (4,a7),a0-a1 ;symmetrical lea (2048,a0),a2 lea (2048,a1),a3 lea (2052,a0),a0 lea (2052,a1),a1movneg: move.l -(a2),(a0)+ ;ReX[i]=ReX[n-i]; move.l -(a3),d0 neg.l d0 move.l d0,(a1)+ ;ImX[i]=-ImX[n-i]; addq.l #1,d1 ;loop processing cmp.l #511,d1 bcs.b movneg;add real and imaginary parts together;Corresponding C code: ;for (i=0;i<n;i++){ ; ReX[i]=ReX[i]+ImX[i]; ;} moveq.l #0,d0 ;add real and imaginary parts movem.l (4,a7),a0-a1 ;togethersum: move.l (a0),d1 ;d1 = ReX[i] add.l (a1)+,d1 ;d1 = d1 + ImX[i] move.l d1,(a0)+ ;ReX[i] = d1 addq.l #1,d0 ;loop processing cmp.l #1024,d0 bcs.b sum move.l (8,a7),-(a7) ;calculate forward real FFT move.l (8,a7),-(a7) jsr _fft32_emac addq.l #8,a7;add real and imaginary parts together;Corresponding C code:;for (i=0;i<n;i++){; ReX[i]=(ReX[i]+ImX[i])/n;;} moveq.l #0,d0 ;add real and imaginary parts movem.l (4,a7),a0-a1 ;togethernorm: move.l (a0),d1 ;d1 = ReX[i] add.l (a1)+,d1 ;d1 = d1 + ImX[i]; asr.l #8,d1 asr.l #1,d1 ;d1 = d1 / 2 move.l d1,(a0)+ ;ReX[i] = d1 addq.l #1,d0 ;loop processing cmp.l #1024,d0 bcs.b norm rts
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -