📄 fft32_mac.s.list
字号:
0x00000078: 4cef0300004c movem.l (76,a7),a0/a1 ;separate even and odd points
;point a0 and a1 to ReX and ImX buffers
0x0000007e: 7000 moveq.l #0,d0
0x00000080: 2448 movea.l a0,a2
reorder
0x00000082: 24f00800 move.l (0,a0,d0.l),(a2)+ ;ReX[i]=ReX[2*i];
0x00000086: 22f00804 move.l (4,a0,d0.l),(a1)+ ;ImX[i]=ReX[2*i+1];
0x0000008a: 5080 addq.l #8,d0 ;modification of loop counter
0x0000008c: 0c8000001000 cmpi.l #4096,d0
0x00000092: 65ee bcs.b reorder
0x00000094: 43e9f800 lea -2048(a1),a1 ;bit reversal sorting
0x00000098: 2f09 move.l a1,-(a7) ;push address of ImX[] buffer into the stack
0x0000009a: 2f08 move.l a0,-(a7) ;push address of ReX[] buffer into the stack
0x0000009c: 4eb900000022 jsr _rev_addr_sort ;jump to subroutine
0x000000a2: 4fef0008 lea (8,a7),a7
0x000000a6: 7c00 moveq.l #0,d6 ;first stage of FFT
first_stage
0x000000a8: 4cd00005 movem.l (a0),d0/d2 ;d0 = ar, d2 = br
0x000000ac: 4cd1000a movem.l (a1),d1/d3 ;d1 = ai, d3 = bi
;on the first stage the butterfly operation
;looks like:
;xr = ar + br
;xi = ai + bi
;yr = ar - br
;yi = ai - bi
0x000000b0: 2802 move.l d2,d4
0x000000b2: 2a03 move.l d3,d5
0x000000b4: d480 add.l d0,d2 ;xr = ar + br
0x000000b6: 20c2 move.l d2,(a0)+
0x000000b8: d681 add.l d1,d3 ;xi = ai + bi
0x000000ba: 22c3 move.l d3,(a1)+
0x000000bc: 9084 sub.l d4,d0 ;yr = ar - br
0x000000be: 20c0 move.l d0,(a0)+
0x000000c0: 9285 sub.l d5,d1 ;yi = ai - bi
0x000000c2: 22c1 move.l d1,(a1)+
0x000000c4: 5286 addq.l #1,d6
0x000000c6: 0c8600000100 cmpi.l #256,d6
0x000000cc: 65da bcs.b first_stage
0x000000ce: 2c7c00000000 movea.l #0,a6 ;second stage of FFT
0x000000d4: 41e8f800 lea (-2048,a0),a0 ;a0 points to the beginning of ReX buffer
0x000000d8: 43e9f800 lea (-2048,a1),a1 ;a1 points to the beginning of ImX buffer
;on the second stage we will calculate
;two butterflies, first of which looks like
;butterfly on the first stage
;xr0 = ar0 + br0
;xi0 = ai0 + bi0
;yr0 = ar0 - br0
;yi0 = ai0 - bi0,
;and second looks like
;xr1 = ar1 + bi1
;xi1 = ai1 - br1
;yr1 = ar1 - bi1
;yi1 = ai1 + br1
second_stage
0x000000dc: 4cd0000f movem.l (a0),d0-d3 ;d0 = ar0, d1 = ar1, d2 = br0, d3 = br1
0x000000e0: 4cd100f0 movem.l (a1),d4-d7 ;d4 = ai0, d5 = ai1, d6 = bi0, d7 = bi1
0x000000e4: 2440 movea.l d0,a2 ;a2 = ar0
0x000000e6: 2641 movea.l d1,a3 ;a3 = ar1
0x000000e8: 2844 movea.l d4,a4 ;a4 = ai0
0x000000ea: 2a45 movea.l d5,a5 ;a5 = ai1
0x000000ec: d082 add.l d2,d0 ;xr0 = ar0 + br0
0x000000ee: 20c0 move.l d0,(a0)+
0x000000f0: d287 add.l d7,d1 ;xr1 = ar1 + bi1
0x000000f2: 20c1 move.l d1,(a0)+
0x000000f4: 95c2 suba.l d2,a2 ;yr0 = ar0 - br0
0x000000f6: 20ca move.l a2,(a0)+
0x000000f8: 97c7 suba.l d7,a3 ;yr1 = ar1 - bi1
0x000000fa: 20cb move.l a3,(a0)+
0x000000fc: d886 add.l d6,d4 ;xi0 = ai0 + bi0
0x000000fe: 22c4 move.l d4,(a1)+
0x00000100: 9a83 sub.l d3,d5 ;xi1 = ai1 - br1
0x00000102: 22c5 move.l d5,(a1)+
0x00000104: 99c6 suba.l d6,a4 ;yi0 = ai0 - bi0
0x00000106: 22cc move.l a4,(a1)+
0x00000108: dbc3 adda.l d3,a5 ;yi1 = ai1 + br1
0x0000010a: 22cd move.l a5,(a1)+
0x0000010c: 528e addq.l #1,a6 ;
0x0000010e: bdfc00000080 cmpa.l #128,a6
0x00000114: 65c6 bcs.b second_stage
0x00000116: 203c00000040 move.l #64,d0 ;fft for complex values
0x0000011c: 2f400040 move.l d0,(64,a7) ;starts from 3-rd stage
0x00000120: 7002 moveq.l #2,d0
0x00000122: 2f400044 move.l d0,(68,a7) ;stage loop counter (starts from 3rd stage)
0x00000126: 2a7c00000010 movea.l #16,a5 ;a5 contains the number of butterflies
;per one sub DFT multiplied
;by 4 (the size of values)
0x0000012c: 2c3c00000400 move.l #1024,d6 ;step in the table of twiddle factors
;(multiplied by 4 because of the size
;of coefficients)
0x00000132: 2c7c00000000 movea.l #0,a6 ;counter for butterfly loop
0x00000138: a93c00000030 move.l #0x00000030,MACSR
0x0000013e: 4282 clr.l d2
0x00000140: 4287 clr.l d7
0x00000142: a13c00000000 move.l #0,ACC
next_stage ;start of the stage loop
0x00000148: 7000 moveq.l #0,d0
0x0000014a: 2f40003c move.l d0,(60,a7) ;sub DFT loop counter
0x0000014e: 4cef0300004c movem.l (76,a7),a0-a1 ;a0 points to ar0, a1 points to ai0
0x00000154: 2448 movea.l a0,a2
0x00000156: 2649 movea.l a1,a3
0x00000158: d5cd adda.l a5,a2 ;a2 points to br0
0x0000015a: d7cd adda.l a5,a3 ;a3 points to bi0
next_subDFT ;start of sub DFTs loop
0x0000015c: 287c00000000 movea.l #TF_table,a4 ;a4 points to the first twiddle factor
0x00000162: 4cd40003 movem.l (a4),d0-d1 ;wr -> d0, wi -> d1
next_bf ;start of butterflies loop
0x00000166: d9c6 adda.l d6,a4
0x00000168: 2410 move.l (a0),d2 ;ar -> d2
0x0000016a: 2812 move.l (a2),d4 ;br -> d4
0x0000016c: a102 move.l d2,ACC ;ar -> ACC
0x0000016e: aa9349c0 msacl.l d0,d4,(a3),d5 ;ar-br*wr -> ACC, bi -> d5
0x00000172: acd159c1 msacl.l d1,d5,(a1),a6 ;ar-br*wr-bi*wi = xr -> ACC, ai -> a6
0x00000176: a183 move.l ACC,d3
0x00000178: 20c3 move.l d3,(a0)+ ;xr -> memory
0x0000017a: d482 add.l d2,d2 ;2*ar -> d2
0x0000017c: 9483 sub.l d3,d2 ;2*ar-xr = yr -> d2
0x0000017e: 24c2 move.l d2,(a2)+ ;yr -> memory
0x00000180: a13c00000000 move.l #0,ACC
0x00000186: a2ac48c10004 macl.l d1,d4,4(a4),d1 ;ai+br*wi -> ACC, ar -> d2
0x0000018c: a09459c0 msacl.l d0,d5,(a4),d0 ;ai+br*wi-bi*wr = xi -> ACC, br -> d4
0x00000190: a183 move.l ACC,d3
0x00000192: d68e add.l a6,d3
0x00000194: 22c3 move.l d3,(a1)+ ;xi -> memory
0x00000196: ddce adda.l a6,a6 ;2*ai -> a6
0x00000198: 9dc3 suba.l d3,a6 ;2*ai-xi = yi -> a6
0x0000019a: 26ce move.l a6,(a3)+ ;yi -> memory
;for the calculation of the next butterfly
0x0000019c: 5887 addq.l #4,d7
0x0000019e: be8d cmp.l a5,d7
0x000001a0: 65c4 bcs.b next_bf ;end of butterflies loop
;of the current sub DFT
0x000001a2: 7e00 moveq.l #0,d7
0x000001a4: d1cd adda.l a5,a0 ;a0 - a3 point to the input values
0x000001a6: d3cd adda.l a5,a1 ;for the first butterfly
0x000001a8: d5cd adda.l a5,a2 ;of the next sub DFT
0x000001aa: d7cd adda.l a5,a3
0x000001ac: 202f003c move.l (60,a7),d0
0x000001b0: 5280 addq.l #1,d0
0x000001b2: 2f40003c move.l d0,(60,a7) ;increment sub DFT loop counter
0x000001b6: b0af0040 cmp.l (64,a7),d0 ;compare sub DFT loop counter with
;the number of sub DFTs on this stage
0x000001ba: 6500ffa0 bcs.w next_subDFT ;end of sub DFTs loop
0x000001be: 7000 moveq.l #0,d0
0x000001c0: 2f40003c move.l d0,(60,a7) ;store 0 to the sub DFT loop counter
0x000001c4: dbcd adda.l a5,a5 ;multiply contents of a5 (the number of
;butterflies per one sub DFT) by 2 for the
;next stage
0x000001c6: e28e lsr.l #1,d6 ;divide step in the table of twiddle
;factors by 2
0x000001c8: 202f0040 move.l (64,a7),d0 ;divide the number of sub DFTs for the
0x000001cc: e288 lsr.l #1,d0 ;next stage by 2
0x000001ce: 2f400040 move.l d0,(64,a7)
0x000001d2: 202f0044 move.l (68,a7),d0 ;increment stage loop counter
0x000001d6: 5280 addq.l #1,d0
0x000001d8: 2f400044 move.l d0,(68,a7)
0x000001dc: 0c8000000009 cmpi.l #9,d0
0x000001e2: 6500ff64 bcs.w next_stage ;end of stage loop
;even/odd frequency domain decomposition
;Corresponding C code:
;nm1=smpl_num-1;
;nd2=smpl_num>>1;
;n4=(smpl_num>>2);
;for (i=1;i<n4;i++){
; im=nd2-i;
; ip2=i+nd2;
; ipm=im+nd2;
;
; ReX[ip2]=(ImX[i]+ImX[im])/2;
; ReX[ipm]=ReX[ip2];
;
; ImX[ip2]=(ReX[im]-ReX[i])/2;
; ImX[ipm]=-ImX[ip2];
;
; ReX[i]=(ReX[i]+ReX[im])/2;
; ReX[im]=ReX[i];
;
; ImX[i]=(ImX[i]-ImX[im])/2;
; ImX[im]=-ImX[i];
;}
;n34=(smpl_num*3)>>2;
;ReX[n34]=ImX[n4];
;ReX[nd2]=ImX[0];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -