📄 macros.asm
字号:
;***********************************************************
; Version 2.20.01
;***********************************************************
;*****************************************************************************
; Filename: macros.asm
; Description: collections of macros for cfft
;-----------------------------------------------------------------------------
; Description: Contains the following macros
;-----------------------------------------------------------------------------
; Revision History:
;
; 0.00 M. Christ/M. Chishtie. Original code
; 1.00 R./ Piedra, 8/31/98
; - Modifed stage3 macro to correct functional problem
; - Modified order of xmem, ymem operands in butterfly code
; to reduce number of cycles from 10 to 8
; 1.00 A. Aboagye, 10/14/98
; - Introduced scaling option ... *sp(scale) = 0 => no scaling
; *sp(scale) != 0 => scaling
;
;*****************************************************************************
.mmregs
;****************************************************************************
; macro : combo5xx
;
; COMBO5xx macro implements a bit reversal stage and the first two FFT
; stages (radix-4 implementation). Bit reversal is now done in the same loop
; thereby saving cycles. Circular addressing is used to access INPUT buffer and
; bit-reversed addressing is used to implement the DATA buffer. Therefore INPUT
; buffer must now be aligned at 4*N and DATA buffer at 2*N boundary. (MCHI)
;---------------------------------------------------------------------------
combo5xx .macro ; REPEAT MACRO 'combo5xx': N/4 times
; .global STAGE1,COMBO1,COMBO2,end1,end2,end?
* *
* R1 := [(R1+R2)+(R3+R4)]/4 INPUT OUTPUT *
* R2 := [(R1-R2)+(I3-I4)]/4 ------------------ ------------------ *
* R3 := [(R1+R2)-(R3+R4)]/4 AR0 = 7 *
* R4 := [(R1-R2)-(I3-I4)]/4 AR1 -> R1,I1 AR1 - > R5,I5 *
* I1 := [(I1+I2)+(I3+I4)]/4 AR2 -> R2,I2 AR2 - > R6,I6 *
* I2 := [(I1-I2)-(R3-R4)]/4 ARP-> AR3 -> R3,I3 ARP - > AR3 - > R7,I7 *
* I3 := [(I1+I2)-(I3+I4)]/4 AR4 -> R4,I4 AR4 - > R8,I8 *
* I4 := [(I1-I2)+(R3-R4)]/4 *
* *
;
STAGE1:
mvdk *sp(DATA),ar2 ; (RMP) pointer to DATA r1,i1
mvdk *sp(DATA),ar3
mvmm ar3,ar4
mvmm ar3,ar5
mar *+ar3(2) ; pointer to DATA + 2 r2,i2
mar *+ar4(4) ; pointer to DATA + 4 r3,i3
mar *+ar5(6) ; pointer to DATA + 6 r4,i4
ld *sp(scale), a
bcd COMBO2, AEQ
ld #0,ASM ; ASM=0
nop
ld #-2, ASM
.if N>4
stm #7,ar0 ; index
stm #0,BK ; blocksize to zero!
stm #N/4-1,BRC ; execute N/4-1 times 'combo5xx'
rptb end1 ;
.endif
; AR2 AR3 AR4 AR5
; ; --- --- --- ---
COMBO1 sub *ar2,*ar3,B ; B := (R1-R2) R1 R2 R3 R4
add *ar2,*ar3,A ; A := (R1+R2) R1 R2 R3 R4
sth B,ASM,*ar3 ; R2':= (R1-R2)/4 R1 R2 R3 R4
add *ar4,*ar5,B ; B := (R3+R4) R1 R2 R3 R4
add B,A ; A := (R1+R2) + (R3+R4) R1 R2 R3 R4
sth A,ASM,*ar2+ ; R1':=((R1+R2) + (R3+R4))/4 I1 R2 R3 R4
sub B,1,A ; B :=((R1+R2) - (R3+R4)) I1 R2 R3 R4
sub *ar4,*ar5,B ; B := (R3-R4) I1 R2 R3 R4
st A,*ar4+ ;ASM ; R3':=((R1+R2) - (R3+R4))/4 I1 R2 I3 R4
|| ld *ar3,A ; 16 ; A := (R1-R2)/4 I1 R2 I3 R4
sth B,ASM,*ar5+ ; R4':= (R3-R4)/4 I1 R2 I3 I4
sub *ar4,*ar5-,B ; B := (I3-I4) I1 R2 I3 R4
add B,ASM,A ; A := (R1-R2) + (I3 -I4)/4 I1 R2 I3 R4
sth A,*ar3+ ; R2':= (R1-R2) + (I3 -I4)/4 I1 I2 I3 R4
sub B,-1,A ; A :=((R1-R2) - (I3-I4)) I1 I2 I3 R4
ld *ar5,16,B ; B=R3-R4
sth A,*ar5+ ; R4':=((R1-R2) - (I3-I4))/4 I1 I2 I3 I4
add *ar4,*ar5,A ; A := (I3+I4) I1 I2 I3 I4
sth A,ASM,*ar4 ; I3':= (I3+I4)/4 I1 I2 I3 I4
sub *ar2,*ar3,A ; A := (I1-I2) I1 I2 I3 I4
add B,2,A ; A := (I1-I2)+ (r3-r4) I1 I2 I3 I4
sth A,ASM,*ar5+0 ; I4':= (I1-I2)+ (r3-r4)/4 I1 I2 I3 R4'
sub B,3,A ; A := (I1-I2)- (r3-r4) I1 I2 I3 R4'
add *ar2,*ar3,B ; B := (I1+I2) I1 I2 I3 R4'
st A,*ar3+0% ;asm; I2':= (I1-I2)-(R3-R4)/4 I1 R2' I3 R4'
|| ld *ar4,A ;16 ; A := (I3+I4)/4 I1 R2' I3 R4'
add A,2,B ; B := (I1+I2)+(I3+I4) I1 R2' I3 R4'
sth B,ASM,*ar2+0 ; I1':= (I1+I2)+(I3+I4)/4 R1' R2' I3 R4'
sub A,3,B ; B := (I1+I2)-(I3+I4)/4 R1' R2' I3 R4'
end1 sth B,ASM,*ar4+0 ; I3':= (I1+I2)-(I3+I4)/4 R1' R2' R3' R4'
b end?
COMBO2
.if N>4
stm #7,ar0 ; index
stm #0,BK ; blocksize to zero!
stm #N/4-1,BRC ; execute N/4-1 times 'combo5xx'
rptb end2 ;
.endif
; AR2 AR3 AR4 AR5
; ; --- --- --- ---
sub *ar2,*ar3,B ; B := (R1-R2) R1 R2 R3 R4
add *ar2,*ar3,A ; A := (R1+R2) R1 R2 R3 R4
sth B,ASM,*ar3 ; R2':= (R1-R2) R1 R2 R3 R4
add *ar4,*ar5,B ; B := (R3+R4) R1 R2 R3 R4
add B,A ; A := (R1+R2) + (R3+R4) R1 R2 R3 R4
sth A,ASM,*ar2+ ; R1':= (R1+R2) + (R3+R4) I1 R2 R3 R4
sub B,1,A ; A := (R1+R2) - (R3+R4) I1 R2 R3 R4
sub *ar4,*ar5,B ; B := (R3-R4) I1 R2 R3 R4
st A,*ar4+ ;ASM ; R3':= (R1+R2) - (R3+R4) I1 R2 I3 R4
|| ld *ar3,A ; 16 ; A := (R1-R2) I1 R2 I3 R4
sth B,ASM,*ar5+ ; R4':= (R3-R4) I1 R2 I3 I4
sub *ar4,*ar5-,B ; B := (I3-I4) I1 R2 I3 R4
add B,ASM,A ; A := (R1-R2) + (I3-I4) I1 R2 I3 R4
sth A,*ar3+ ; R2':= (R1-R2) + (I3-I4) I1 I2 I3 R4
sub B,1,A ; A := (R1-R2) - (I3-I4) I1 I2 I3 R4
ld *ar5,16,B ; B=R3-R4
sth A,*ar5+ ; R4':= (R1-R2) - (I3-I4) I1 I2 I3 I4
add *ar4,*ar5,A ; A := (I3+I4) I1 I2 I3 I4
sth A,ASM,*ar4 ; I3':= (I3+I4) I1 I2 I3 I4
sub *ar2,*ar3,A ; A := (I1-I2) I1 I2 I3 I4
add B,A ; A := (I1-I2)+ (r3-r4) I1 I2 I3 I4
sth A,ASM,*ar5+0 ; I4':= (I1-I2)+ (r3-r4) I1 I2 I3 R4'
sub B,1,A ; A := (I1-I2)- (r3-r4) I1 I2 I3 R4'
add *ar2,*ar3,B ; B := (I1+I2) I1 I2 I3 R4'
st A,*ar3+0% ;asm; I2':= (I1-I2)-(R3-R4) I1 R2' I3 R4'
|| ld *ar4,A ;16 ; A := (I3+I4) I1 R2' I3 R4'
add A,B ; B := (I1+I2)+(I3+I4) I1 R2' I3 R4'
sth B,ASM,*ar2+0 ; I1':= (I1+I2)+(I3+I4) R1' R2' I3 R4'
sub A,1,B ; B := (I1+I2)-(I3+I4) R1' R2' I3 R4'
end2 sth B,ASM,*ar4+0 ; I3':= (I1+I2)-(I3+I4) R1' R2' R3' R4'
end? .endm
;*****************************************************************************
; macro: stage3
;
; STAGE3 macro is improved such that it now takes only 31 cycles per iteration.
; It uses two additional auxiliary registers(AR1,AR4) to support indexing.(MCHI)
;------------------------------------------------------------------------------
stage3 .macro
; .global STAGE3,MCR3,end?
.asg AR2,P
.asg AR3,Q
STAGE3:
ld *sp(DATA),a ; a = DATA
stlm a, P ; pointer to DATA pr,pi
add #8,a ; a = DATA + #8
stlm a, Q ; pointer to DATA + 8 qr,qi
ld *sp(scale),a
STM #9,AR1
STM #2,AR4
xc 1,ANEQ
ld #-1,ASM
.if N>8
STM #N/8-1,BRC ; execute N/8-1 times '4 macros'
RPTBD end? ;
.endif ;
LD *sp(SIN45),T ; load to sin(45)
nop
***************************************************************************
* *
* MACRO requires number of words/number of cycles: 6.5 *
* *
* PR'=(PR+QR)/2 PI'=(PI+QI)/2 *
* QR'=(PR-QR)/2 QI'=(PI-QI)/2 *
* *
* version 0.99 from Manfred Christ update: 2. May. 94 *
***************************************************************************
; (contents of register after exec.)
; AR2 AR3
; --- ---
MCR3 LD *P,16,A ; A := PR PR QR
SUB *Q,16,A,B ; B : PR-QR PR QR
ST B,*Q ; QR:= (1/2)(PR-QR)
|| ADD *Q+,B ; B := (PR+QR) PR QI
ST B,*P+ ; PR:= (1/2)(PR+QR)
|| LD *Q,A ; A := QI PI QI
ST A,*Q ; Dummy write
|| SUB *P,B ; B := (PI-QI) PI QI
ST B,*Q+ ; QI:= (1/2)(PI-QI) PI QR+1
|| ADD *P,B ; B := (PI+QI)
ST B,*P+ ; PI:= (1/2)(PI+QI) PR+1 QR+1
***************************************************************************
* *
* MACRO requires number of words/number of cycles: 9 *
* *
* T=SIN(45)=COS(45)=W45 *
* *
* *
* PR'= PR + (W*QI + W*QR) = PR + W * QI + W * QR (<- AR2) *
* QR'= PR - (W*QI + W*QR) = PR - W * QI - W * QR (<- AR3) *
* PI'= PI + (W*QI - W*QR) = PI + W * QI - W * QR (<- AR2+1) *
* QI'= PI - (W*QI - W*QR) = PI - W * QI + W * QR (<- AR3+2) *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -