📄 example 3-48.asm
字号:
* CODESIZE *
* *
* 1248 bytes *
* *
* ----------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ======================================================================= *
.global _DSPF_sp_cfftr2_dit
.asg B8, B_x
.asg A5, A_w
.asg B7, B_x2mp1
.asg B6, B_x2m
.asg A7, A_si
.asg A6, A_co
.asg B0, B_lx2mc
.asg B4, B_8n2
.asg B13, B_n2
.asg A13, A_p1
.asg B11, B_p2
.asg B3, B_p4
.asg A15, A_p3
.asg B2, B_lx2iac
.asg A3, A_x
.asg A10, A_8n2
.asg A12, A_rtemp
.asg B10, B_itemp
.asg A1, A_stcnt
.asg A15, A_x2ias
.asg B3, B_x2mp1s
.asg A15, A_x2ms
.asg B3, B_x2iap1s
.asg A9, A_x2iap1
.asg A8, A_x2ia
.asg B1, B_hafn2
.asg A2, A_nby2
.asg A4, A_wbase
_DSPF_sp_cfftr2_dit:
; push all the registers (also CSR, IRP )
SUBAW .D2 B15, 16, B15
|| B .S1 no_int
|| MVC .S2 CSR, B1
MV .S1X B15, A1
|| STW .D2T1 A10, *B15[0]
STW .D2T2 B1, *B15[2]
|| STW .D1T1 A11, *A1[1]
STW .D2T2 B11, *B15[4]
|| STW .D1T1 A12, *A1[3]
|| AND .L2 B1, -2, B1 ; disable interrupts
|| MVC .S2 IRP, B2
STW .D2T2 B2, *B15[6]
|| STW .D1T1 A13, *A1[5]
|| MVC .S2 B1, CSR
STW .D2T2 B13, *B15[8]
|| STW .D1T1 A14, *A1[7]
no_int:
STW .D2T2 B14, *B15[10]
|| STW .D1T1 A15, *A1[9]
STW .D2T2 B3, *B15[12]
|| MVC .S2 B15, IRP
ADDAW .D1 A4, A6, A3 ; init x[2m] ptr
|| SHR .S2X A6, 1, B_n2 ; init n2
|| MV .L1X B4, A_w ; init w ptr
|| STW .D2T2 B10, *B15[13] ; push b10
MV .S2X A3, B_x ; transfer x[2m] ptr
|| SHL .S1 A6, 2, A_8n2 ; keep 8n2 for addr incr
|| SUB .L2 B_n2, 6, B15 ; for inner loop cntr
|| STW .D2T2 B12, *B15[11] ; push b12
* ====================== PIPED LOOP PROLOG ======================================= *
LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
|| LDDW .D1 *A_w++, A_si:A_co ; load si:co
|| MV .S2 B_x, B5 ; init x[2m] store ptr
MV .L1 A4, A11 ; init x[2ia] store ptr
|| SHL .S2X A6, 2, B_8n2 ; copy of 8n2 on b-side
|| MV B_n2, B_lx2mc ; load cntr for x[2m] loads
[B_lx2mc]SUB .L2 B_lx2mc, 1, B_lx2mc ; decr x[2m] load cntr
|| MV .S1 A4, A_x ; f xx2 = x
|| MV .S2X A4, B14 ; save base x ptr
|| MV .D1 A4, A0 ; save base x ptr
|| SUB .L1 A_w, 8, A_wbase ; save w base ptr
MV .D2 B_n2, B_lx2iac ; init x[2ia] load cntr
|| MV .L1X B_n2, A_stcnt ; init store cntr
|| SHR .S2 B_n2, 1, B_hafn2 ; init half of n2
LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
|| MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
[B_lx2mc]SUB .S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
|| MV .L1X B15, A_nby2 ; init loop cntr
MPYSP .M1X A_si, B_x2mp1, A_p3 ; p3=si*x[2m+1]
|| MPYSP .M2X A_si, B_x2m, B_p4 ; p4=si*x[2m]
LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
|| MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
[B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; load x[2ia+1]:x[2ia]
|| MPYSP .M1X A_si, B_x2mp1, A_p3 ; p3=si*x[2m+1]
|| MPYSP .M2X A_si, B_x2m, B_p4 ; p4=si*x[2m]
|| ADDSP .L1 A_p1, A_p3, A_rtemp ; rtemp=p1+p3
|| SUBSP .L2 B_p2, B_p4, B_itemp ; itemp=p2-p4
LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
||[B_lx2iac]SUB.S2 B_lx2iac, 1, B_lx2iac ; decr load cntr
MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
|| MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
[B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; load x[2ia+1]:x[2ia]
|| MPYSP .M1X A_si, B_x2mp1, A_p3 ; p3=si*x[2m+1]
|| MPYSP .M2X A_si, B_x2m, B_p4 ; p4=si*x[2m]
|| ADDSP .L1 A_p1, A_p3, A_rtemp ; rtemp=p1+p3
|| SUBSP .L2 B_p2, B_p4, B_itemp ; itemp=p2-p4
LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
||[B_lx2iac]SUB.S2 B_lx2iac, 1, B_lx2iac ; decr load cntr
|| ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; x[2ia]=x[2ia]+rtemp
|| SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; x[2m+1]=x[2ia+1]-itemp
MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
|| MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
|| B .S2 loop
[B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
|| SUBSP .L1 A_x2ia, A_rtemp, A_x2ms ; x[2m]=x[2ia]-rtemp
|| ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; x[2ia+1]=x[2ia+1]+itemp
* ================== PIPED LOOP KERNEL ======================================== *
loop:
LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; load x[2ia+1]:x[2ia]
|| MPYSP .M1X A_si, B_x2mp1, A_p3 ; p3=si*x[2m+1]
|| MPYSP .M2X A_si, B_x2m, B_p4 ; p4=si*x[2m]
||[!B_lx2mc]ADD.D2 B_x, B_8n2, B_x ; incr load ptr if required
|| ADDSP .L1 A_p1, A_p3, A_rtemp ; rtemp=p1+p3
|| SUBSP .L2 B_p2, B_p4, B_itemp ; itemp=p2-p4
||[!A_stcnt]ADD.S2 B5, B_8n2, B5 ; incr store ptr if required
||[!A_stcnt]ADD.S1 A11, A_8n2, A11 ; incr store ptr if required
LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
||[!B_lx2mc]LDDW.D1 *A_w++, A_si:A_co ; load si:co
||[!B_lx2mc]MPY .M2 1, B_n2, B_lx2mc ; reset load cntr
||[B_lx2iac]SUB .S2 B_lx2iac, 1, B_lx2iac ; decr load cntr
|| ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; x[2ia]=x[2ia]+rtemp
|| SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; x[2m+1]=x[2ia+1]-itemp
||[A_nby2]SUB .S1 A_nby2, 1, A_nby2 ; decr loop cntr
||[!A_stcnt]MPY.M1X 1, B_n2, A_stcnt ; reset store cntr
MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
|| MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
||[!B_lx2iac]ADD.S1 A_x, A_8n2, A_x ; incr load ptr if required
||[A_nby2]B .S2 loop ; Branch loop
|| STW .D1T1 A_x2ias, *A11++ ; store x[2ia]
|| STW .D2T2 B_x2mp1s, *B5[1] ; store x[2m+1]
[B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
||[!B_lx2iac]MPY.M2 1, B_n2, B_lx2iac ; decr load cntr
|| SUBSP .L1 A_x2ia, A_rtemp, A_x2ms ; x[2m]=x[2ia]-rtemp
|| ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; x[2ia+1]=x[2ia+1]+itemp
|| STW .D1 B_x2iap1s, *A11++ ; store x[2ia+1]
|| STW .D2 A_x2ms, *B5++[2] ; store x[2m]
||[A_stcnt]SUB.S1 A_stcnt, 1, A_stcnt ; decr store cntr
* ======================= END OF PIPED LOOP KERNEL ========================== *
LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; (e) load x[2ia+1]:x[2ia]
|| MPYSP .M1X A_si, B_x2mp1, A_p3 ; (e) p3=si*x[2m+1]
|| MPYSP .M2X A_si, B_x2m, B_p4 ; (e) p4=si*x[2m]
||[!B_lx2mc]ADDAW.D2 B14, B_n2, B_x ; (p) init B_x for outer loop
|| ADDSP .L1 A_p1, A_p3, A_rtemp ; (e) rtemp=p1+p3
|| SUBSP .L2 B_p2, B_p4, B_itemp ; (e) itemp=p2-p4
||[!A_stcnt]ADD.S2 B5, B_8n2, B5 ; (e) incr store ptr if required
||[!A_stcnt]ADD.S1 A11, A_8n2, A11 ; (e) incr store ptr if required
; loads are predicated with B_hafn2 so that the last prolog does not
; perform invalid loads
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -