📄 example 5-10.sa
字号:
[!A_il]MV.1 A_nh_l, A_il ; il = nh_l
[!B_fl]MV.2 B_h32_n, B_h32 ; h32 = h32_n
[!B_fl]MV.2 B_h10_n, B_h10 ; h10 = h10_n
SUB.1 A_il, 1, A_il ; il --
MV.1x B_h32, A_h32 ; Partitioning
MV.1x B_h10, A_h10 ; copy
PACKLH2.1 A_x32, A_x10, A_x21 ; Prepare
PACKLH2.2x B_x54, A_x32, B_x43 ; x21, x43
PACKLH2.2 B_x76, B_x54, B_x65 ; x65
DOTP2.1 A_x10, A_h10, A_prod0_10 ; x1h1 + x0h0
DOTP2.1 A_x32, A_h32, A_prod0_32 ; x3h3 + x2h2
DOTP2.1 A_x32, A_h10, A_prod2_32 ; x3h1 + x2h0
DOTP2.2 B_x54, B_h32, B_prod2_54 ; x5h3 + x4h2
DOTP2.1 A_x21, A_h10, A_prod1_21 ; x2h1 + x1h0
DOTP2.2 B_x43, B_h32, B_prod1_43 ; x4h3 + x3h2
DOTP2.2 B_x43, B_h10, B_prod3_43 ; x4h1 + x3h0
DOTP2.2 B_x65, B_h32, B_prod3_65 ; x6h3 + x5h2
ADD.1 A_sum0, A_prod0_10, A_sum0_0 ; += prod0_10
ADD.1 A_sum0_0, A_prod0_32, A_sum0 ; += prod0_32
ADD.1 A_sum1, A_prod1_21, A_sum1_0 ; += prod1_21
ADD.1x A_sum1_0, B_prod1_43, A_sum1 ; += prod1_43
ADD.2x B_sum2, A_prod2_32, B_sum2_0 ; += prod2_32
ADD.2 B_sum2_0, B_prod2_54, B_sum2 ; += prod2_54
ADD.2 B_sum3, B_prod3_43, B_sum3_0 ; += prod3_43
ADD.2 B_sum3_0, B_prod3_65, B_sum3 ; += prod3_65
SHR.2x A_sum0, 15, B_sum0_s ; sum0 >> 15
ADD.1 A_sum1, A_sum1, A_sum1_s ; sum1 >> 15
SHR.2 B_sum2, 15, B_sum2_s ; sum2 >> 15
ADD.2 B_sum3, B_sum3, B_sum3_s ; sum3 >> 15
MV.2x A_sum1_s, B_sum1_s ; Pack res.
PACKHL2.2 B_sum1_s, B_sum0_s, B_r10 ; into
PACKHL2.2 B_sum3_s, B_sum2_s, B_r32 ; doubleword
MV.1 A_x76, A_x32 ; x32 = x76
MV.1 A_x54, A_x10 ; x54 = x10
SUB.2 B_ic, 1, B_ic ; ic --
[!B_ic]STDW.D1T2 B_r32:B_r10, *A_rptr++ ; Store
[!B_ic]ZERO.1 A_sum1:A_sum0 ; Zero
[!B_ic]ZERO.2 B_sum3:B_sum2 ; accum.
[!B_ic]MV.2 B_nh_l, B_ic ; Reset
BDEC.2 LOOP, B_ij ; Branch
.endif
.if 0
.reg B_m, B_3, B_2
.reg B_1, B_ofs, A_ofs
.reg B_ptr, B_h32_n, B_h10_n
.reg B_h32:B_h10, A_h32:A_h10, A_nr_l
.reg B_nh_l, B_j, A_hptr
.reg A_ptr_x, A_it_i, A_sum1:A_sum0
.reg B_sum3:B_sum2, A_x32:A_x10
;--
.reg B_x76:B_x54, A_x21, B_x43
.reg B_x65, A_prod0_10, A_prod0_32
.reg A_prod2_10, A_prod2_32, B_prod1_10
.reg B_prod1_32, B_prod3_10, B_prod3_32
.reg A_sum0_0, A_sum2_0, B_sum1_0
.reg B_sum3_0, B_sum0_s, B_sum1_s
.reg B_sum2_s, B_sum3_s, B_r32:B_r10
;----------------------------------------------------;
; Detect if mask is a multiple of 4 or not ;
; If mask is of the form 4n + 3, then h3 = 0 ;
; If mask is of the form 4n + 2, then h3h2 = 0 ;
; If mask is of the from 4n + 1, then h3,h2,h1 = 0 ;
;----------------------------------------------------;
AND 3, B_nh, B_m ; mask = nh & 3
[!B_m] MVK 4, B_m ; mask = 4
CMPEQ 3, B_m, B_3 ; m == 3
CMPEQ 2, B_m, B_2 ; m == 2
CMPEQ 1, B_m, B_1 ; m == 1
SUB B_nh, B_m, B_ofs ; ofs = nh - m
;----------------------------------------------------;
; Read the filter taps speculatively. If the filter ;
; tap is not valid it is replaced with zero. At most ;
; 3 values past the end of the array may be read. ;
;----------------------------------------------------;
ADDAH B_hptr, B_ofs, B_ptr ; ptr = &h[ofs]
LDNDW *B_ptr--, B_h32:B_h10 ; Load h32:h10
[B_3] CLR B_h32, 16, 31, B_h32 ; h32 = 00XX
[B_2] ZERO B_h32 ; h32 = 0
[B_1] ZERO B_h32 ; h32 = 0
[B_1] CLR B_h10, 16, 31, B_h10 ; h10 = 00XX
;----------------------------------------------------;
; Pack either modified h3, h2, h1, h0 or unmodified ;
; h3,h2,h1,h0 to form a new double word h3210_n, to ;
; be used on the last stage. ;
;----------------------------------------------------;
MV B_h32, B_h32_n ; h32_n
MV B_h10, B_h10_n ; h10_n
;----------------------------------------------------;
; Round up the filter taps, number of output samples ;
; to the nearest multiple of four. ;
;----------------------------------------------------;
ADD A_nr, 3, A_nr_l ; nr + 3
ADD B_nh, 3, B_nh_l ; nh + 3
SHRU A_nr_l, 2, A_nr_l ; nr_l >> 2
SHRU B_nh_l, 2, B_nh_l ; nh_l >> 2
SUB A_nr_l, 0, B_j ; j = nr_l
ADD B_ofs, 4, A_ofs ; ofs -= 4
LOOPJ:
MV B_h32_n, B_h32 ; h32 = h32_n
MV B_h10_n, B_h10 ; h10 = h10_n
MV B_h32_n, A_h32 ; Copy to
MV B_h10_n, A_h10 ; A side.
MV B_ptr, A_hptr ; Filter ptr.
ADDAH A_xptr, A_ofs, A_ptr_x ; Data ptr.
ADD A_ofs, 4, A_ofs ; Incr. start
LDNDW *A_ptr_x--, B_x76:B_x54 ; Load x76:x54
MV B_nh_l, A_it_i ; loop cnt.
SUB A_it_i, 2, A_it_i ; it_i -= 2
ZERO A_sum1:A_sum0 ; Zero accum.
ZERO B_sum3:B_sum2 ; Zero accum.
.trip 3 ; nh >= 12
LOOPI: ; LOOPI
LDNDW *A_ptr_x--, A_x32:A_x10 ; Load x32:x10
PACKLH2 A_x32, A_x10, A_x21 ; Prepare x21
PACKLH2 B_x54, A_x32, B_x43 ; Prepare x43
PACKLH2 B_x76, B_x54, B_x65 ; Prepare x65
DOTP2 A_x10, A_h10, A_prod0_10 ; x1h1 + x0h0
DOTP2 A_x32, A_h32, A_prod0_32 ; x3h3 + x2h2
DOTP2 A_x32, A_h10, A_prod2_10 ; x3h1 + x2h0
DOTP2 B_x54, A_h32, A_prod2_32 ; x5h3 + x4h2
DOTP2 A_x21, B_h10, B_prod1_10 ; x2h1 + x1h0
DOTP2 B_x43, B_h32, B_prod1_32 ; x4h3 + x3h2
DOTP2 B_x43, B_h10, B_prod3_10 ; x4h1 + x3h0
DOTP2 B_x65, B_h32, B_prod3_32 ; x6h3 + x5h2
;---------------------------------------------------;
; Redundant loading of filter coefficients is ;
; done to get partitioning of filter coefficients ;
; on both A and B data paths. ;
;---------------------------------------------------;
LDNDW *A_hptr, A_h32:A_h10 ; Load h32:h10
LDNDW *A_hptr--, B_h32:B_h10 ; Load h32:h10
ADD A_sum0, A_prod0_10, A_sum0_0 ; sum0 += prod0
ADD A_sum0_0, A_prod0_32, A_sum0 ; sum0 += prod0
ADD A_prod2_10, A_prod2_32, A_sum2_0 ; sum2 += prod2
ADD B_sum2, A_sum2_0, B_sum2 ; sum2 += prod2
ADD B_prod1_10, B_prod1_32, B_sum1_0 ; sum1 += prod1
ADD A_sum1, B_sum1_0, A_sum1 ; sum1 += prod1
ADD B_sum3, B_prod3_10, B_sum3_0 ; sum3 += prod3
ADD B_sum3_0, B_prod3_32, B_sum3 ; sum3 += prod3
MV A_x10, B_x54 ; x54 = x10
MV A_x32, B_x76 ; x76 = x32
SHR A_sum0, 15, B_sum0_s ; sum0 >> 15
SHR B_sum2, 15, B_sum2_s ; sum2 >> 15
SHR A_sum1, 15, B_sum1_s ; sum1 >> 15
SHR B_sum3, 15, B_sum3_s ; sum3 >> 15
BDEC LOOPI, A_it_i ; Branch.
PACK2 B_sum1_s, B_sum0_s, B_r10 ; Packed
PACK2 B_sum3_s, B_sum2_s, B_r32 ; output samples.
STDW B_r32:B_r10, *A_rptr++ ; Store
[B_j] SUB B_j, 1, B_j ; j --
[B_j] B LOOPJ ; Branch.
.endif
.endproc ; End
* ========================================================================= *
* End of file: dsp_fir_gen.sa *
* ------------------------------------------------------------------------- *
* Copyright (c) 2003 Texas Instruments, Incorporated. *
* All Rights Reserved. *
* ========================================================================= *
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -