example 5-10.sa

来自「《基于TI DSP的通用算法实现》程序代码」· SA 代码 · 共 431 行 · 第 1/2 页
431 行
[!A_il]MV.1         A_nh_l,       A_il                          ; il = nh_l
[!B_fl]MV.2         B_h32_n,      B_h32                         ; h32 = h32_n
[!B_fl]MV.2         B_h10_n,      B_h10                         ; h10 = h10_n
       SUB.1        A_il,         1,               A_il         ; il --

       MV.1x        B_h32,        A_h32                         ; Partitioning
       MV.1x        B_h10,        A_h10                         ; copy

       PACKLH2.1    A_x32,        A_x10,           A_x21        ; Prepare
       PACKLH2.2x   B_x54,        A_x32,           B_x43        ; x21, x43
       PACKLH2.2    B_x76,        B_x54,           B_x65        ; x65

       DOTP2.1      A_x10,        A_h10,           A_prod0_10   ; x1h1 + x0h0
       DOTP2.1      A_x32,        A_h32,           A_prod0_32   ; x3h3 + x2h2
       DOTP2.1      A_x32,        A_h10,           A_prod2_32   ; x3h1 + x2h0
       DOTP2.2      B_x54,        B_h32,           B_prod2_54   ; x5h3 + x4h2

       DOTP2.1      A_x21,        A_h10,           A_prod1_21   ; x2h1 + x1h0
       DOTP2.2      B_x43,        B_h32,           B_prod1_43   ; x4h3 + x3h2
       DOTP2.2      B_x43,        B_h10,           B_prod3_43   ; x4h1 + x3h0
       DOTP2.2      B_x65,        B_h32,           B_prod3_65   ; x6h3 + x5h2

       ADD.1        A_sum0,       A_prod0_10,      A_sum0_0     ; += prod0_10
       ADD.1        A_sum0_0,     A_prod0_32,      A_sum0       ; += prod0_32
       ADD.1        A_sum1,       A_prod1_21,      A_sum1_0     ; += prod1_21
       ADD.1x       A_sum1_0,     B_prod1_43,      A_sum1       ; += prod1_43

       ADD.2x       B_sum2,       A_prod2_32,      B_sum2_0     ; += prod2_32
       ADD.2        B_sum2_0,     B_prod2_54,      B_sum2       ; += prod2_54
       ADD.2        B_sum3,       B_prod3_43,      B_sum3_0     ; += prod3_43
       ADD.2        B_sum3_0,     B_prod3_65,      B_sum3       ; += prod3_65

       SHR.2x       A_sum0,       15,              B_sum0_s     ; sum0 >> 15
       ADD.1        A_sum1,       A_sum1,          A_sum1_s     ; sum1 >> 15
       SHR.2        B_sum2,       15,              B_sum2_s     ; sum2 >> 15
       ADD.2        B_sum3,       B_sum3,          B_sum3_s     ; sum3 >> 15

       MV.2x        A_sum1_s,     B_sum1_s                      ; Pack res.
       PACKHL2.2    B_sum1_s,     B_sum0_s,        B_r10        ; into
       PACKHL2.2    B_sum3_s,     B_sum2_s,        B_r32        ; doubleword

       MV.1         A_x76,        A_x32                         ; x32 = x76
       MV.1         A_x54,        A_x10                         ; x54 = x10

       SUB.2        B_ic,         1,               B_ic         ; ic --
[!B_ic]STDW.D1T2    B_r32:B_r10,  *A_rptr++                     ; Store
[!B_ic]ZERO.1       A_sum1:A_sum0                               ; Zero
[!B_ic]ZERO.2       B_sum3:B_sum2                               ; accum.
[!B_ic]MV.2         B_nh_l,         B_ic                        ; Reset

       BDEC.2       LOOP,          B_ij                         ; Branch

       .endif

       .if 0

       .reg    B_m,            B_3,            B_2
       .reg    B_1,            B_ofs,          A_ofs
       .reg    B_ptr,          B_h32_n,        B_h10_n
       .reg    B_h32:B_h10,    A_h32:A_h10,    A_nr_l
       .reg    B_nh_l,         B_j,            A_hptr
       .reg    A_ptr_x,        A_it_i,         A_sum1:A_sum0
       .reg    B_sum3:B_sum2,  A_x32:A_x10

;--

       .reg    B_x76:B_x54,    A_x21,          B_x43
       .reg    B_x65,          A_prod0_10,     A_prod0_32
       .reg    A_prod2_10,     A_prod2_32,     B_prod1_10
       .reg    B_prod1_32,     B_prod3_10,     B_prod3_32
       .reg    A_sum0_0,       A_sum2_0,       B_sum1_0
       .reg    B_sum3_0,       B_sum0_s,       B_sum1_s
       .reg    B_sum2_s,       B_sum3_s,       B_r32:B_r10

       ;----------------------------------------------------;
       ; Detect if mask is a multiple of 4 or not           ;
       ; If mask is of the form 4n + 3, then h3 = 0         ;
       ; If mask is of the form 4n + 2, then h3h2 = 0       ;
       ; If mask is of the from 4n + 1, then h3,h2,h1 = 0   ;
       ;----------------------------------------------------;

       AND      3,            B_nh,            B_m          ; mask = nh & 3
[!B_m] MVK      4,            B_m                           ; mask = 4

       CMPEQ    3,            B_m,             B_3          ; m == 3
       CMPEQ    2,            B_m,             B_2          ; m == 2
       CMPEQ    1,            B_m,             B_1          ; m == 1

       SUB      B_nh,         B_m,             B_ofs        ; ofs = nh - m

       ;----------------------------------------------------;
       ; Read the filter taps speculatively. If the filter  ;
       ; tap is not valid it is replaced with zero. At most ;
       ; 3 values past the end of the array may be read.    ;
       ;----------------------------------------------------;

       ADDAH    B_hptr,       B_ofs,           B_ptr        ; ptr = &h[ofs]
       LDNDW    *B_ptr--,     B_h32:B_h10                   ; Load h32:h10


[B_3]  CLR      B_h32,        16, 31,          B_h32        ; h32 = 00XX
[B_2]  ZERO     B_h32                                       ; h32 = 0
[B_1]  ZERO     B_h32                                       ; h32 = 0
[B_1]  CLR      B_h10,        16, 31,          B_h10        ; h10 = 00XX

       ;----------------------------------------------------;
       ; Pack either modified h3, h2, h1, h0 or unmodified  ;
       ; h3,h2,h1,h0 to form a new double word h3210_n, to  ;
       ; be used on the last stage.                         ;
       ;----------------------------------------------------;

       MV       B_h32,        B_h32_n                       ; h32_n
       MV       B_h10,        B_h10_n                       ; h10_n

       ;----------------------------------------------------;
       ; Round up the filter taps, number of output samples ;
       ; to the nearest multiple of four.                   ;
       ;----------------------------------------------------;

       ADD      A_nr,         3,               A_nr_l       ; nr + 3
       ADD      B_nh,         3,               B_nh_l       ; nh + 3
       SHRU     A_nr_l,       2,               A_nr_l       ; nr_l >> 2
       SHRU     B_nh_l,       2,               B_nh_l       ; nh_l >> 2

       SUB      A_nr_l,       0,               B_j          ; j = nr_l
       ADD      B_ofs,        4,               A_ofs        ; ofs -= 4

LOOPJ:

       MV       B_h32_n,      B_h32                         ; h32 = h32_n
       MV       B_h10_n,      B_h10                         ; h10 = h10_n
       MV       B_h32_n,      A_h32                         ; Copy to
       MV       B_h10_n,      A_h10                         ; A side.

       MV       B_ptr,        A_hptr                        ; Filter ptr.

       ADDAH    A_xptr,       A_ofs,           A_ptr_x      ; Data ptr.
       ADD      A_ofs,        4,               A_ofs        ; Incr. start

       LDNDW    *A_ptr_x--,   B_x76:B_x54                   ; Load x76:x54

       MV       B_nh_l,       A_it_i                        ; loop cnt.
       SUB      A_it_i,       2,                A_it_i      ; it_i -= 2

       ZERO     A_sum1:A_sum0                               ; Zero accum.
       ZERO     B_sum3:B_sum2                               ; Zero accum.

       .trip    3                                           ; nh >= 12
LOOPI:                                                      ; LOOPI

        LDNDW    *A_ptr_x--,   A_x32:A_x10                  ; Load x32:x10

        PACKLH2  A_x32,        A_x10,          A_x21        ; Prepare x21
        PACKLH2  B_x54,        A_x32,          B_x43        ; Prepare x43
        PACKLH2  B_x76,        B_x54,          B_x65        ; Prepare x65

        DOTP2    A_x10,        A_h10,          A_prod0_10   ; x1h1 + x0h0
        DOTP2    A_x32,        A_h32,          A_prod0_32   ; x3h3 + x2h2
        DOTP2    A_x32,        A_h10,          A_prod2_10   ; x3h1 + x2h0
        DOTP2    B_x54,        A_h32,          A_prod2_32   ; x5h3 + x4h2

        DOTP2    A_x21,        B_h10,          B_prod1_10   ; x2h1 + x1h0
        DOTP2    B_x43,        B_h32,          B_prod1_32   ; x4h3 + x3h2
        DOTP2    B_x43,        B_h10,          B_prod3_10   ; x4h1 + x3h0
        DOTP2    B_x65,        B_h32,          B_prod3_32   ; x6h3 + x5h2

        ;---------------------------------------------------;
        ;  Redundant loading of filter coefficients is      ;
        ; done to get partitioning of filter coefficients   ;
        ; on both A and B data paths.                       ;
        ;---------------------------------------------------;

        LDNDW    *A_hptr,      A_h32:A_h10                  ; Load h32:h10
        LDNDW    *A_hptr--,    B_h32:B_h10                  ; Load h32:h10

        ADD      A_sum0,       A_prod0_10,     A_sum0_0     ; sum0 += prod0
        ADD      A_sum0_0,     A_prod0_32,     A_sum0       ; sum0 += prod0
        ADD      A_prod2_10,   A_prod2_32,     A_sum2_0     ; sum2 += prod2
        ADD      B_sum2,       A_sum2_0,       B_sum2       ; sum2 += prod2

        ADD      B_prod1_10,   B_prod1_32,     B_sum1_0     ; sum1 += prod1
        ADD      A_sum1,       B_sum1_0,       A_sum1       ; sum1 += prod1
        ADD      B_sum3,       B_prod3_10,     B_sum3_0     ; sum3 += prod3
        ADD      B_sum3_0,     B_prod3_32,     B_sum3       ; sum3 += prod3

        MV       A_x10,        B_x54                        ; x54 = x10
        MV       A_x32,        B_x76                        ; x76 = x32

        SHR      A_sum0,       15,             B_sum0_s     ; sum0 >> 15
        SHR      B_sum2,       15,             B_sum2_s     ; sum2 >> 15
        SHR      A_sum1,       15,             B_sum1_s     ; sum1 >> 15
        SHR      B_sum3,       15,             B_sum3_s     ; sum3 >> 15

        BDEC     LOOPI,        A_it_i                       ; Branch.


        PACK2    B_sum1_s,     B_sum0_s,       B_r10        ; Packed
        PACK2    B_sum3_s,     B_sum2_s,       B_r32        ; output samples.

        STDW     B_r32:B_r10,  *A_rptr++                    ; Store

[B_j]   SUB      B_j,          1,              B_j          ; j --
[B_j]   B        LOOPJ                                      ; Branch.

       .endif


       .endproc                                             ; End

* ========================================================================= *
*   End of file:  dsp_fir_gen.sa                                            *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2003 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *
example 5-10.sa - 源码说明

本页面展示了「《基于TI DSP的通用算法实现》程序代码」中的 example 5-10.sa 源码文件，采用 SA 编程语言编写，共 431 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与DSP相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?