⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fir_r8.asm

📁 davinci技术 源码 视频监控汇编源码
💻 ASM
📖 第 1 页 / 共 2 页
字号:
* ========================================================================= *
*  TEXAS INSTRUMENTS, INC.                                                  *
*                                                                           *
*  NAME                                                                     *
*      fir_r8 -- Block FIR                                                  *
*                                                                           *
*                                                                           *
*  REVISION DATE                                                            *
*      09-May-2005                                                          *
*                                                                           *
*     USAGE                                                                 *
*        This routine has following C prototype:                            *
*        void fir_r8_asm                                                    *
*        (                                                                  *
*            const short *restrict x,  /* Input array [nr+nh-1 elements] */ *
*            const short *restrict h,  /* Coeff array [nh elements]      */ *
*                                      /* Reversed order                 */ *
*            short       *restrict r,  /* Output array [nr elements]     */ *
*            int nh,                   /* Number of coefficients.        */ *
*            int nr                    /* Number of output samples.      */ *
*        )                                                                  *
*                                                                           *
*                                                                           *
*                                                                           *
*      DESCRIPTION                                                          *
*         Computes a real FIR filter (direct-form) using coefficients       *
*         stored in vector h.  The real data input is stored in vector x.   *
*         The filter output result is stored in vector r.  Input data and   *
*         filter taps are 16-bit, with intermediate values kept at 32-bit   *
*         precision.  Filter taps are expected in Q15 format.               *
*                                                                           *
*                                                                           *
*     TECHNIQUES                                                            *
*         1.  The inner loop is unrolled eight times and the outer loop is  *
*             unrolled 8 times, computing 8 outputs.                        *
*                                                                           *
*         2.  The inner and outer loops are completely overlaped with       *
*             SPLOOP reload; thus no overhead exists for outer loop         *
*             execution.                                                    *
*                                                                           *
*         3.  For a case where this function is used with circular          *
*             addressing, A4 and B7 are allocated to A_X_addr and B_X_addr, *
*             respectively.                                                 *
*                                                                           *
*     ASSUMPTIONS                                                           *
*        Number of taps:    'nh' >= 8, multiple of 8.                       *
*        Number of samples: 'nr' >= 8, multiple of 8.                       *
*        Array 'r' is double-word aligned.                                  *
*                                                                           *
*                                                                           *
*     C CODE                                                                *
*        void fir_r8                                                        *
*        (                                                                  *
*            const short *restrict x,                                       *
*            const short *restrict h,                                       *
*            short       *restrict r,                                       *
*            int nh,                                                        *
*            int nr                                                         *
*        )                                                                  *
*        {                                                                  *
*            int i, j, sum;                                                 *
*                                                                           *
*            for (j = 0; j < nr; j++)                                       *
*            {                                                              *
*                sum = 0;                                                   *
*                for (i = 0; i < nh; i++)                                   *
*                    sum += x[i + j] * h[i];                                *
*                r[j] = sum >> 15;                                          *
*            }                                                              *
*        }                                                                  *
*                                                                           *
*                                                                           *
*   NOTES                                                                   *
*      This function is fully interruptible.                                *
*                                                                           *
*                                                                           *
*   CYCLES                                                                  *
*       When nh>=32, nh * nr / 8 + 22                                        *
*       Otherwise,   32 * nr / 8 + 22                                        *
*                                                                           *
*       For nh = 32 and nr = 256, cycles = 1046                             *
*                                                                           *
*   CODESIZE                                                                *
*       640 bytes                                                           *
* ------------------------------------------------------------------------- *
*             Copyright (c) 2005 Texas Instruments, Incorporated.           *
*                            All Rights Reserved.                           *
* ========================================================================= *


        .text        .global _fir_r8_fir_r8:                                                                
        .asg            A4,         A_X_addr
        .asg            B4,         B_H_addr
        .asg            A6,         A_R_addr
        .asg            B6,         B_T
        .asg            A8,         A_N


* ===================== SYMBOLIC REGISTER ASSIGNMENTS ===================== *
        .asg            A4,         A_X_addr
        .asg            B7,         B_X_addr
        .asg            A8,         A_COEFaddr
        .asg            B29,        B_sum3
        .asg            B28,        B_sum2
        .asg            B31,        B_sum1
        .asg            B30,        B_sum0
        .asg            A27,        A_sum5
        .asg            A26,        A_sum4
        .asg            A29,        A_sum7
        .asg            A28,        A_sum6
        .asg            A15,        A_d3d2
        .asg            A14,        A_d1d0
        .asg            B27,        B_d7d6
        .asg            B26,        B_d5d4
        .asg            A23,        A_dbda
        .asg            A22,        A_d9d8
        .asg            B11,        B_dfde
        .asg            B10,        B_dddc
        .asg            A25,        A_c3c2
        .asg            A24,        A_c1c0
        .asg            B9,         B_COEFaddr
        .asg            B21,        B_c7c6
        .asg            B20,        B_c5c4
        .asg            B23,        B_c3c2
        .asg            B22,        B_c1c0
        .asg            A21,        A_c7c6
        .asg            A20,        A_c5c4
        .asg            B25,        B_d5d4_
        .asg            B24,        B_d3d2_
        .asg            A17,        A_d9d8_
        .asg            A16,        A_d7d6_
        .asg            B13,        B_dddc_
        .asg            B12,        B_dbda_
        .asg            B17,        B_s7d
        .asg            B16,        B_s6d
        .asg            B11,        B_s7c
        .asg            B10,        B_s6c
        .asg            A19,        A_s7b
        .asg            A18,        A_s6b
        .asg            A21,        A_s7a
        .asg            A20,        A_s6a
        .asg            A5,         A_s6ab
        .asg            A15,        A_s7ab
        .asg            B2,         B_s6cd
        .asg            B8,         B_s7cd
        .asg            A5,         A_s6abcd
        .asg            A7,         A_s7abcd
        .asg            B19,        B_s5d
        .asg            B18,        B_s4d
        .asg            A15,        A_s5c
        .asg            A14,        A_s4c
        .asg            A13,        A_s5b
        .asg            A12,        A_s4b
        .asg            B13,        B_s5a
        .asg            B12,        B_s4a
        .asg            B10,        B_s4ad
        .asg            B11,        B_s5ad
        .asg            A24,        A_s4bc
        .asg            A13,        A_s5bc
        .asg            A7,         A_s4abcd
        .asg            A25,        A_s5abcd
        .asg            A13,        A_s3d
        .asg            A12,        A_s2d
        .asg            A17,        A_s3c
        .asg            A16,        A_s2c
        .asg            B17,        B_s3b
        .asg            B16,        B_s2b
        .asg            B19,        B_s3a
        .asg            B18,        B_s2a
        .asg            B20,        B_s2ab
        .asg            B8,         B_s3ab
        .asg            A9,         A_s2cd
        .asg            A11,        A_s3cd
        .asg            B21,        B_s2abcd
        .asg            B25,        B_s3abcd
        .asg            A19,        A_s1d
        .asg            A18,        A_s0d
        .asg            B19,        B_s1c
        .asg            B18,        B_s0c
        .asg            B13,        B_s1b
        .asg            B12,        B_s0b
        .asg            A31,        A_s1a
        .asg            A30,        A_s0a
        .asg            A15,        A_s0ad
        .asg            A0,         A_s1ad
        .asg            B0,         B_s0bc
        .asg            B14,        B_s1bc
        .asg            B0,         B_s0abcd
        .asg            B23,        B_s1abcd

        .asg            B3,         B_ret
        .asg            B21,        B_R_addr
        .asg            B1,         B_i
        .asg            B5,         B_sum32
        .asg            B4,         B_sum10
        .asg            A3,         A_sum76
        .asg            A2,         A_sum54
        .asg            B6,         B_TI
        .asg            B3,         B_TC
        .asg            A1,         A_TI
        .asg            A10,        A_TC
        .asg            B15,        B_SP

* ========================================================================= *



        SHRU    .S2     B_T,        3,          B_T
||      SHRU    .S1     B_T,        2,          A_TC
||      MV      .L2X    A_N,        B_i
||      STDW    .D2     A11:A10,    *--B_SP[10]

        MVC     .S2     B_T,        RILC
||      SUB     .L2     B_T,        1,          B_T
||      STW     .D2     B3,         *+B_SP[6*2]

  [B_i] SPLOOPD         8
||      SUB     .D1     A_X_addr,   16,         A_X_addr
||      MVC     .S2     B_T,        ILC
||      SUB     .L2     A_TC,       2,          B_TC
||      SUB     .S1     B_H_addr,   16,         A_COEFaddr
||      STDW    .D2T1   A15:A14,    *+B_SP[2]

*- Stage 0 -----------------------------------------------------------------*
        SPMASK
||      LDDW    .D1T1   *++A_X_addr[2], A_d3d2:A_d1d0           ;[ 1,1]
||^     SUB     .S2     B_H_addr,   8,          B_COEFaddr      ;
||^     SUB     .L1     A_TC,       2,          A_TC
||^     STW     .D2T2   B14,        *+B_SP[3*2]

        SPMASK
||      LDDW    .D2T2   *++B_COEFaddr[2], B_c7c6:B_c5c4         ;[ 2,1]
||      LDDW    .D1T1   *++A_COEFaddr[2], A_c3c2:A_c1c0         ;[ 2,1]
||^     SUB     .S2     A_X_addr,   8,          B_X_addr        ;

        SPMASK
||      LDDW    .D2T2   *++B_X_addr[2], B_d7d6:B_d5d4           ;[ 3,1]
||^     SUB     .S2     B_TC,       2,          B_TI
||^     SUB     .S1     A_TC,       2,          A_TI

        LDDW    .D2T2   *B_X_addr[2], B_dfde:B_dddc             ;[ 4,1]
||      LDDW    .D1T1   *A_X_addr[2], A_dbda:A_d9d8             ;[ 4,1]

        LDDW    .D2T1   *B_COEFaddr, A_c7c6:A_c5c4              ;[ 5,1]
||      LDDW    .D1T2   *A_COEFaddr, B_c3c2:B_c1c0              ;[ 5,1]

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -