example 5-12.sa

来自「《基于TI DSP的通用算法实现》程序代码」· SA 代码 · 共 187 行
187 行

; Example 5 - 12. Single-Precision Float-Point FIR Filter Implementation SA Listing for the TMS320C67x DSP


* ====================================================================== *
*  TEXAS INSTRUMENTS, INC.                                               *
*                                                                        *
*  NAME                                                                  *
*      DSPF_sp_fir_gen -- Single Precision Generic FIR filter            *
*                                                                        *
*    USAGE                                                               *  
*                                                                        *  
*    This routine is C Callable and can be called as:                    *  
*                                                                        *  
*        void DSPF_sp_fir_gen(const float *x, const float *h,            *  
*                    float * restrict r,int nh, int nr);                 *  
*                                                                        *  
*        x : Pointer to array holding the input floating point array     *  
*        h : Pointer to array holding the coefficient floating point     *  
*            array                                                       *  
*        r : Pointer to output array                                     *  
*        nh: Number of coefficents                                       *  
*        nr: Number of output values                                     *  
*                                                                        *  
*    DESCRIPTION                                                         *  
*                                                                        *  
*        This routine implements a block FIR filter.  There are "nh"     *  
*        filter coefficients, "nr" output samples, and "nh+nr-1"         *  
*        input samples.    The coefficients need to be placed in the "h" *  
*        array in reverse order {h(nh-1), ... , h(1), h(0)} and the      *  
*        array "x" starts at x(-nh+1) and ends at x(nr-1).  The          *  
*        routine calculates y(0) through y(nr-1) using the following     *  
*        formula:                                                        *  
*                                                                        *  
*        r(n) = h(0)*x(n) + h(1)*x(n-1) + ... + h(nh-1)*x(n-nh+1)        *  
*                                                                        *  
*        where n = {0, 1, ... , nr-1}.                                   *  
*                                                                        *  
*    C CODE                                                              *  
*        This is the C equivalent for the assembly code.  Note that      *  
*        the assembly code is hand optimized and restrictions may        *  
*        apply.                                                          *  
*                                                                        *  
*        void DSPF_sp_fir_gen(const float *x, const float *h,            *  
*                    float * restrict r, int nh, int nr)                 *  
*        {                                                               *  
*           int i, j;                                                    *  
*           float sum;                                                   *  
*                                                                        *  
*           for(j=0; j < nh; j++)                                        *  
*           {                                                            *  
*              sum = 0;                                                  *  
*              for(i=0; i < nr; i++)                                     *  
*              {                                                         *  
*                  sum += x[i+j] * h[i];                                 *  
*              }                                                         *  
*              r[j] = sum;                                               *  
*           }                                                            *  
*        }                                                               *  
*   TECHNIQUES                                                           *  
*                                                                        *  
*           1. The inner loop is unrolled twice and the outer loop is    *  
*          unrolled four times.                                          *  
*                                                                        *  
*   ASSUMPTIONS                                                          *  
*                                                                        *  
*           1. x and h are assumed to be aligned on a                    *  
*          double word boundary.                                         *  
*                                                                        *
* ---------------------------------------------------------------------- *
*             Copyright (c) 2003 Texas Instruments, Incorporated.        *
*                            All Rights Reserved.                        *
* ====================================================================== *

                .global _DSPF_sp_fir_gen

_DSPF_sp_fir_gen   .cproc  x, h, r, nh, nr


              .reg      h1:h0,     x1:x0,     x3:x2,       x4
              .reg      acc1,      acc2,      acc3,        acc4
              .reg      sum,       temp1,     temp2, temp3
              .reg      ocntr,     icntr
              .reg      acc5,      acc6,      acc7,        acc8
              .reg      nh_is_odd
              .reg      nr_mod_four
              .no_mdep
	
              MV        nr,        ocntr
              MV        x,         temp3
              AND       nh,        1,         nh_is_odd
              SUB       nh,        nh_is_odd, nh
              
              AND       nr,        3,         nr_mod_four
 [nr_mod_four]ADD       nr,        4,         nr
 [nr_mod_four]SUB       nr,        nr_mod_four,nr
 [!nr_mod_four]MVK      4,         nr_mod_four 
 
              ;Outer loop Unrolled 4 times
oloop: 	      .trip 1
	
              ZERO      acc1                        ;Zero the accumulator
              ZERO      acc2                        ;Zero the accumulator
              ZERO      acc3                        ;Zero the accumulator
              ZERO      acc4                        ;Zero the accumulator
              ZERO      acc5                        ;Zero the accumulator
              ZERO      acc6                        ;Zero the accumulator
              ZERO      acc7                        ;Zero the accumulator
              XOR       acc8,      acc8,     acc8   ;Zero the accumulator
    
              SHR       nh,        1,        icntr  ;Set the counter for loop
              MV        temp3,     temp1            ;Set pointer for array x              
              MV        h,         temp2
    			
              ;Inner loop Unrolled 2 times
iloop:        .trip 2
	
              LDDW      *temp2++,  h1:h0          ;load h0 and h1
              LDDW      *temp1,    x1:x0          ;load x0 and x1
              LDDW      *++temp1,  x3:x2          ;load x2 and x3
              LDW       *+temp1(8),x4              ;load x4
              MPYSP     x0,       h0,      sum
              ADDSP     acc1,      sum,      acc1   ;acc1+=x0*h0
              MPYSP     x1,       h1,      sum
              ADDSP     acc2,      sum,      acc2   ;acc2+=x1*h1
              MPYSP     x1,       h0,      sum
              ADDSP     acc3,      sum,      acc3   ;acc3+=x1*h0
              MPYSP     x2,       h1,      sum
              ADDSP     acc4,      sum,      acc4   ;acc4+=x2*h1
              MPYSP     x2,       h0,      sum
              ADDSP     acc5,      sum,      acc5   ;acc5+=x2*h0
              MPYSP     x3,       h1,      sum		
              ADDSP     acc6,      sum,      acc6   ;acc6+=x3*h1
              MPYSP     x3,       h0,      sum
              ADDSP     acc7,      sum,      acc7   ;acc7+=x3*h0
              MPYSP     x4,       h1,      sum
              ADDSP     acc8,      sum,      acc8   ;acc8+=x4*h1
              SUB       icntr,     1,        icntr
  [icntr]     B         iloop
              ;BRANCH TO INNER LOOP OCCURS HERE
  [nh_is_odd] LDW       *temp2 ,   h0          ;load h0 and h1
  [nh_is_odd] LDDW      *temp1,    x1:x0          ;load x0 and x1
  [nh_is_odd] LDDW      *++temp1,  x3:x2          ;load x2 and x3
  
  [nh_is_odd] MPYSP     h0,        x0,       sum
  [nh_is_odd] ADDSP     acc1,      sum,      acc1   
  
  [nh_is_odd] MPYSP     h0,        x1,       sum              
  [nh_is_odd] ADDSP     acc3,      sum,      acc3   
  
  [nh_is_odd] MPYSP     h0,        x2,       sum
  [nh_is_odd] ADDSP     acc5,      sum,      acc5   
  
  [nh_is_odd] MPYSP     h0,        x3,       sum
  [nh_is_odd] ADDSP     acc7,      sum,      acc7   
              
              ADDSP     acc1,      acc2,     acc1     
              STW       acc1,      *r++             ;store acc1+acc2
              SUB       ocntr,     1,        ocntr
                   
              ADDSP     acc3,      acc4,     acc3              
[ocntr ]      STW       acc3,      *r++             ;store acc3+acc4
[ocntr ]      SUB       ocntr,     1,        ocntr     

              ADDSP     acc5,      acc6,     acc5
[ocntr]       STW       acc5,      *r++             ;store acc5+acc6
[ocntr]       SUB       ocntr,     1,        ocntr     
     
              ADDSP     acc7,      acc8,     acc7
[ocntr]       STW       acc7,      *r++             ;store acc7+acc8
[ocntr]       SUB       ocntr,     1,        ocntr     
				
              ADDAW     temp3,     4,        temp3  ;Set x pointer
    		              
  [ocntr]     B         oloop
              ;BRANCH TO OUTER LOOP OCCURS HERE
              .return


                .endproc

* ======================================================================== *
*  End of file: sp_fir_gen.sa                                              *
* ------------------------------------------------------------------------ *
*          Copyright (C) 2003 Texas Instruments, Incorporated.             *
*                          All Rights Reserved.                            *
* ======================================================================== *
example 5-12.sa - 源码说明

本页面展示了「《基于TI DSP的通用算法实现》程序代码」中的 example 5-12.sa 源码文件，采用 SA 编程语言编写，共 187 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与DSP相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?