⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 amf_biquadcascade_render.asm

📁 ADI SHARC DSP 音频算法标准模块库
💻 ASM
字号:
// Copyright(c) 2005 Analog Devices, Inc. All Rights Reserved.
// This software is proprietary and confidential to Analog Devices, Inc. and its licensors.

// File    : $Id: //depot/development/visualaudio/modules/2.5.0/SHARC/Source/AMF_BiquadCascade_Render.asm#3 $ 
// Part of : VisualAudio V2.5.0 
// Updated : $Date: 2006/10/12 $ by $Author: Fernando $




//    Module Name     : AMF_BiquadCascade_Render.asm 
//    DSP Processor   : ADSP21161
//    Original Author : Richard Grafton, et al.  Ported to VA by Tim Stilson    
//    Date               : 6/18/03
//====================================================================================
// Processor resources used:
//  91 words pmem INTERNAL
//  1273 cycles for tickSize=128 (1 stage of 2 biquads)
//     cycles = 116 + TickSize*(5 + 2*numSections)
//  (SIMD used)
//====================================================================================

#if 1

    /**************************************************************
    File Name:    iir_hh_matt.asm
            
    Revision history:

        09/03/98  Richard Grafton, ADI  Initial version
        09/28/98  OW                    Bug fixed, verified, timed
        10/06/99  Boris Lerner            SIMD (requires data to be interlaced w/ zeros)
        12/13/99  Matt Walsh            Optimized SIMD     - Added second set of filters into loop 
                                                        - non interlaced data required
        12/16/99  Matt Walsh            Made callable
        06/05/03  Tim Stilson            Debugged register initialization, verified coef layout
        06/18/03  Tim Stilson            Ported to VA Module (changed input arguments, etc)
        08/15/03  Tim Stilson            Some code-size optimizations



    Purpose:    Subroutine that implements a Biquad IIR Filter given
                coefficients and samples.


    Equation:    
                ,---------------.                            ,---------------.
    x(n)------->| compblk X (f8)|--> I(n)        x(n+1)----->| compblk X (f9)|---> I(n+1)
                `---------------'                            `---------------'
                ,---------------.                            ,---------------.
    I(n-1)----->| compblk Y (s8)|---> Y(n-1)    I(n)------->| compblk Y (s9)|---> Y(n)
                `---------------'                            `---------------'

    (The two "filter-blocks" on the left are computed simultaneously using SIMD, 
    and then in a second loop, the two "filter-blocks" on the right simultaneously
    computed via SIMD)

    The resulting output is delayed by one sample, so the total block transfer 
    function effectively has an additional z^-1.




    Calling Parameters:     See C function declaration in header file
                
    Assumptions:
        
                - This algorithm always performs an even number of filters, so
                if an odd number is required, simply use zeros for one set of
                filter coefficients (a unity filter).
                - Coefficients must also be interlaced to accomodate the SIMD
                fetches. Here is an example coef. buffer: (in this case BIQUADS = 4)

    ***********************************************************************/



#include "processor.h"
#include "AMF_BiquadCascade.h"
#include <asm_sprt.h>

// global routines
.global    _AMF_BiquadCascade_Render;            ;

.segment /pm SEG_MOD_FAST_CODE;

_AMF_BiquadCascade_Render:

    i4 = r4;                /* Read structure pointer */
    
                puts = mode1;
                puts = r14;
    r14 = r12;              /* Read number of points */
    R14 = LSHIFT R14 by -1;                     /* r14=N/2, because all uses are in a SIMD context */
    
                puts = r15;
                puts = r9;
    r0 = i0;    puts = r0;
    r0 = i1;    puts = r0;
    r0 = i2;    puts = r0;
    r0 = i3;    puts = r0;
    r0 = i5;    puts = r0;
    r0 = b0;    puts = r0;
    r0 = b1;    puts = r0;

    r0 = dm(AMF_BiquadCascade_State,i4);    
    b0 = r0;
    b1 = r0;

    r0 = dm(AMF_BiquadCascade_Coefs,i4);
    b13 = r0;

    r15 = dm(AMF_BiquadCascade_NumSections,i4);

    f4 = dm(AMF_BiquadCascade_TotalAmp,i4);
    s4=f4;

    s8 = dm(AMF_BiquadCascade_LastIn,i4);        // get last input of previous tick


    i2=r8;                    // i4->*buffers
    
 // initialize input and output samples pointers
    i3=dm(0,i2);            // i3->buffers[0], input
    i5=dm(1,i2);            // i5->buffers[1], output


    
// do initial scaling loop
    bit set MODE1 RND32 | PEYEN;         /* alu, multiplier precision -> 1 cycle of latency before PEYEN */
    m4 = 2;                                        /* stride = 2 for SIMD            */
    f1 = dm(i3,m4);
    lcntr = r14, do scaleLoop until lce;
        f2 = f1*f4, f1 = dm(i3,m4);
scaleLoop:        dm(i5,m4) = f2;

    bit clr MODE1 PEYEN;     
    
    // reset i/o pointers for filter
    i3=dm(1,i2);            // scale loop scaled into output buffer, so that is now the input
    i5=i3;

    r15=r15+r15, m12 = m4;                        /* r15 = biquads*2,  stride = 2 for SIMD */
    l0 = r15;                                    /* L0 = biquads*2                */
    l1 = r15;                                    /* L1 = biquads*2                */
       
       R15=r15+r15, b1 = b0;                         /* r15=biquads*4, B0/B1 used for writing/reading DELAY LINE,  */
    l13 = r15;                                    /* L8=biquads*4,                 */    
    

    bit set MODE1 PEYEN;         /* alu, multiplier precision -> 1 cycle of latency before PEYEN */
    R15 = LSHIFT R15 by -3;                        /* R15=r15/8 -> r15=(Biquads/2) */
    
/******************************************BEGIN IIR FILTER******************************************/
    f12 = 0;                                    /* in case f12 contains NaN or Inf or something, because f12-f12 != 0.0 in that case */
    f2=dm(i0,m4), f4=pm(i13,m12);                 /* prime cache and data before loop*/;   
    lcntr = r14, do iir until lce;                /* BEGIN FILTER - 2 samples at a time*/
        f12=f12-f12, f8 = dm(i3,m4) (LW);         /* clear f12 and fetch next 2 samples: f8 = x(n), f9 = x(n+1) */

        lcntr=r15, do biq1 until lce;            /* IIR loop 1 - Filter sample x(n) and i(n-1) using SIMD  (both comp blocks)*/
            f12=f2*f4, f8=f8+f12, f1=dm(i0,m4), f4=pm(i13,m12); /*simultaneously filter in X and Y using SIMD */
            f12=f1*f4, f8=f8+f12, dm(i1,m4)=f1, f4=pm(i13,m12);
            f12=f2*f4, f8=f8+f12, f2=dm(i0,m4), f4=pm(i13,m12);
biq1:        f12=f1*f4, f8=f8+f12, dm(i1,m4)=f8, f4=pm(i13,m12);
        f8 = f8 + f12 ;                            /* f8 = I(n), s8 = y(n-1) */
        f12=f12-f12,      f8 <-> s9;                 /* update I(n) (s9) for use in filter below */

        lcntr=r15, do biq2 until lce;            /* IIR loop 2 - Filter sample x(n+1) and i(n) using SIMD  (both comp blocks)*/
            f12=f2*f4, f9=f9+f12, f1=dm(i0,m4), f4=pm(i13,m12);
            f12=f1*f4, f9=f9+f12, dm(i1,m4)=f1, f4=pm(i13,m12);
            f12=f2*f4, f9=f9+f12, f2=dm(i0,m4), f4=pm(i13,m12);
biq2:        f12=f1*f4, f9=f9+f12, dm(i1,m4)=f9, f4=pm(i13,m12);
        f9 = f9 + f12;                            /* f9 = I(n+1), s9 = y(n) */
        dm(i5,m4) = s8 (LW);                    /* write results ->    OUTPUT[i] = s8 = y(n-1) */
                                                /*                    OUTPUT[i+1] = s9 = y(n)   */
iir:    f9 <-> s8;                                /* move I(n+1) into f9 for use on next sample */
/******************************************* END IIR FILTER******************************************/
    

        bit clr MODE1 PEYEN;
        
    // reset used L regs
    l13 = 0;
    dm(AMF_BiquadCascade_LastIn,i4)=s8;        // store last output of this tick for next tick

    l1 = 0;
    l0 = 0;

    b1 = gets(1);
    b0 = gets(2);
    i5 = gets(3);
    i3 = gets(4);
    i2 = gets(5);
    i1 = gets(6);
    i0 = gets(7);
    r9 = gets(8);
    r15 = gets(9);
    r14 = gets(10);
    mode1=gets(11);
    alter(11);

//------------------------------------------------------------------------------------
_AMF_BiquadCascade_Render.END:
    leaf_exit; // C-rth requires this instead of rts
//------------------------------------------------------------------------------------
    
.endseg;
#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -