📄 amf_agclimitercore_render.asm

📁 ADI SHARC DSP 音频算法标准模块库
💻 ASM
字号:
// Copyright(c) 2005 Analog Devices, Inc. All Rights Reserved.
// This software is proprietary and confidential to Analog Devices, Inc. and its licensors.

// File    : $Id: //depot/development/visualaudio/modules/2.5.0/SHARC/Source/AMF_AGCLimiterCore_Render.asm#3 $ 
// Part of : VisualAudio V2.5.0 
// Updated : $Date: 2006/10/12 $ by $Author: Fernando $




//    Module Name     : AMF_AGCLimiterCore_Render.asm 
//    DSP Processor   : ADSP21161
//    Original Author : Tim Stilson 
//    Date            : 7/7/03
//====================================================================================
// Processor resources used:
// 103 words pmem INTERNAL
// 1962 cycles, tickSize=128, (106 + 14.5*tickSize)
// (SIMD used)
//====================================================================================

#if 1

    ////////////////////////////////////////////////////////////////////////////////
    //
    // History:
    //
    // 7/7/03 Tim Stilson: created from AMF_AGCLimiterCore.c
    //
    ////////////////////////////////////////////////////////////////////////////////

#include "processor.h"

#include "AMF_AGCLimiterCore.h"
#include <asm_sprt.h>

// global routines
.global _AMF_AGCLimiterCore_Render;

.segment /pm SEG_MOD_FAST_CODE;

////////////////////////////////////////////////////////////////////////////////
// AGCLimiterCore filter
//
_AMF_AGCLimiterCore_Render:
    
 // push context on stack
 	puts = r14;
    puts=mode1;
    r0=i0; puts=r0;
    r0=i1; puts=r0;
    puts=r3;
    puts=r5;
    puts=r9;
    puts=r13;
    puts=r15;

    i1=r8;                  // i1->*buffers
    
 // initialize input and output samples pointers
    i0=dm(0,i1);            // i4->buffers[0], input
    i12=dm(1,i1);           // i12->buffers[1], output
    i4=dm(2,i1);            // i4->buffers[2], scratch
    m12=i4;                 // remember it

    i1=r4;                  //i1->testModuleInstance[0]
    

 /******************** Envelope Follower ************************/
    r0=dm(AMF_AGCLimiterCore_EnvState,i1);          
    r8=dm(AMF_AGCLimiterCore_AttackCoef,i1);          
    r9=dm(AMF_AGCLimiterCore_DecayCoef,i1);          

        r13=r12-1, f1=dm(i0,m6);            // r1=in[i], adjust loop count because we do one iteration before entering loop
        f2=f1-f0;                           // r2=in-state (also does the comparison)
        if LT f2=f9*f2;                     // if in<state, r2=decaycoef*(in-state), (since this is a multiply, the alu condition isn't affected, right?)
        if GE f2=f8*f2;                     // "else" r2=attackcoef*(in-state)
        f0=f2+f0,     f1=dm(i0,m6);         // state = r0 = state+coef*(in-state), get next in
    lcntr=r13, do AMF_AGCLimiterCore_Loop1 until lce;
        f2=f1-f0,     dm(i4,m6)=f0;         // r2=in-state (also does the comparison), write previous output
        if LT f2=f9*f2;                     // if in<state, r2=decaycoef*(in-state), (since this is a multiply, the alu condition isn't affected, right?)
        if GE f2=f8*f2;                     // "else" r2=attackcoef*(in-state)
AMF_AGCLimiterCore_Loop1:
        f0=f2+f0,     f1=dm(i0,m6);         // state = r0 = state+coef*(in-state), get next in

        dm(i4,m6)=f0;                       // write last state to scratch

    dm(AMF_AGCLimiterCore_EnvState,i1)=f0;          

 /******************** Convert to dB ************************/
        // using lower-accuracy approximation to dB... using straight-line interp instread of table lookup to handle mantissa part:
        //      6.0206*(exponent + mantissa - 1) rather than 6.0206*(exponent + log2table(mantissa)) 
        //  this is quicker and didn't seem to have terrible effect on quality
 
    m4=2;
    i0=m12;                                 // restore scratch pointer

    // basic assumption: float mantissa = scalb x by (-logb x) gives a value in the range 1 -> 2 (verified)
    // instead of doing out=6.0206*(exp + (mant-1)), we're doing 6.0206*exp - 6.0206 + 6.0206*mant
    // (this, by giving us an extra independent multiply, allows us to overlap the conversion of the exp into float)

    r13=lshift r12 by -1;

    bit set mode1 PEYEN;                    // enter SIMD
    i4=i0;                                  // the write ptr
    f4=6.0206;

    
        r14=r14-r14, f0=dm(i0,m4);          // clear r14/s14, f0/s0=tmp[i/i+1];
        r8=logb f0,  f15=f4;                // r8/s8 = exponent(tmp), copy f4 into higher reg for parallel op below
    lcntr=r13, do AMF_AGCLimiterCore_Loop2 until lce;
        r3=-r8;
        f2=scalb f0 by r3,  f0=dm(i0,m4);   // f2/s2 = mantissa(tmp) (in range 1-2), read next inputs
        f8=f2*f4, f1=float r8 by r14;       // f8/s2=6.*mant, convert exponent to float
        f1=f1*f4, f8=f8-f15;                // f1/s1=6.*exp, f8/s8=6.*mant - 6.
        f8=f1+f8;                           // f8/s8=6.*exp + 6.*mant - 6. 
AMF_AGCLimiterCore_Loop2:
        r8=logb f0, dm(i4,m4)=f8;           // r8/s8= exponent(tmp), tmp[i]=f8/s8

    bit clr mode1 PEYEN;                    // leave SIMD
    i0=m12;                                 // restore scratch pointer

 /******************** Adjust for threshold and prescale for the knee ************************/
 
    r4=dm(AMF_AGCLimiterCore_Threshold,i1); // can't do broadcast because the struct may be off-chip         
    s4=r4;
    r3=dm(AMF_AGCLimiterCore_SharpnessFactor,i1);  // can't do broadcast because the struct may be off-chip        
    s3=r3;
    
    bit set mode1 PEYEN;                    // enter SIMD
    i4=i0;

        r0=dm(i0,m4);                       // f0/s0=tmp[i/i+1]
        f0=f0-f4;                           // f0-=threshold
    lcntr=r13, do AMF_AGCLimiterCore_Loop3 until lce;
        f1=f0*f3, f0=dm(i0,m4);             // f1*=sharpnessFactor, read next in
AMF_AGCLimiterCore_Loop3:
        f0=f0-f4, dm(i4,m4)=f1;             // next f0-=threshold, writeback tmp (f1)


 /******************** Do the knee ************************/

    f4=0.25;
    f9=1.0;

    bit clr mode1 PEYEN;                    // leave SIMD
    i0=m12;                                 // restore scratch pointer

    r5=dm(AMF_AGCLimiterCore_Slope,i1);     // can't use broadcast because struct may be off-chip   
    s5=r5;

    bit set mode1 PEYEN;                    // enter SIMD
    i4=i0;

    f15=-f9;    // f15/s15 = -1.0

    // for consistency in cost, the polynomial is computed every iteration, no matter if used or not
    //   this also allows us to do it in SIMD

        f8=dm(i0,m4);                       // f8=tmp[i/i+1]=x
        f1=f8+f9;                           // f1=(x+1)
        f1=f1*f1;                           // f1=(1+2x+x^2)
    lcntr=r13, do AMF_AGCLimiterCore_Loop4 until lce;
        f1=f1*f4,  f3=f8-f15;               // f1=0.25*(1+2x+x^2), dummy subtract to test x vs -1.0
        if LE f1=f1-f1;                     // if x<-1.0, f1=0.0
        comp(f8,f9), f12=dm(i0,m4);         // test x vs 1.0, read next x   
        if GE f1=f8;                        // if x>1.0, f1=f8 (skip polynomial result)
        f3=f1*f5, f1=f9+f12, f8=f12;        // f3=f1*slope, (next iter: f8=x, f1=(x+1))
AMF_AGCLimiterCore_Loop4:
        f1=f1*f1, dm(i4,m4)=f3;             // (next iter) f1=(1+2x+x^2), write this iter's out


 /******************** Unscale from the knee and add in any extra gain ************************/
 
    r0=0.166096;  // this is the input scale of the dB->lin conversion, we're combining it w/ this calculation

    bit clr mode1 PEYEN;                    // leave SIMD
    i0=m12;                                 // restore scratch pointer

    r4=dm(AMF_AGCLimiterCore_Gain,i1);      // can't use broadcast because struct may be off-chip         
    s4=r4;
    r3=dm(AMF_AGCLimiterCore_KneeDepth,i1);          
    s3=r3;

    bit set mode1 PEYEN;                    // enter SIMD
    i4=i0;

    f4=f4*f0;                               // scale gain and kneedepth by the input scale of the db->lin conversion
    f3=f3*f0;

        r0=dm(i0,m4);                       // f0/s0=tmp[i/i+1]
        f1=f0*f3;                           // f1=tmp*kneeDepth
    lcntr=r13, do AMF_AGCLimiterCore_Loop5 until lce;
        f1=f4-f1,     r0=dm(i0,m4);         // f1=gain-f1, read next in
AMF_AGCLimiterCore_Loop5:
        f1=f0*f3,     dm(i4,m4)=f1;         // next tmp*kneeDepth, writeback tmp (f1)

    //bit clr mode1 PEYEN;                  // leave SIMD


 /******************** Convert from dB back to lin and write to output ************************/

    //bit set mode1 PEYEN | TRUNCATE;       // enter SIMD, and set rounding mode to -inf for fp->int conversion
    bit set mode1 TRUNCATE;                 // round to -inf for the fp->int conversion (the fix operation)
    i0=m12;         // restore scratch pointer
    i4=i12;         // i4 = i12 (output ptr)

    //f9=1.0;       // value should still be here from earlier loop
    //s9=f9;

        f0=dm(i0,m4);                       // f0/s0=tmp[i/i+1]*0.166096, 0.166096 = log2(10)/20;
        r1=fix f0;                          // r1=(int)floor(tmp);
    lcntr=r13, do AMF_AGCLimiterCore_Loop6 until lce;
        f2=float r1;                        // back to float to get fractional part
        f2=f0-f2,     f0=dm(i0,m4);         // f2=fractional part, read next in 
        f2=f2+f9;                           // f2 = frac + 1;
        f2=scalb f2 by r1;                  // f2 = (frac + 1)*2^floor(tmp);
AMF_AGCLimiterCore_Loop6:
        r1=fix f0,    dm(i4,m4)=f2;         // write out, (next iter: r1=(int)floor(tmp) )

    bit clr mode1 PEYEN | TRUNCATE;                    // leave SIMD
    // nop; //(don't need a nop, because at worst s15 will get clobbered, but s15 is a scratch reg)

 // pop context off stack
    r15=gets(1);
    r13=gets(2);
    r9=gets(3);
    r5=gets(4);
    r3=gets(5);
    i1=gets(6);
    i0=gets(7);
    mode1=gets(8);
    r14 = gets(9);
    alter(9);
    
//------------------------------------------------------------------------------------
_AMF_AGCLimiterCore_Render.END:
    leaf_exit; // C-rth requires this instead of rts
//------------------------------------------------------------------------------------
    
.endseg;
#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -