📄 amf_agccompressorcore_render.asm

📁 ADI SHARC DSP 音频算法标准模块库
💻 ASM
字号:
// Copyright(c) 2005 Analog Devices, Inc. All Rights Reserved.
// This software is proprietary and confidential to Analog Devices, Inc. and its licensors.

// File    : $Id: //depot/development/visualaudio/modules/2.5.0/SHARC/Source/AMF_AGCCompressorCore_Render.asm#3 $ 
// Part of : VisualAudio V2.5.0 
// Updated : $Date: 2006/10/12 $ by $Author: Fernando $




//    Module Name     : AMF_AGCCompressorCore_Render.asm 
//    DSP Processor   : ADSP21161
//    Original Author : Tim Stilson 
//    Date            : 8/28/03
//====================================================================================
// Processor resources used:
//  79 words pmem 
// 273 cycles, tickSize=128, (145 + 2*(tickSize/2))
// (SIMD used)
//====================================================================================

#if 1

    ////////////////////////////////////////////////////////////////////////////////
    //
    // History:
    //
    // 8/28/03 Tim Stilson: created from AMF_AGCLimiterCore_Render.asm
    //
    ////////////////////////////////////////////////////////////////////////////////

#include "processor.h"
#include "AMF_AGCCompressorCore.h"
#include <asm_sprt.h>

// global routines
.global _AMF_AGCCompressorCore_Render;

.segment /pm SEG_MOD_FAST_CODE;

////////////////////////////////////////////////////////////////////////////////
// AGCCompressorCore filter
//
_AMF_AGCCompressorCore_Render:
    
 // push context on stack
    puts = mode1;
    r0=i0; puts=r0;
    r0=i1; puts=r0;
    puts=r3;
    puts=r5;
    puts=r9;
    puts=r13;
    puts=r14;
    puts=r15;

    i1=r8;                  // i1->*buffers
    
 // initialize input and output samples pointers
    i0=dm(0,i1);            // i0->buffers[0], input
    i4=dm(1,i1);            // i4->buffers[1], output

    i1=r4;                  //i1->testModuleInstance[0]
    
 /******************** Get max over tick ************************/

    bit set mode1 PEYEN;                    // enter SIMD
    m4=2;

    r12=lshift r12 by -1;
    f0=-1e10;

        f1=dm(i0,m4);
    lcntr=r12, do (pc,1) until lce;
        f0=max(f0,f1),   f1=dm(i0,m4);      // accumulate the max, get the next two inputs

    bit clr mode1 PEYEN;                    // exit SIMD
    f1<->s0;                                // get the other side of the SIMD max
        // now max over the two sides of the SIMD
    f1=max(f0,f1), r0=dm(AMF_AGCCompressorCore_EnvState,i1);  // f1 = the max, f0 = env follower state
        
 /******************** Envelope Follower ************************/
    r8=dm(AMF_AGCCompressorCore_AttackCoef,i1);          
    f5=dm(AMF_AGCCompressorCore_DecayCoefSmoothing,i1);     // the smoothing coefs will be used by the smoother at the end,
    f4=dm(AMF_AGCCompressorCore_AttackCoefSmoothing,i1);    //   we decide here which one to use

    f2=f1-f0,  r9=dm(AMF_AGCCompressorCore_DecayCoef,i1); // r2=in-state (also does the comparison), f9 = decay coef
    if LT f2=f9*f2;                     // if in<state, r2=decaycoef*(in-state), (since this is a multiply, the alu condition isn't affected, right?)
    if GE f2=f8*f2, f5=f4;              // "else" r2=attackcoef*(in-state), and choose/remember the corresponding smoothing coef
    f0=f2+f0;                           // state = r0 = state+coef*(in-state)

    dm(AMF_AGCCompressorCore_EnvState,i1)=f0;          

 /******************** Convert to dB ************************/
        // using lower-accuracy approximation to dB... using straight-line interp instread of table lookup to handle mantissa part:
        //      6.0206*(exponent + mantissa - 1) rather than 6.0206*(exponent + log2table(mantissa)) 
        //  this is quicker and didn't seem to have terrible effect on quality
 

    // basic assumption: float mantissa = scalb x by (-logb x) gives a value in the range 1 -> 2 (verified)
    // instead of doing out=6.0206*(exp + (mant-1)), we're doing 6.0206*exp - 6.0206 + 6.0206*mant
    // (this, by giving us an extra independent multiply, allows us to overlap the conversion of the exp into float)

    f4=6.0206;
    
    r14=r14-r14, s5=f5;                 // clear r14/s14, put smoother coef in PEy side too
    r8=logb f0,  f15=f4;                // r8/s8 = exponent(tmp), copy f4 into higher reg for parallel op below
    r3=-r8;
    f2=scalb f0 by r3;                  // f2/s2 = mantissa(tmp) (in range 1-2)
    f8=f2*f4, f1=float r8 by r14;       // f8/s2=6.*mant, convert exponent to float
    f1=f1*f4, f8=f8-f15;                // f1/s1=6.*exp, f8/s8=6.*mant - 6.
    f0=f1+f8, r4=dm(AMF_AGCCompressorCore_Threshold,i1);  // f8/s8=6.*exp + 6.*mant - 6. , this is the result

 /******************** Adjust for threshold, do the knee, add any extra gain ************************/
    
    f0=f0-f4;                           // f0-=threshold
    f0=max(f0,f14), r4=dm(AMF_AGCCompressorCore_Slope,i1);  // max vs zero, read slope
 
    f0=f0*f4, r4=dm(AMF_AGCCompressorCore_Gain,i1);          
    f0=f4-f0;                           // f0 = -maxzero(f0)*slope + gain
    
 /******************** Convert from dB back to lin and write to output ************************/

    f1=0.166096;                            // this is the input scale of the dB->lin conversion
    f0=f0*f1;                               // f0/s0=tmp[i/i+1]*0.166096, 0.166096 = log2(10)/20;

    bit set mode1 TRUNCATE;                 // round to -inf for the fp->int conversion (the fix operation)
    f9=1.0;

    r1=fix f0;                          // r1=(int)floor(tmp);
    f2=float r1;                        // back to float to get fractional part
    f2=f0-f2;                           // f2=fractional part, read next in 
    f2=f2+f9;                           // f2 = frac + 1;
    f8=scalb f2 by r1;                  // f8 = (frac + 1)*2^floor(tmp) (this will be the target for the smoother below)

    bit clr mode1 TRUNCATE;                 // reset rounding mode

 /******************** Smooth the gain factor and write to output ************************/
        // since the input to the smoother is constant over the tick, we can do the smoother in SIMD
        // Note that this implementation will give sample pairs that are equal (ala updating every other sample).
        //   We could add a few instructions to compute one step of smoothing and put it in s1, which would give 
        //   the equivalent of updating every sample (ref OnComputingSmoothingInSIMD.doc), but it was decided
        //   that updating every other sample is "good enough" for most cases.

    s8=f8;
    f1=dm(AMF_AGCCompressorCore_SmoothingState,i1); // f1/s1 = float currentAmp = instance->currentAmp
    s1=f1;                                          // can't use broadcast because this may be an off-chip access

    bit set mode1 PEYEN;                            // enter SIMD
    nop;

    f0=1.0;
    f2=f0-f5;                                       // f2/s2 = (1-r) (r was chosen up in the envelope follower)

    // fixup (1-r) and r for updating every other sample...  (1-r) -> (1-r)^2, r -> 1-((1-r)^2) (i.e. new(r) = 1 - new(1-r))

    f2=f2*f2;                                       // f2/s2 = new(1-r) = (1-r)^2
    f4=f0-f2;                                       // f4/s4 = new(r) = (1-new(1-r))

    f13=f8*f4;                                      // f13= target*r
        
// Optimization note: The dependence of the multiply on the result of the previous add makes it so we can't get down
//    to a single-instruction loop (if only the sharc had a floating-point MAC instruction...)
// One could probably pipeline 2 iterations together to get down to 1 instr per sample-pair, with enough work.
// Pipelining would be possible since the input is constant over the loop

        f8=f1*f2;                                   // f8=state*(1-r)
    lcntr=r12, do AMF_AGCCompressorCore_Loop2 until lce;
        f1=f8+f13;                                  // f1=state=state*(1-r)+t*r
AMF_AGCCompressorCore_Loop2:
        f8=f1*f2,          dm(i4,m4)=f1;            // f8=next[state*(1-r)], write out

    bit clr mode1 PEYEN;                            // leave SIMD
    nop;

    dm(AMF_AGCCompressorCore_SmoothingState,i1)=f1; 
            

 /******************** We're done ************************/

 // pop context off stack
    r15=gets(1);
    r14=gets(2);
    r13=gets(3);
    r9=gets(4);
    r5=gets(5);
    r3=gets(6);
    i1=gets(7);
    i0=gets(8);
    mode1=gets(9);
    alter(9);

//------------------------------------------------------------------------------------
_AMF_AGCCompressorCore_Render.END:
    leaf_exit; // C-rth requires this instead of rts
//------------------------------------------------------------------------------------
    
.endseg;
#endif
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -