📄 amf_agccompressorcore_render.asm
字号:
// Copyright(c) 2005 Analog Devices, Inc. All Rights Reserved.
// This software is proprietary and confidential to Analog Devices, Inc. and its licensors.
// File : $Id: //depot/development/visualaudio/modules/2.5.0/SHARC/Source/AMF_AGCCompressorCore_Render.asm#3 $
// Part of : VisualAudio V2.5.0
// Updated : $Date: 2006/10/12 $ by $Author: Fernando $
// Module Name : AMF_AGCCompressorCore_Render.asm
// DSP Processor : ADSP21161
// Original Author : Tim Stilson
// Date : 8/28/03
//====================================================================================
// Processor resources used:
// 79 words pmem
// 273 cycles, tickSize=128, (145 + 2*(tickSize/2))
// (SIMD used)
//====================================================================================
#if 1
////////////////////////////////////////////////////////////////////////////////
//
// History:
//
// 8/28/03 Tim Stilson: created from AMF_AGCLimiterCore_Render.asm
//
////////////////////////////////////////////////////////////////////////////////
#include "processor.h"
#include "AMF_AGCCompressorCore.h"
#include <asm_sprt.h>
// global routines
.global _AMF_AGCCompressorCore_Render;
.segment /pm SEG_MOD_FAST_CODE;
////////////////////////////////////////////////////////////////////////////////
// AGCCompressorCore filter
//
_AMF_AGCCompressorCore_Render:
// push context on stack
puts = mode1;
r0=i0; puts=r0;
r0=i1; puts=r0;
puts=r3;
puts=r5;
puts=r9;
puts=r13;
puts=r14;
puts=r15;
i1=r8; // i1->*buffers
// initialize input and output samples pointers
i0=dm(0,i1); // i0->buffers[0], input
i4=dm(1,i1); // i4->buffers[1], output
i1=r4; //i1->testModuleInstance[0]
/******************** Get max over tick ************************/
bit set mode1 PEYEN; // enter SIMD
m4=2;
r12=lshift r12 by -1;
f0=-1e10;
f1=dm(i0,m4);
lcntr=r12, do (pc,1) until lce;
f0=max(f0,f1), f1=dm(i0,m4); // accumulate the max, get the next two inputs
bit clr mode1 PEYEN; // exit SIMD
f1<->s0; // get the other side of the SIMD max
// now max over the two sides of the SIMD
f1=max(f0,f1), r0=dm(AMF_AGCCompressorCore_EnvState,i1); // f1 = the max, f0 = env follower state
/******************** Envelope Follower ************************/
r8=dm(AMF_AGCCompressorCore_AttackCoef,i1);
f5=dm(AMF_AGCCompressorCore_DecayCoefSmoothing,i1); // the smoothing coefs will be used by the smoother at the end,
f4=dm(AMF_AGCCompressorCore_AttackCoefSmoothing,i1); // we decide here which one to use
f2=f1-f0, r9=dm(AMF_AGCCompressorCore_DecayCoef,i1); // r2=in-state (also does the comparison), f9 = decay coef
if LT f2=f9*f2; // if in<state, r2=decaycoef*(in-state), (since this is a multiply, the alu condition isn't affected, right?)
if GE f2=f8*f2, f5=f4; // "else" r2=attackcoef*(in-state), and choose/remember the corresponding smoothing coef
f0=f2+f0; // state = r0 = state+coef*(in-state)
dm(AMF_AGCCompressorCore_EnvState,i1)=f0;
/******************** Convert to dB ************************/
// using lower-accuracy approximation to dB... using straight-line interp instread of table lookup to handle mantissa part:
// 6.0206*(exponent + mantissa - 1) rather than 6.0206*(exponent + log2table(mantissa))
// this is quicker and didn't seem to have terrible effect on quality
// basic assumption: float mantissa = scalb x by (-logb x) gives a value in the range 1 -> 2 (verified)
// instead of doing out=6.0206*(exp + (mant-1)), we're doing 6.0206*exp - 6.0206 + 6.0206*mant
// (this, by giving us an extra independent multiply, allows us to overlap the conversion of the exp into float)
f4=6.0206;
r14=r14-r14, s5=f5; // clear r14/s14, put smoother coef in PEy side too
r8=logb f0, f15=f4; // r8/s8 = exponent(tmp), copy f4 into higher reg for parallel op below
r3=-r8;
f2=scalb f0 by r3; // f2/s2 = mantissa(tmp) (in range 1-2)
f8=f2*f4, f1=float r8 by r14; // f8/s2=6.*mant, convert exponent to float
f1=f1*f4, f8=f8-f15; // f1/s1=6.*exp, f8/s8=6.*mant - 6.
f0=f1+f8, r4=dm(AMF_AGCCompressorCore_Threshold,i1); // f8/s8=6.*exp + 6.*mant - 6. , this is the result
/******************** Adjust for threshold, do the knee, add any extra gain ************************/
f0=f0-f4; // f0-=threshold
f0=max(f0,f14), r4=dm(AMF_AGCCompressorCore_Slope,i1); // max vs zero, read slope
f0=f0*f4, r4=dm(AMF_AGCCompressorCore_Gain,i1);
f0=f4-f0; // f0 = -maxzero(f0)*slope + gain
/******************** Convert from dB back to lin and write to output ************************/
f1=0.166096; // this is the input scale of the dB->lin conversion
f0=f0*f1; // f0/s0=tmp[i/i+1]*0.166096, 0.166096 = log2(10)/20;
bit set mode1 TRUNCATE; // round to -inf for the fp->int conversion (the fix operation)
f9=1.0;
r1=fix f0; // r1=(int)floor(tmp);
f2=float r1; // back to float to get fractional part
f2=f0-f2; // f2=fractional part, read next in
f2=f2+f9; // f2 = frac + 1;
f8=scalb f2 by r1; // f8 = (frac + 1)*2^floor(tmp) (this will be the target for the smoother below)
bit clr mode1 TRUNCATE; // reset rounding mode
/******************** Smooth the gain factor and write to output ************************/
// since the input to the smoother is constant over the tick, we can do the smoother in SIMD
// Note that this implementation will give sample pairs that are equal (ala updating every other sample).
// We could add a few instructions to compute one step of smoothing and put it in s1, which would give
// the equivalent of updating every sample (ref OnComputingSmoothingInSIMD.doc), but it was decided
// that updating every other sample is "good enough" for most cases.
s8=f8;
f1=dm(AMF_AGCCompressorCore_SmoothingState,i1); // f1/s1 = float currentAmp = instance->currentAmp
s1=f1; // can't use broadcast because this may be an off-chip access
bit set mode1 PEYEN; // enter SIMD
nop;
f0=1.0;
f2=f0-f5; // f2/s2 = (1-r) (r was chosen up in the envelope follower)
// fixup (1-r) and r for updating every other sample... (1-r) -> (1-r)^2, r -> 1-((1-r)^2) (i.e. new(r) = 1 - new(1-r))
f2=f2*f2; // f2/s2 = new(1-r) = (1-r)^2
f4=f0-f2; // f4/s4 = new(r) = (1-new(1-r))
f13=f8*f4; // f13= target*r
// Optimization note: The dependence of the multiply on the result of the previous add makes it so we can't get down
// to a single-instruction loop (if only the sharc had a floating-point MAC instruction...)
// One could probably pipeline 2 iterations together to get down to 1 instr per sample-pair, with enough work.
// Pipelining would be possible since the input is constant over the loop
f8=f1*f2; // f8=state*(1-r)
lcntr=r12, do AMF_AGCCompressorCore_Loop2 until lce;
f1=f8+f13; // f1=state=state*(1-r)+t*r
AMF_AGCCompressorCore_Loop2:
f8=f1*f2, dm(i4,m4)=f1; // f8=next[state*(1-r)], write out
bit clr mode1 PEYEN; // leave SIMD
nop;
dm(AMF_AGCCompressorCore_SmoothingState,i1)=f1;
/******************** We're done ************************/
// pop context off stack
r15=gets(1);
r14=gets(2);
r13=gets(3);
r9=gets(4);
r5=gets(5);
r3=gets(6);
i1=gets(7);
i0=gets(8);
mode1=gets(9);
alter(9);
//------------------------------------------------------------------------------------
_AMF_AGCCompressorCore_Render.END:
leaf_exit; // C-rth requires this instead of rts
//------------------------------------------------------------------------------------
.endseg;
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -