📄 amf_agclimitercore_render.asm
字号:
// Copyright(c) 2005 Analog Devices, Inc. All Rights Reserved.
// This software is proprietary and confidential to Analog Devices, Inc. and its licensors.
// File : $Id: //depot/development/visualaudio/modules/2.5.0/SHARC/Source/AMF_AGCLimiterCore_Render.asm#3 $
// Part of : VisualAudio V2.5.0
// Updated : $Date: 2006/10/12 $ by $Author: Fernando $
// Module Name : AMF_AGCLimiterCore_Render.asm
// DSP Processor : ADSP21161
// Original Author : Tim Stilson
// Date : 7/7/03
//====================================================================================
// Processor resources used:
// 103 words pmem INTERNAL
// 1962 cycles, tickSize=128, (106 + 14.5*tickSize)
// (SIMD used)
//====================================================================================
#if 1
////////////////////////////////////////////////////////////////////////////////
//
// History:
//
// 7/7/03 Tim Stilson: created from AMF_AGCLimiterCore.c
//
////////////////////////////////////////////////////////////////////////////////
#include "processor.h"
#include "AMF_AGCLimiterCore.h"
#include <asm_sprt.h>
// global routines
.global _AMF_AGCLimiterCore_Render;
.segment /pm SEG_MOD_FAST_CODE;
////////////////////////////////////////////////////////////////////////////////
// AGCLimiterCore filter
//
_AMF_AGCLimiterCore_Render:
// push context on stack
puts = r14;
puts=mode1;
r0=i0; puts=r0;
r0=i1; puts=r0;
puts=r3;
puts=r5;
puts=r9;
puts=r13;
puts=r15;
i1=r8; // i1->*buffers
// initialize input and output samples pointers
i0=dm(0,i1); // i4->buffers[0], input
i12=dm(1,i1); // i12->buffers[1], output
i4=dm(2,i1); // i4->buffers[2], scratch
m12=i4; // remember it
i1=r4; //i1->testModuleInstance[0]
/******************** Envelope Follower ************************/
r0=dm(AMF_AGCLimiterCore_EnvState,i1);
r8=dm(AMF_AGCLimiterCore_AttackCoef,i1);
r9=dm(AMF_AGCLimiterCore_DecayCoef,i1);
r13=r12-1, f1=dm(i0,m6); // r1=in[i], adjust loop count because we do one iteration before entering loop
f2=f1-f0; // r2=in-state (also does the comparison)
if LT f2=f9*f2; // if in<state, r2=decaycoef*(in-state), (since this is a multiply, the alu condition isn't affected, right?)
if GE f2=f8*f2; // "else" r2=attackcoef*(in-state)
f0=f2+f0, f1=dm(i0,m6); // state = r0 = state+coef*(in-state), get next in
lcntr=r13, do AMF_AGCLimiterCore_Loop1 until lce;
f2=f1-f0, dm(i4,m6)=f0; // r2=in-state (also does the comparison), write previous output
if LT f2=f9*f2; // if in<state, r2=decaycoef*(in-state), (since this is a multiply, the alu condition isn't affected, right?)
if GE f2=f8*f2; // "else" r2=attackcoef*(in-state)
AMF_AGCLimiterCore_Loop1:
f0=f2+f0, f1=dm(i0,m6); // state = r0 = state+coef*(in-state), get next in
dm(i4,m6)=f0; // write last state to scratch
dm(AMF_AGCLimiterCore_EnvState,i1)=f0;
/******************** Convert to dB ************************/
// using lower-accuracy approximation to dB... using straight-line interp instread of table lookup to handle mantissa part:
// 6.0206*(exponent + mantissa - 1) rather than 6.0206*(exponent + log2table(mantissa))
// this is quicker and didn't seem to have terrible effect on quality
m4=2;
i0=m12; // restore scratch pointer
// basic assumption: float mantissa = scalb x by (-logb x) gives a value in the range 1 -> 2 (verified)
// instead of doing out=6.0206*(exp + (mant-1)), we're doing 6.0206*exp - 6.0206 + 6.0206*mant
// (this, by giving us an extra independent multiply, allows us to overlap the conversion of the exp into float)
r13=lshift r12 by -1;
bit set mode1 PEYEN; // enter SIMD
i4=i0; // the write ptr
f4=6.0206;
r14=r14-r14, f0=dm(i0,m4); // clear r14/s14, f0/s0=tmp[i/i+1];
r8=logb f0, f15=f4; // r8/s8 = exponent(tmp), copy f4 into higher reg for parallel op below
lcntr=r13, do AMF_AGCLimiterCore_Loop2 until lce;
r3=-r8;
f2=scalb f0 by r3, f0=dm(i0,m4); // f2/s2 = mantissa(tmp) (in range 1-2), read next inputs
f8=f2*f4, f1=float r8 by r14; // f8/s2=6.*mant, convert exponent to float
f1=f1*f4, f8=f8-f15; // f1/s1=6.*exp, f8/s8=6.*mant - 6.
f8=f1+f8; // f8/s8=6.*exp + 6.*mant - 6.
AMF_AGCLimiterCore_Loop2:
r8=logb f0, dm(i4,m4)=f8; // r8/s8= exponent(tmp), tmp[i]=f8/s8
bit clr mode1 PEYEN; // leave SIMD
i0=m12; // restore scratch pointer
/******************** Adjust for threshold and prescale for the knee ************************/
r4=dm(AMF_AGCLimiterCore_Threshold,i1); // can't do broadcast because the struct may be off-chip
s4=r4;
r3=dm(AMF_AGCLimiterCore_SharpnessFactor,i1); // can't do broadcast because the struct may be off-chip
s3=r3;
bit set mode1 PEYEN; // enter SIMD
i4=i0;
r0=dm(i0,m4); // f0/s0=tmp[i/i+1]
f0=f0-f4; // f0-=threshold
lcntr=r13, do AMF_AGCLimiterCore_Loop3 until lce;
f1=f0*f3, f0=dm(i0,m4); // f1*=sharpnessFactor, read next in
AMF_AGCLimiterCore_Loop3:
f0=f0-f4, dm(i4,m4)=f1; // next f0-=threshold, writeback tmp (f1)
/******************** Do the knee ************************/
f4=0.25;
f9=1.0;
bit clr mode1 PEYEN; // leave SIMD
i0=m12; // restore scratch pointer
r5=dm(AMF_AGCLimiterCore_Slope,i1); // can't use broadcast because struct may be off-chip
s5=r5;
bit set mode1 PEYEN; // enter SIMD
i4=i0;
f15=-f9; // f15/s15 = -1.0
// for consistency in cost, the polynomial is computed every iteration, no matter if used or not
// this also allows us to do it in SIMD
f8=dm(i0,m4); // f8=tmp[i/i+1]=x
f1=f8+f9; // f1=(x+1)
f1=f1*f1; // f1=(1+2x+x^2)
lcntr=r13, do AMF_AGCLimiterCore_Loop4 until lce;
f1=f1*f4, f3=f8-f15; // f1=0.25*(1+2x+x^2), dummy subtract to test x vs -1.0
if LE f1=f1-f1; // if x<-1.0, f1=0.0
comp(f8,f9), f12=dm(i0,m4); // test x vs 1.0, read next x
if GE f1=f8; // if x>1.0, f1=f8 (skip polynomial result)
f3=f1*f5, f1=f9+f12, f8=f12; // f3=f1*slope, (next iter: f8=x, f1=(x+1))
AMF_AGCLimiterCore_Loop4:
f1=f1*f1, dm(i4,m4)=f3; // (next iter) f1=(1+2x+x^2), write this iter's out
/******************** Unscale from the knee and add in any extra gain ************************/
r0=0.166096; // this is the input scale of the dB->lin conversion, we're combining it w/ this calculation
bit clr mode1 PEYEN; // leave SIMD
i0=m12; // restore scratch pointer
r4=dm(AMF_AGCLimiterCore_Gain,i1); // can't use broadcast because struct may be off-chip
s4=r4;
r3=dm(AMF_AGCLimiterCore_KneeDepth,i1);
s3=r3;
bit set mode1 PEYEN; // enter SIMD
i4=i0;
f4=f4*f0; // scale gain and kneedepth by the input scale of the db->lin conversion
f3=f3*f0;
r0=dm(i0,m4); // f0/s0=tmp[i/i+1]
f1=f0*f3; // f1=tmp*kneeDepth
lcntr=r13, do AMF_AGCLimiterCore_Loop5 until lce;
f1=f4-f1, r0=dm(i0,m4); // f1=gain-f1, read next in
AMF_AGCLimiterCore_Loop5:
f1=f0*f3, dm(i4,m4)=f1; // next tmp*kneeDepth, writeback tmp (f1)
//bit clr mode1 PEYEN; // leave SIMD
/******************** Convert from dB back to lin and write to output ************************/
//bit set mode1 PEYEN | TRUNCATE; // enter SIMD, and set rounding mode to -inf for fp->int conversion
bit set mode1 TRUNCATE; // round to -inf for the fp->int conversion (the fix operation)
i0=m12; // restore scratch pointer
i4=i12; // i4 = i12 (output ptr)
//f9=1.0; // value should still be here from earlier loop
//s9=f9;
f0=dm(i0,m4); // f0/s0=tmp[i/i+1]*0.166096, 0.166096 = log2(10)/20;
r1=fix f0; // r1=(int)floor(tmp);
lcntr=r13, do AMF_AGCLimiterCore_Loop6 until lce;
f2=float r1; // back to float to get fractional part
f2=f0-f2, f0=dm(i0,m4); // f2=fractional part, read next in
f2=f2+f9; // f2 = frac + 1;
f2=scalb f2 by r1; // f2 = (frac + 1)*2^floor(tmp);
AMF_AGCLimiterCore_Loop6:
r1=fix f0, dm(i4,m4)=f2; // write out, (next iter: r1=(int)floor(tmp) )
bit clr mode1 PEYEN | TRUNCATE; // leave SIMD
// nop; //(don't need a nop, because at worst s15 will get clobbered, but s15 is a scratch reg)
// pop context off stack
r15=gets(1);
r13=gets(2);
r9=gets(3);
r5=gets(4);
r3=gets(5);
i1=gets(6);
i0=gets(7);
mode1=gets(8);
r14 = gets(9);
alter(9);
//------------------------------------------------------------------------------------
_AMF_AGCLimiterCore_Render.END:
leaf_exit; // C-rth requires this instead of rts
//------------------------------------------------------------------------------------
.endseg;
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -