📄 amf_biquadcascade_render.asm
字号:
// Copyright(c) 2005 Analog Devices, Inc. All Rights Reserved.
// This software is proprietary and confidential to Analog Devices, Inc. and its licensors.
// File : $Id: //depot/development/visualaudio/modules/2.5.0/SHARC/Source/AMF_BiquadCascade_Render.asm#3 $
// Part of : VisualAudio V2.5.0
// Updated : $Date: 2006/10/12 $ by $Author: Fernando $
// Module Name : AMF_BiquadCascade_Render.asm
// DSP Processor : ADSP21161
// Original Author : Richard Grafton, et al. Ported to VA by Tim Stilson
// Date : 6/18/03
//====================================================================================
// Processor resources used:
// 91 words pmem INTERNAL
// 1273 cycles for tickSize=128 (1 stage of 2 biquads)
// cycles = 116 + TickSize*(5 + 2*numSections)
// (SIMD used)
//====================================================================================
#if 1
/**************************************************************
File Name: iir_hh_matt.asm
Revision history:
09/03/98 Richard Grafton, ADI Initial version
09/28/98 OW Bug fixed, verified, timed
10/06/99 Boris Lerner SIMD (requires data to be interlaced w/ zeros)
12/13/99 Matt Walsh Optimized SIMD - Added second set of filters into loop
- non interlaced data required
12/16/99 Matt Walsh Made callable
06/05/03 Tim Stilson Debugged register initialization, verified coef layout
06/18/03 Tim Stilson Ported to VA Module (changed input arguments, etc)
08/15/03 Tim Stilson Some code-size optimizations
Purpose: Subroutine that implements a Biquad IIR Filter given
coefficients and samples.
Equation:
,---------------. ,---------------.
x(n)------->| compblk X (f8)|--> I(n) x(n+1)----->| compblk X (f9)|---> I(n+1)
`---------------' `---------------'
,---------------. ,---------------.
I(n-1)----->| compblk Y (s8)|---> Y(n-1) I(n)------->| compblk Y (s9)|---> Y(n)
`---------------' `---------------'
(The two "filter-blocks" on the left are computed simultaneously using SIMD,
and then in a second loop, the two "filter-blocks" on the right simultaneously
computed via SIMD)
The resulting output is delayed by one sample, so the total block transfer
function effectively has an additional z^-1.
Calling Parameters: See C function declaration in header file
Assumptions:
- This algorithm always performs an even number of filters, so
if an odd number is required, simply use zeros for one set of
filter coefficients (a unity filter).
- Coefficients must also be interlaced to accomodate the SIMD
fetches. Here is an example coef. buffer: (in this case BIQUADS = 4)
***********************************************************************/
#include "processor.h"
#include "AMF_BiquadCascade.h"
#include <asm_sprt.h>
// global routines
.global _AMF_BiquadCascade_Render; ;
.segment /pm SEG_MOD_FAST_CODE;
_AMF_BiquadCascade_Render:
i4 = r4; /* Read structure pointer */
puts = mode1;
puts = r14;
r14 = r12; /* Read number of points */
R14 = LSHIFT R14 by -1; /* r14=N/2, because all uses are in a SIMD context */
puts = r15;
puts = r9;
r0 = i0; puts = r0;
r0 = i1; puts = r0;
r0 = i2; puts = r0;
r0 = i3; puts = r0;
r0 = i5; puts = r0;
r0 = b0; puts = r0;
r0 = b1; puts = r0;
r0 = dm(AMF_BiquadCascade_State,i4);
b0 = r0;
b1 = r0;
r0 = dm(AMF_BiquadCascade_Coefs,i4);
b13 = r0;
r15 = dm(AMF_BiquadCascade_NumSections,i4);
f4 = dm(AMF_BiquadCascade_TotalAmp,i4);
s4=f4;
s8 = dm(AMF_BiquadCascade_LastIn,i4); // get last input of previous tick
i2=r8; // i4->*buffers
// initialize input and output samples pointers
i3=dm(0,i2); // i3->buffers[0], input
i5=dm(1,i2); // i5->buffers[1], output
// do initial scaling loop
bit set MODE1 RND32 | PEYEN; /* alu, multiplier precision -> 1 cycle of latency before PEYEN */
m4 = 2; /* stride = 2 for SIMD */
f1 = dm(i3,m4);
lcntr = r14, do scaleLoop until lce;
f2 = f1*f4, f1 = dm(i3,m4);
scaleLoop: dm(i5,m4) = f2;
bit clr MODE1 PEYEN;
// reset i/o pointers for filter
i3=dm(1,i2); // scale loop scaled into output buffer, so that is now the input
i5=i3;
r15=r15+r15, m12 = m4; /* r15 = biquads*2, stride = 2 for SIMD */
l0 = r15; /* L0 = biquads*2 */
l1 = r15; /* L1 = biquads*2 */
R15=r15+r15, b1 = b0; /* r15=biquads*4, B0/B1 used for writing/reading DELAY LINE, */
l13 = r15; /* L8=biquads*4, */
bit set MODE1 PEYEN; /* alu, multiplier precision -> 1 cycle of latency before PEYEN */
R15 = LSHIFT R15 by -3; /* R15=r15/8 -> r15=(Biquads/2) */
/******************************************BEGIN IIR FILTER******************************************/
f12 = 0; /* in case f12 contains NaN or Inf or something, because f12-f12 != 0.0 in that case */
f2=dm(i0,m4), f4=pm(i13,m12); /* prime cache and data before loop*/;
lcntr = r14, do iir until lce; /* BEGIN FILTER - 2 samples at a time*/
f12=f12-f12, f8 = dm(i3,m4) (LW); /* clear f12 and fetch next 2 samples: f8 = x(n), f9 = x(n+1) */
lcntr=r15, do biq1 until lce; /* IIR loop 1 - Filter sample x(n) and i(n-1) using SIMD (both comp blocks)*/
f12=f2*f4, f8=f8+f12, f1=dm(i0,m4), f4=pm(i13,m12); /*simultaneously filter in X and Y using SIMD */
f12=f1*f4, f8=f8+f12, dm(i1,m4)=f1, f4=pm(i13,m12);
f12=f2*f4, f8=f8+f12, f2=dm(i0,m4), f4=pm(i13,m12);
biq1: f12=f1*f4, f8=f8+f12, dm(i1,m4)=f8, f4=pm(i13,m12);
f8 = f8 + f12 ; /* f8 = I(n), s8 = y(n-1) */
f12=f12-f12, f8 <-> s9; /* update I(n) (s9) for use in filter below */
lcntr=r15, do biq2 until lce; /* IIR loop 2 - Filter sample x(n+1) and i(n) using SIMD (both comp blocks)*/
f12=f2*f4, f9=f9+f12, f1=dm(i0,m4), f4=pm(i13,m12);
f12=f1*f4, f9=f9+f12, dm(i1,m4)=f1, f4=pm(i13,m12);
f12=f2*f4, f9=f9+f12, f2=dm(i0,m4), f4=pm(i13,m12);
biq2: f12=f1*f4, f9=f9+f12, dm(i1,m4)=f9, f4=pm(i13,m12);
f9 = f9 + f12; /* f9 = I(n+1), s9 = y(n) */
dm(i5,m4) = s8 (LW); /* write results -> OUTPUT[i] = s8 = y(n-1) */
/* OUTPUT[i+1] = s9 = y(n) */
iir: f9 <-> s8; /* move I(n+1) into f9 for use on next sample */
/******************************************* END IIR FILTER******************************************/
bit clr MODE1 PEYEN;
// reset used L regs
l13 = 0;
dm(AMF_BiquadCascade_LastIn,i4)=s8; // store last output of this tick for next tick
l1 = 0;
l0 = 0;
b1 = gets(1);
b0 = gets(2);
i5 = gets(3);
i3 = gets(4);
i2 = gets(5);
i1 = gets(6);
i0 = gets(7);
r9 = gets(8);
r15 = gets(9);
r14 = gets(10);
mode1=gets(11);
alter(11);
//------------------------------------------------------------------------------------
_AMF_BiquadCascade_Render.END:
leaf_exit; // C-rth requires this instead of rts
//------------------------------------------------------------------------------------
.endseg;
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -