⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 finite impulse response filter.txt

📁 c6000的应用程序比较常用比如说fft ifft等一些原文件
💻 TXT
字号:
*===============================================================================
*
*	TEXAS INSTRUMENTS, INC.
*
*	FIRCIRC
*
*	Revision Data: 03/27/97
*
*	USAGE 	This routine is C Callable and can be called as:
*
*		void fircirc(short y[], short x[], int n, short h[], int s, 
*			     int m, int size, int index)
*
*		y 	= output array
*		x 	= input array
*		n 	= number of coefficients (MULTIPLE of 4 >= 4)
*		h 	= coefficient array
*		s 	= output scaling factor
*		m 	= number of inputs (MULTIPLE of 2 >= 2)
*		size	= Block Size Factor for Circular Addressing (Block Size
*			  = 2^(size + 1))
*		index	= Initial Index
*
*		If routine is not to be used as a C callable function
*		then all instructions relating to stack should be removed.
*		Refer to comments of individual instructions.  You will also
*		need to initialize values for all of the values passed as these
*		are assumed to be in registers as defined by the calling 
*		convention of the compiler, (refer to the C compiler reference
*		guide).
*
*	C CODE 	This is the C equivalent of the assembly code without
*		restrictions.  Note that the assembly code is hand optimized and
*		restrictions may apply.
*
*		void fircirc(short y[], short x[], int n, short h[], int s,
*			     int m, int size, int index)
*			{
*			int		i, j;
*			Long40		y0;
*			Long40		round = (Long40) 1 << (s - 1);
*			for (j = 0; j < m; j++) {
*			    y0 = 0; 
*			    for (i = 0; i < n; i++)
*			        y0 += x[(i + j + index) % (1 << size)] * h[i];
*			    y[j] = y0 >> s;
*			    }
*			}
*
*	DESCRIPTION
*		The fircirc performs a Finite Impulse Response filter using
*		circular addressing w/ inital index and output scaling.  It
*		operates on 16-bit data with a 40-bit accumulate.  The final
*		output is scaled down by the scaling factor s.  The scaling
*		factor s is normaly set to 24 to give a 16 bit output.  The FIR
*		assumes	the number of filter coeficients is a multiple of 4 and
*		the number of output samples is a multiple of 2.  This routine
*		has no memory hits regardless of where x, h, and y arrays are
*		located in memory.  The filter has M input samples and N
*		coefficients.  The assembly routine performs 2 output samples at
*		a time.	The Block Size of the Circular Buffer given in Bytes is 
*		2^(SIZE + 1).
*
*	TECHNIQUES
*		The inner loop is unrolled four times thus the number of 
*		filter coefficients must be a multiple of four.  The outer
*		loop is unrolled twice so the number of output samples must
*		be a multiple of 2.
*
*		If an odd number of output samples is needed or possible, the
*		final store can either be removed or conditionally executed 
*		depending on whether M is even or odd.  This code would have to 
*		be added to the existing code.
*
*		The outer loop, like the inner loop, is software pipelined as
*		well.  e, o, and p in the comments of the individual 
*		instructions correspond to the epilogue, outer loop, and
*		prologue respectively.
*
*		Refer to FIR example in the optimizing assembly chapter of
*		the programmer's guide for more information.
*
*	ASSUMPTIONS
*		N MULTIPLE of 4 >= 4
*		M EVEN >= 2
*
*	MEMORY NOTE
*		This code has no memory hits regardless of where x and h are
*		located in memory.
*
*	CYCLES	M*(N + 11)/2 + 13
*
*===============================================================================
	.global _fircirc
	.text

_fircirc:
	STW	.D2	B10,	*B15--		; push B10 on the stack
||	MV	.L1X	B15,	A1		; copy stack pointer

	STW	.D2	A10,	*B15--[2]	; push A10 on the stack
||	STW	.D1	B11,	*--A1[2]	; push B11 on the stack

	STW	.D2	A11,	*B15--[2]	; push A11 on the stack
||	STW	.D1	B12,	*--A1[2]	; push B12 on the stack

*** BEGIN Benchmark Timing ***
B_START
	B	.S1	OUTLOOP
||	ADD	.D1	6,	A6,	A10	; n + 6 half array reset
||	SHL	.S2X	A10,	16,	B0	; set circular block size 
||	ADDAH	.D2	B4,	B10,	B4	; x += index
||	MV	.L1X	B3,	A0		; copy return address
||	MV	.L2	B8,	B1		; move m

	SHR	.S1	A6,	2,	A3	; n / 4
||	MV	.L2X	A10,	B10		; copy array reset
||	SET	.S2	B0,	8, 8,	B0	; set B4 (x) in circular mode
||	ADD	.L1X	2,	B6,	A5	; copy h
||	STW	.D2	A12,	*B15--[2]	; push A12 on the stack
||	STW	.D1	B13,	*--A1[2]	; push B13 on the stack

	ADD	.L1X	2,	B4,	A7	; copy x
||	ADD	.L2	B10,	2,	B14	; array reset
||	SET	.S2	B0,	6, 6,	B0	; set A7 (x) in circular mode
||	STW	.D2	A13,	*B15--[2]	; push A13 on the stack
||	STW	.D1	B14,	*--A1[2]	; push B15 on the stack

	ADDAH	.D1	A5,	A10,	A5	; compensate for first pass
||	ADDAH	.D2	B6,	B10,	B5	; compensate for first pass
||	MVC	.S2	B0,	AMR		; setup AMR

	ADDAH	.D1	A7,	A10,	A7	; compensate for first pass
||	ADDAH	.D2	B4,	B14,	B4	; compensate for first pass
||	MVK	.S2	1,	B2		; setup j loop priming

	ADD	.L2X	2,	A4,	B11	; copy y
||	STW	.D2	A15,	*B15--		; push A15 on the stack

LOOP:	; LOOP BEGINS HERE
  [!A1]	ADD	.L2X	A9,	B13:B12,B13:B12	; y1 += p00,		i=0
||[!A1]	ADD	.L1X	B9,	A13:A12,A13:A12	; y0 += p01,		i=0
||	MPY	.M2	B3,	B7,	B6	; p11 = x1 * h1,	i=1
||	MPY	.M1	A6,	A11,	A11	; p00 = x0 * h0,	i=1
||	LDH	.D1	*++A5[2],	B9	;** h1 = *h++,		i=0
||	LDH	.D2	*++B5[2],	A9	;** h0 = *h++,		i=0

  [!A1]	ADD	.L2	B6,	B13:B12,B13:B12	; y1 += p11,		i=0
||[!A1]	ADD	.L1	A6,	A13:A12,A13:A12	; y0 += p10,		i=1
||	MPY	.M1X	B3,	A9,	A15	;* p10 = x1 * h0,	i=0
||	MPY	.M2X	A15,	B9,	B9	;* p01 = x0 * h1,	i=0
||	LDH	.D2	*++B4[2],	B3	;** x1 = *x++,		i=1
||	LDH	.D1	*++A7[2],	A6	;** x0 = *x++,		i=1
||[A2]	SUB	.S1	A2,	1,	A2	; i++

  [A2]	B	.S1	LOOP			;* for i
||[!A1]	ADD	.L2X	A11,	B13:B12,B13:B12	; y1 += p00,		i=1
||[!A1]	ADD	.L1X	B7,	A13:A12,A13:A12	; y0 += p01,		i=1
||	MPY	.M2	B8,	B9,	B6	;* p11 = x1 * h1,	i=0
||	MPY	.M1	A15,	A9,	A9	;* p00 = x0 * h0,	i=0
||	LDH	.D1	*++A5[2],	B7	;** h1 = *h++,		i=1
||	LDH	.D2	*++B5[2],	A11	;** h0 = *h++		i=1
||[B0]	SUB	.S2	B0,	1,	B0	; decrement flushing count

  [!A1]	ADD	.L2	B6,	B13:B12,B13:B12	; y1 += p11,		i=1
||[B0]	ADD	.L1	A15,	A13:A12,A13:A12	;* y0 += p10,		i=0
||	MPY	.M2X	A6,	B7,	B7	;* p01 = x0 * h1,	i=1
||	MPY	.M1X	B8,	A11,	A6	;* p10 = x1 * h0,	i=1
||	LDH	.D2	*++B4[2],	B8	;*** x1 = *x++,		i=0
||	LDH	.D1	*++A7[2],	A15	;*** x0 = *x++,		i=0
||[A1]	SUB	.S1	A1,	1,	A1	; decrement priming

	; inner loop branch occurs here
OUTLOOP:
	LDH	.D2	*--B4[B14],	B3	;p x1 = *x++,		i=1

  [B1]	B	.S2	LOOP			;p for i
||	LDH	.D2	*++B4[2],	B8	;p x1 = *x++,		i=0
||	LDH	.D1	*--A7[A10],	A15	;p x0 = *x++,		i=0
||	MV	.L2X	A8,	B6		;o copy s
||	SUB	.L1	A8,	1,	A1	;o s - 1
||	SHR	.S1	A13:A12,A8,	A13:A12	;e y0 >>= s

	SHR	.S2	B13:B12,B6,	B13:B12	;e y1 >>= s
||	LDH	.D1	*--A5[A10],	B9	;p h1 = *h++,		i=0
||	LDH	.D2	*--B5[B10],	A9	;p h0 = *h++,		i=0

  [!B2]	STH	.D1	A12,	*A4++[2]	;e y[0] = y0
||[!B2]	STH	.D2	B12,	*B11++[2]	;e y[1] = y1
||	MVK	.S1	1,	A12		;o \ round = (Long40) 1
||	ZERO	.L1	A13			;o /
||	ZERO	.L2	B2			;o clear j loop priming

	LDH	.D2	*++B4[2],	B3	;p x1 = *x++,		i=1
||	LDH	.D1	*++A7[2],	A6	;p x0 = *x++,		i=1
||	SHL	.S1	A13:A12,A1,	A13:A12	;o y0 = round = (Long40) 1<<(s-1)
||	ADD	.L2X	1,	A3,	B0	;p setup flushing count

  [B1]	B	.S2	LOOP			;p for i
||	LDH	.D1	*++A5[2],	B7	;p h1 = *h++,		i=1
||	LDH	.D2	*++B5[2],	A11	;p h0 = *h++		i=1
||	MV	.L2X	A13,	B13		;o y1 = round

	LDH	.D2	*++B4[2],	B8	;p* x1 = *x++,		i=0
||	LDH	.D1	*++A7[2],	A15	;p* x0 = *x++,		i=0
||	MV	.L2X	A12,	B12		;o y1 = round
||[B1]	SUB	.S2	B1,	2,	B1	;p j++
||	MV	.L1	A3,	A2		;p i < n
||	MVK	.S1	1,	A1		;p i loop priming

B_END:
*** END Benchmark Timing ***

END:	LDW	.D2	*++B15,		A15	; pop A15 off the stack
||	MV	.L1X	B15,	A1		; copy stack pointer

	LDW	.D1	*++A1[3],	A13	; pop A13 off the stack
||	LDW	.D2	*++B15,		B14	; pop B14 off the stack

	LDW	.D1	*++A1[2],	A12	; pop A12 off the stack
||	LDW	.D2	*++B15[2],	B13	; pop B13 off the stack

	LDW	.D1	*++A1[2],	A11	; pop A11 off the stack
||	LDW	.D2	*++B15[2],	B12	; pop B12 off the stack
||	MV	.L2X	A0,	B3		; move return address

	LDW	.D1	*++A1[2],	A10	; pop A10 off the stack
||	LDW	.D2	*++B15[2],	B11	; pop B11 off the stack
||	B	.S2	B3

	LDW	.D2	*++B15[2],	B10	; pop B10 off the stack

	NOP 	4

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -