📄 myprogram.asm

📁 基于visual dsp++开发环境
💻 ASM
字号:
/*																			**
**																			**
**	 Name: Cache_Example.dpj							        			**	
**																			**
******************************************************************************

(C) Copyright 2003 - Analog Devices, Inc.  All rights reserved.

File Name:		myprogram.asm

Date Modified:	11/21/05		MKB		Rev 2.0

Software:       VisualDSP++ 4.0 (September 2005 Update)

Hardware:		ADSP-BF537 EZ-KIT Board

Special Connections:  None

Purpose:		To demonstrate configuring and enableing cache features.
				You can insert your own program and data here or run the example
				2D Convolution (3x3 mask).
				
Program Parameters:

******************************************************************************/


#include <defBF537.h>
#define M 20	// number of rows
#define N 20	// number of columns


.global my_program;

.section sdram0_bank2;
my_program:
	l0 = 0;
	l1 = 0;
	l2 = 0;
	l3 = 0;
	i0.l = input;	// pointer to byte-packed input data  
	i0.h = input;	// pointer to byte-packed input data  
	i1.l = buffer; 	// pointer to where the input data with be unpacked to 16 bits
	i1.h = buffer; 	// pointer to where the input data with be unpacked to 16 bits

	p0 = M*N (x);	// initialize to number of data points
	p0 = p0 >> 2;
	p0 += -1; 

	// unpack the input data
	lsetup(l$1, l$1e) lc0 = p0;
	r0 = [i0++];
	(r2, r3) = byteunpack r1:0;
	l$1:  mnop || [i1++] = r3 || r0 = [i0++];
	l$1e: (r2, r3) = byteunpack r1:0 || [i1++] = r2 || nop;
	[i1++] = r3;
	[i1++] = r2;

	i0.h = buffer;	// this buffer will be used for the filtering process
	i0.l = buffer;	// this buffer will be used for the filtering process

	p1.h = output;	// this is where the filtered image will be stored
	p1.l = output;	// this is where the filtered image will be stored

//    p1 += 2;
	// for a 3x3 mask, the three rows of the mask must be separated by the number of columns (m0 bytes)

	p4 = M*N (x);	// number of iterations to be performed
	p0 = N;			// number of columns	
  	p0 = p0 << 1;	// data is 16-bit, but Blackfin is byte-addressable (thus the shift)
 	m0 = p0;		// m0 is the number of bytes by which each row is separated
  	m1 = 4 (x);

	a1 = a0 = 0;	// initialize the two accumulators used for the filtering

	// i0 points to the image pixel corresponding to the 1st row in the mask (see above: i0 = buffer)
	// i1 points to the image pixel corresponding to the 2nd row in the mask
	i1 = i0;
	i1 += m0;  

	// i1 points to the image pixel corresponding to the 3rd row in the mask
	i2 = i1;
	i2 += m0;  
                   
	i3.h = coefs;
	i3.l = coefs;
	b3 = i3; // the mask resides in a circular buffer, so we need to use b and l DAG registers
	l3 = 2*10;		// (9 plus a dummy for alignment) * 2 for byte addressing

	// this loop assumes that the i0 pointers is initialized to a 32-bit boundary
	lsetup(strt, end) lc0 = p4 >> 1;
	mnop || r0 = [i0++] || r1 = [i3++];			// prefill the data, prefill the coefficients
	strt: a1 = r0.h * r1.l,  a0 = r0.l * r1.l (is)  || r0.l = w[i0++] || r2 = [i3++];
          a1 += r0.l * r1.h, a0 += r0.h * r1.h (is) || r0.h = w[i0--];
          a1 += r0.h * r2.l, a0 += r0.l * r2.l (is) || r0 = [i1++]    || r3 = [i3++];
          a1 += r0.h * r2.h, a0 += r0.l * r2.h (is) || r0.l = w[i1++];
          a1 += r0.l * r3.l, a0 += r0.h * r3.l (is) || r0.h = w[i1--] || r1 = [i3++];
          a1 += r0.h * r3.h, a0 += r0.l * r3.h (is) || r0 = [i2++];
          a1 += r0.h * r1.l, a0 += r0.l * r1.l (is) || r0.l = w[i2++] || r2 = [i3++];
          a1 += r0.l * r1.h, a0 += r0.h * r1.h (is) || r0.h = w[i2--] || r1 = [i3++];
          r6.h = (a1 += r0.h * r2.l), r6.l = (a0 += r0.l * r2.l) (is); 
	end: mnop || [p1++] = r6 || r0 = [i0++]; 
	
rts;

my_program.end:



.section sdram0_bank3;
my_data:
	.align 4;

	// 3x3 low-pass filter
	.byte2 coefs[10] =	0x0001,	0x0002,	0x0001,
						0x0002,	0x0004,	0x0002,
						0x0001,	0x0002,	0x0001,
						0x0000;	// the last one is a dummy inserted for alignment issues
	
	.align 4;
	.byte2 input[] = 	// the input data is byte-packed (i.e. two byte-sized pixels stuffed into each 16-bit word)
						// the first three pixels are 0a, 0a, 0b
		   0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,

           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c,
           0x0a0a,
           0x0b0b,
           0xa0a0,
           0xb0b0,
           0x0c0c;
 		   
	.align 4;
	.var buffer[N*M];		// unpacked input data will reside here (1.15 format)
	
	.align 4;
	.var output[N*M];		// the output will be calculated to this buffer (1.15 format)

.section sdram0_bank1;
	.align 4;
💿 文件大小 93 K
👤 上传用户 tzxiaojian
📂 所属分类 DSP编程
🏷️ 相关标签

#visual #dsp #开发环境
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -