📄 myprogram.asm
字号:
/* **
** **
** Name: Cache_Example.dpj **
** **
******************************************************************************
(C) Copyright 2003 - Analog Devices, Inc. All rights reserved.
File Name: myprogram.asm
Date Modified: 11/21/05 MKB Rev 2.0
Software: VisualDSP++ 4.0 (September 2005 Update)
Hardware: ADSP-BF537 EZ-KIT Board
Special Connections: None
Purpose: To demonstrate configuring and enableing cache features.
You can insert your own program and data here or run the example
2D Convolution (3x3 mask).
Program Parameters:
******************************************************************************/
#include <defBF537.h>
#define M 20 // number of rows
#define N 20 // number of columns
.global my_program;
.section sdram0_bank2;
my_program:
l0 = 0;
l1 = 0;
l2 = 0;
l3 = 0;
i0.l = input; // pointer to byte-packed input data
i0.h = input; // pointer to byte-packed input data
i1.l = buffer; // pointer to where the input data with be unpacked to 16 bits
i1.h = buffer; // pointer to where the input data with be unpacked to 16 bits
p0 = M*N (x); // initialize to number of data points
p0 = p0 >> 2;
p0 += -1;
// unpack the input data
lsetup(l$1, l$1e) lc0 = p0;
r0 = [i0++];
(r2, r3) = byteunpack r1:0;
l$1: mnop || [i1++] = r3 || r0 = [i0++];
l$1e: (r2, r3) = byteunpack r1:0 || [i1++] = r2 || nop;
[i1++] = r3;
[i1++] = r2;
i0.h = buffer; // this buffer will be used for the filtering process
i0.l = buffer; // this buffer will be used for the filtering process
p1.h = output; // this is where the filtered image will be stored
p1.l = output; // this is where the filtered image will be stored
// p1 += 2;
// for a 3x3 mask, the three rows of the mask must be separated by the number of columns (m0 bytes)
p4 = M*N (x); // number of iterations to be performed
p0 = N; // number of columns
p0 = p0 << 1; // data is 16-bit, but Blackfin is byte-addressable (thus the shift)
m0 = p0; // m0 is the number of bytes by which each row is separated
m1 = 4 (x);
a1 = a0 = 0; // initialize the two accumulators used for the filtering
// i0 points to the image pixel corresponding to the 1st row in the mask (see above: i0 = buffer)
// i1 points to the image pixel corresponding to the 2nd row in the mask
i1 = i0;
i1 += m0;
// i1 points to the image pixel corresponding to the 3rd row in the mask
i2 = i1;
i2 += m0;
i3.h = coefs;
i3.l = coefs;
b3 = i3; // the mask resides in a circular buffer, so we need to use b and l DAG registers
l3 = 2*10; // (9 plus a dummy for alignment) * 2 for byte addressing
// this loop assumes that the i0 pointers is initialized to a 32-bit boundary
lsetup(strt, end) lc0 = p4 >> 1;
mnop || r0 = [i0++] || r1 = [i3++]; // prefill the data, prefill the coefficients
strt: a1 = r0.h * r1.l, a0 = r0.l * r1.l (is) || r0.l = w[i0++] || r2 = [i3++];
a1 += r0.l * r1.h, a0 += r0.h * r1.h (is) || r0.h = w[i0--];
a1 += r0.h * r2.l, a0 += r0.l * r2.l (is) || r0 = [i1++] || r3 = [i3++];
a1 += r0.h * r2.h, a0 += r0.l * r2.h (is) || r0.l = w[i1++];
a1 += r0.l * r3.l, a0 += r0.h * r3.l (is) || r0.h = w[i1--] || r1 = [i3++];
a1 += r0.h * r3.h, a0 += r0.l * r3.h (is) || r0 = [i2++];
a1 += r0.h * r1.l, a0 += r0.l * r1.l (is) || r0.l = w[i2++] || r2 = [i3++];
a1 += r0.l * r1.h, a0 += r0.h * r1.h (is) || r0.h = w[i2--] || r1 = [i3++];
r6.h = (a1 += r0.h * r2.l), r6.l = (a0 += r0.l * r2.l) (is);
end: mnop || [p1++] = r6 || r0 = [i0++];
rts;
my_program.end:
.section sdram0_bank3;
my_data:
.align 4;
// 3x3 low-pass filter
.byte2 coefs[10] = 0x0001, 0x0002, 0x0001,
0x0002, 0x0004, 0x0002,
0x0001, 0x0002, 0x0001,
0x0000; // the last one is a dummy inserted for alignment issues
.align 4;
.byte2 input[] = // the input data is byte-packed (i.e. two byte-sized pixels stuffed into each 16-bit word)
// the first three pixels are 0a, 0a, 0b
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c,
0x0a0a,
0x0b0b,
0xa0a0,
0xb0b0,
0x0c0c;
.align 4;
.var buffer[N*M]; // unpacked input data will reside here (1.15 format)
.align 4;
.var output[N*M]; // the output will be calculated to this buffer (1.15 format)
.section sdram0_bank1;
.align 4;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -