📄 corr_3x3.asm
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : corr_3x3.asm
Label name : __corr_3x3
Version : 1.3
Change History :
Version Date Author Comments
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev. 0.2
1.1 01/28/2002 Raghavendra Modified to match
silicon cycle count
1.0 07/10/2001 Raghavendra Original
Description : This function performs correlation between input image and 3x3
mask. The mask elements are multiplied with corresponding input
elements and then summed together.
if input == | x00 x01 x02 |
| x10 x11 x12 |
| x20 x21 x22 |
and mask == | h00 h01 h02 |
| h10 h11 h12 |
| h20 h21 h22 |
then output y11=( x00*h00 + x01*h01 + x02*h02 + x10*h10 +
x11*h11 + x12*h12 + x20*h20 + x21*h21 +
x22*h22 )>>12;
Prototype : void _corr_3x3(unsigned char* in, int row, int col,
short * mask, unsigned char *out );
in -> It is pointer to the input image.
row -> It is number of rows of input image.
col -> It is number of columns of input image.
mask -> It is pointer 3x3 mask.
out -> It is pointer the output buffer.
Assumption : 1. Number of columns in input image is even AND aligned to
word or half word boundary
2. Input image and mask elements are in different Banks
Registers used : A0, A1, R0-R3, I1, B1, M0-M3, L1, P0-P5, LC0, LC1.
Performance :
Code Size : 690 bytes.
Cycle count : 657 cycles for 8x8 image
first and last row : 10*(col-2)
Middle rows
Inner loop : 9*(col-2)
Outer loop :23*(row-2)
four corner elements : 8*4
*******************************************************************************/
.section L1_code;
.global __corr_3x3;
.align 8;
__corr_3x3:
[--SP]=(P5:3);
R3=[SP+24]; // Address of structured element
I1=R3; // Initialize I register to structure element
// address
B1=R3; // Initialize base register
L1=18; // Initialize L register
M0=8; // Initialize to fetch h11
M1=6; // initialize to skip 3 element in structured
// element array
P5=R0; // Address of input array
M3=R0; // duplicate the address
P0=R2; // Number of columns (N)
P1=R2; // Duplicate the number of columns
I1+=M0; // Increment to fetch h11 element
P4=[SP+28]; // Address of output array
P2=R1; // Number of rows (M)
P3=R2; // Number of columns
P0+=-4; // Columns -4
R3=R2<<1 ||R0=B[P5++](Z)||R1.L=W[I1++];
// r3= 2*Col,fetch first input and h11
M0=R3; //
R3+=2;
M2=R3; // 2*col +2
P3+=2; // Col+2
P1+=-2; // Col-2
P2+=-2; // ROW-2;
/////////////////////// FIRST ROW ///////////////////////////////////
// First row and last row is computed separately.
//In each row first and last elements are computed separately.
A0=R0.L*R1.L(IS)||R0=B[P5++](z)||R1.L=W[I1++];
// fetch x(0,1) and h12
R2=W[P5++P1](Z)||R3.L=W[I1++];
// Dummy fetches to increment P5 and I1
A0+=R0.L *R1.L(IS)||R0=B[P5++](Z) || R1.L=W[I1++];
// Fetch x(1,0) and h21
A0+=R0.L*R1.L(IS)||R0=B[P5](Z) || R1.L=W[I1++];
// Fetch x(1,1) and h22
R2=(A0+=R0.L * R1.L)(IS)|| I1+=M1;
// Increment I1 to fetch h10
P5=M3; // Initialize P5 to input array
R2=R2>>12||R0=B[P5++](z) || R1.L=W[I1++];
// Fetch X(0,0) and H10
B[P4++]=R2; // store the result
MNOP;
LSETUP(FIRST_ROW_ST,FIRST_ROW_END)LC0=P1;
// Loop counter ==COL-2
FIRST_ROW_ST:
A0=R0.L *R1.L(IS)||R0=B[P5++](z) ||R1.L=W[I1++];
// Fetch X(0,1) and H11
A0+=R0.L*R1.L(IS)||R0=B[P5] (z)||R1.L=W[I1++];
// Fetch X(0,2) and H12
P5=P5+P1; // increment P5 to fetch x(1,0)
A0+=R0.L *R1.L (IS)||R0=B[P5++](Z) || R1.L=W[I1++];
// Fetch X(1,0) and H20
A0+=R0.L *R1.L (IS)||R0=B[P5++](z) || R1.L=W[I1++];
// Fetch X(1,1) and H21
A0+=R0.L *R1.L(IS) ||R0=B[P5++](z) || R1.L=W[I1++];
// Fetch X(1,2) and H22
R2=(A0+=R0.L *R1.L)(IS) || I1+=M1;
// Increment I1 to fetch h10
P5-=P3;
R2=R2>>12||R0=B[p5++](z);
FIRST_ROW_END:
B[P4++]=R2|| R1.L=W[I1++];
// Store the result
A0=R0.L *R1.L (IS) ||R0=B[P5++](z) || R1.L=W[I1++];
// fetch x(0,n-1) and H11
NOP;
R2=W[P5++P1](Z) || R3.L=W[I1++];
// Dummy fetch to increment P5 and I1 pointers
A0+=R0.L * R1.L (IS)|| R0=B[P5++](z)|| R1.L=W[I1++];
// Fetch x(1,N-2) and H20
A0+=R0.L * R1.L (IS)|| R0=B[P5++](z)|| R1.L=W[I1++];
// Fetch x(1,N-1) and H21
R2=(A0+=R0.L *R1.L) (IS)|| I1+=4;
// Increment to fetch h01
P5=M3; // Initialize P5 to starting of input array
R2=R2>>12||R0=B[P5++](z)||R1.L =W[I1++] ;
// Fetch x(0,0) and H01
B[P4++]=R2; // store output of last element in first row
///////////////////// MIDDLE LOOP ///////////////////////////////////////
LSETUP(ROW_ST,ROW_END)LC0=P2;
// loop counter == ROW-2 (M-2)
P2=M2; // Initialized to 2*COL+2
P3=M0; // Initialized to 2*col
ROW_ST:
A0=R0.L *R1.L(IS) || R0=B[P5++](z) || R1.L=W[I1++];
// Fetch x(0,1) and H02
R2=W[P5++P1](Z) ||R3.L=W[I1++];
// Dummy fetch to adjust P5 and I1
A0+=R0.L*R1.L(IS) || R0=B[P5++](z) || R1.L=W[I1++];
// fetch x(1,0) and H11
A0+=R0.L* R1.L(IS) || R0=B[P5++](z)|| R1.L=W[I1++];
// Fetch x(1,1) and H12
R2=W[P5++P1](Z)|| R3.L=W[I1++];
// Dummy fetch to adjust P5 and I1
A0+=R0.L*R1.L(IS) || R0=B[P5++](z) || R1.L=W[I1++];
// Fetch x(2,0) and H21
A0+=R0.L* R1.L(IS) || R0=B[P5++](z)|| R1.L=W[I1++];
// Fetch x(2,1) and H22
R2=(A0+=R0.L*R1.L )(IS)|| R1.L=W[I1++];
//fetch first element of the row and h00
P5-=P2; // P5 modified to starting of the row
R2=R2>>12||R0=B[P5++](z);
B[P4++]=R2; // store the output
/////////////////////////// CORE LOOP /////////////////////////////////////
// Here data is processed to get two pixels output at a time
LSETUP(COL_ST,COL_END)LC1=P1>>1;
// Loop counter ==(COL-2)/2
COL_ST: A0=R0.L*R1.L(IS) ||R0=B[P5++](z) || R1.H=W[I1++];
// Fetch x(0,1) and h01
A1=R0.L * R1.L ,A0+=R0.L*R1.H(IS) || R0=B[P5++](z) || R1.L=W[I1++];
// Fetch x(1,2) and h02
A1+=R0.L *R1.H ,A0+=R0.L *R1.L(IS) || R0=B[P5++](z) ||R1.H=W[I1++];
// Fetch x(1,3) and h10
R2=W[P5++P0](Z) ||NOP; // move P5 to starting of next row
A1+= R0.L *R1.L (IS) ||R0=B[P5++](z)||R1.L=W[I1++];
// Fetch x(1,0) and h11
A0+=R0.L *R1.H(IS)|| R0=B[P5++](z) ;
// Fetch x(1,1)
A1+=R0.L * R1.H,A0+=R0.L *R1.L(IS) || R0=B[P5++](z)|| R1.H=W[I1++];
// Fetch x(1,2) and h12
A1+= R0.L *R1.L ,A0+=R0.L *R1.H(IS)|| R0=B[P5++](z)||R1.L=W[I1++];
// Fetch x(1,3) and h20
P5=P5+P0; // move P5 to starting of next row
A1+= R0.L *R1.H(IS)||R0=B[P5++](z) || R1.H=W[I1++];
// Fetch x(2,0) and h21
A0+=R0.L*R1.L(IS) ||R0=B[P5++](z);
// Fetch x(2,1)
A1+=R0.L * R1.L ,A0+=R0.L *R1.H(IS)|| R0=B[P5++] (z)||R1.L=W[I1++];
// Fetch x(2,2) and h22
A1+=R0.L *R1.H,R2=(A0+=R0.L *R1.L)(IS)||R0=B[P5++](z);
// Fetch x(2,3)
P5-=P2; // modify P5 to process next set of data
R3=(A1+=R0.L *R1.L)(IS)||R0=B[P5++](z) || R1.L=W[I1++];
// Fetch x(0,2) and h00
R2>>=12;
R3=R3>>12||B[P4++]=R2;
// Store first output
COL_END: B[P4++]=R3;
// Store next output
A0=R0.L *R1.L (IS)||R0=B[P5++](z) || R1.L=W[I1++];
// Fetch x(0,N-2) and h01
R2=W[P5++P1](Z) ||R3.L=W[I1++];
// Dummy fetch to increment P5 and I1
A0+=R0.L *R1.L (IS)||R0=B[P5++](z) || R1.L=W[I1++];
// Fetch X(1,N-2) and H10;
A0+=R0.L*R1.L(IS) ||R0=B[P5++](z) || R1.L=W[I1++];
// Fetch X(1,N-1) and H11;
R2=W[P5++P1](Z) || R3.L=W[I1++];
// Dummy fetch to increment P5 and I1
A0+=R0.L *R1.L (IS)||R0=B[P5++] (z)|| R1.L=W[I1++];
// Fetch x(2,N-2) and h20
A0+=R0.L*R1.L(IS) ||R0=B[P5++] (z)|| R1.L=W[I1++];
// Fetch x(2,N-1) and h21;
R2=(A0+=R0.L *R1.L)(IS)|| I1+=4;
// modify I1 to fetch h01
P5-=P3; // modify P5 to starting of next row
R2=R2>>12||R0=B[P5++](z)||R1.L =W[I1++];
// fetch x(0,0) and H01;
ROW_END:B[P4++]=R2;
////////////////////// LAST ROW ////////////////////////////////////////////
P2=P1;
P2+=4; // P2==Col +2
A0=R0.L *R1.L(IS) || R0=B[P5++](z)||R1.L=W[I1++];
// fetch x(M-2,1) and h02
R2=W[P5++P1](Z)|| R3.L=W[I1++];
// dummy fetch to increment P5 and I1
A0+=R0.L*R1.L(IS)||R0=B[P5++](z)||R1.L=W[I1++];
// fetch x(M-1,0) and h11
A0+=R0.L *R1.L (IS)||R0=B[P5++](z)||R1.L=W[I1++];
// fetch x(M-1,2) and h12
P5-=P2;
I1+=M1; // I1 is modified to fetch h00
R2=(A0+=R0.L *R1.L) (IS)||R0=B[P5++](z) || R1.L=W[I1++];
// fetch x(M-2,0) and h00
NOP;
R2>>=12;
B[P4++]=R2; // store the result
LSETUP(LAST_ROW_ST,LAST_ROW_END)LC0=P1;
LAST_ROW_ST:
A0=R0.L*R1.L (IS) ||R0=B[P5++](z) || R1.L=W[I1++];
// fetch x(M-2,1) and h01
A0+=R0.L *R1.L(IS) ||R0=B[P5](z) || R1.L=W[I1++];
// fetch x(M-2,2) and h02
P5=P5+P1; // modify P5 to next row
A0+=R0.L *R1.L(IS) ||R0=B[P5++](z) || R1.L=W[I1++];
// fetch x(M-1,0) and h10
A0+=R0.L *R1.L(IS) ||R0=B[P5++](Z) || R1.L=W[I1++];
// fetch x(M-1,1) and h11
A0+=R0.L *R1.L(IS) ||R0=B[P5++](z) || R1.L=W[I1++];
// fetch x(M-1,2) and h12
R2=(A0+=R0.L *R1.L)(IS)||I1+=M1;
// modify I1 to fetch h00
P5-=P2;
R2=R2>>12||R0=B[P5++] (z)|| R1.L=W[I1++];
// fetch h00;
LAST_ROW_END:
B[P4++]=R2; // store the result
A0=R0.L *R1.L(IS) || R0=B[P5++](z)||R1.L=W[I1++];
// fetch x(m-2,N-1) and h01
R2=W[P5++P1](Z) || R3.L=W[I1++];
// dummy fetch to increment P5 to next row and I1
// to fetch h10
A0+=R0.L*R1.L(IS)||R0=B[P5++](z)||R1.L=W[I1++];
// fetch h10 and x(m-1,N-2)
A0+=R0.L *R1.L (IS)||R0=B[P5++](z)||R1.L=W[I1++];
// fetch h11 and x(m-1,N-1)
R2=(A0+=R0.L *R1.L )(IS);
R2>>=12;
B[P4++]=R2 ; // store the final output pixel
(P5:3)=[SP++]; // POP P5-3
RTS;
NOP; // To avoid one stall if LINK or UNLINK happens to
// be the next instruction in the memory.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -