📄 image_scale_up_by2.asm
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs ( Micro Signal Architecture 1.0 specification).
By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software.
********************************************************************************
Module Name : image_scale_up_by2.asm
Label Name : __image_scale_up_by2
Version : 1.3
Change History :
Version Date Author Comments
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev. 0.2
1.1 02/28/2002 Raghavendra Modified to match
silicon cycle count
1.0 06/29/2001 Raghavendra Original
Description : This function performs scaling of an image by factor 2 in both
horizontal and vertical direction.
The input data in a unsigned char and mask is in fractional
format and mask size is fixed to 3x3. The input and output data
are in the range 0 to 255. Mask values are fetched and stored
on stack starting address with 16 bit boundary.
Mask elements are stored as below
| h00 h01 h02 |
| h10 h11 h12 |
| h20 h21 h22 |
Prototype : void _image_scale_up_by2(unsigned char* in, int row, int col,
short mask, unsigned char *out );
in -> pointer to the input image.
row -> number of rows of input image.
col -> number of columns of input image.
mask -> pointer 3x3 mask.
out -> pointer the output buffer.
Registers used : A0, A1, R0-R7, I1, I2, B1, B2, M0-M2, L1, L2, P0-P5, LC0.
Performance :
Code size : 498 Bytes
Cycle count : 342 cycles (for 4x4 input)
Even rows output : Inner loop = 6*( COL-1)
Outer loop = 10* ROW
Odd rows output : Inner loop = 9*( COL-1)
Outer loop = 16* (ROW -1)
Last row output : 7* (COL-1)
For storing the coefficients on stack : 9 cycles
(4.85 cycles/output pel in core)
******************************************************************************/
.section L1_code;
.global __image_scale_up_by2;
.align 8;
__image_scale_up_by2:
[--SP] = (R7:4,P5:3);
// Push r7-4 and P5-3 registers
L1 = 0;
L2 = 0; // clear L2 REG
P1 = R2; // Number of COLUMNS in input (N)
R7 = [SP+40]; // Address of MASK
I1 = R7;
P4 = [SP+44]; // Address of output array
P0 = 9; // Initialize to copy 9 elements of mask to stack
SP += -18; // decrement stack to store coefficients of mask
I2 = SP; // Initialize I2 and B2
B2 = SP;
MNOP || R6.L = W[I1++]; // fetch first coefficient, MNOP to avoid IAU stall
LSETUP(CP,CP)LC0 = P0;
CP: W[I2++] = R6.L || R6.L = W[I1++];
// store it on stack and fetch next value
P0 = R1; // Number of ROWS in input array (M)
I2 = B2; // Initialize address of mask to I2
L2 = 18; // length of circular buffer 2*No. of elements in
// mask
M1 = 6; // modifier to fetch h10 value
R2 = R2 << 1 || I2+=M1;
// 2*COLUMNS value and modify I2 to fetch h10
R5 = 255; // Initialize to saturate the result
R5 = PACK(R5.L,R5.L) || R4.L = W[I2++];
// Fetch h10 and address of out put array
P5 = R0; // Address of input array
P2 = R2; // p2 = =2 *COLUMNS to have different address to
// store odd and even rows
B0 = R0; // Duplicate the address
P1+=-1; // P1==Col-1 to set inner loop counter
R7 = R1-R1(NS) || R0 = B[P5++](z) || R3.l = w[I2++];
// Clear r7, fetch first input and h11
P3 = P4+P2; // address of output array to store even rows
R3.H = W[I2++]; // fetch h12
/****************************** FOR EVEN ROW OUTPUT ***************************
Y(0) = h11* x(0);
Y(1) = h12 * x(0) + h10 * x(1);
Y(2*N-2)= h11* x(N-1);
Y(2*N-1)= h12 * x(N-1) + h10 * x(0);
*******************************************************************************/
LSETUP(EVEN_ROW_ST,EVEN_ROW_END)LC1 = P0;
// Loop counter ==No. of ROWS
EVEN_ROW_ST:
R6 = R0; // store the first input for last element
// calculation
A1 = R0.L*R3.H, R1.L = (A0 = R0.L *R3.L) || R2 = B[P5++](z);
// A1= h12 *x(0) y(0)=A0=h11*x(0) and fetch x(1)
LSETUP(EVEN_COL_ST,EVEN_COL_END)LC0 = P1;
// Loop counter == No. COLUMNS -1
EVEN_COL_ST:
R1.H = (A1+=R2.L *R4.L);
// Y(1) = A1= h12 *x(0) + h10 *x(1)
R1 = MIN(R1,R5)(V);
// Check if out put between 0 to 255
R1 = MAX(R1,R7)(V);
R0 = R2 << 0 || B[P4++] = R1;
// Copy x(1) to r0 for next iteration and store Y(0)
R1 = R1 >> 16 || R2 = B[P5++](z);
EVEN_COL_END:
A1 = R0.L*R3.H, R1.L = (A0 = R0.L *R3.L) || B[P4++] = R1;
// A1= h12 *x(0) y(0)=A0=h11*x(0) and fetch x(1)
// Store the result Y(1)
R1.H = (A1+=R6.L *R4.L) || R0 = B[P5--](z);
// Y(N-1)=A1= h12 *x(N-1) + h10 * x(0)
R1 = MIN(R1,R5)(V) ;
R1 = MAX(R1,R7)(V); // check if result is within 0 to 255
R0 = R2 << 0 || B[P4++] = R1;
// store Y(N-2)
R1 = R1 >> 16 || R0 = B[P5++](z);
// Fetch next input
B[P4++] = R1; // store Y(N-1) result
EVEN_ROW_END:
P4 = P4+P2; // increment output address by 2* Column to store
// next odd row
/****************************** FOR ODD ROW OUTPUT **************************
Y(0) = h21*x(0) + h01 * x(0+COLUMN)
Y(1) = h22*x(0) + h20 *x(1)+ h02 *x(0+COLUMN) + h00* x(1+COLUMN)
Y(2*N-2)= h21*x(N-1) + h01 * x(N-1+ COLUMN)
Y(2*N-1)= h22*x(N-1) + h20 *x(0)+ h02 *x(N-1+COLUMN) + h00* x(0+COLUMN)
******************************************************************************/
P0+=-1; // decrement to set loop counter to ROWS-1
M0 = 10; // modifier to fetch coeff. properly
MNOP || R4.L = W[I2++]; // fetch h20
P5 = B0; // Address of input array
R2 = B[P5++](z) || R3 = [I2++];
// Fetch first input and h21, h22
LSETUP(ODD_ROW_ST,ODD_ROW_END)LC1 = P0;
// LC0 = ROWS-1
ODD_ROW_ST:
A1 = R2.L * R3.H, A0 = R2.L * R3.L || R0 = B[P5](z) || R1.L = W[I2++];
// A1=h22 *x(0),A0=h21*x(0) and fetch x(1),h00
LSETUP(ODD_COL_ST,ODD_COL_END)LC0 = P1;
ODD_COL_ST: P5 = P5+P1; // Increment address start of next row
A1+= R4.L *R0.L || R0 = B[P5++] (z) || R3 = [I2];
// A1+=h20 * x(1), fetch x(0+COLUMN) and h01and h02
A1+=R3.H*R0.L,R2.L = (A0+=R3.L * R0.L) || R0 = B[P5--](z);
//a1+=h02 * x(0+COLUMN),y(0)=a1+=h01*x(0+COLUMN),
// Fetch x(1+COLUMN)
P5-= P1; // modify the input address to next element
R2.H=(A1+= R1.L*R0.L) || I2+=M0;
// Y(1)=a1+=h00*x(1+COLUMN), modify I2 to fetch h20
R1 = MIN(R2,R5)(V) || R4.L = W[I2++] || R6 = B[P5++](z);
// Check the result within 0 to 255,fetch h20,
// next input
R1 = MAX(R1,R7)(V) || R0 = B[P5](z);
// Fetch next input
R1 = R1 >> 16 || B[P3++] = R1 || R3 = [I2++];
// store y(0) and fetch h21, h22
ODD_COL_END:A1 = R6.L * R3.H, A0 = R6.L * R3.L || B[P3++] = R1
|| R1.L = W[I2++];
// A1=h22 *x(0),A0=h21*x(0) and fetch x(1),h00,
// Store Y(1)
P5+=-1;
P5-=P1; // decrement the address to fetch start of next row
R0 = B[P5++](z) ; // fetch x(0)
P5 = P5+P1; // Increase the input address to fetch x(1,0)
A1+= R4.L *R0.L || R0 = B[P5](z) || R3 = [I2];
// A1+=h20* x(0) and fetch h01, h02,x(0+COLUMN)
P5 = P5+P1; // modify address to fetch x(N-1 +COLUMN)
A1+=R1.L*R0.L || R0 = B[P5](z);
// Y(N-1)=A1+=h00*x(N-1+COLUMN),
// Modify I2 to fetch h20
R2.H = (A1+=R3.H*R0.L) ,R2.L = (A0+=R3.L * R0.L) || I2+= M0;
// A1+= h02* x(0) ,y(N-2)=a0+=h01 *x(0) and
// fetch x(N-1+COLUMN)
R0 = MIN(R2,R5)(V) || R4.L = W[I2++];
// Check if output is within the limit 0 to 255,
// fetch h20
P5-= P1; // modify address to start of next row
R0 = MAX(R0,R7)(V) || R2 = B[P5++](z);
// Fetch next input
R0 = R0 >> 16 || B[P3++] = R0 || R3 = [I2++];
// Store Y(N-2), fetch h21, h22
B[P3++] = R0; // store Y(N-1)
ODD_ROW_END:
P3 = P3+P2; // modify out address by 2*COLUMN
/******************************LAST ROW *************************************
Y(0) = h21*x(M-1,0) + h01 * x(0)
Y(1) = h22*x(M-1,0) + h20 *x(M-1,1)+ h02 *x(0) + h00* x(1)
Y(2*N-2)= h21*x(M-1,N-1) + h01 * x(M-1,0)
Y(2*N-1)= h22*x(M-1,N-1) + h20 *x(M-1,1)+ h02 *x(M-1,0) + h00* x(0)
*******************************************************************************/
P4 = B0; // starting address of input buffer
M1 = R2; // value of x(M-1,N-1)
R0 = B[P5](z); // store x(0) in R7 and fetch x(M-1,1)
A1 = R4.L*R0.L || R1 = B[P4++](z) || R4.L = W[I2++];
// A1= h20 *x(M-1,1), fetch h00, x(0)
M2 = R1; // store x(0)
MNOP;
LSETUP(LAST_ROW_ST,LAST_ROW_END)LC0 = P1;
LAST_ROW_ST:
A1+= R2.L *R3.H, A0 = R2.L *R3.L || R3 = [I2] || I2+= M0;
// A1+=h22* x(M-1,0) ,A0=h21*x(M-1,0), modify I2 to
// Fetch h00
A1+= R3.H*R1.L , R3.L = (A0+= R3.L *R1.L) || R0 = B[P4](z);
// A1+=h22 *x(0),y(0)=A1+=h21*x(0) ,fetch x(1)
R3.H = (A1+= R4.L *R0.L) || R1 = B[P4++](z);
// Y(1)=A1+=h02 * x(1)
R6 = MAX(R7,R3)(V) || R2 = B[P5++](z) || R4.L = W[I2++];
// Fetch next input and h00
R6 = MIN(R6,R5)(V) || R0 = B[P5](z);
// Check if output within 0 to 255, fetch next input
R6 = R6 >> 16 || B[P3++] = R6 || R3 = [I2++];
// Store Y(0),fetch h21,h22
LAST_ROW_END:
A1 = R4.L*R0.L || R4.L = W[I2++] || B[P3++] = R6 ;
// A1= h20 *x(M-1,1),store y(1), fetch h00
M0 = 6;
R6 = M1; // x(M-1,0)
R7 = M2;
A1 = R4.L*R7.L; // A1=h20* x(M-1,0), fetch h00
A1+= R2.L *R3.H, A0 = R2.L *R3.L || R3 = [I2++];
// A1+=h22 *x(M-1,N-1), A0=h21*x(M-1,N-1), fetch
// h01,02
R2 = R1-R1(NS) || I2+= m0 ;
// Clear r2
A1+= R3.H*R1.L , R3.L = (A0+= R3.L *R1.L) || R4.L = W[I2++] ;
// A1+=h22 *x(M-1,N-1),y(0)=A1+=h21*x(M-1,N-1)
R3.H = (A1+= R4.L *R6.L);
R2 = MAX(R2,R3)(V);
R2 = MIN(R2,R5)(V); // check if output within 0 to 255
R2 = R2 >> 16||B[P3++] = R2;
B[P3++] = R2; // store the results
SP+= 18; // stack to original position
(R7:4,P5:3) = [SP++]; // pop R7-4, P5-3 from stack
RTS;
NOP; //to avoid one stall if LINK or UNLINK happens to be
//the next instruction after RTS in the memory.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -