📄 sobel_fast.asm
字号:
/*******************************************************************************
sobel_fast.asm
********************************************************************************
Module name : sobel_fast.asm
Label name : _sobel_fast
Version : 1.5
Change History :
Version Date Author Comments
1.5 25/07/2006 bmk Fixed circular buf issue
1.4 03/08/2004 TL Eliminated thinning step
1.3 11/18/2002 Swarnalatha Tested with VDSP++ 3.0
compiler 6.2.2 on
ADSP-21535 Rev.0.2
1.2 11/13/2002 Swarnalatha Tested with VDSP++ 3.0
on ADSP-21535 Rev. 0.2
1.1 01/28/2002 Raghavendra Modified to match
silicon cycle count
1.0 07/16/2001 Raghavendra Original
Description : This function performs edge detection using sobel method.
Input is processed using horizontal and vertical mask.
Both the result are squared and added.
This value is compared with square of threshold value and if
result is greater than threshold value, output is written as
one else zero. The resultant image is a binary image.
Since first row and last does not contain valid information,
output values in these rows are zero. Similarly in each row
first and last columns are zero value. i.e all boundary
elements are zero. User has to pass proper threshold value.
Horizontal mask = | -1 -2 -1 |
| 0 0 0 |
| 1 2 1 |
Vertical mask = | -1 0 1 |
| -2 0 2 |
| -1 0 1 |
In output image first row,last row, first column and last
columns are zero.
Prototype : void _sobel_fast(unsigned char* in, int row, int col,
unsigned char *out,int threshold );
in -> It is pointer to the input image.
row -> It is number of rows of input image.
col -> It is number of columns of input image.
out -> It is pointer the output buffer.
threshold -> Threshold value to compare.
Registers used : A0, A1, R0-R7, I1, I3, B0-B3, M0-M3, L1, P0-P5, LC0, LC1.
Performance :
In condition check Branch prediction is assumed because most of the values will be below threshold.
Image chosen for cycle count : 8x8 image with central 6x6 pixels with
255 value and rest of the pixels with value zero. Threshold value used is 966.
Code Size : xxx bytes
Cycle Count : xxx cycles for 8x8 image
First row and last row = 2*Column
Loop for applying Horizontal and vertical mask
Inner loop : 23 * (column-2)
outer loop : 5 * (row-2)
Loop for conditional check:
Inner loop : 25 * (column-2)
outer loop : 4 * (row-2)
*******************************************************************************/
.section L1_code;
.global _sobel_fast;
.align 8;
_sobel_fast:
[--SP]=(R7:4,P5:3);
// Push R7:5 and P5:4
L1 = 0;
P5=R0; // Address of input image
P0=R1; // Number of rows
P1=R2; // Number of columns
R3=R2<<1 ||P4=[SP+40]; // fetch output address
M0=R2; // m0=number of columns
nop;
nop;
nop;
nop;
P0+=-2; // ROW-2
M3=P0;
R3+=2; // 2*col +2
R4=R1.L*R2.L(is)||R6=[SP+44];
// fetch threshold value
P2=R3; // P2=2*col +2
R7=1; // Initialize R7 to 1
R0=-1; // Initialize R0 to -1
R1=-2; // Initialize R1 to -2
R2=2; // Initialize R2 to 2 to store coeff. on stack
SP+=-16; // decrement stack to store coeff.
I1=SP;
B1=SP; // set I1 and B1 to sp
B2=R4; // ROW*COL
R4=R4<<2||W[I1++]=R0.L; // 4*row*col
B3=R4;
P3=R4;
W[I1++]=R1.L; // horizontal and vertical masks are stored on stack
W[I1++]=R7.L;
W[I1++]=R1.L;
W[I1++]=R2.L;
W[I1++]=R7.L; // store all coeff. on stack
R6=R6.L*R6.L(IS)||W[I1++]=R0.L;
// get square of threshold value
R5=R7-R7(NS)|| W[I1++]=R2.L;
// clear R5
I1=B1;
L1=16; // set L1 to 16 to have circular buffer
R0=B[P5++] (Z)||R1.L=W[I1++];
// fetch first input and coeff.
I3=P4;
P3=B2; // ROW*COL
B0=SP; // TO STORE BX<=BY VALUE
MNOP;
R4 = 254; // edge pixel
LSETUP(FIRST_ROW,FIRST_ROW)LC0=P1;
// loop counter == COLS
FIRST_ROW:
B[P4++]=R5;
P1+=-2; // COL-2
LSETUP(ROW_ST,ROW_END)LC0=P0;
// loop counter ==ROW-2
P0=SP;
P3=B0;
ROW_ST:
B[P4++]=R5;
LSETUP(COL_ST,COL_END)LC1=P1;
// loop counter=COL-2
/****************************************************************************
Coefficients are store as -1, -2, 1, -2, 2, 1, -1, 2 on stack
MAC1 is used for calculating the output with horizontal mask and
MAC0 is used for calculating output with vertical mask.
****************************************************************************/
COL_ST:
// Maybe the P5=P5+P1 steps can be eliminated with proper usage of an M register???
A1=R0.L*R1.L, A0=R0.L *R1.L(IS) ||R0=B[P5++](Z)||R1.H=W[I1++];
// A1=-x00,A0=-x00,fetch x01and -2
A1+=R0.L*R1.H(IS) ||R0=B[P5](Z)||R1.H=W[I1++];
// A1+=-2* x01,fetch x02 and 1
P5=P5+P1; // change pointer to starting of next row
A1+=R0.L*R1.L ,A0+=R0.L *R1.H(IS)||R0=B[P5++](Z)||R1.L=W[I1++];
//A1+=-x02,A0+=x02,fetch x10 and -2
A0+=R0.L*R1.L(IS)||R0=B[P5++](Z)|| R1.H=W[I1++];
// A0+=-2*x10 ,fetch 2
R5=R1-R1(NS)||R0=B[P5](Z)||R1.L=W[I1++];
// fetch x10 and 1
P5=P5+P1; // Modify pointer to next row
A0+=R0.L *R1.H(IS)||R0=B[P5++](Z)||R1.H=W[I1++];
// A0+=2* x12, fetch x20 and -1
A1+=R0.L*R1.L,A0+=R0.L*R1.H(IS)|| R0=B[P5++](Z)||R1.H=W[I1++];
//A1+=x20, A0=-x20,fetch x21,2
A1+=R0.L *R1.H(IS)||R0=B[P5++](Z);
// A1+=2* x21,fetch x22
R3=(A1+=R0.L*R1.L),R2=(A0+=R0.L*R1.L)(IS);
// R3=result of horizontal mask R2=result of
// vertical mask
// Need to choose b/w ABS and mult.
R3=ABS R3;
R2=ABS R2;
//R3=R3.L*R3.L(IS);
//R2=R2.L*R2.L(IS);
P5-=P2; // modify pointer back to process next set of data
R3=R3+R2(ns)||R0=B[P5++](z)||R1.L=W[I1++];
// R3 contain final result to compare
CC=R3<=R6; // is the result less than the threshold?
///
R3=R3>>2;
///
IF !CC R5 = R3;
COL_END: B[P4++]=R5;
// store the result
P5+=1; // move pointer to starting of next row
R5=R1-R1(ns)||R0=B[P5++](Z);
ROW_END:B[P4++]=R5; // store last element as zero
P0=M0; // p0==Number of columns
LSETUP(END_ROW,END_ROW)LC0=P0;
// Clear last row
END_ROW:
nop;
B[P4++]=R5;
P3=B3;
P1=B2;
L1=0; // clear L0 to avoid circular buf problems
SP+=16;
P3=P3+(P1<<1); // offset value to bring stack pointer to normal position
(R7:4,P5:3)=[SP++];
RTS;
_sobel_fast.end:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -