⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 corr_3x3.asm

📁 ADI BF DSP的几种常用的图象滤波汇编优化后的代码
💻 ASM
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification).

By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software. 
********************************************************************************
Module Name     : corr_3x3.asm
Label name      : __corr_3x3
Version         :   1.3
Change History  :

                Version     Date          Author        Comments
                1.3         11/18/2002    Swarnalatha   Tested with VDSP++ 3.0
                                                        compiler 6.2.2 on 
                                                        ADSP-21535 Rev.0.2
                1.2         11/13/2002    Swarnalatha   Tested with VDSP++ 3.0
                                                        on ADSP-21535 Rev. 0.2
                1.1         01/28/2002    Raghavendra   Modified to match 
                                                        silicon cycle count
                1.0         07/10/2001    Raghavendra   Original 

Description     : This function performs correlation between input image and 3x3 
                  mask. The mask elements are multiplied with corresponding input
                  elements and then summed together.

                 if input == | x00 x01 x02 |
                             | x10 x11 x12 |
                             | x20 x21 x22 |

                 and mask == | h00 h01 h02 |
                             | h10 h11 h12 |
                             | h20 h21 h22 |
 
                then output y11=( x00*h00 + x01*h01 + x02*h02 + x10*h10 + 
                                  x11*h11 + x12*h12 + x20*h20 + x21*h21 + 
                                  x22*h22 )>>12;
                 
Prototype       : void _corr_3x3(unsigned char* in, int row, int col, 
                                 short * mask, unsigned char *out );
                 
                   in   ->  It is pointer to the input image.
                   row  ->  It is number of rows of input image.
                   col  ->  It is number of columns of input image.
                   mask ->  It is pointer 3x3 mask.
                   out  ->  It is pointer the output buffer.

Assumption      : 1. Number of columns in input image is even  AND  aligned to 
                     word or half word boundary 
                  2. Input image and mask elements are in different Banks

Registers used  : A0, A1, R0-R3, I1, B1, M0-M3, L1, P0-P5, LC0, LC1.

Performance     :  
                  Code Size      : 690 bytes.
                  Cycle count    : 657 cycles for 8x8 image
       first and last row        : 10*(col-2)
       Middle rows    
           Inner loop            : 9*(col-2)
           Outer loop            :23*(row-2) 
      four corner elements       : 8*4 
*******************************************************************************/
.section  L1_code;
.global   __corr_3x3;
.align    8;
    
__corr_3x3:
    [--SP]=(P5:3);
    R3=[SP+24];             // Address of structured element
    I1=R3;                  // Initialize I register to structure element 
                            // address
    B1=R3;                  // Initialize base register
    L1=18;                  // Initialize  L register
    M0=8;                   // Initialize to fetch h11
    M1=6;                   // initialize to skip 3 element in structured 
                            // element array
    P5=R0;                  // Address of input array
    M3=R0;                  // duplicate the address
    P0=R2;                  // Number of columns  (N)
    P1=R2;                  // Duplicate the number of columns
    I1+=M0;                 // Increment to fetch h11 element
    P4=[SP+28];             // Address of output array
    P2=R1;                  // Number of rows    (M)
    P3=R2;                  // Number of columns
    P0+=-4;                 // Columns -4
    R3=R2<<1 ||R0=B[P5++](Z)||R1.L=W[I1++];
                            // r3= 2*Col,fetch first input and h11 
    M0=R3;                  //
    R3+=2;
    M2=R3;                  // 2*col +2
    P3+=2;                  // Col+2
    P1+=-2;                 // Col-2
    P2+=-2;                 // ROW-2;
    
/////////////////////// FIRST ROW ///////////////////////////////////
// First row  and last row is computed separately. 
//In each row first and last elements are computed separately.
    
    A0=R0.L*R1.L(IS)||R0=B[P5++](z)||R1.L=W[I1++];
                            // fetch x(0,1) and h12 
    R2=W[P5++P1](Z)||R3.L=W[I1++];
                            // Dummy fetches to increment P5 and I1 
    A0+=R0.L *R1.L(IS)||R0=B[P5++](Z) || R1.L=W[I1++];
                            // Fetch x(1,0) and h21 
    A0+=R0.L*R1.L(IS)||R0=B[P5](Z) || R1.L=W[I1++];
                            // Fetch x(1,1) and h22 
    R2=(A0+=R0.L * R1.L)(IS)|| I1+=M1;
                            // Increment I1 to fetch h10 
    P5=M3;                  // Initialize P5 to  input array
    R2=R2>>12||R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch X(0,0) and H10 
    B[P4++]=R2;             // store the result
    MNOP;

    LSETUP(FIRST_ROW_ST,FIRST_ROW_END)LC0=P1;
                            // Loop counter ==COL-2 
    
FIRST_ROW_ST:
        A0=R0.L *R1.L(IS)||R0=B[P5++](z) ||R1.L=W[I1++];
                            // Fetch X(0,1) and H11 
        A0+=R0.L*R1.L(IS)||R0=B[P5] (z)||R1.L=W[I1++];
                            // Fetch X(0,2) and H12 
        P5=P5+P1;               // increment P5 to fetch x(1,0)
        A0+=R0.L *R1.L (IS)||R0=B[P5++](Z) || R1.L=W[I1++];
                            // Fetch X(1,0) and H20 
        A0+=R0.L *R1.L (IS)||R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch X(1,1) and H21 
        A0+=R0.L *R1.L(IS) ||R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch X(1,2) and H22 
        R2=(A0+=R0.L *R1.L)(IS) || I1+=M1;
                            // Increment I1 to fetch h10 
        P5-=P3; 
        R2=R2>>12||R0=B[p5++](z);
FIRST_ROW_END:
        B[P4++]=R2|| R1.L=W[I1++];
                            // Store the result 
    
    
    A0=R0.L *R1.L (IS) ||R0=B[P5++](z)  || R1.L=W[I1++];
                            // fetch x(0,n-1) and H11 
    NOP;
    R2=W[P5++P1](Z) || R3.L=W[I1++];
                            // Dummy fetch to increment P5 and I1 pointers 
    A0+=R0.L * R1.L (IS)|| R0=B[P5++](z)|| R1.L=W[I1++];
                            // Fetch x(1,N-2) and H20 
    A0+=R0.L * R1.L (IS)|| R0=B[P5++](z)|| R1.L=W[I1++];
                            // Fetch x(1,N-1) and H21 
    R2=(A0+=R0.L *R1.L) (IS)|| I1+=4;
                            // Increment to fetch h01 
    P5=M3;                  // Initialize P5 to starting of  input array
    R2=R2>>12||R0=B[P5++](z)||R1.L =W[I1++] ;
                            // Fetch x(0,0) and H01 
    B[P4++]=R2;             // store output of last element in first row
/////////////////////  MIDDLE   LOOP   ///////////////////////////////////////
    LSETUP(ROW_ST,ROW_END)LC0=P2;
                            // loop counter == ROW-2 (M-2) 
    P2=M2;                  // Initialized to 2*COL+2
    P3=M0;                  // Initialized to 2*col
ROW_ST:
        A0=R0.L *R1.L(IS) || R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch x(0,1) and H02 
        R2=W[P5++P1](Z) ||R3.L=W[I1++];
                            // Dummy fetch to adjust P5 and I1 
        A0+=R0.L*R1.L(IS) || R0=B[P5++](z) || R1.L=W[I1++];
                            // fetch x(1,0) and H11 
        A0+=R0.L* R1.L(IS) || R0=B[P5++](z)|| R1.L=W[I1++];
                            // Fetch x(1,1) and H12 
        R2=W[P5++P1](Z)|| R3.L=W[I1++];
                            // Dummy fetch to adjust P5 and I1 
        A0+=R0.L*R1.L(IS) || R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch x(2,0) and H21 
        A0+=R0.L* R1.L(IS) || R0=B[P5++](z)|| R1.L=W[I1++];
                            // Fetch x(2,1) and H22 
        R2=(A0+=R0.L*R1.L )(IS)|| R1.L=W[I1++];
                            //fetch first element of the row and h00 
        P5-=P2;             // P5 modified to starting of the row
        R2=R2>>12||R0=B[P5++](z);
        B[P4++]=R2;         // store the output
    
/////////////////////////// CORE LOOP      /////////////////////////////////////
// Here data is processed to get two pixels output at a time
        LSETUP(COL_ST,COL_END)LC1=P1>>1;
                            // Loop counter ==(COL-2)/2 
COL_ST:     A0=R0.L*R1.L(IS) ||R0=B[P5++](z)   || R1.H=W[I1++];
                            // Fetch x(0,1) and h01 
            A1=R0.L * R1.L ,A0+=R0.L*R1.H(IS)  || R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch x(1,2) and h02 
            A1+=R0.L *R1.H ,A0+=R0.L *R1.L(IS) || R0=B[P5++](z) ||R1.H=W[I1++];
                            // Fetch x(1,3) and h10 
            R2=W[P5++P0](Z) ||NOP;     // move  P5 to starting of next row
            A1+= R0.L *R1.L (IS) ||R0=B[P5++](z)||R1.L=W[I1++];
                            // Fetch x(1,0) and h11 
            A0+=R0.L *R1.H(IS)|| R0=B[P5++](z) ;
                            // Fetch x(1,1) 
            A1+=R0.L * R1.H,A0+=R0.L *R1.L(IS) || R0=B[P5++](z)|| R1.H=W[I1++];
                            // Fetch x(1,2) and h12 
            A1+= R0.L *R1.L ,A0+=R0.L *R1.H(IS)|| R0=B[P5++](z)||R1.L=W[I1++];
                            // Fetch x(1,3) and h20 
            P5=P5+P0;               // move P5 to starting of next row
            A1+= R0.L *R1.H(IS)||R0=B[P5++](z) || R1.H=W[I1++];
                            // Fetch x(2,0) and h21 
            A0+=R0.L*R1.L(IS)  ||R0=B[P5++](z);
                            // Fetch x(2,1) 
            A1+=R0.L * R1.L ,A0+=R0.L *R1.H(IS)|| R0=B[P5++] (z)||R1.L=W[I1++];
                            // Fetch x(2,2) and h22 
            A1+=R0.L *R1.H,R2=(A0+=R0.L *R1.L)(IS)||R0=B[P5++](z);
                            // Fetch x(2,3) 
            P5-=P2;                 // modify P5 to process next set of data
            R3=(A1+=R0.L *R1.L)(IS)||R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch x(0,2) and h00 
            R2>>=12;
            R3=R3>>12||B[P4++]=R2;
                            // Store first output
COL_END:    B[P4++]=R3;
                            // Store next output 
    
        A0=R0.L *R1.L (IS)||R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch x(0,N-2) and h01 
        R2=W[P5++P1](Z) ||R3.L=W[I1++];
                            // Dummy fetch to increment P5 and I1 
        A0+=R0.L *R1.L (IS)||R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch  X(1,N-2) and H10; 
        A0+=R0.L*R1.L(IS) ||R0=B[P5++](z) || R1.L=W[I1++];
                            // Fetch  X(1,N-1) and H11; 
        R2=W[P5++P1](Z) || R3.L=W[I1++];
                            // Dummy fetch to increment P5 and I1 
        A0+=R0.L *R1.L (IS)||R0=B[P5++] (z)|| R1.L=W[I1++];
                            // Fetch x(2,N-2) and h20 
        A0+=R0.L*R1.L(IS) ||R0=B[P5++] (z)|| R1.L=W[I1++];
                            // Fetch x(2,N-1) and h21; 
        R2=(A0+=R0.L *R1.L)(IS)|| I1+=4;
                            // modify I1 to fetch h01 
        P5-=P3;                 // modify P5 to starting of next row
        R2=R2>>12||R0=B[P5++](z)||R1.L =W[I1++];
                            // fetch x(0,0) and H01; 
ROW_END:B[P4++]=R2;   
    
//////////////////////  LAST ROW   ////////////////////////////////////////////
    P2=P1;
    P2+=4;                  // P2==Col +2
    A0=R0.L *R1.L(IS) || R0=B[P5++](z)||R1.L=W[I1++];
                            // fetch x(M-2,1) and h02 
    R2=W[P5++P1](Z)|| R3.L=W[I1++];
                            // dummy fetch to increment P5 and I1 
    A0+=R0.L*R1.L(IS)||R0=B[P5++](z)||R1.L=W[I1++];
                            // fetch x(M-1,0) and h11 
    A0+=R0.L *R1.L (IS)||R0=B[P5++](z)||R1.L=W[I1++];
                            // fetch x(M-1,2) and h12 
    P5-=P2;      
    I1+=M1;                 // I1 is modified to fetch h00
    R2=(A0+=R0.L *R1.L) (IS)||R0=B[P5++](z) || R1.L=W[I1++];
                            // fetch x(M-2,0) and  h00 
    NOP;
    R2>>=12;
    B[P4++]=R2;             // store the result
    
    LSETUP(LAST_ROW_ST,LAST_ROW_END)LC0=P1;
LAST_ROW_ST:
        A0=R0.L*R1.L (IS) ||R0=B[P5++](z) || R1.L=W[I1++];
                            // fetch x(M-2,1) and h01 
        A0+=R0.L *R1.L(IS) ||R0=B[P5](z) || R1.L=W[I1++];
                            // fetch x(M-2,2) and h02 
        P5=P5+P1;               // modify P5 to  next row
        A0+=R0.L *R1.L(IS) ||R0=B[P5++](z) || R1.L=W[I1++];
                            // fetch x(M-1,0) and h10 
        A0+=R0.L *R1.L(IS) ||R0=B[P5++](Z) || R1.L=W[I1++];
                            // fetch x(M-1,1) and h11 
        A0+=R0.L *R1.L(IS) ||R0=B[P5++](z) || R1.L=W[I1++];
                            // fetch x(M-1,2) and h12 
        R2=(A0+=R0.L *R1.L)(IS)||I1+=M1;
                            // modify I1 to fetch h00 
        P5-=P2;
        R2=R2>>12||R0=B[P5++] (z)|| R1.L=W[I1++];
                            // fetch h00; 
LAST_ROW_END:
        B[P4++]=R2;         // store the result
    
    A0=R0.L *R1.L(IS) || R0=B[P5++](z)||R1.L=W[I1++];
                            // fetch x(m-2,N-1) and h01 
    R2=W[P5++P1](Z) || R3.L=W[I1++];
                            // dummy fetch to increment P5 to next row and I1 
                            // to fetch h10 
    A0+=R0.L*R1.L(IS)||R0=B[P5++](z)||R1.L=W[I1++];
                            // fetch h10 and x(m-1,N-2) 
    A0+=R0.L *R1.L (IS)||R0=B[P5++](z)||R1.L=W[I1++];
                            // fetch h11 and x(m-1,N-1) 
    R2=(A0+=R0.L *R1.L )(IS);
    R2>>=12;
    B[P4++]=R2 ;            // store the final output pixel
    (P5:3)=[SP++];          // POP  P5-3
    RTS;
    NOP;                    // To avoid one stall if LINK or UNLINK happens to 
                            // be the  next instruction in the memory.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -