⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 image_scale_up_by2.asm

📁 ADI BF DSP 图象缩放的汇编优化源码
💻 ASM
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification).

By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software. 
********************************************************************************
Module Name     : image_scale_up_by2.asm
Label Name      : __image_scale_up_by2
Version         :   1.3
Change History  :

                Version     Date          Author        Comments
                1.3         11/18/2002    Swarnalatha   Tested with VDSP++ 3.0
                                                        compiler 6.2.2 on 
                                                        ADSP-21535 Rev.0.2
                1.2         11/13/2002    Swarnalatha   Tested with VDSP++ 3.0
                                                        on ADSP-21535 Rev. 0.2
                1.1         02/28/2002    Raghavendra   Modified to match
                                                        silicon cycle count
                1.0         06/29/2001    Raghavendra   Original 

Description     : This function performs scaling of an image by factor 2 in both 
                  horizontal and vertical direction.
                 The input data in a unsigned char and mask is in fractional 
                 format and mask size is fixed to 3x3. The input and output data
                 are in the range 0 to 255. Mask values are fetched and stored 
                 on stack starting address with 16 bit boundary.

                 Mask elements are stored as below
                                       | h00  h01  h02 |
                                       | h10  h11  h12 |
                                       | h20  h21  h22 |
                

Prototype       : void _image_scale_up_by2(unsigned char* in, int row, int col, 
                                    short mask, unsigned char *out );

                   in   ->  pointer to the input image.
                   row  ->  number of rows of input image.
                   col  ->  number of columns of input image.
                   mask ->  pointer 3x3 mask.
                   out  ->  pointer the output buffer.

Registers used  : A0, A1, R0-R7, I1, I2, B1, B2, M0-M2, L1, L2, P0-P5, LC0.

Performance     :
            Code size       : 498 Bytes
            Cycle count     : 342 cycles (for 4x4 input)
      
        Even rows output    : Inner loop   = 6*( COL-1)    
                              Outer loop  =  10* ROW       
        Odd  rows output    : Inner loop   = 9*( COL-1)    
                              Outer loop  =  16* (ROW -1)
        Last row output     : 7* (COL-1)
    For storing the coefficients on stack : 9 cycles
                          (4.85 cycles/output pel in core) 
******************************************************************************/

.section    L1_code;
.global     __image_scale_up_by2;
.align      8;
    
__image_scale_up_by2:

    [--SP] = (R7:4,P5:3);
                            // Push r7-4 and P5-3 registers 
    L1 = 0;
    L2 = 0;                 // clear L2 REG
    P1 = R2;                // Number of COLUMNS in input    (N)
    R7 = [SP+40];           // Address of MASK
    I1 = R7;                  
    P4 = [SP+44];           // Address of output array
    P0 = 9;                 // Initialize to copy 9 elements of mask to stack
    SP += -18;              // decrement stack to store coefficients of mask
    I2 = SP;                // Initialize I2 and B2
    B2 = SP;
    MNOP || R6.L = W[I1++]; // fetch first coefficient, MNOP to avoid IAU stall

    LSETUP(CP,CP)LC0 = P0;
CP:     W[I2++] = R6.L || R6.L = W[I1++];
                            // store it on stack and fetch next value 
        P0 = R1;            // Number of  ROWS in input array (M)
        I2 = B2;            // Initialize address of mask to I2
        L2 = 18;            // length of circular buffer 2*No. of elements in 
                            // mask
        M1 = 6;             // modifier to fetch h10 value
        R2 = R2  <<  1 || I2+=M1;
                            // 2*COLUMNS value and modify I2 to fetch h10 
        R5 = 255;           // Initialize to saturate the result
        R5 = PACK(R5.L,R5.L) || R4.L = W[I2++];
                            // Fetch  h10 and address of out put array 
        P5 = R0;            // Address of input array
        P2 = R2;            // p2 = =2 *COLUMNS to have different address to 
                            // store odd and even rows
        B0 = R0;            // Duplicate the address
        P1+=-1;             // P1==Col-1 to set inner loop counter
        R7 = R1-R1(NS) || R0 = B[P5++](z) ||  R3.l = w[I2++];
                            // Clear r7, fetch first input and h11 
        P3 = P4+P2;         // address of output array to store even rows
        R3.H = W[I2++];     // fetch h12
    
/****************************** FOR EVEN ROW OUTPUT ***************************
    Y(0)  = h11* x(0);
    Y(1)  = h12 * x(0) + h10 * x(1);
    Y(2*N-2)= h11* x(N-1);
    Y(2*N-1)= h12 * x(N-1) + h10 * x(0);
*******************************************************************************/
    
    LSETUP(EVEN_ROW_ST,EVEN_ROW_END)LC1 = P0;
                            // Loop counter ==No. of ROWS 
    
EVEN_ROW_ST:
        R6 = R0;            // store the first input for last element 
                            // calculation
        A1 = R0.L*R3.H, R1.L = (A0 = R0.L *R3.L) ||  R2 = B[P5++](z);
                            // A1= h12 *x(0)   y(0)=A0=h11*x(0) and fetch x(1) 

        LSETUP(EVEN_COL_ST,EVEN_COL_END)LC0 = P1;
                            // Loop counter == No. COLUMNS -1 
EVEN_COL_ST:
            R1.H = (A1+=R2.L *R4.L);
                            // Y(1) = A1= h12 *x(0) + h10 *x(1) 
            R1 = MIN(R1,R5)(V);
                            // Check if out put between 0 to 255 
            R1 = MAX(R1,R7)(V);                               
            R0 = R2 << 0  || B[P4++] = R1;
                            // Copy x(1) to r0 for next iteration and store Y(0)
            R1 = R1 >> 16 || R2 = B[P5++](z);
EVEN_COL_END:
            A1 = R0.L*R3.H, R1.L = (A0 = R0.L *R3.L) || B[P4++] = R1;
                            // A1= h12 *x(0)   y(0)=A0=h11*x(0) and fetch x(1) 
                            // Store the result  Y(1)
        R1.H = (A1+=R6.L *R4.L) || R0 = B[P5--](z);
                            // Y(N-1)=A1= h12 *x(N-1) + h10 * x(0) 
        R1 = MIN(R1,R5)(V) ;
        R1 = MAX(R1,R7)(V); // check if result is within 0 to 255
        R0 = R2 << 0  || B[P4++] = R1;
                            // store Y(N-2) 
        R1 = R1  >>  16 ||  R0 = B[P5++](z);
                            // Fetch next input 
        B[P4++] = R1;       // store Y(N-1) result
EVEN_ROW_END:
        P4 = P4+P2;         // increment output address by 2* Column to store 
                            // next odd row
    
/****************************** FOR ODD ROW OUTPUT   **************************
    Y(0)  = h21*x(0) + h01 * x(0+COLUMN)
    Y(1)  = h22*x(0) + h20 *x(1)+ h02 *x(0+COLUMN) + h00* x(1+COLUMN)
    Y(2*N-2)= h21*x(N-1) + h01 * x(N-1+ COLUMN)
    Y(2*N-1)= h22*x(N-1) + h20 *x(0)+ h02 *x(N-1+COLUMN) + h00* x(0+COLUMN)
******************************************************************************/
    
    
    P0+=-1;                 // decrement to set loop counter to ROWS-1
    M0 = 10;                // modifier to fetch coeff. properly
    MNOP || R4.L = W[I2++]; // fetch h20
    P5 = B0;                // Address of input array
    R2 = B[P5++](z)  || R3 = [I2++];
                            // Fetch first input and h21, h22 

    LSETUP(ODD_ROW_ST,ODD_ROW_END)LC1 = P0;
                            // LC0 = ROWS-1 
ODD_ROW_ST:
        A1 = R2.L * R3.H, A0 = R2.L * R3.L  || R0 = B[P5](z)  || R1.L = W[I2++];
                            // A1=h22 *x(0),A0=h21*x(0) and fetch x(1),h00 
        LSETUP(ODD_COL_ST,ODD_COL_END)LC0 = P1;
ODD_COL_ST: P5 = P5+P1;     // Increment address start of next row
            A1+= R4.L *R0.L || R0 = B[P5++] (z) ||  R3 = [I2];
                            // A1+=h20 * x(1), fetch x(0+COLUMN) and h01and h02 
            A1+=R3.H*R0.L,R2.L = (A0+=R3.L * R0.L) || R0 = B[P5--](z);
                            //a1+=h02 * x(0+COLUMN),y(0)=a1+=h01*x(0+COLUMN),
                            // Fetch x(1+COLUMN) 
            P5-= P1;        // modify the input address to next element
            R2.H=(A1+= R1.L*R0.L) || I2+=M0;
                            // Y(1)=a1+=h00*x(1+COLUMN), modify I2 to fetch h20
            R1 = MIN(R2,R5)(V) || R4.L = W[I2++] || R6 = B[P5++](z);
                            // Check the result within 0 to 255,fetch h20,
                            // next input 
            R1 = MAX(R1,R7)(V) || R0 = B[P5](z);
                            // Fetch next input 
            R1 = R1  >>  16  || B[P3++] = R1 || R3 = [I2++];
                            // store y(0) and fetch h21, h22 
ODD_COL_END:A1 = R6.L * R3.H, A0 = R6.L * R3.L  || B[P3++] = R1 
            || R1.L = W[I2++];
                            // A1=h22 *x(0),A0=h21*x(0) and fetch x(1),h00,
                            // Store Y(1) 
    
        P5+=-1;                                                
        P5-=P1;             // decrement the address to fetch start of next row
        R0 = B[P5++](z) ;   // fetch x(0)
        P5 = P5+P1;         // Increase the input address to fetch x(1,0)
        A1+= R4.L *R0.L  || R0 = B[P5](z) ||  R3 = [I2];
                            // A1+=h20* x(0) and fetch h01, h02,x(0+COLUMN) 
        P5 = P5+P1;         // modify address to fetch x(N-1 +COLUMN)
        A1+=R1.L*R0.L ||   R0 = B[P5](z);
                            // Y(N-1)=A1+=h00*x(N-1+COLUMN),
                            // Modify I2 to fetch h20 
        R2.H = (A1+=R3.H*R0.L) ,R2.L = (A0+=R3.L * R0.L) ||  I2+= M0;
                            // A1+= h02* x(0) ,y(N-2)=a0+=h01 *x(0) and 
                            // fetch x(N-1+COLUMN) 
        R0 = MIN(R2,R5)(V) ||  R4.L = W[I2++];
                            // Check if output is within the limit 0 to 255, 
                            // fetch h20 
        P5-= P1;            // modify address to start of next row
        R0 = MAX(R0,R7)(V) || R2 = B[P5++](z);
                            // Fetch next input 
        R0 = R0 >> 16 || B[P3++] = R0  || R3 = [I2++];
                            // Store Y(N-2), fetch h21, h22 
        B[P3++] = R0;       // store Y(N-1)
ODD_ROW_END:
        P3 = P3+P2;         // modify out address by 2*COLUMN
    
    
/******************************LAST ROW  *************************************
    Y(0)  = h21*x(M-1,0) + h01 * x(0)
    Y(1)  = h22*x(M-1,0) + h20 *x(M-1,1)+ h02 *x(0) + h00* x(1)
    Y(2*N-2)= h21*x(M-1,N-1) + h01 * x(M-1,0)
    Y(2*N-1)= h22*x(M-1,N-1) + h20 *x(M-1,1)+ h02 *x(M-1,0) + h00* x(0)
*******************************************************************************/
    
    P4 = B0;                // starting address of input buffer
    M1 = R2;                // value of x(M-1,N-1)
    R0 = B[P5](z);          // store x(0) in R7 and fetch x(M-1,1)
    A1 = R4.L*R0.L ||  R1 = B[P4++](z) ||  R4.L = W[I2++];
                            // A1= h20 *x(M-1,1), fetch h00, x(0) 
    M2 = R1;                // store x(0)
    MNOP;

    LSETUP(LAST_ROW_ST,LAST_ROW_END)LC0 = P1;
LAST_ROW_ST:
        A1+= R2.L *R3.H, A0 = R2.L *R3.L || R3 = [I2] || I2+= M0;
                            // A1+=h22* x(M-1,0) ,A0=h21*x(M-1,0), modify I2 to 
                            // Fetch h00 
        A1+= R3.H*R1.L , R3.L = (A0+= R3.L *R1.L)  || R0 = B[P4](z);
                            // A1+=h22 *x(0),y(0)=A1+=h21*x(0) ,fetch x(1) 
        R3.H = (A1+= R4.L *R0.L) || R1 = B[P4++](z);
                            // Y(1)=A1+=h02 * x(1) 
        R6 = MAX(R7,R3)(V) || R2 = B[P5++](z)  || R4.L = W[I2++];
                            // Fetch next input and h00 
        R6 = MIN(R6,R5)(V) || R0 = B[P5](z);
                            // Check if output within 0 to 255, fetch next input
        R6 = R6 >> 16 || B[P3++] = R6 || R3 = [I2++];
                            // Store Y(0),fetch h21,h22 
LAST_ROW_END:
        A1 = R4.L*R0.L ||  R4.L = W[I2++] || B[P3++] = R6 ;
                            // A1= h20 *x(M-1,1),store y(1), fetch h00 
    M0 = 6;
    R6 = M1;                // x(M-1,0)
    R7 = M2;
    A1 = R4.L*R7.L;         // A1=h20* x(M-1,0), fetch h00
    A1+= R2.L *R3.H, A0 = R2.L *R3.L || R3 = [I2++];
                            // A1+=h22 *x(M-1,N-1), A0=h21*x(M-1,N-1), fetch 
                            // h01,02 
    R2 = R1-R1(NS) ||  I2+= m0 ;
                            // Clear r2 
    A1+= R3.H*R1.L , R3.L = (A0+= R3.L *R1.L) ||  R4.L = W[I2++] ;
                            // A1+=h22 *x(M-1,N-1),y(0)=A1+=h21*x(M-1,N-1) 
    R3.H = (A1+= R4.L *R6.L);
    R2 = MAX(R2,R3)(V); 
    R2 = MIN(R2,R5)(V);     // check if output within 0 to 255
    R2 = R2 >> 16||B[P3++] = R2;
    B[P3++] = R2;           // store the results
    
    SP+= 18;                // stack to original position
    (R7:4,P5:3) = [SP++];   // pop R7-4, P5-3 from stack
    RTS;
    NOP;                    //to avoid one stall if LINK or UNLINK happens to be
                            //the next instruction after RTS in the memory.    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -