📄 err_diffusion.asm

📁 ADI BF DSP的几种常用的图象滤波汇编优化后的代码
💻 ASM
字号:
/*******************************************************************************
Copyright(c) 2000 - 2002 Analog Devices. All Rights Reserved.
Developed by Joint Development Software Application Team, IPDC, Bangalore, India
for Blackfin DSPs  ( Micro Signal Architecture 1.0 specification).

By using this module you agree to the terms of the Analog Devices License
Agreement for DSP Software. 
********************************************************************************
Module Name     : err_diffusion.asm
Label name      : __err_diffusion
Version         :   1.3
Change History  :

                Version     Date          Author        Comments
                1.3         11/18/2002    Swarnalatha   Tested with VDSP++ 3.0
                                                        compiler 6.2.2 on 
                                                        ADSP-21535 Rev.0.2
                1.2         11/13/2002    Swarnalatha   Tested with VDSP++ 3.0
                                                        on ADSP-21535 Rev. 0.2
                1.1         01/28/2002    Raghavendra   Modified to match 
                                                        silicon cycle count
                1.0         07/11/2001    Raghavendra   Original 

Description     : This routine implements the Floyd-Steinberg error diffusion 
                  filter with binary output. Each pixel value is compared with 
                  threshold value defined by the user. Values that are larger 
                  than threshold are set to 255 and values that are less than or
                  equal to threshold are set to zero. The error value is the 
                  difference between the threshold and pixel value. Error is 
                  propagated to neighboring pixels using Floyed Steinberg 
                  filter.
                       Floyed Steinberg filter propagates fractions of the 
                  error value at pixel location
                  A to four of its neighboring pixels as shown below.
                     ----------------------
                     |      |   A |  7/16 |
                     ----------------------
                     | 3/16 | 5/16| 1/16  |
                     ----------------------
                  When a pixel is processed, it has already received error terms
                  from four neighboring pixels.

                  In this implementation an error buffer is used to accumulate 
                  errors that are propagated from the previous row. First time 
                  when the function is called error buffer is cleared and 
                  previous propagated value is passed next time. This process 
                  produces bilevel images with  visual appearance capturing the 
                  full range of detail and detail of the original image. This is
                  particularly effective in case the original image has a lot of
                  detail.
               
Algorithm       : Equivalent C program:

            void err_diffusion(unsigned char *in, int row ,int col,short * temp,
                        unsigned char *output,int thresh)
            {
                 int x,i,y,F;
                 int errA,errB,errC,errE,errF;

            for(y=0,i=0;y<row;y++)
                {
            errA=0; errE=0; errB=temp[0];
                                        // Initialize errors set to zero 
            for(x=0;x<col;x++,i++)
                    {
            errC=temp[x+1];
            F=in[i];                    // pixel value
            errF=F+((errE*7+errA +errB*5+errC*3)>>4);
            if(errF >thresh)            // check if error > threshold value
                output[i]=255;
            else
                output[i]=0;

            if(errF >thresh)
                temp[x]=errF=errF-255;
                            //  if pixel is > threshold subtract 255 from error
            else
                temp[x]=errF;

            errE=errF;      // propagate error for next pixel
            errA=errB;
            errB=errC;
                    }
                }
            }

Assumption      : 1. Input and  error_buffer are in different banks.
                  2. The size of error_buffer is COL+1

Prototype       : void _err_diffusion(unsigned char* in, int row, int col,
                               short * err_buf, unsigned char *out,int thresh );
               
                   in   ->  It is pointer to the input image.
                   row  ->  It is number of rows of input image.
                   col  ->  It is number of columns of input image.
              err_buf   ->  Buffer where one row error value is stored
                   out  ->  It is pointer the output buffer.
               thresh   ->  User specified threshold value

Registers used  : A0, A1, R0-R7, I1, I2, B1, B2, L1, L2, LC0, LC1.

Performance     : 
                Code Size    : 170 bytes

                Cycle count  : 834 cycles for 8x8 image
                Inner loop   : 12 * COL
                 Outer loop  : 3* row
*******************************************************************************/
.section         L1_code;
.global          __err_diffusion;
.align                8;
    
__err_diffusion:

    [--SP]=(R7:4,P5:4);
                            // Push R7-4,P5-3 
    R3=R2<<1||P4=[SP+40];   // Get Address of output array
    R3+=2;
    P5=R0;                  // Address of input array
    R7=[SP+44];             // Threshold value
    R0=[SP+36];             // Address of error_buffer
    SP+=-8;                 // Decrement stack to store coefficients
    I2=SP;  
    B2=SP;
    L2=8; 
    R6.L=3;                 // store 3,7,5,1 as coeff.
    W[I2++]=R6.L;
    R6.L=7;
    W[I2++]=R6.L;           
    I1=R0;                  // Address of error_buff
    B1=R0;
    L1=R3;                  // 2*Col+2
    R6.L=5;
    W[I2++]=R6.L;
    R6.L=1;
    R0=R1-R1(NS)||W[I2++]=R6.L;
                            // clear R0 
    P0=R1;                  // Number of rows
    P1=R2;                  // Number of columns
    R6=255;
    R1=R2-R2(NS)|| R0.H=W[I1++]|| R2=[I2++];
                            // fetch error_buff[0] and coeff. 7 & 3 
    LSETUP(ROW_ST,ROW_END)LC0=P0;
                            // loop count ==ROW 
ROW_ST: LSETUP(COL_ST,COL_END)LC1=P1;
                            // loop count ==COL 
COL_ST:     R5=R1-R1(NS)||R4=B[P5++](Z)||R1.L=W[I1--];
                            // clear R5, fetch input and  err_buff[x+1] 
            A1=R1.H*R2.H,A0=R1.L*R2.L(IS)||R3=[I2++];
                            // errE*7 , errC*3 and fetch 1and 5 
            R3=(A1+=R0.L*R3.H),R2=(A0+=R0.H*R3.L)(IS);
                            // errB*5 and 1*errA 
            R3=R3+R2(NS);   // add both results
            R3=R3>>>4;      // divide by 16
            R3=R3+R4(NS);   // Add with input
            CC=R3<=R7;
            IF !CC R5=R6;   // check if error <= threshold
            R2=R3-R6(NS)||B[P4++]=R5;
                            // store the output 
            IF !CC R3=R2;   // if false clear error
            R0=PACK(R1.L,R0.H)||W[I1]=R3.L||I1+=4;
                            // errA=errB, errB=errC 
COL_END:    R1=PACK(R3.L,R0.H)||R2=[I2++];
                            // errE=errF and fetch coeff. 7&3 
        R0.L=0;             // clear errA
ROW_END:
        R1.H=R2.L-R2.L(NS)||R0.H=W[I1++];
                            // clear errE  and fetch err_buff[0] 
    SP+=8;                  // increment stack pointer
    (R7:4,P5:4)=[SP++];     // Pop R7-4,P5-3
    RTS;
    NOP;                    // To avoid one stall if LINK or UNLINK happens to 
                            //be the  next instruction in the memory.
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -