⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 r8x8dct.s

📁 这是linux环境下的DCT变换的程序
💻 S
字号:
/**************************************
Prototype       : void _r8x8dct(fract16 *in, fract16 *coeff, fract16 *temp, fract16 *coeff1);

                 *in -> Pointer to Input vector.
                 *coeff -> Pointer to coefficients.
                 *temp -> Pointer to temporary data. 

Registers Used  : A0, A1, R0-R7, I0-I3, B0-B3, M0, M1, L0-L3, P0-P5, LC0, LC1.

Performance     :Cycle Count : 508 Cycles.

             
*******************************************************************************/
/*.section    L1_code;
.global     __r8x8dct;
.align      8;


__r8x8dct:*/

#include "config_bfin.h"
//#include "tr8x8dct.h"
//#include "mds_def.h"
DEFUN(r8x8dct,mL1,
        (fract16 *s, fract16 *coeff, fract16 *temp, fract16 *coeff1)):

/******************************* Function Prologue ***************************/
  
    B0 = R0;                //Pointer to Input matrix.
    B3 = R1;                //Pointer to Coefficients.
    B2 = R2;                //Pointer to Temporary matrix.
    R0=[sp+12];
    [--SP] = (R7:4, P5:3);  //Pushing the Registers on stack.
    B1=R0;
    L0 = 0;                 //L registers are initialized to 0
    L1 = 0;                 //-------- do --------
    L2 = 0;                 //-------- do --------
    L3 = 24;                //L3 is set to 16 to make the coefficients
                            //array Circular.
/*
 I0, I1, and I2 registers are used to read the input data. I3 register is used
 to read the coefficients. P0  register are used for writing the output
 data.  
*/  
    
   M0 = 12 (X);            // All these initialization are used in the
                            // modification of address offsets.
    M1 = 16 (X);
    P2 = 16;        
    P3 = 32 (X);
    P4 = -110 (X);
    P5 = -62 (X);  
    P0 = 2;
/*
   B0 points to the "in" buffer and B2 points to "temp" buffer in the first 
   iteration. The input is read from "in" buffer and output is written to
   "temp" buffer. In the second iteration of DCT_START B0 points to "temp" and
   B2 points to "in" buffer. The input is read from "temp" buffer and output
   is written to "in" buffer. "in" buffer holds the final output.
*/
    
        I0 = B0;            //I0 points to Input Element (0, 0)
        I1 = B0;            //Element 1 and 0 is read in R0.
        I1 += M0  || R0 = [I0++];                                       
                            //I1 points to Input Element (0, 6) 
        I2 = I1;            //Element 6 is read in R3.H
        I2 -= 4   || R3.H = W[I1++];                                     
                            //I2 points to Input Element (0, 4) 
    
        I3 = B3;            //I3 points to Coefficients
        P0 = B2;            //P0 points to temporary array Element (0, 0)
        P1 = B2;            //P1 points to temporary array
        R7 = [P1++P2] ||R2 = [I2++];                                                     
                            //P1 points to temporary array Element (1, 0) 
                            //R7 is a dummy read. Element 4 and 5 are read in R2
        R3.L = W[I1--];     //Element 7 is read in R3.L                 
        R1.H = W[I0++];     //Element 2 is read in R1.H                
    
//******************************* Implementation of Row ********************

    
        R0 = R0 +|+ R3, R3 = R0 -|- R3  || R1.L = W[I0++] || NOP;  //R0(1+6,0+7);R3(1-6,0-7)
        R1 = R1 +|+ R2, R2 = R1 -|- R2(CO)|| NOP ||  R7 = [I3++];//R1(2+5,3+4);R2(3-4,2-5)
                                                            
        R0 = R0 +|+R1, R1 = R0 -|- R1;    
       
        LSETUP (ROW_START, ROW_END) LC1 = P2 >> 1;
        
                            //The loop is set for 8 rows. 
ROW_START:
                   
            R5 = (A1 = R0.L * R7.L), R4 = (A0 = R0.H * R7.L)(IS)|| W[I1]=R3.L||R6= [I3++];
            R0.L = R5+R4(RND12)|| R7 = [I3++]   || W[I0]=R2.L;
            R5.L = R5-R4(RND12)||W[P0++P3] = R0.L || R3.L=W[I0];

            A1 = R1.H * R7.L, A0 = R1.L * R7.L(IS) ||I0+=4         || R2.L=W[I1];
            R1 = (A1 += R1.L * R7.H), R0 = (A0 -= R1.H * R7.H)(IS) ||  I0 += 4
            || R7 = [I3++];
            R1.L=R1+R6(RND12)|| I1+=M1;
            R0.L=R0+R6(RND12)||W[P0++P3] = R1.L;
                                                                           // R7=(C1,C7)
           A1 =R7.H*R2.L,       A0 =R7.L*R2.L(IS) || W[P0++P3] = R5.L;
           A1+=R7.L*R2.H,       A0-=R7.H*R2.H (IS)|| W[P0++P2] = R0.L || R7=[I3++];
           A1+=R7.H*R3.H,       A0-=R7.L*R3.H (IS)|| R0 = [I0++];                     // R7=(C3,C5)
           R5 =(A1+=R7.L*R3.L), R4 =(A0+=R7.H*R3.L)(IS) || R1.H = W[I0++];
           R4.L=R4+R6(RND12)|| R1.L = W[I0++] ;
           R5.L=R5+R6(RND12)|| W[P0++P4] = R4.L;

           A1 =R7.L*R2.L,       A0 =R7.H*R2.L(IS) || W[P1++P3] = R5.L;
           A1+=R7.H*R2.H,       A0-=R7.L*R2.H(IS) || R7=[I3++] ;     // R7=(C1,C7)
           A1-=R7.H*R3.H,       A0-=R7.L*R3.H(IS) || R3.H = W[I1++];
           R5 =(A1+=R7.L*R3.L), R4 =(A0-=R7.H*R3.L)(IS) || R3.L = W[I1--];
           R4.L=R4+R6(RND12)||I2+=M0 ;
           R5.L=R5+R6(RND12)|| W[P1++P3] = R4.L;
          
            R0 = R0 +|+ R3, R3 = R0 -|- R3     || W[P1++P5] = R5.L || R2 = [I2++];     
            R1 = R1 +|+ R2, R2 = R1 -|- R2 (CO)|| R7= [I3++];
ROW_END:    R0 = R0 +|+ R1, R1 = R0 -|- R1;
        B3 = B1;
        B1 = B0;            //Swapping of Input and output address pointers
        B0 = B2;            //B0 points to input buffer.
        B2 = B1;            //B2 points to output buffer.
        I0 = B0;            //I0 points to Input Element (0, 0)
        I1 = B0;            //Element 1 and 0 is read in R0.
        I1 += M0  || R0 = [I0++];                                       
                            //I1 points to Input Element (0, 6) 
        I2 = I1;            //Element 6 is read in R3.H
        I2 -= 4   || R3.H = W[I1++];                                     
                            //I2 points to Input Element (0, 4) 
    
        I3 = B3;            //I3 points to Coefficients
        P0 = B2;            //P0 points to temporary array Element (0, 0)
        P1 = B2;            //P1 points to temporary array
        R7 = [P1++P2] ||R2 = [I2++];                                                     
                            //P1 points to temporary array Element (1, 0) 
                            //R7 is a dummy read. Element 4 and 5 are read in R2
        R3.L = W[I1--];     //Element 7 is read in R3.L                 
        R1.H = W[I0++];     //Element 2 is read in R1.H                
    
//******************************* Implementation of Column ********************

    
        R0 = R0 +|+ R3, R3 = R0 -|- R3  || R1.L = W[I0++] || NOP;  //R0(1+6,0+7);R3(1-6,0-7)
        R1 = R1 +|+ R2, R2 = R1 -|- R2(CO)|| NOP ||  R7 = [I3++];//R1(2+5,3+4);R2(3-4,2-5)
                                                                   
        LSETUP (COLUMN_START, COLUMN_END) LC1 = P2 >> 1;
        
                            //The loop is set for 8 columns. 
COLUMN_START:
                   
            A1 = R0.L * R7.L, A0 = R0.H * R7.L(IS)|| W[I1]=R3.L||R6= [I3++];
            A1 += R1.L * R7.L, A0 += R1.H * R7.L(IS)|| R7 = [I3++] || W[I0]=R2.L;
            R5=A1+A0,R4=A1-A0(S);

            R5=R5+R6(S)||I2+=M0;
            R4=R4+R6(S)||W[P0++P3] = R5.H|| R3.L=W[I0];

            A1 = R0.L * R7.H, A0 = R0.L * R7.L(IS)||I0+=4 || R2.L=W[I1];
            A1 -= R1.L * R7.H, A0 -= R1.L * R7.L(IS);
            A1 += R0.H * R7.L, A0 -= R0.H * R7.H(IS);
            R1=(A1 -= R1.H * R7.L), R0=(A0 += R1.H * R7.H)(IS)||  I0 += 4
            || R7 = [I3++];
            R1=R1+R6(S)||I1+=M1;
            R0=R0+R6(S)||W[P0++P3] = R1.H;
                                                                     // R7=(C1,C7)
           A1 =R7.H*R2.L,       A0 =R7.L*R2.L(IS) || W[P0++P3] = R4.H;
           A1+=R7.L*R2.H,       A0-=R7.H*R2.H (IS)|| W[P0++P2] = R0.H || R7=[I3++];
           A1+=R7.H*R3.H,       A0-=R7.L*R3.H (IS)|| R0 = [I0++];                     // R7=(C3,C5)
           R5 =(A1+=R7.L*R3.L), R4 =(A0+=R7.H*R3.L)(IS) || R1.H = W[I0++];
           R4=R4+R6(S)||R1.L = W[I0++] ;
           R5=R5+R6(S)||W[P0++P4] = R4.H;

           A1 =R7.L*R2.L,       A0 =R7.H*R2.L(IS) || W[P1++P3] = R5.H;
           A1+=R7.H*R2.H,       A0-=R7.L*R2.H (IS)|| R7=[I3++] ;     // R7=(C1,C7)
           A1-=R7.H*R3.H,       A0-=R7.L*R3.H (IS)|| R3.H = W[I1++];
           R5 =(A1+=R7.L*R3.L), R4 =(A0-=R7.H*R3.L)(IS) || R3.L = W[I1--];
           R4=R4+R6(S);
           R5=R5+R6(S)||W[P1++P3] = R4.H;

            R0 = R0 +|+ R3, R3 = R0 -|- R3     || W[P1++P5] = R5.H || R2 = [I2++];     
COLUMN_END: R1 = R1 +|+ R2, R2 = R1 -|- R2 (CO)|| R7= [I3++];
           

/*TERMINATE:
   (R7:4,P5:3)=[SP++];     //Pop the registers before returning.
    RTS;                    //Return.
    NOP;                    //to avoid one stall if LINK or UNLINK happens to be
                            //the next instruction after RTS in the memory.
__r8x8dct.end:       */


        (R7:4,P5:3) = [SP++];
        RTS;
DEFUN_END(r8x8dct)        

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -