⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hpel_8x8_16.asm

📁 adi bf533视频编码程序
💻 ASM
📖 第 1 页 / 共 2 页
字号:
    [--SP] = RETS;
    L0 = 0;
    L1 = 0;
    L2 = 0;
    L3 = 0;
    P5 = R0;                // Address of the best matching block
    R5 = R1;                // SAD corresponding to the best match
    P4 = R2;                // Address of the target block
    P3 = [SP + 44];         // Width of the reference window
    
    
//********************store the best matching block**********************

    I0 = R0;                // Address of best match   
    I1 = R0;                // Address of best match
    P0 = [SP + 64];         //address of MBy_match
    I3 = P0;                //address of MBy_match 
    M3 = 12(Z);
    P2 = 8 (Z);             //16
    R1 = P3;                // WINWIDTH
    R1 += -8;              //-20
    M1 = R1;               //M1的值再定一下

    LSETUP(STORE_BEST_T_ST,STORE_BEST_T_END) LC0 = P2;
    DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
STORE_BEST_T_ST:
        DISALGNEXCPT || R1 = [I0++] || R3  =[I1++]; 
        R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M1] || R2  =[I1++M1]; 
        R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++];
STORE_BEST_T_END:       
		DISALGNEXCPT  || [I3++M3] = R6 || R2  =[I1++]; 
                                      
//************************************************************************
    
    
    P0 = 344;
    SP -= P0;
    I3 = SP;                // Temporary buffer in stack
    R4 = 0;
//******************** INTERPOLATE DIAGONAL BLOCKS ********************
    M0 = 7;
    M3 = -3 (X);
    R0 = P5;                // Address of the best match
    R0 += -1;
    I0 = R0;
    I1 = R0;
    R2 = P3;                // WINWIDTH
    R1 = R0 - R2 (S);
    I2 = R1;                // Address of best match - (WINWIDTH+1)
    R2 += -9;        //-17
    M1 = R2;
    P2 = 9;   //17

    LSETUP(AVG4_T_ST, AVG4_T_END) LC0 = P2;
    DISALGNEXCPT || R0 = [I1++] || R2 = [I2++];
    DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
    R7 = BYTEOP2P(R1:0,R3:2) (TL) || R0 = [I1++M0];
AVG4_T_ST:    
        DISALGNEXCPT || I0 += M3 || R2 = [I2++M0];
        R6 = BYTEOP2P(R1:0,R3:2) (TH) || R0 = [I1++M3] || R2 = [I2++M3];
        R7 = R6 + R7 (NS) || I0 -= M3;
    
        R7 = BYTEOP2P(R1:0,R3:2) (TL, R) || [I3++] = R7 || R1 = [I1++M0];
        DISALGNEXCPT || I0 += M3 || R3 = [I2++M0];
        R6 = BYTEOP2P(R1:0,R3:2) (TH, R) || R1 = [I1++M3] || R3 = [I2++M3];
        R7 = R6 + R7 (NS) || I0 -= M3;
    
        R7 = BYTEOP2P(R1:0,R3:2) (TL) || [I3++] = R7 || R0 = [I1++M1];
        DISALGNEXCPT || I0 += M3 || R2 = [I2++M1];
        R6 = BYTEOP2P(R1:0,R3:2) (TH) || R0 = [I1++] || R2 = [I2++];
        R7 = R6 + R7 (NS) || I0 -= M3;
                   
        DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
AVG4_T_END:
        R7 = BYTEOP2P(R1:0,R3:2) (TL) || [I3++] = R7 || R0 = [I1++M0];       
    
//**************** CALCULATE SAD FOR DIAGONAL BLOCKS *******************
	R6=0(Z);                   //the offset from SP
    R7 = -1;                // (V,H) -> R7.H = -1, R7.L = -1
    R0 = P4;
    R1 = SP;
    CALL _compute_sad_8x8_16;
    CC = R0 < R5;
    IF CC R5 = R0;
    IF CC R4 = R7;
    
    R7.L = 1;               // (V,H) -> R7.H = -1, R7.L = 1
    R0 = P4;
    R1 = SP;
    R1 += 1;
    CALL _compute_sad_8x8_16;
    CC = R0 < R5;
    IF CC R5 = R0;
    IF CC R4 = R7;
    R1 = 1(Z);
    IF CC R6 = R1;
    
    R7.H = 1;               // (V,H) -> R7.H = 1, R7.L = 1
    R0 = P4;
    R1 = SP;
    R1 += 13;           //21
    CALL _compute_sad_8x8_16;
    CC = R0 < R5;
    IF CC R5 = R0;
    IF CC R4 = R7;
    R1 = 13(Z);         //13
    IF CC R6 = R1;
    
    R7.L = -1;              // (V,H) -> R7.H = 1, R7.L = -1
    R0 = P4;
    R1 = SP;
    R1 += 12;           //20
    CALL _compute_sad_8x8_16;
    CC = R0 < R5;
    IF CC R5 = R0;
    IF CC R4 = R7;
    R1 = 12(Z);         //12
    IF CC R6 = R1;
    
    CC = R4==0;
    IF CC JUMP STORE_BYPASS0_T; 
    //if half pel match better, store the interpolated block
 //********************store the best matching block**********************
	R0 = SP;
	R0 = R0 + R6;
    I0 = R0;                // Address of half pel best match   
    I1 = R0;                
    P0 = [SP + 408];         //address of MBy_match :SP + 344 + 64
    I3 = P0;                 //address of the 8x8 block in MBy_match 
    P2 = 8 (Z);      //16
    M3 = 12(Z);
    
    LSETUP(STORE_BEST_T_ST0,STORE_BEST_T_END0) LC0 = P2;
    DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
STORE_BEST_T_ST0:
        DISALGNEXCPT || R1 = [I0++] || R3  =[I1++]; 
        R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2  =[I1++]; 
        R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++];
        //DISALGNEXCPT  || [I3++] = R6 || R3  =[I1++];            
        //R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2  =[I1++];
        //R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++]; 
STORE_BEST_T_END0:       
		DISALGNEXCPT || [I3++M3] = R6 || R2  =[I1++];               

//************************************************************************
   
STORE_BYPASS0_T:    
//******************** INTERPOLATE LEFT/RIGHT BLOCKS ********************
    R0 = P5;                // Address of the best match
    I0 = R0;                // Address of best match
    R0 += -1;
    I1 = R0;                // Address of best match - 1
    I3 = SP;                // Output buffer
    P2 = 8 (Z);             //16
    R1 = P3;                // WINWIDTH
    R1 += -12;                //20
    M1 = R1;
    MNOP;

    LSETUP(AVG2_LR_T_ST, AVG2_LR_T_END) LC0 = P2;
    DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
AVG2_LR_T_ST:
        DISALGNEXCPT || R1 = [I0++] || R3  =[I1++]; 
        R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++] || R2  =[I1++]; 
        R6 = BYTEOP1P(R1:0,R3:2)(T,R) || [I3++] = R6 || R1 = [I0++M1];
        DISALGNEXCPT  || [I3++] = R6 || R3  =[I1++M1];            
        //R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2  =[I1++];
        //R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R1 = [I0++M1]; 
        //DISALGNEXCPT || [I3++] = R6 || R3  =[I1++M1];               
        R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++];
AVG2_LR_T_END:
        DISALGNEXCPT || [I3++] = R6 || R2  =[I1++];
    
//**************** CALCULATE SAD FOR LEFT/RIGHT BLOCKS *******************
    P2 = R4;                //backup R4
	R7 = 1;                 // (V,H) -> R7.H = 0, R7.L = 1
    R0 = P4;
    R1 = SP;
    R1 += 1;
    CALL _compute_sad_8x8_16;
    CC = R0 < R5;
    IF CC R5 = R0;
    IF CC R4 = R7;
    
    R7.L = -1;              // (V,H) -> R7.H = 0, R7.L = -1
    R0 = P4;
    R1 = SP;
    CALL _compute_sad_8x8_16;
    CC = R0 < R5;
    IF CC R5 = R0;
    IF CC R4 = R7;
    
    R3 = P2;
    CC = R4==R3;         //R4 has been modified?
    IF CC JUMP STORE_BYPASS1_T;
 
    //if half pel match better, store the 
//********************store the best matching block**********************
	R0 = SP;
    I0 = R0;                // Address of half pel interpolated block                
    P0 = [SP + 408];         //address of MBy_match :SP + 344 + 64
    I3 = P0;                //address of MBy_match 
    P2 = 8 (Z);          //16
    CC = R4==R7;
    M3 = 12(Z);
    
    IF !CC JUMP STORE_DISALIGN_T;

    LSETUP(STORE_BEST_ST1_ALIGN_T,STORE_BEST_END1_ALIGN_T) LC0 = P2;
    M1 = 8(Z);
    R0 = [I0++];
STORE_BEST_ST1_ALIGN_T: // (V,H)=(0,-1)
    //[I3++]=R0||R0 = [I0++];
    //[I3++]=R0||R0 = [I0++];
    [I3++]=R0||R0 = [I0 ++ M1];
STORE_BEST_END1_ALIGN_T:    
	[I3++M3]=R0||R0 = [I0++];
	
	JUMP STORE_BYPASS1_T;
	
STORE_DISALIGN_T:	   // (V,H)=(0,1)
    LSETUP(STORE_BEST_ST1_T,STORE_BEST_END1_T) LC0 = P2;
    DISALGNEXCPT || R0 = [I0++];
    DISALGNEXCPT || R1 = [I0++];
STORE_BEST_ST1_T:
		R6 = ALIGN8(R1,R0) || R0 = [I0++];
		R6 = ALIGN8(R0,R1) || [I3++] = R6 || R0 = [I0++];		
        //R6 = ALIGN8(R1,R0) || [I3++] = R6 || R0  =[I0++]; 
        //R6 = ALIGN8(R0,R1) || [I3++] = R6 || R0 = [I0++];                
		
STORE_BEST_END1_T:		
		DISALGNEXCPT || [I3++M3] = R6 || R1 = [I0++];
							
    
STORE_BYPASS1_T:   
//******************** INTERPOLATE TOP/BOTTOM BLOCKS ********************
    R1 = P3;                // WINWIDTH
    R0 = P5;                // Address of the best match
    I0 = R0;                // Address of best match
    R0 = R0 - R1(S) || NOP;
    I1 = R0;                // Address of best match - WINWIDTH
    I3 = SP;                // Output buffer
    P2 = 9;                 //17
    R1 += -8;               //16
    M1 = R1;

    LSETUP(AVG2_TB_T_ST, AVG2_TB_T_END) LC0 = P2;
    DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
AVG2_TB_T_ST:
        DISALGNEXCPT || R1 = [I0++] || R3  =[I1++]; 
        R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M1] || R2  =[I1++M1]; 
        R6 = BYTEOP1P(R1:0,R3:2)(T,R) || [I3++] = R6 || R0 = [I0++];
        //DISALGNEXCPT  || [I3++] = R6 || R3  =[I1++];            
        //R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M1] || R2  =[I1++M1];
        //R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++]; 
AVG2_TB_T_END:
        DISALGNEXCPT || [I3++] = R6 || R2  = [I1++];             
    
//**************** CALCULATE SAD FOR TOP/BOTTOM BLOCKS *******************
    
	P2 = R4;                //backup R4
	R7.H = -1;              // (V,H) -> R7.H = -1, R7.L = 0
    R7.L = 0;
    R0 = P4;
    R1 = SP;
    CALL _compute_sad_aligned_8x8_16;
    CC = R0 < R5;
    IF CC R5 = R0;
    IF CC R4 = R7;
    
    R7.H = 1;               // (V,H) -> R7.H = 1, R7.L = 0
    R0 = P4;
    R1 = SP;
    R1 += 8;      //16
    CALL _compute_sad_aligned_8x8_16;
    CC = R0 < R5;
    IF CC R5 = R0;
    IF CC R4 = R7;
 
//********************store the best matching block**********************  
    R3 = P2;
    CC = R4==R3;
    IF CC JUMP STORE_BYPASS2_T;
    
    R0 = 0(Z);
    R1 = 8(Z);          //16
    CC = R4==R7; 
    IF !CC R1=R0;
    R0 = SP;
    R0 = R0+R1;          // Address of half pel best match
    I0 = R0;
    P0 = [SP + 408];         //address of MBy_match :SP + 344 + 64
    I3 = P0;                //address of MBy_match     
    P2 = 8(Z);       //16 
    M3 = 12(Z);
    
    LSETUP(STORE_BEST_T_ST2,STORE_BEST_T_END2) LC0 = P2;
    R0 = [I0++];
STORE_BEST_T_ST2:
    [I3++]=R0||R0 = [I0++];
    
STORE_BEST_T_END2:    
	[I3++M3]=R0||R0 = [I0++];
		 
    
STORE_BYPASS2_T:    
//********************************************************************
    R0 = R4;                // Return horizontal and vertical half pel
    P0 = 344;
    SP = SP + P0;
    [SP + 48] = R5;        //store SAD corresponding to the best match
    RETS = [SP++];
    (R7:4, P5:3) = [SP++];
__hpel_8x8_16_T.end:
    RTS;
    

*/
////////////////////////////////////////////////////////////////////////////    
.align 8;
_compute_sad_8x8_16:
    [--SP]=(R7:4,P5:3);
    [--SP]=RETS; 
    //R2=380;//Step-4
    //M0=R2;
    m0=12;
    
    I0 = R0;                // Address of the target
    I1 = R1;                // Address of the interpolated block
    P0 = 8 (Z);      //16
    A1 = A0 = 0;
   
	
    LSETUP (MAD_START1, MAD_END1) LC0=P0;
    DISALGNEXCPT || R0 = [I0++] || R2 = [I1++]; 
MAD_START1:
        DISALGNEXCPT || R3 = [I1++];
        SAA (R1:0,R3:2) || R1 = [I0++M0]  || R2 = [I1++];
       
MAD_END1:
        SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
    R3=A1.L+A1.H,R2=A0.L+A0.H;    
    R0 = R2 + R3 (S);       // Add the accumulated values in both MACs
    
    RETS = [SP++];
    (R7:4, P5:3) = [SP++];
_compute_sad_8x8_16.end:    
    RTS;
    
.align 8;
_compute_sad_aligned_8x8_16:
    
    [--SP]=(R7:4,P5:3);
    [--SP]=RETS;
    //R2=380;//Step-4
    //M0=R2;
    m0=12;
    
    I0 = R0;                // Address of the target
    I1 = R1;                // Address of the interpolated block
    P0 = 8;
   
    
    LSETUP (MAD_START, MAD_END) LC0=P0;
    A1=A0=0 || R0 = [I0++] || R2 = [I1++];
                            // Initialize accumulators 
MAD_START:
        SAA (R1:0,R3:2) || R1 = [I0++M0]  || R3 = [I1++];
             
MAD_END:SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
    R3=A1.L+A1.H,R2=A0.L+A0.H;    
    R0 = R2 + R3 (S);       // Add the accumulated values in both MACs
    
    RETS = [SP++];
    (R7:4, P5:3) = [SP++];
    
_compute_sad_aligned_8x8_16.end:    
    RTS;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -