📄 hpel_8x8_16.asm
字号:
[--SP] = RETS;
L0 = 0;
L1 = 0;
L2 = 0;
L3 = 0;
P5 = R0; // Address of the best matching block
R5 = R1; // SAD corresponding to the best match
P4 = R2; // Address of the target block
P3 = [SP + 44]; // Width of the reference window
//********************store the best matching block**********************
I0 = R0; // Address of best match
I1 = R0; // Address of best match
P0 = [SP + 64]; //address of MBy_match
I3 = P0; //address of MBy_match
M3 = 12(Z);
P2 = 8 (Z); //16
R1 = P3; // WINWIDTH
R1 += -8; //-20
M1 = R1; //M1的值再定一下
LSETUP(STORE_BEST_T_ST,STORE_BEST_T_END) LC0 = P2;
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
STORE_BEST_T_ST:
DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M1] || R2 =[I1++M1];
R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++];
STORE_BEST_T_END:
DISALGNEXCPT || [I3++M3] = R6 || R2 =[I1++];
//************************************************************************
P0 = 344;
SP -= P0;
I3 = SP; // Temporary buffer in stack
R4 = 0;
//******************** INTERPOLATE DIAGONAL BLOCKS ********************
M0 = 7;
M3 = -3 (X);
R0 = P5; // Address of the best match
R0 += -1;
I0 = R0;
I1 = R0;
R2 = P3; // WINWIDTH
R1 = R0 - R2 (S);
I2 = R1; // Address of best match - (WINWIDTH+1)
R2 += -9; //-17
M1 = R2;
P2 = 9; //17
LSETUP(AVG4_T_ST, AVG4_T_END) LC0 = P2;
DISALGNEXCPT || R0 = [I1++] || R2 = [I2++];
DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
R7 = BYTEOP2P(R1:0,R3:2) (TL) || R0 = [I1++M0];
AVG4_T_ST:
DISALGNEXCPT || I0 += M3 || R2 = [I2++M0];
R6 = BYTEOP2P(R1:0,R3:2) (TH) || R0 = [I1++M3] || R2 = [I2++M3];
R7 = R6 + R7 (NS) || I0 -= M3;
R7 = BYTEOP2P(R1:0,R3:2) (TL, R) || [I3++] = R7 || R1 = [I1++M0];
DISALGNEXCPT || I0 += M3 || R3 = [I2++M0];
R6 = BYTEOP2P(R1:0,R3:2) (TH, R) || R1 = [I1++M3] || R3 = [I2++M3];
R7 = R6 + R7 (NS) || I0 -= M3;
R7 = BYTEOP2P(R1:0,R3:2) (TL) || [I3++] = R7 || R0 = [I1++M1];
DISALGNEXCPT || I0 += M3 || R2 = [I2++M1];
R6 = BYTEOP2P(R1:0,R3:2) (TH) || R0 = [I1++] || R2 = [I2++];
R7 = R6 + R7 (NS) || I0 -= M3;
DISALGNEXCPT || R1 = [I1++M3] || R3 = [I2++M3];
AVG4_T_END:
R7 = BYTEOP2P(R1:0,R3:2) (TL) || [I3++] = R7 || R0 = [I1++M0];
//**************** CALCULATE SAD FOR DIAGONAL BLOCKS *******************
R6=0(Z); //the offset from SP
R7 = -1; // (V,H) -> R7.H = -1, R7.L = -1
R0 = P4;
R1 = SP;
CALL _compute_sad_8x8_16;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R7.L = 1; // (V,H) -> R7.H = -1, R7.L = 1
R0 = P4;
R1 = SP;
R1 += 1;
CALL _compute_sad_8x8_16;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R1 = 1(Z);
IF CC R6 = R1;
R7.H = 1; // (V,H) -> R7.H = 1, R7.L = 1
R0 = P4;
R1 = SP;
R1 += 13; //21
CALL _compute_sad_8x8_16;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R1 = 13(Z); //13
IF CC R6 = R1;
R7.L = -1; // (V,H) -> R7.H = 1, R7.L = -1
R0 = P4;
R1 = SP;
R1 += 12; //20
CALL _compute_sad_8x8_16;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R1 = 12(Z); //12
IF CC R6 = R1;
CC = R4==0;
IF CC JUMP STORE_BYPASS0_T;
//if half pel match better, store the interpolated block
//********************store the best matching block**********************
R0 = SP;
R0 = R0 + R6;
I0 = R0; // Address of half pel best match
I1 = R0;
P0 = [SP + 408]; //address of MBy_match :SP + 344 + 64
I3 = P0; //address of the 8x8 block in MBy_match
P2 = 8 (Z); //16
M3 = 12(Z);
LSETUP(STORE_BEST_T_ST0,STORE_BEST_T_END0) LC0 = P2;
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
STORE_BEST_T_ST0:
DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++];
//DISALGNEXCPT || [I3++] = R6 || R3 =[I1++];
//R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++];
//R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++];
STORE_BEST_T_END0:
DISALGNEXCPT || [I3++M3] = R6 || R2 =[I1++];
//************************************************************************
STORE_BYPASS0_T:
//******************** INTERPOLATE LEFT/RIGHT BLOCKS ********************
R0 = P5; // Address of the best match
I0 = R0; // Address of best match
R0 += -1;
I1 = R0; // Address of best match - 1
I3 = SP; // Output buffer
P2 = 8 (Z); //16
R1 = P3; // WINWIDTH
R1 += -12; //20
M1 = R1;
MNOP;
LSETUP(AVG2_LR_T_ST, AVG2_LR_T_END) LC0 = P2;
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
AVG2_LR_T_ST:
DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++] || R2 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2)(T,R) || [I3++] = R6 || R1 = [I0++M1];
DISALGNEXCPT || [I3++] = R6 || R3 =[I1++M1];
//R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++];
//R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R1 = [I0++M1];
//DISALGNEXCPT || [I3++] = R6 || R3 =[I1++M1];
R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++];
AVG2_LR_T_END:
DISALGNEXCPT || [I3++] = R6 || R2 =[I1++];
//**************** CALCULATE SAD FOR LEFT/RIGHT BLOCKS *******************
P2 = R4; //backup R4
R7 = 1; // (V,H) -> R7.H = 0, R7.L = 1
R0 = P4;
R1 = SP;
R1 += 1;
CALL _compute_sad_8x8_16;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R7.L = -1; // (V,H) -> R7.H = 0, R7.L = -1
R0 = P4;
R1 = SP;
CALL _compute_sad_8x8_16;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R3 = P2;
CC = R4==R3; //R4 has been modified?
IF CC JUMP STORE_BYPASS1_T;
//if half pel match better, store the
//********************store the best matching block**********************
R0 = SP;
I0 = R0; // Address of half pel interpolated block
P0 = [SP + 408]; //address of MBy_match :SP + 344 + 64
I3 = P0; //address of MBy_match
P2 = 8 (Z); //16
CC = R4==R7;
M3 = 12(Z);
IF !CC JUMP STORE_DISALIGN_T;
LSETUP(STORE_BEST_ST1_ALIGN_T,STORE_BEST_END1_ALIGN_T) LC0 = P2;
M1 = 8(Z);
R0 = [I0++];
STORE_BEST_ST1_ALIGN_T: // (V,H)=(0,-1)
//[I3++]=R0||R0 = [I0++];
//[I3++]=R0||R0 = [I0++];
[I3++]=R0||R0 = [I0 ++ M1];
STORE_BEST_END1_ALIGN_T:
[I3++M3]=R0||R0 = [I0++];
JUMP STORE_BYPASS1_T;
STORE_DISALIGN_T: // (V,H)=(0,1)
LSETUP(STORE_BEST_ST1_T,STORE_BEST_END1_T) LC0 = P2;
DISALGNEXCPT || R0 = [I0++];
DISALGNEXCPT || R1 = [I0++];
STORE_BEST_ST1_T:
R6 = ALIGN8(R1,R0) || R0 = [I0++];
R6 = ALIGN8(R0,R1) || [I3++] = R6 || R0 = [I0++];
//R6 = ALIGN8(R1,R0) || [I3++] = R6 || R0 =[I0++];
//R6 = ALIGN8(R0,R1) || [I3++] = R6 || R0 = [I0++];
STORE_BEST_END1_T:
DISALGNEXCPT || [I3++M3] = R6 || R1 = [I0++];
STORE_BYPASS1_T:
//******************** INTERPOLATE TOP/BOTTOM BLOCKS ********************
R1 = P3; // WINWIDTH
R0 = P5; // Address of the best match
I0 = R0; // Address of best match
R0 = R0 - R1(S) || NOP;
I1 = R0; // Address of best match - WINWIDTH
I3 = SP; // Output buffer
P2 = 9; //17
R1 += -8; //16
M1 = R1;
LSETUP(AVG2_TB_T_ST, AVG2_TB_T_END) LC0 = P2;
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
AVG2_TB_T_ST:
DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M1] || R2 =[I1++M1];
R6 = BYTEOP1P(R1:0,R3:2)(T,R) || [I3++] = R6 || R0 = [I0++];
//DISALGNEXCPT || [I3++] = R6 || R3 =[I1++];
//R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M1] || R2 =[I1++M1];
//R6 = BYTEOP1P(R1:0,R3:2)(R) || [I3++] = R6 || R0 = [I0++];
AVG2_TB_T_END:
DISALGNEXCPT || [I3++] = R6 || R2 = [I1++];
//**************** CALCULATE SAD FOR TOP/BOTTOM BLOCKS *******************
P2 = R4; //backup R4
R7.H = -1; // (V,H) -> R7.H = -1, R7.L = 0
R7.L = 0;
R0 = P4;
R1 = SP;
CALL _compute_sad_aligned_8x8_16;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
R7.H = 1; // (V,H) -> R7.H = 1, R7.L = 0
R0 = P4;
R1 = SP;
R1 += 8; //16
CALL _compute_sad_aligned_8x8_16;
CC = R0 < R5;
IF CC R5 = R0;
IF CC R4 = R7;
//********************store the best matching block**********************
R3 = P2;
CC = R4==R3;
IF CC JUMP STORE_BYPASS2_T;
R0 = 0(Z);
R1 = 8(Z); //16
CC = R4==R7;
IF !CC R1=R0;
R0 = SP;
R0 = R0+R1; // Address of half pel best match
I0 = R0;
P0 = [SP + 408]; //address of MBy_match :SP + 344 + 64
I3 = P0; //address of MBy_match
P2 = 8(Z); //16
M3 = 12(Z);
LSETUP(STORE_BEST_T_ST2,STORE_BEST_T_END2) LC0 = P2;
R0 = [I0++];
STORE_BEST_T_ST2:
[I3++]=R0||R0 = [I0++];
STORE_BEST_T_END2:
[I3++M3]=R0||R0 = [I0++];
STORE_BYPASS2_T:
//********************************************************************
R0 = R4; // Return horizontal and vertical half pel
P0 = 344;
SP = SP + P0;
[SP + 48] = R5; //store SAD corresponding to the best match
RETS = [SP++];
(R7:4, P5:3) = [SP++];
__hpel_8x8_16_T.end:
RTS;
*/
////////////////////////////////////////////////////////////////////////////
.align 8;
_compute_sad_8x8_16:
[--SP]=(R7:4,P5:3);
[--SP]=RETS;
//R2=380;//Step-4
//M0=R2;
m0=12;
I0 = R0; // Address of the target
I1 = R1; // Address of the interpolated block
P0 = 8 (Z); //16
A1 = A0 = 0;
LSETUP (MAD_START1, MAD_END1) LC0=P0;
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
MAD_START1:
DISALGNEXCPT || R3 = [I1++];
SAA (R1:0,R3:2) || R1 = [I0++M0] || R2 = [I1++];
MAD_END1:
SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
R3=A1.L+A1.H,R2=A0.L+A0.H;
R0 = R2 + R3 (S); // Add the accumulated values in both MACs
RETS = [SP++];
(R7:4, P5:3) = [SP++];
_compute_sad_8x8_16.end:
RTS;
.align 8;
_compute_sad_aligned_8x8_16:
[--SP]=(R7:4,P5:3);
[--SP]=RETS;
//R2=380;//Step-4
//M0=R2;
m0=12;
I0 = R0; // Address of the target
I1 = R1; // Address of the interpolated block
P0 = 8;
LSETUP (MAD_START, MAD_END) LC0=P0;
A1=A0=0 || R0 = [I0++] || R2 = [I1++];
// Initialize accumulators
MAD_START:
SAA (R1:0,R3:2) || R1 = [I0++M0] || R3 = [I1++];
MAD_END:SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
R3=A1.L+A1.H,R2=A0.L+A0.H;
R0 = R2 + R3 (S); // Add the accumulated values in both MACs
RETS = [SP++];
(R7:4, P5:3) = [SP++];
_compute_sad_aligned_8x8_16.end:
RTS;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -