📄 sad_asm.asm
字号:
/* sad_asm.asm*/
/*
function _sad16x16_asm
parameters:
pMBy_cur: address of target macroblock
ptr_ref : address of reference block
stride: stride of reference window
iMinSAD: last MinSAD
*/
/*
R0 : pMBy_cur
R1: prt_ref
R2: stride
[sp + 44]: iMinSAD
*/
.section L1_code;
.align 8;
.global __sad16x16_asm;
__sad16x16_asm:
[--SP] = (R7:4, P5:3);
[--SP] = RETS;
I0 = R0;
I1 = R1;
R2 += -16;
R6 = [SP + 44]; //iMinSAD
//R7=[SP+48]; //stepLuma
//R7+=-12;
//M0=R7;
M1 = R2;// Modifier for the reference window
P2 = 16(Z);
A1 = A0 = 0 ;
/******************** ZERO MOTION VECTOR POSITION *****************************/
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
// Fetch the first data from the two blocks
LSETUP (MAD_START, MAD_END) LC0=P2;
MAD_START:
DISALGNEXCPT || R3 = [I1++];
SAA (R1:0,R3:2) || R1 = [I0++] || R2 = [I1++];
// Compute absolute difference and acc
SAA (R1:0,R3:2) (R) || R0 = [I0++] || R3 = [I1++];
SAA (R1:0,R3:2) || R1 = [I0 ++ ] || R2 = [I1 ++ M1];
SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
// Dummy fetch using I0, modifier[k++]
R4=A1.L+A1.H,R7=A0.L+A0.H ;
R5 = R4 + R7 (S) ;
CC = R6 < R5;
MAD_END:
IF CC JUMP SAD_END;
// MNOP;
SAD_END:
R0 = R5;
RETS = [SP++];
(R7:4, P5:3) = [SP++];
__sad16x16_asm.end:
RTS;
/*
function _sad8x8_asm
parameters:
pMBy_cur: address of target macroblock
ptr_ref : address of reference block
stride: stride of reference window
*/
.section L1_code;
.align 8;
.global __sad8x8_asm;
__sad8x8_asm:
[--SP] = (R7:4, P5:3);
[--SP] = RETS;
I0 = R0;
I1 = R1;
R2 += -8;
M1 = R2; // Modifier for the reference window
//R7=[SP+44];
//R7+=-4;
//M0 = R7; // Modifier for current MB
m0=12;
A1 = A0 = 0 ;
P2 = 8 (Z);
DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
// Fetch the first data from the two blocks
LSETUP (MAD8_START, MAD8_END) LC0=P2;
MAD8_START:
DISALGNEXCPT || R3 = [I1++];
SAA (R1:0,R3:2) || R1 = [I0 ++ M0] || R2 = [I1 ++ M1];
// Compute absolute difference and acc
MAD8_END: SAA (R1:0,R3:2) (R) || R0 = [I0++] || R2 = [I1++];
// Dummy fetch using I0, modifier[k++]
R3=A1.L+A1.H,R2=A0.L+A0.H; //|| R1 = W[P4++] (X)
R0 = R2 + R3 (S);
RETS = [SP++];
(R7:4, P5:3) = [SP++];
__sad8x8_asm.end:
RTS;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -