📄 mdma_get_mb_refwin.asm
字号:
R4 += 2;
I0 = R4;
R5 += 2;
I1 = R5;
R0 = [SP+56];
R3 = [SP+68];
R1 = R0;
R2 = R3;
R1 += 32;
R2 += 32;
CC = BITTST(R7, 2);
IF CC R0=R1;
IF CC R3=R2;
W[I0++] = R0.L;
W[I0] = R0.H;
R4 += 16;
I0 = R4;
W[I1++] = R3.L;
W[I1] = R3.H;
R5 += 16;
I1 = R5;
R0 = [SP+60];
R3 = [SP+72];
R1 = R0;
R2 = R3;
R1 += 16;
R2 += 16;
CC = BITTST(R7, 2);
IF CC R0=R1;
IF CC R3=R2;
W[I0++] = R0.L;
W[I0] = R0.H;
R4 += 16;
I0 = R4;
W[I1++] = R3.L;
W[I1] = R3.H;
R5 += 16;
I1 = R5;
R0 = [SP+64];
R3 = [SP+76];
R1 = R0;
R2 = R3;
R1 += 16;
R2 += 16;
CC = BITTST(R7, 2);
IF CC R0=R1;
IF CC R3=R2;
W[I0++] = R0.L;
W[I0] = R0.H;
W[I1++] = R3.L;
W[I1++] = R3.H;
no_more_dma1:
R0 = 0x6819;
W[P0+MDMA_S0_CONFIG-MDMA_S0_NEXT_DESC_PTR] = R0;
SSYNC;
R0 = 0x681B;
W[P1+MDMA_D0_CONFIG-MDMA_D0_NEXT_DESC_PTR] = R0;
SSYNC;
_restore_MB_get_MB_refwin_no_pre.end:
RETS = [SP++];
(R7:4, P5:3) = [SP++];
RTS;
NOP;
_restore_MB_get_MB_refwin_prepro:
[--SP] = (R7:4, P5:3);
[--SP] = RETS;
L0 = 0;
L1 = 0;
P0.L = _dma_done; //clear flag
P0.H = _dma_done;
R7 = 0;
[P0] = R7;
R4.L = _MB_restore_queue;
R4.H = _MB_restore_queue;
R6 = R4;
R4 += 2;
I0 = R4;
R4 += 16;
R5.L = _refvop_queue;
R5.H = _refvop_queue;
R7 = R5;
R5 += 2;
I1 = R5;
R5 += 16;
W[I0++] = R0.L;
W[I0] = R0.H;
I0 = R4;
W[I1++] = R1.L;
W[I1] = R1.H;
I1 = R5;
R3 = 432; //24*18
R0 = R0 + R3;
W[I0++] = R0.L;
W[I0] = R0.H;
R4 += 16;
I0 = R4;
W[I1++] = R2.L;
W[I1] = R2.H;
R5 += 16;
I1 = R5;
R3 = 64;
R0 = R0 + R3;
W[I0++] = R0.L;
W[I0] = R0.H;
R3 = [SP+44];
W[I1++] = R3.L;
W[I1] = R3.H;
P0.L = LO(MDMA_S0_NEXT_DESC_PTR);
P0.H = HI(MDMA_S0_NEXT_DESC_PTR);
[P0] = R6;
P1.L = LO(MDMA_D0_NEXT_DESC_PTR);
P1.H = HI(MDMA_D0_NEXT_DESC_PTR);
[P1] = R7;
R7 = [SP+80];
P2.L = _get_MB_refwin;
P2.H = _get_MB_refwin;
[P2] = R7;
CC = BITTST(R7, 1); //get MB flag;
IF !CC JUMP no_more_dma2;
P3.L = _MBY_queue_addr;
P3.H = _MBY_queue_addr;
P4.L = _Y656_queue_addr;
P4.H = _Y656_queue_addr;
R4.L = _Y656_queue_prepro;
R4.H = _Y656_queue_prepro;
[P4] = R4;
R4 += 2;
R5.L = _MBY_queue_prepro;
R5.H = _MBY_queue_prepro;
[P3] = R5;
R5 += 2;
I0 = R4;
I1 = R5;
R0 = [SP+48];
R1 = [SP+52];
R2 = R0;
R3 = R1;
R3 += 3; //prepro MB Y addr;
P3.L = _prepro_back_offset;
P3.H = _prepro_back_offset;
R6 = [P3];
R2 = R2 - R6; //prepro UYVY656 addr;
R2 += 1;
W[I0++] = R2.L;
W[I0] = R2.H;
R4.L = _UV656_queue;
R4.H = _UV656_queue;
R4 += 2;
I0 = R4;
W[I1++] = R3.L;
W[I1] = R3.H;
R5.L = _MBUV_queue;
R5.H = _MBUV_queue;
R5 += 2;
I1 = R5;
W[I0++] = R0.L; //656 U addr;
W[I0] = R0.H;
R4 += 16;
I0 = R4;
R6 = 432;
R1 = R1 + R6;
W[I1++] = R1.L; //MB U addr;
W[I1] = R1.H;
R5 += 16;
I1 = R5;
R0 += 2;
W[I0++] = R0.L;
W[I0] = R0.H;
R6 = 64;
R1 = R1 + R6;
W[I1++] = R1.L;
W[I1] = R1.H;
CC = BITTST(R7, 0); //get refwin flag;
IF !CC JUMP no_more_dma2;
P2.L = _ref_src_queue_addr;
P2.H = _ref_src_queue_addr;
P3.L = _refwin_queue_addr;
P3.H = _refwin_queue_addr;
R4.L = _ref_src_queue;
R4.H = _ref_src_queue;
R5.L = _refwin_queue;
R5.H = _refwin_queue;
R0.L = _ref_src_queue1;
R0.H = _ref_src_queue1;
R1.L = _refwin_queue1;
R1.H = _refwin_queue1;
CC = BITTST(R7, 2);
IF CC R4=R0;
IF CC R5=R1;
[P2] = R4;
[P3] = R5;
R4 += 2;
I0 = R4;
R5 += 2;
I1 = R5;
R0 = [SP+56];
R3 = [SP+68];
R1 = R0;
R2 = R3;
R1 += 32;
R2 += 32;
CC = BITTST(R7, 2);
IF CC R0 = R1;
IF CC R3 = R2;
W[I0++] = R0.L;
W[I0] = R0.H;
R4 += 16;
I0 = R4;
W[I1++] = R3.L;
W[I1] = R3.H;
R5 += 16;
I1 = R5;
R0 = [SP+60];
R3 = [SP+72];
R1 = R0;
R2 = R3;
R1 += 16;
R2 += 16;
CC = BITTST(R7, 2);
IF CC R0 = R1;
IF CC R3 = R2;
W[I0++] = R0.L;
W[I0] = R0.H;
R4 += 16;
I0 = R4;
W[I1++] = R3.L;
W[I1] = R3.H;
R5 += 16;
I1 = R5;
R0 = [SP+64];
R3 = [SP+76];
R1 = R0;
R2 = R3;
R1 += 16;
R2 += 16;
CC = BITTST(R7, 2);
IF CC R0 = R1;
IF CC R3 = R2;
W[I0++] = R0.L;
W[I0] = R0.H;
W[I1++] = R3.L;
W[I1++] = R3.H;
no_more_dma2:
R0 = 0x6819;
W[P0+MDMA_S0_CONFIG-MDMA_S0_NEXT_DESC_PTR] = R0;
SSYNC;
R0 = 0x681B;
W[P1+MDMA_D0_CONFIG-MDMA_D0_NEXT_DESC_PTR] = R0;
SSYNC;
_restore_MB_get_MB_refwin_prepro.end:
RETS = [SP++];
(R7:4, P5:3) = [SP++];
RTS;
NOP;
//void MB_Gaus_filter(Ipp8u *MB, int ext_width);
_MB_Gaus_filter: //Gaussian filter preprocessing;
[--SP] = (R7:4, P5:3);
[--SP] = RETS;
P4 = R0;//the Curr_MB addr
I1 = R0;
R0 = R0 + R1;
I0 = R0;
R0 = 0;
L0 = R0;
L1 = R0;
L2 = R0;
L3 = R0;
R1 += -20;
M1 = R1;
R7 = R1;
//用I2,I3,P0,P1,P2,P3来存储第一行的数据;
//用M2,M3,B0,B1,B2,B3来存储第二行的数据;
R0 = [I1++] || R1 = [I0++];
R0 >>= 24;
I2 = R0;
R1 >>= 24;
M2 = R1;
R0 = [I1++] || R1 = [I0++];
I3 = R0;
M3 = R1;
R0 = [I1++] || R1 = [I0++];
P0 = R0;
B0 = R1;
R0 = [I1++] || R1 = [I0++];
P1 = R0;
B1 = R1;
R0 = [I1++] || R1 = [I0++];
P2 = R0;
B2 = R1;
R0 = [I1++] || R1 = [I0++M1];
R0 = R0.B(Z);
P3 = R0;
R1 = R1.B(Z);
B3 = R1;
R7 += 4;
M1 = R7;
M0 = 8;
R0 = I0;
R0 += 4;
I1 = R0;
P5 = 16;
LSETUP(prepro_s, prepro_e) LC0=P5;
prepro_s:
R7 = I2;
I2 = M2;
R0 = M2;
R0 <<= 1; // *2
R7 = R7 + R0;
R0 = [I0++M0] || R1 = [I1++M0];
R0 >>= 24;
M2 = R0; //store r0;
R7 = R7 + R0;
R0 = I3;
(R6, R5) = BYTEUNPACK R1:0;
I3 = M3;
R0 = M3;
(R4, R0) = BYTEUNPACK R1:0;
R4 <<= 1;
R0 <<= 1;
R5 = R5 + R0;
R6 = R6 + R4;
M3 = R1; //store R1;
(R4, R0) = BYTEUNPACK R1:0(R);
R5 = R5 + R0;
R6 = R6 + R4;
R4 = R5.L(Z);
R0 = R7 + R5;
R0 = R0 + R4;
R2.L = R0.L + R0.H(S);
R4 = R6.L(Z);
R0 = R5 + R4;
R5 >>= 16;
R0 = R0 + R5;
R2.H = R0.L + R0.H(S);
R0.L = 0x0008;
R0.H = 0x0008;
R2 = R2 + R0;
R2 >>= 4;
R0 = R5 + R6;
R0 = R0 + R4;
R3.L = R0.L + R0.H(S);
R0 = P0;
(R7, R5) = BYTEUNPACK R1:0;
P0 = B0;
R0 = B0;
(R4, R0) = BYTEUNPACK R1:0;
R4 <<= 1;
R0 <<= 1;
R5 = R5 + R0;
R7 = R7 + R4;
R0 = [I0++M0] || R1 = [I1++M0];
B0 = R0;
(R4, R0) = BYTEUNPACK R1:0;
R5 = R5 + R0;
R7 = R7 + R4;
R4 = R5.L(Z);
R0 = R6 + R4;
R6 >>=16;
R0 = R0 + R6;
R3.H = R0.L + R0.H(S);
R0.L = 0x0008;
R0.H = 0x0008;
R3 = R3 + R0;
R3 >>= 4;
//R0 = BYTEPACK(R2, R3); //to avoid "read after write" delay;
//[P4++] = R3; //to avoid "read after write" delay;
R0 = R6 + R5;
R0 = R0 + R4;
R3 = BYTEPACK(R2, R3); //to avoid "read after write" delay;
R2.L = R0.L + R0.H(S);
R4 = R7.L(Z);
R0 = R5 + R4;
R5 >>= 16;
R0 = R0 + R5;
R2.H = R0.L + R0.H(S);
R0.L = 0x0008;
R0.H = 0x0008;
R2 = R2 + R0;
R2 >>= 4;
R0 = R5 + R7;
R0 = R0 + R4;
[P4++] = R3; //to avoid "read after write" delay;
R3.L = R0.L + R0.H(S);
R0 = P1;
(R6, R5) = BYTEUNPACK R1:0;
P1 = B1;
R0 = B1;
(R4, R0) = BYTEUNPACK R1:0;
R4 <<= 1;
R0 <<= 1;
R5 = R5 + R0;
R6 = R6 + R4;
B1 = R1;
(R4, R0) = BYTEUNPACK R1:0(R);
R5 = R5 + R0;
R6 = R6 + R4;
R4 = R5.L(Z);
R0 = R7 + R4;
R7 >>= 16;
R0 = R0 + R7;
R3.H = R0.L + R0.H(S);
R0.L = 0x0008;
R0.H = 0x0008;
R3 = R3 + R0;
R3 >>= 4;
//R0 = BYTEPACK(R2, R3); //to avoid "read after write" delay;
//[P4++] = R0;
R0 = R7 + R5;
R0 = R0 + R4;
R3 = BYTEPACK(R2, R3); //to avoid "read after write" delay;
R2.L = R0.L + R0.H(S);
R4 = R6.L(Z);
R0 = R5 + R4;
R5 >>= 16;
R0 = R0 + R5;
R2.H = R0.L + R0.H(S);
R0.L = 0x0008;
R0.H = 0x0008;
R2 = R2 + R0;
R2 >>= 4;
R0 = R5 + R6;
R0 = R0 + R4;
[P4++] = R3; //to avoid "read after write" delay;
R3.L = R0.L + R0.H(S);
R0 = P2;
(R7, R5) = BYTEUNPACK R1:0;
P2 = B2;
R0 = B2;
(R4, R0) = BYTEUNPACK R1:0;
R4 <<= 1;
R0 <<= 1;
R5 = R5 + R0;
R7 = R7 + R4;
R0 = [I0++M1] || R1 = [I1++M1];
B2 = R0;
(R4, R0) = BYTEUNPACK R1:0;
R5 = R5 + R0;
R7 = R7 + R4;
R4 = R5.L(Z);
R0 = R6 + R4;
R6 >>= 16;
R0 = R0 + R6;
R3.H = R0.L + R0.H(S);
R0.L = 0x0008;
R0.H = 0x0008;
R3 = R3 + R0;
R3 >>= 4;
//R0 = BYTEPACK(R2, R3); //to avoid "read after write" delay;
//[P4++] = R0;
R0 = R6 + R5;
R0 = R0 + R4;
R3 = BYTEPACK(R2, R3); //to avoid "read after write" delay;
R2.L = R0.L + R0.H(S);
R4 = R7.L(Z);
R0 = R5 + R4;
R5 >>= 16;
R0 = R0 + R5;
R2.H = R0.L + R0.H(S);
R0.L = 0x0008;
R0.H = 0x0008;
R2 = R2 + R0;
R2 >>= 4;
R0 = R5 + R7;
R0 = R0 + R4;
[P4++] = R3; //to avoid "read after write" delay;
R3.L = R0.L + R0.H(S);
R6 = P3;
P3 = B3;
R0 = B3;
R0 <<= 1;
R6 = R6 + R0;
R1 = R1.B(Z);
B3 = R1;
R6 = R6 + R1;
R0 = R7 + R6;
R7 >>= 16;
R0 = R0 + R7;
R3.H = R0.L + R0.H(S);
R0.L = 0x0008;
R0.H = 0x0008;
R3 = R3 + R0;
R3 >>= 4;
R2 = BYTEPACK(R2, R3);
prepro_e:
[P4++] = R2;
RETS = [SP++];
(R7:4, P5:3) = [SP++];
_MB_Gaus_filter.end:
RTS;
NOP;
//void MB_no_pre(Ipp8u *MB, int width);
_MB_no_pre: //empty function;
_MB_no_pre.end:
RTS;
NOP;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -