⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mdma_get_mb_refwin.asm

📁 adi bf533视频编码程序
💻 ASM
📖 第 1 页 / 共 3 页
字号:
	
	R4 += 2;
	I0 = R4;
	R5 += 2;
	I1 = R5;
	R0 = [SP+56];
	R3 = [SP+68];
	R1 = R0;
	R2 = R3;
	R1 += 32;
	R2 += 32;
	CC = BITTST(R7, 2);
	IF CC R0=R1;
	IF CC R3=R2;
	W[I0++] = R0.L;
	W[I0] = R0.H;
	R4 += 16;
	I0 = R4;
	W[I1++] = R3.L;
	W[I1] = R3.H;
	R5 += 16;
	I1 = R5;
	R0 = [SP+60];
	R3 = [SP+72];
	R1 = R0;
	R2 = R3;
	R1 += 16;
	R2 += 16;
	CC = BITTST(R7, 2);
	IF CC R0=R1;
	IF CC R3=R2;
	
	W[I0++] = R0.L;
	W[I0] = R0.H;
	R4 += 16;
	I0 = R4;
	W[I1++] = R3.L;
	W[I1] = R3.H;
	R5 += 16;
	I1 = R5;
	R0 = [SP+64];
	R3 = [SP+76];
	R1 = R0;
	R2 = R3;
	R1 += 16;
	R2 += 16;
	CC = BITTST(R7, 2);
	IF CC R0=R1;
	IF CC R3=R2;
	W[I0++] = R0.L;
	W[I0] = R0.H;
	W[I1++] = R3.L;
	W[I1++] = R3.H;
	

no_more_dma1:		
	R0 = 0x6819;
	W[P0+MDMA_S0_CONFIG-MDMA_S0_NEXT_DESC_PTR] = R0;
	SSYNC;
	R0 = 0x681B;
	W[P1+MDMA_D0_CONFIG-MDMA_D0_NEXT_DESC_PTR] = R0;
	SSYNC;
				
_restore_MB_get_MB_refwin_no_pre.end:
	RETS = [SP++];
	(R7:4, P5:3) = [SP++];
	RTS;
	NOP;	


_restore_MB_get_MB_refwin_prepro:
	[--SP] = (R7:4, P5:3);
	[--SP] = RETS;
	
	L0 = 0;
	L1 = 0;
	
	P0.L = _dma_done;			//clear flag
	P0.H = _dma_done;
	R7 = 0;
	[P0] = R7;

	R4.L = _MB_restore_queue;
	R4.H = _MB_restore_queue;
	R6 = R4;
	R4 += 2;
	I0 = R4;
	R4 += 16;
	R5.L = _refvop_queue;
	R5.H = _refvop_queue;
	R7 = R5;
	R5 += 2;
	I1 = R5;
	R5 += 16;
	W[I0++] = R0.L;
	W[I0] = R0.H;
	I0 = R4;
	W[I1++] = R1.L;
	W[I1] = R1.H;
	I1 = R5;
	R3 = 432;			//24*18
	R0 = R0 + R3;
	W[I0++] = R0.L;
	W[I0] = R0.H;
	R4 += 16;
	I0 = R4;
	W[I1++] = R2.L;
	W[I1] = R2.H;
	R5 += 16;
	I1 = R5;
	R3 = 64;
	R0 = R0 + R3;
	W[I0++] = R0.L;
	W[I0] = R0.H;
	R3 = [SP+44];
	W[I1++] = R3.L;
	W[I1] = R3.H;
	
	P0.L = LO(MDMA_S0_NEXT_DESC_PTR);
	P0.H = HI(MDMA_S0_NEXT_DESC_PTR);
	[P0] = R6;

	P1.L = LO(MDMA_D0_NEXT_DESC_PTR);
	P1.H = HI(MDMA_D0_NEXT_DESC_PTR);
	[P1] = R7;
	
	
	R7 = [SP+80];
	P2.L = _get_MB_refwin;
	P2.H = _get_MB_refwin;
	[P2] = R7;
	CC = BITTST(R7, 1);		//get MB flag;
	IF !CC JUMP no_more_dma2;

	P3.L = _MBY_queue_addr;
	P3.H = _MBY_queue_addr;
	P4.L = _Y656_queue_addr;
	P4.H = _Y656_queue_addr;
	
	R4.L = _Y656_queue_prepro;
	R4.H = _Y656_queue_prepro;
	[P4] = R4;
	R4 += 2;
	R5.L = _MBY_queue_prepro;
	R5.H = _MBY_queue_prepro;
	[P3] = R5;
	R5 += 2;
	I0 = R4;
	I1 = R5;
	R0 = [SP+48];
	R1 = [SP+52];
	R2 = R0;
	R3 = R1;
	R3 += 3;					//prepro MB Y addr;
	P3.L = _prepro_back_offset;
	P3.H = _prepro_back_offset;
	R6 = [P3];
	R2 = R2 - R6;				//prepro UYVY656 addr;
	R2 += 1;
	W[I0++] = R2.L;
	W[I0] = R2.H;
	R4.L = _UV656_queue;
	R4.H = _UV656_queue;
	R4 += 2;
	I0 = R4;
	W[I1++] = R3.L;
	W[I1] = R3.H;
	R5.L = _MBUV_queue;
	R5.H = _MBUV_queue;
	R5 += 2;
	I1 = R5;
	W[I0++] = R0.L;				//656 U addr;
	W[I0] = R0.H;
	R4 += 16;
	I0 = R4;
	R6 = 432;
	R1 = R1 + R6;
	W[I1++] = R1.L;				//MB U addr;
	W[I1] = R1.H;
	R5 += 16;
	I1 = R5;
	R0 += 2;
	W[I0++] = R0.L;
	W[I0] = R0.H;
	R6 = 64;
	R1 = R1 + R6;
	W[I1++] = R1.L;
	W[I1] = R1.H;
	
	CC = BITTST(R7, 0);			//get refwin flag;
	IF !CC JUMP no_more_dma2;
	
	P2.L = _ref_src_queue_addr;
	P2.H = _ref_src_queue_addr;
	P3.L = _refwin_queue_addr;
	P3.H = _refwin_queue_addr;
	
	R4.L = _ref_src_queue;
	R4.H = _ref_src_queue;
	R5.L = _refwin_queue;
	R5.H = _refwin_queue;
	R0.L = _ref_src_queue1;
	R0.H = _ref_src_queue1;
	R1.L = _refwin_queue1;
	R1.H = _refwin_queue1;
	
	CC = BITTST(R7, 2);
	IF CC R4=R0;
	IF CC R5=R1;
	[P2] = R4;
	[P3] = R5;
	
	
	R4 += 2;
	I0 = R4;
	R5 += 2;
	I1 = R5;
	R0 = [SP+56];
	R3 = [SP+68];
	R1 = R0;
	R2 = R3;
	R1 += 32;
	R2 += 32;
	CC = BITTST(R7, 2);
	IF CC R0 = R1;
	IF CC R3 = R2;
	
	W[I0++] = R0.L;
	W[I0] = R0.H;
	R4 += 16;
	I0 = R4;
	W[I1++] = R3.L;
	W[I1] = R3.H;
	R5 += 16;
	I1 = R5;
	R0 = [SP+60];
	R3 = [SP+72];
	R1 = R0;
	R2 = R3;
	R1 += 16;
	R2 += 16;
	CC = BITTST(R7, 2);
	IF CC R0 = R1;
	IF CC R3 = R2;
	
	W[I0++] = R0.L;
	W[I0] = R0.H;
	R4 += 16;
	I0 = R4;
	W[I1++] = R3.L;
	W[I1] = R3.H;
	R5 += 16;
	I1 = R5;
	R0 = [SP+64];
	R3 = [SP+76];
	R1 = R0;
	R2 = R3;
	R1 += 16;
	R2 += 16;
	CC = BITTST(R7, 2);
	IF CC R0 = R1;
	IF CC R3 = R2;
	
	W[I0++] = R0.L;
	W[I0] = R0.H;
	W[I1++] = R3.L;
	W[I1++] = R3.H;
	

no_more_dma2:		
	R0 = 0x6819;
	W[P0+MDMA_S0_CONFIG-MDMA_S0_NEXT_DESC_PTR] = R0;
	SSYNC;
	R0 = 0x681B;
	W[P1+MDMA_D0_CONFIG-MDMA_D0_NEXT_DESC_PTR] = R0;
	SSYNC;
				
_restore_MB_get_MB_refwin_prepro.end:
	RETS = [SP++];
	(R7:4, P5:3) = [SP++];
	RTS;
	NOP;	
	
	
	
//void MB_Gaus_filter(Ipp8u *MB, int ext_width);
_MB_Gaus_filter:	//Gaussian filter preprocessing;
	[--SP] = (R7:4, P5:3);
	[--SP] = RETS;
	
    P4 = R0;//the Curr_MB addr
	I1 = R0;
	R0 = R0 + R1;
	I0 = R0;
	

    R0 = 0;
    L0 = R0;
	L1 = R0;
	L2 = R0;
	L3 = R0;
        
    R1 += -20;
    M1 = R1;
    R7 = R1;

//用I2,I3,P0,P1,P2,P3来存储第一行的数据;
//用M2,M3,B0,B1,B2,B3来存储第二行的数据;        
    R0 = [I1++] || R1 = [I0++];
    R0 >>= 24;
    I2 = R0;
    R1 >>= 24;
    M2 = R1;
    R0 = [I1++] || R1 = [I0++];
    I3 = R0;
    M3 = R1;
    R0 = [I1++] || R1 = [I0++];
    P0 = R0;
    B0 = R1;
    R0 = [I1++] || R1 = [I0++];
    P1 = R0;
    B1 = R1;
    R0 = [I1++] || R1 = [I0++];
    P2 = R0;
    B2 = R1;
    R0 = [I1++] || R1 = [I0++M1];
    R0 = R0.B(Z);
    P3 = R0;
    R1 = R1.B(Z);
    B3 = R1;
    
    R7 += 4;
    M1 = R7;
    
    M0 = 8;
    R0 = I0;
    R0 += 4;
    I1 = R0;
    
    P5 = 16;
    
    LSETUP(prepro_s, prepro_e) LC0=P5;
    prepro_s:
    	R7 = I2;
    	I2 = M2;
    	R0 = M2;
    	R0 <<= 1;		// *2
    	R7 = R7 + R0;

    	R0 = [I0++M0] || R1 = [I1++M0];
    	R0 >>= 24;
    	M2 = R0;		//store r0;
    	R7 = R7 + R0;
    	
    	R0 = I3;
    	(R6, R5) = BYTEUNPACK R1:0;
    	I3 = M3;
    	R0 = M3;
    	(R4, R0) = BYTEUNPACK R1:0;
    	R4 <<= 1;
    	R0 <<= 1;
    	R5 = R5 + R0;
    	R6 = R6 + R4;
    	M3 = R1;		//store R1;
    	(R4, R0) = BYTEUNPACK R1:0(R);
    	R5 = R5 + R0;
    	R6 = R6 + R4;
    	
    	R4 = R5.L(Z);
    	
    	R0 = R7 + R5;
    	R0 = R0 + R4;
    	R2.L = R0.L + R0.H(S);
    	
    	R4 = R6.L(Z);
    	R0 = R5 + R4;
    	R5 >>= 16;
    	R0 = R0 + R5;
    	
    	R2.H = R0.L + R0.H(S);
    	R0.L = 0x0008;
    	R0.H = 0x0008;
    	R2 = R2 + R0;
    	R2 >>= 4;
    	
    	R0 = R5 + R6;
    	R0 = R0 + R4;
    	R3.L = R0.L + R0.H(S);
    	
    	R0 = P0;
    	(R7, R5) = BYTEUNPACK R1:0;
    	P0 = B0;
    	R0 = B0;
    	(R4, R0) = BYTEUNPACK R1:0;
    	R4 <<= 1;
    	R0 <<= 1;
    	R5 = R5 + R0;
    	R7 = R7 + R4;
    	R0 = [I0++M0] || R1 = [I1++M0];
    	B0 = R0;
    	(R4, R0) = BYTEUNPACK R1:0;
    	R5 = R5 + R0;
    	R7 = R7 + R4;
    	
    	R4 = R5.L(Z);
    	R0 = R6 + R4;
    	R6 >>=16;
    	R0 = R0 + R6;
    	
    	R3.H = R0.L + R0.H(S);
    	R0.L = 0x0008;
    	R0.H = 0x0008;
		R3 = R3 + R0;    	
    	R3 >>= 4;
    	//R0 = BYTEPACK(R2, R3);	//to avoid "read after write" delay;
    	//[P4++] = R3;    	    	//to avoid "read after write" delay;
    	
    	R0 = R6 + R5;
    	R0 = R0 + R4;
    	R3 = BYTEPACK(R2, R3);    	//to avoid "read after write" delay;
    	R2.L = R0.L + R0.H(S);
    	
    	R4 = R7.L(Z);
    	R0 = R5 + R4;
    	R5 >>= 16;
    	R0 = R0 + R5;
    	R2.H = R0.L + R0.H(S);
    	R0.L = 0x0008;
    	R0.H = 0x0008;
		R2 = R2 + R0;    	
    	R2 >>= 4;
    	
    	R0 = R5 + R7;
    	R0 = R0 + R4;
    	[P4++] = R3;    	    	//to avoid "read after write" delay;
    	R3.L = R0.L + R0.H(S);
    	
    	R0 = P1;
    	(R6, R5) = BYTEUNPACK R1:0;
    	P1 = B1;
    	R0 = B1;
    	(R4, R0) = BYTEUNPACK R1:0;
    	R4 <<= 1;
    	R0 <<= 1;
    	R5 = R5 + R0;
    	R6 = R6 + R4;
    	B1 = R1;
    	(R4, R0) = BYTEUNPACK R1:0(R);
    	R5 = R5 + R0;
    	R6 = R6 + R4;
    	
    	R4 = R5.L(Z);
    	R0 = R7 + R4;
    	R7 >>= 16;
    	R0 = R0 + R7;
    	R3.H = R0.L + R0.H(S);
    	R0.L = 0x0008;
    	R0.H = 0x0008;
		R3 = R3 + R0;    	
    	R3 >>= 4;
    	
    	//R0 = BYTEPACK(R2, R3);	//to avoid "read after write" delay;
    	//[P4++] = R0;
    	
    	R0 = R7 + R5;
    	R0 = R0 + R4;
    	
    	R3 = BYTEPACK(R2, R3);	//to avoid "read after write" delay;
    	
    	R2.L = R0.L + R0.H(S);
    	
    	R4 = R6.L(Z);
    	R0 = R5 + R4;
    	R5 >>= 16;
    	R0 = R0 + R5;
    	R2.H = R0.L + R0.H(S);
    	R0.L = 0x0008;
    	R0.H = 0x0008;
		R2 = R2 + R0;
    	R2 >>= 4;
		    	
    	R0 = R5 + R6;
    	R0 = R0 + R4;
    	
    	[P4++] = R3;			//to avoid "read after write" delay;
    	
    	R3.L = R0.L + R0.H(S);
    	
    	R0 = P2;
    	(R7, R5) = BYTEUNPACK R1:0;
    	P2 = B2;
    	R0 = B2;
    	(R4, R0) = BYTEUNPACK R1:0;
    	R4 <<= 1;
    	R0 <<= 1;
    	R5 = R5 + R0;
    	R7 = R7 + R4;
    	R0 = [I0++M1] || R1 = [I1++M1];
    	B2 = R0;
    	(R4, R0) = BYTEUNPACK R1:0;
    	R5 = R5 + R0;
    	R7 = R7 + R4;
    	
    	R4 = R5.L(Z);
    	R0 = R6 + R4;
    	R6 >>= 16;
    	R0 = R0 + R6;
    	R3.H = R0.L + R0.H(S);
    	R0.L = 0x0008;
    	R0.H = 0x0008;
		R3 = R3 + R0;    	
    	R3 >>= 4;
    	//R0 = BYTEPACK(R2, R3);		//to avoid "read after write" delay;
    	//[P4++] = R0;
    	
    	R0 = R6 + R5;
    	R0 = R0 + R4;
    	
    	R3 = BYTEPACK(R2, R3);			//to avoid "read after write" delay;
    	
    	R2.L = R0.L + R0.H(S);
    	
    	R4 = R7.L(Z);
    	R0 = R5 + R4;
    	R5 >>= 16;
    	R0 = R0 + R5;
    	R2.H = R0.L + R0.H(S);
    	R0.L = 0x0008;
    	R0.H = 0x0008;
		R2 = R2 + R0;    	
    	R2 >>= 4;
    	
    	R0 = R5 + R7;
    	R0 = R0 + R4;
    	
    	[P4++] = R3;					//to avoid "read after write" delay;
    	
    	R3.L = R0.L + R0.H(S);
    	
    	R6 = P3;
    	P3 = B3;
    	R0 = B3;
    	R0 <<= 1;
    	R6 = R6 + R0;
    	R1 = R1.B(Z);
    	B3 = R1;
    	R6 = R6 + R1;
    	
    	R0 = R7 + R6;
    	R7 >>= 16;
    	R0 = R0 + R7;
    	R3.H = R0.L + R0.H(S);
    	R0.L = 0x0008;
    	R0.H = 0x0008;
		R3 = R3 + R0;    	
    	R3 >>= 4;
    	R2 = BYTEPACK(R2, R3);
    	
    prepro_e:
    	[P4++] = R2;
	
	RETS = [SP++];
	(R7:4, P5:3) = [SP++];
_MB_Gaus_filter.end:
	RTS;
	NOP;
	

//void MB_no_pre(Ipp8u *MB, int width);	
_MB_no_pre:			//empty function;

_MB_no_pre.end:
	RTS;
	NOP;
	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -