⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blit_wmmx_fix.c

📁 大名鼎鼎的CE下播放软件,TCPPMP的源代码!!!2410下可以流畅的解QVGA的H264,MPEG4等格式.
💻 C
📖 第 1 页 / 共 2 页
字号:
			I2(TBCSTH,WR6,R0);
			I3(WAND,WR5,WR5,WR6);
		}

		I2(WUNPCKEHUB,WR6,WR3);
		I2(WUNPCKEHUB,WR7,WR4);
		I2(WUNPCKEHUB,WR8,WR5);

		if (p->DstPos[0]+p->DstSize[0]!=8) I3(p->DstPos[0]+p->DstSize[0]>8?WSLLWG:WSRLWG,WR6,WR6,WCGR0);
		if (p->DstPos[1]+p->DstSize[1]!=8) I3(p->DstPos[1]+p->DstSize[1]>8?WSLLWG:WSRLWG,WR7,WR7,WCGR1);
		if (p->DstPos[2]+p->DstSize[2]!=8) I3(p->DstPos[2]+p->DstSize[2]>8?WSLLWG:WSRLWG,WR8,WR8,WCGR2);

		I2(WUNPCKELUB,WR3,WR3);
		I2(WUNPCKELUB,WR4,WR4);
		I2(WUNPCKELUB,WR5,WR5);

		if (p->DstPos[0]+p->DstSize[0]!=8) I3(p->DstPos[0]+p->DstSize[0]>8?WSLLWG:WSRLWG,WR3,WR3,WCGR0);
		if (p->DstPos[1]+p->DstSize[1]!=8) I3(p->DstPos[1]+p->DstSize[1]>8?WSLLWG:WSRLWG,WR4,WR4,WCGR1);
		if (p->DstPos[2]+p->DstSize[2]!=8) I3(p->DstPos[2]+p->DstSize[2]>8?WSLLWG:WSRLWG,WR5,WR5,WCGR2);

		I3(WOR,WR6,WR6,WR7);
		I3(WOR,WR3,WR3,WR4);
		I3(WOR,WR6,WR6,WR8);
		I3(WOR,WR3,WR3,WR5);
	}

	if (p->DstDoubleX)
	{
		I2C(WSHUFH,WR7,WR6,0xFA); // 7 7 6 6
		I2C(WSHUFH,WR4,WR3,0xFA); // 3 3 2 2
		I2C(WSHUFH,WR6,WR6,0x50); // 5 5 4 4
		I2C(WSHUFH,WR3,WR3,0x50); // 1 1 0 0
	}

	if (p->SwapXY)
	{
		reg Dst = (reg)(Row?R2:R9);

		MB(); I3S(ADD,R1,Dst,R10,LSL,p->DstDoubleY);

		MB(); I2C(WSTRD,WR3,Dst,0);
		if (p->DstDoubleX)
		{
			MB(); I2C(WSTRD,WR4,Dst,8);
		}

		if (p->DstDoubleY)
		{
			MB(); I3(ADD,R5,Dst,R10);

			MB(); I2C(WSTRD,WR3,R5,0);
			if (p->DstDoubleX)
			{
				MB(); I2C(WSTRD,WR4,R5,8);
			}
		}

		MB(); I2C(WSTRD,WR6,R1,0);
		if (p->DstDoubleX)
		{
			MB(); I2C(WSTRD,WR7,R1,8);
		}

		if (p->DstDoubleY)
		{
			MB(); I3(ADD,R6,R1,R10);

			MB(); I2C(WSTRD,WR6,R6,0);
			if (p->DstDoubleX)
			{
				MB(); I2C(WSTRD,WR7,R6,8);
			}
		}

		MB(); I3S(ADD,Dst,Dst,R10,LSL,1+p->DstDoubleY);
	}
	else
	{
		if (p->DstDoubleY)
		{
			MB(); I3(ADD,R1,R2,R10);
			MB(); I3(ADD,R5,R9,R10);
		}

		if (p->DstDoubleX)
		{
			MB(); I2C(WSTRD_POST,WR6,R2,8);
			MB(); I2C(WSTRD_POST,WR3,R9,8);
		}
		else
		{
			MB(); I2C(WSTRD_POST,WR6,R2,8*p->DirX);
			MB(); I2C(WSTRD_POST,WR3,R9,8*p->DirX);
		}

		if (p->DstDoubleY)
		{
			MB(); I2C(WSTRD,WR6,R1,0);
			MB(); I2C(WSTRD,WR3,R5,0);
		}

		if (p->DstDoubleX)
		{
			MB(); I2C(WSTRD_POST,WR7,R2,p->DirX>0?8:-24);
			MB(); I2C(WSTRD_POST,WR4,R9,p->DirX>0?8:-24);

			if (p->DstDoubleY)
			{
				MB(); I2C(WSTRD,WR7,R1,8);
				MB(); I2C(WSTRD,WR4,R5,8);
			}
		}
	}
}

void WMMXFix_RGB_UV(blit_soft* p)
{
	bool_t HalfMode = p->SrcHalfX || p->SrcHalfY;
	dyninst* LoopY;
	dyninst* LoopX;
	dyninst* EndLine;
	dyninst* Dither = NULL;

	p->SrcAlignPos = p->DstAlignPos = p->DstAlignSize = 8;
	if (p->RScaleX==8) p->DstAlignSize = 16;

	p->DstStepX = p->DirX * ((p->DstBPP*8) >> 3) << p->DstDoubleX;

	p->YMul = InstCreate16(abs(p->_YMul) >> 8,NONE,NONE,NONE,0,0);
	p->RVMul = InstCreate16(abs(p->_RVMul) >> 8,NONE,NONE,NONE,0,0);
	p->RAdd = InstCreate16((p->_RAdd) >> 16,NONE,NONE,NONE,0,0);
	p->GUMul = InstCreate16(abs(p->_GUMul) >> 8,NONE,NONE,NONE,0,0);
	p->GVMul = InstCreate16(abs(p->_GVMul) >> 8,NONE,NONE,NONE,0,0);
	p->GAdd = InstCreate16((p->_GAdd) >> 16,NONE,NONE,NONE,0,0);
	p->BUMul = InstCreate16(abs(p->_BUMul) >> 8,NONE,NONE,NONE,0,0);
	p->BAdd = InstCreate16((p->_BAdd) >> 16,NONE,NONE,NONE,0,0);

	CodeBegin();
	I2C(SUB,SP,SP,OFS(stack,StackFrame));

	I1P(WLDRD,WR11,p->RAdd,0);
	I1P(WLDRD,WR14,p->RVMul,0);

	I3(MOV,R10,NONE,R3); //DstPitch
	I2C(LDR,R9,R1,0); //Dst[0] RGB
	I2C(LDR,R3,R2,4); //Src[1] U
	I2C(LDR,R4,R2,8); //Src[2] V
	I2C(LDR,R12,R2,0); //Src[0] Y
	I2C(LDR,R11,SP,OFS(stack,SrcPitch));

	I2C(WSHUFH,WR15,WR11,0xFF); //ymul
	if (!(p->FX.Flags & BLITFX_DITHER))
	{
		I2C(WSHUFH,WR12,WR11,0x55); //gadd
		I2C(WSHUFH,WR13,WR11,0xAA); //badd
		I2C(WSHUFH,WR11,WR11,0x00); //radd
	}
	else
	{
		int i;
		static const uint8_t Matrix0[16] = 
		{	0,   8,  2, 10,
			12,  4, 14,  6,
			3,  11,  1,  9,
			15,  7, 13,  5 };

		uint8_t Matrix[16];
		memcpy(Matrix,Matrix0,sizeof(Matrix));
		p->DitherSize = min(p->DstSize[0],min(p->DstSize[1],p->DstSize[2]));
		if (p->DitherSize>4)
			for (i=0;i<16;++i)
				Matrix[i] >>= p->DitherSize-4;

		// dither mask
		Dither = InstCreate(Matrix,16,NONE,NONE,NONE,0,0);
		I1P(WLDRD,WR12,Dither,0);
		I1P(WLDRD,WR13,Dither,8);
	}

 	I2C(LDR,R5,SP,OFS(stack,Height));
	I2C(LDR,R6,SP,OFS(stack,Width));

	//SrcNext = 2*(SrcHalfY?2:1)*(SwapXY?4:1)*Src->Pitch - (Width*(SrcHalfY?2:1) >> SrcDoubleX)
	I3S(MOV,R1,NONE,R11,LSL,1+p->SrcHalfY+p->SwapXY*2);
	I3S(SUB,R1,R1,R6,LSR,p->SrcDoubleX-p->SrcHalfX); 
	I2C(STR,R1,SP,OFS(stack,SrcNext));

	//UVNext = (Src->Pitch >> 1)*(SrcHalfY?2:1)*(SwapXY?4:1) - (Width*(SrcHalfY?2:1) >> SrcDoubleX >> 1);
	I3S(MOV,R2,NONE,R11,ASR,1-p->SrcHalfY-p->SwapXY*2);
	I3S(SUB,R2,R2,R6,LSR,p->SrcDoubleX+1-p->SrcHalfX); 
	I2C(STR,R2,SP,OFS(stack,UVNext));

	if (p->DirX<0) //adjust reversed destination for block size
		I2C(SUB,R9,R9,-(p->DstStepX >> 1)-(p->DstBPP >> 3));

	if (p->SwapXY)
	{
		// EndOfRect = Dst + ((Height * DstBPP * DirX) >> 3)
		I2C(MOV,R1,NONE,p->DstBPP * p->DirX);
		I3(MUL,R1,R5,R1);
		I3S(ADD,R1,R9,R1,ASR,3);
		I2C(STR,R1,SP,OFS(stack,EndOfRect));

		//DstNext = DstStepX - Width*DstPitch;
		MB(); I3(MUL,R2,R10,R6);
		I2C(MOV,R1,NONE,p->DstStepX); 
		I3(SUB,R1,R1,R2); 
		I2C(STR,R1,SP,OFS(stack,DstNext));
	}
	else
	{
		// EndOfRect = Dst + DstPitch * Height
		I3(MUL,R1,R10,R5);
		I3(ADD,R1,R9,R1);
		I2C(STR,R1,SP,OFS(stack,EndOfRect));

		//DstNext = ((DstPitch*2 << DstDoubleY) - DirX * Width << DstBPP2;
		I3S(MOV,R2,NONE,R10,LSL,p->DstDoubleY+1);
		I3S(p->DirX>0?SUB:ADD,R2,R2,R6,LSL,p->DstBPP2); 
		I2C(STR,R2,SP,OFS(stack,DstNext));
	}

	// setup shift registers
	// wcgr0 abs(rpos-8)
	// wcgr1 abs(gpos-8)
	// wcgr2 abs(bpos-8)
	// wcgr3 8 (if HalfMode)

	I2C(MOV,R5,NONE,abs(p->DstPos[0]+p->DstSize[0]-8));
	I2C(MOV,R6,NONE,abs(p->DstPos[1]+p->DstSize[1]-8));
	I2C(MOV,R7,NONE,abs(p->DstPos[2]+p->DstSize[2]-8));
	I2(TMCR,WCGR0,R5);
	I2(TMCR,WCGR1,R6);
	I2(TMCR,WCGR2,R7);

	if (HalfMode)
	{
		I2C(MOV,R1,NONE,8);
		I2(TMCR,WCGR3,R1);
	}

	// setup masks
	// r0 bmask (if bpos!=0)
	// wr9,wr10 mask r,g

	I2C(MOV,R1,NONE,((1 << p->DstSize[1])-1)<<(8-p->DstSize[1]));
	I2(TBCSTB,WR10,R1);

	if (p->DstPos[2]==0 && p->DstPos[0]+p->DstSize[0]==16)
	{
		// (red and blue word mask in R9)
		I2C(MOV,R1,NONE,((1 << p->DstSize[0])-1)<<(16-p->DstSize[0]));
		I2C(ORR,R1,R1,(1 << p->DstSize[2])-1);
		I2(TBCSTH,WR9,R1);
	}
	else
	{
		I2C(MOV,R1,NONE,((1 << p->DstSize[0])-1)<<(8-p->DstSize[0]));
		I2C(MOV,R0,NONE,((1 << p->DstSize[2])-1)<<(8-p->DstSize[2]));
		I2(TBCSTB,WR9,R1);
	}

	if (p->SwapXY)
		I2C(ADD,R2,R9,(8*p->DirX) << p->DstDoubleX);
	else
		I3S(ADD,R2,R9,R10,LSL,p->DstDoubleY);

	I3S(ADD,R14,R12,R11,LSL,p->SrcHalfY);
	if (!p->SwapXY)
	{
		I2C(SUB,R3,R3,4);
		I2C(SUB,R4,R4,4);
	}

	I2C(LDR,R5,SP,OFS(stack,Width));
	
	LoopY = Label(0);
	I0P(B,AL,LoopY);

	Align(8);

	InstPost(p->RVMul);
	InstPost(p->GUMul);
	InstPost(p->GVMul);
	InstPost(p->BUMul);

	InstPost(p->RAdd);
	InstPost(p->GAdd);
	InstPost(p->BAdd);
	InstPost(p->YMul);

	if (Dither)
		InstPost(Dither);
	InstPost(LoopY);

	if (p->SwapXY)
	{
		I3(MUL,R1,R10,R5); //dstpitch * width
		I3(ADD,R8,R9,R1);
	}
	else
	{
		if (p->DirX > 0)
			I3S(ADD,R8,R9,R5,LSL,p->DstBPP2);
		else
			I3S(SUB,R8,R9,R5,LSL,p->DstBPP2);
	}

	LoopX = Label(0);

	// preload
	if (!p->Slices)
	{
		dyninst* PreLoad1;
		dyninst* PreLoad2;
		dyninst* PreLoad3;
		dyninst* PreLoad4;
		int UVAdj = p->SwapXY?0:4;

		I3S(ADD,R1,R12,R5,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0));
		I2C(ADD,R5,R12,32);
		I3(CMP,NONE,R5,R1);
		I0P(B,CS,LoopX);

		//y0
		PreLoad1 = Label(1);
		Byte(); I2C(LDR,R6,R5,-32);
		I2C(ADD,R5,R5,64);
		I3(CMP,NONE,R5,R1);
		Byte(); I2C(LDR,R7,R5,-64);
		I0P(B,CC,PreLoad1);

		I3(SUB,R1,R1,R12);
		I3(ADD,R1,R1,R14);
		I2C(ADD,R5,R14,32);

		//y1
		PreLoad2 = Label(1);
		Byte(); I2C(LDR,R6,R5,-32);
		I2C(ADD,R5,R5,64);
		I3(CMP,NONE,R5,R1);
		Byte(); I2C(LDR,R7,R5,-64);
		I0P(B,CC,PreLoad2);

		I3(SUB,R1,R1,R14);

		I3S(ADD,R1,R3,R1,ASR,p->SrcUVX2);

		I2C(ADD,R5,R3,32);
		I3(CMP,NONE,R5,R1);
		I0P(B,CS,LoopX);

		//u
		PreLoad3 = Label(1);
		Byte(); I2C(LDR,R6,R5,-32+UVAdj);
		I2C(ADD,R5,R5,64);
		I3(CMP,NONE,R5,R1);
		Byte(); I2C(LDR,R7,R5,-64+UVAdj);
		I0P(B,CC,PreLoad3);

		I3(SUB,R1,R1,R3);

		I3(ADD,R1,R1,R4);
		I2C(ADD,R5,R4,32);

		//v
		PreLoad4 = Label(1);
		Byte(); I2C(LDR,R6,R5,-32+UVAdj);
		I2C(ADD,R5,R5,64);
		I3(CMP,NONE,R5,R1);
		Byte(); I2C(LDR,R7,R5,-64+UVAdj);
		I0P(B,CC,PreLoad4);
	}
	else
	if (p->ARM5)
	{
		//preload next
		I3S(PLD,NONE,R12,R11,LSL,p->SrcHalfY+1); 
		I3S(PLD,NONE,R14,R11,LSL,p->SrcHalfY+1);
		I3S(PLD,NONE,R3,R11,ASR,p->SrcUVPitch2);
		I3S(PLD,NONE,R4,R11,ASR,p->SrcUVPitch2);
	}

	EndLine = Label(0);
	InstPost(LoopX);
	{
		if (!HalfMode) Fix_UV(p,0,0,0);
		Fix_Y(p,0,0,HalfMode);
		Fix_Y(p,1,0,HalfMode);

		if (p->SwapXY && (p->FX.Flags & BLITFX_DITHER))
		{
			I3(CMP,NONE,R9,R8);
			I0P(B,EQ,EndLine);

			if (!HalfMode) Fix_UV(p,0,0,0);
			Fix_Y(p,0,1,HalfMode);
			Fix_Y(p,1,1,HalfMode);
		}

		I3(CMP,NONE,R9,R8);
		I0P(B,NE,LoopX);
	}
	InstPost(EndLine);

	I2C(LDR,R5,SP,OFS(stack,SrcNext));
	I2C(LDR,R6,SP,OFS(stack,DstNext));
	I2C(LDR,R7,SP,OFS(stack,UVNext));
	I2C(LDR,R8,SP,OFS(stack,EndOfRect));
	
	//increment pointers
	I3(ADD,R12,R12,R5);
	I3(ADD,R14,R14,R5);
	I3(ADD,R2,R2,R6);
	I3(ADD,R9,R9,R6);
	I3(ADD,R3,R3,R7);
	I3(ADD,R4,R4,R7);

	if (!p->SwapXY && (p->FX.Flags & BLITFX_DITHER))
	{
		//swap WR12 and WR13
		I3(WOR,WR3,WR12,WR12);
		I3(WOR,WR12,WR13,WR13);
		I3(WOR,WR13,WR3,WR3);
	}

	//prepare registers for next row
	I2C(LDR,R5,SP,OFS(stack,Width));

	I3(CMP,NONE,R9,R8);
	I0P(B,NE,LoopY);

	I2C(ADD,SP,SP,OFS(stack,StackFrame));
	CodeEnd();
}

#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -