⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blit_wmmx_fix.c

📁 大名鼎鼎的CE下播放软件,TCPPMP的源代码!!!2410下可以流畅的解QVGA的H264,MPEG4等格式.
💻 C
📖 第 1 页 / 共 2 页
字号:
/*****************************************************************************
 *
 * This program is free software ; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 *
 * $Id: blit_wmmx_fix.c 271 2005-08-09 08:31:35Z picard $
 *
 * The Core Pocket Media Player
 * Copyright (c) 2004-2005 Gabor Kovacs
 *
 ****************************************************************************/

#include "../common.h"
#include "../dyncode/dyncode.h"
#include "blit_soft.h"

// RScale==16 && !SwapXY	8x2 -> 8x2
// RScale==16 && SwapXY		2x8 -> 8x2
// RScale==8  && !SwapXY	8x2 -> 16x4
// RScale==8  && SwapXY		2x8 -> 16x4
// RScale==32 && !SwapXY	16x4 -> 8x2
// RScale==32 && SwapXY		4x16 -> 8x2

#if defined(ARM) 

typedef struct stack
{
	int EndOfRect;
	int DstNext;
	int SrcNext;
	int UVNext;

	int StackFrame[STACKFRAME];

	//void* this   R0
	//char* Dst    R1
	//char* Src    R2
	//int DstPitch R3 can be signed
	int SrcPitch; //can be signed
	int Width; 
	int Height;
	int Src2SrcLast;
} stack;

// r0 bmask (if bpos!=0)
// r1 temp
// r2 dst1 (dst+dstpitch*DoubleY or dst+8*DoubleX*DirX)
// r3 u (-=4 if !swapxy)
// r4 v (-=4 if !swapxy)
// r5..r7 temp
// r8 endofline
// r9 dst0
// r10 dstpitch
// r11 srcpitch
// r12 y0
// r14 y1 (y0+(srchalfy?2:1)*srcpitch)

// wr0..wr2 r0,g0,b0 (from uv)
// wr3..wr5 temp1 (r,g,b)
// wr6..wr8 temp2 (r,g,b)
// wr9,wr10 mask r,g
// wr11 radd (if !dither)
// wr12 gadd (if !dither)
// wr13 badd (if !dither)
// wr11 radd,gadd,badd (if dither)
// wr12,wr13 dither
// wr14 rvmul,gumul,gvmul,bumul
// wr15 ymul

// wcgr0 abs(rpos-8)
// wcgr1 abs(gpos-8)
// wcgr2 abs(bpos-8)
// wcgr3 8 (if HalfMode)

// using: wr0,wr1,wr2,wr6,wr7
static void Fix_UVSwapXY(blit_soft* p,bool_t HalfMode,bool_t Part,bool_t Row,reg Src,reg Dst)
{
	if (p->DstHalfY && Part)
	{
		Byte(); I3S(LDR_POSTSUB,R7,Src,R11,ASR,1);	//3
		Byte(); I3S(LDR_POSTSUB,R1,Src,R11,ASR,1);	//2
		Byte(); I3S(LDR_POSTSUB,R6,Src,R11,ASR,1);	//1
		Byte(); I2C(LDR_POST,R5,Src,Row?1:-1);		//0

		I2C(TINSRB,Dst,R7,p->DirX>0?7:1);
		I2C(TINSRB,Dst,R1,p->DirX>0?5:3);
		I2C(TINSRB,Dst,R6,p->DirX>0?3:5);
		I2C(TINSRB,Dst,R5,p->DirX>0?1:7);

		if (p->SrcHalfY)
			I3S(Row?SUB:ADD,Src,Src,R11,LSL,1); // 4 rows
	}
	else
	if (p->DstHalfY && !Part)
	{
		Byte(); I3S(LDR_POST,R5,Src,R11,ASR,1);		//0
		Byte(); I3S(LDR_POST,R6,Src,R11,ASR,1);		//1
		Byte(); I3S(LDR_POST,R1,Src,R11,ASR,1);		//2
		Byte(); I2C(LDR_POST,R7,Src,1);				//3

		I2C(TINSRB,Dst,R5,p->DirX>0?1:7);
		I2C(TINSRB,Dst,R6,p->DirX>0?3:5);
		I2C(TINSRB,Dst,R1,p->DirX>0?5:3);
		I2C(TINSRB,Dst,R7,p->DirX>0?7:1);
	}
	else
	{
		Byte(); I3S(LDR_POST,R5,Src,R11,ASR,1);		//0
		Byte(); I3S(LDR_POST,R6,Src,R11,ASR,0);		//1
		Byte(); I3S(LDR_POSTSUB,R7,Src,R11,ASR,1);	//3
		Byte(); I3S(LDR_POSTSUB,R1,Src,R11,ASR,0); //2

		I2C(TINSRB,Dst,R5,p->DirX>0?1:3);
		I2C(TINSRB,Dst,R6,p->DirX>0?3:1);
		I2C(TINSRB,Dst,R7,p->DirX>0?7:5);
		I2C(TINSRB,Dst,R1,p->DirX>0?5:7);

		I2C(ADD,Src,Src,1);
	}
}

static void Fix_UV(blit_soft* p,bool_t HalfMode,bool_t Part,bool_t Row)
{
	if (p->SwapXY)
	{
		Fix_UVSwapXY(p,HalfMode,Part,Row,R3,WR6);
		Fix_UVSwapXY(p,HalfMode,Part,Row,R4,WR7);
	}
	else
	{
		if (p->DstHalfY && Part)
		{
			// one uv row below
			MB(); I3S(ADD,R5,R3,R11,ASR,1);
			MB(); I3S(ADD,R6,R4,R11,ASR,1);
			MB(); I2C(WLDRW,WR6,R5,0);
			MB(); I2C(WLDRW,WR7,R6,0);
		}
		else
		{
			int Pre = (HalfMode && !p->SrcHalfX && Row)?0:4;
			MB(); I2C(WLDRW_PRE,WR6,R3,Pre);
			MB(); I2C(WLDRW_PRE,WR7,R4,Pre);
		}

		I3(WUNPCKILB,WR6,WR6,WR6);
		I3(WUNPCKILB,WR7,WR7,WR7);

		if (p->DirX<0)
		{
			I2C(WSHUFH,WR6,WR6,p->SrcHalfY?0x1B:0xB1); //swap order (2:2)
			I2C(WSHUFH,WR7,WR7,p->SrcHalfY?0x1B:0xB1); //swap order (2:2)
		}
	}

	if (HalfMode && !p->DstHalfX)
	{
		I3(Row==(p->DirX>0)?WUNPCKIHH:WUNPCKILH,WR6,WR6,WR6);
		I3(Row==(p->DirX>0)?WUNPCKIHH:WUNPCKILH,WR7,WR7,WR7);
	}

	//WR6 U
	//WR7 V

	I2C(WSHUFH,WR1,WR14,0x55); //gumul
	I3(WMULUM,WR1,WR1,WR6);
	I2C(WSHUFH,WR2,WR14,0xFF); //bumul
	I3(WMULUM,WR2,WR2,WR6);
	I2C(WSHUFH,WR6,WR14,0xAA); //gvmul
	I3(WMULUM,WR6,WR6,WR7);
	I2C(WSHUFH,WR0,WR14,0x00); //rvmul
	I3(WMULUM,WR0,WR0,WR7);
	I3((p->_GUMul<0)^(p->_GVMul<0)?WSUBH:WADDH,WR1,WR1,WR6);

	//WR0 R
	//WR1 G
	//WR2 B

	if (p->FX.Flags & BLITFX_DITHER)
	{
		I2C(WSHUFH,WR6,WR11,0x00); //radd
		I2C(WSHUFH,WR7,WR11,0x55); //gadd
		I3(p->_RVMul<0?WSUBH:WADDH,WR0,WR6,WR0);
		I2C(WSHUFH,WR6,WR11,0xAA); //badd
		I3(p->_GUMul<0?WSUBH:WADDH,WR1,WR7,WR1);
		I3(p->_BUMul<0?WSUBH:WADDH,WR2,WR6,WR2);
	}
	else
	{
		I3(p->_RVMul<0?WSUBH:WADDH,WR0,WR11,WR0);
		I3(p->_GUMul<0?WSUBH:WADDH,WR1,WR12,WR1);
		I3(p->_BUMul<0?WSUBH:WADDH,WR2,WR13,WR2);
	}
}

static void Fix_Y(blit_soft* p,bool_t Row,int Col,bool_t HalfMode)
{
	// load to upper 8bits (doesn't matter what is in the lower 8 bits)
	// load y0 wr5
	// load y1 wr8
	reg Dither;
	int AddY;

	if (p->SwapXY)
	{
		Dither = (reg)(Col?WR13:WR12);

		if (p->ArithStretch && p->SrcHalfX && p->SrcHalfY)
		{ 
			I3(LDR_POST,R5,R12,R11);
			I3(LDR_POST,R6,R12,R11);
			I3(LDR_POST,R7,R12,R11);
			I3(LDR_POST,R1,R12,R11);
			I2C(TINSRH,WR5,R5,p->DirX>0?0:3);
			I2C(TINSRH,WR6,R6,p->DirX>0?0:3);
			I2C(TINSRH,WR5,R7,p->DirX>0?1:2);
			I2C(TINSRH,WR6,R1,p->DirX>0?1:2);
			I3S(MOV,R5,NONE,R5,LSR,16);
			I3S(MOV,R6,NONE,R6,LSR,16);
			I3S(MOV,R7,NONE,R7,LSR,16);
			I3S(MOV,R1,NONE,R1,LSR,16);
			I2C(TINSRH,WR8,R5,p->DirX>0?0:3);
			I2C(TINSRH,WR7,R6,p->DirX>0?0:3);
			I2C(TINSRH,WR8,R7,p->DirX>0?1:2);
			I2C(TINSRH,WR7,R1,p->DirX>0?1:2);
			I3(LDR_POST,R5,R12,R11);
			I3(LDR_POST,R6,R12,R11);
			I3(LDR_POST,R7,R12,R11);
			I3(LDR_POST,R1,R12,R11);
			I2C(TINSRH,WR5,R5,p->DirX>0?2:1);
			I2C(TINSRH,WR6,R6,p->DirX>0?2:1);
			I2C(TINSRH,WR5,R7,p->DirX>0?3:0);
			I2C(TINSRH,WR6,R1,p->DirX>0?3:0);
			I3S(MOV,R5,NONE,R5,LSR,16);
			I3S(MOV,R6,NONE,R6,LSR,16);
			I3S(MOV,R7,NONE,R7,LSR,16);
			I3S(MOV,R1,NONE,R1,LSR,16);
			I2C(TINSRH,WR8,R5,p->DirX>0?2:1);
			I2C(TINSRH,WR7,R6,p->DirX>0?2:1);
			I2C(TINSRH,WR8,R7,p->DirX>0?3:0);
			I2C(TINSRH,WR7,R1,p->DirX>0?3:0);

			I3(WAVG2B,WR5,WR5,WR6);
			I3(WSLLHG,WR6,WR5,WCGR3);
			I3(WAVG2B,WR8,WR8,WR7);
			I3(WSLLHG,WR7,WR8,WCGR3);
			I3(WAVG2B,WR5,WR5,WR6);
			I3(WAVG2B,WR8,WR8,WR7);
		}
		else
		if (p->SrcHalfX)
		{
			reg Add = R11;
			if (p->SrcHalfY)
			{
				I3(ADD,R1,R11,R11);
				Add = R1;
			}

			I3(LDR_POST,R5,R12,Add);
			I3(LDR_POST,R6,R12,Add);
			I3(LDR_POST,R7,R12,Add);
			I3(LDR_POST,R1,R12,Add);
			I2C(TINSRH,WR5,R5,p->DirX>0?0:3);
			I2C(TINSRH,WR5,R6,p->DirX>0?1:2);
			I2C(TINSRH,WR5,R7,p->DirX>0?2:1);
			I2C(TINSRH,WR5,R1,p->DirX>0?3:0);
			I3S(MOV,R5,NONE,R5,LSR,16);
			I3S(MOV,R6,NONE,R6,LSR,16);
			I3S(MOV,R7,NONE,R7,LSR,16);
			I3S(MOV,R1,NONE,R1,LSR,16);
			I2C(TINSRH,WR8,R5,p->DirX>0?0:3);
			I2C(TINSRH,WR8,R6,p->DirX>0?1:2);
			I2C(TINSRH,WR8,R7,p->DirX>0?2:1);
			I2C(TINSRH,WR8,R1,p->DirX>0?3:0);
		}
		else
		{
			reg Add = R11;
			if (p->SrcHalfY)
			{
				I3(ADD,R1,R11,R11);
				Add = R1;
			}

			Half(); I3(LDR_POST,R5,R12,Add);
			Half(); I3(LDR_POST,R6,R12,Add);
			Half(); I3(LDR_POST,R7,R12,Add);
			Half(); I3(LDR_POST,R1,R12,Add);
			I2C(TINSRB,WR5,R5,p->DirX>0?1:7);
			I2C(TINSRH,WR8,R5,p->DirX>0?0:3);
			I2C(TINSRB,WR5,R6,p->DirX>0?3:5);
			I2C(TINSRH,WR8,R6,p->DirX>0?1:2);
			I2C(TINSRB,WR5,R7,p->DirX>0?5:3);
			I2C(TINSRH,WR8,R7,p->DirX>0?2:1);
			I2C(TINSRB,WR5,R1,p->DirX>0?7:1);
			I2C(TINSRH,WR8,R1,p->DirX>0?3:0);
		}

		if (Row)
		{
			I3S(SUB,R12,R12,R11,LSL,3+p->SrcHalfY);
			I2C(ADD,R12,R12,2 << p->SrcHalfX);
		}
	}
	else
	{
		Dither = WR12;
		if (p->ArithStretch && p->SrcHalfX && p->SrcHalfY)
		{ 
			MB(); I3(ADD,R1,R12,R11);
			MB(); I3(ADD,R5,R14,R11);
			MB(); I2C(WLDRD_POST,WR5,R12,8);
			MB(); I2C(WLDRD,WR6,R1,0);
			MB(); I2C(WLDRD_POST,WR8,R14,8);
			MB(); I2C(WLDRD,WR7,R5,0);

			I3(WAVG2B,WR5,WR5,WR6);
			I3(WSLLHG,WR6,WR5,WCGR3);
			I3(WAVG2B,WR8,WR8,WR7);
			I3(WSLLHG,WR7,WR8,WCGR3);
			I3(WAVG2B,WR5,WR5,WR6);
			I3(WAVG2B,WR8,WR8,WR7);
		}
		else
		if (p->SrcHalfX)
		{
			MB(); I2C(WLDRD_POST,WR5,R12,8); // use only every second pixel
			MB(); I2C(WLDRD_POST,WR8,R14,8); // use only every second pixel
		}
		else
		{
			MB(); I2C(WLDRW_POST,WR5,R12,4);
			MB(); I2C(WLDRW_POST,WR8,R14,4);
			I3(WUNPCKILB,WR5,WR5,WR5);
			I3(WUNPCKILB,WR8,WR8,WR8);
		}

		if (p->DirX<0)
		{
			I2C(WSHUFH,WR8,WR8,0x1B); //swap order
			I2C(WSHUFH,WR5,WR5,0x1B); //swap order
		}
	}

	MB(); I3(WMULUM,WR5,WR5,WR15);
	MB(); I3(WMULUM,WR8,WR8,WR15);

	AddY = p->_YMul>=0?WADDH:WSUBH;

	if (HalfMode)
	{
		Fix_UV(p,HalfMode,0,Row); 

		I3(AddY,WR3,WR0,WR5);
		I3(AddY,WR4,WR1,WR5);
		I3(AddY,WR5,WR2,WR5);

		if (p->DstHalfY)
			Fix_UV(p,HalfMode,1,Row);
		
		I3(AddY,WR6,WR0,WR8);
		I3(AddY,WR7,WR1,WR8);
		I3(AddY,WR8,WR2,WR8);

		I3(WPACKHUS,WR3,WR3,WR6);
		I3(WPACKHUS,WR4,WR4,WR7);
		I3(WPACKHUS,WR5,WR5,WR8);
	}
	else
	{
		// WR0,WR1,WR2 already has UV information

		I3(Row?WUNPCKIHH:WUNPCKILH,WR6,WR0,WR0);
		I3(Row?WUNPCKIHH:WUNPCKILH,WR7,WR1,WR1);

		I3(AddY,WR3,WR6,WR5);
		I3(AddY,WR4,WR7,WR5);
		I3(AddY,WR6,WR6,WR8);
		I3(AddY,WR7,WR7,WR8);
		I3(WPACKHUS,WR3,WR3,WR6);
		I3(WPACKHUS,WR4,WR4,WR7);

		MB(); I3(Row?WUNPCKIHH:WUNPCKILH,WR6,WR2,WR2);
		I3(AddY,WR5,WR6,WR5);
		I3(AddY,WR8,WR6,WR8);
		I3(WPACKHUS,WR5,WR5,WR8);
	}

	if (p->FX.Flags & BLITFX_DITHER)
	{
		if (p->DstSize[0] > p->DitherSize || p->DstSize[1] > p->DitherSize || p->DstSize[2] > p->DitherSize)
		{
			MB(); I3(WXOR,WR6,WR6,WR6);
			I3(WAVG2B,WR6,WR6,Dither);
		}

		I3(WADDBUS,WR3,WR3,(reg)(p->DstSize[0] > p->DitherSize ? WR6:Dither));
		I3(WADDBUS,WR4,WR4,(reg)(p->DstSize[1] > p->DitherSize ? WR6:Dither));
		I3(WADDBUS,WR5,WR5,(reg)(p->DstSize[2] > p->DitherSize ? WR6:Dither));
	}

	if (p->DstPos[2]==0 && p->DstPos[0]+p->DstSize[0]==16)
	{
		// special optimized case for rgb565
		// (red and blue word mask in WR9)

		I3(WAND,WR4,WR4,WR10);
		I3(WSRLDG,WR5,WR5,WCGR2);

		I2(WUNPCKEHUB,WR7,WR4);
		I3(WUNPCKIHB,WR6,WR5,WR3);
		I3(WSLLWG,WR7,WR7,WCGR1);
		I3(WAND,WR6,WR6,WR9); 
		I3(WOR,WR6,WR6,WR7);

		I2(WUNPCKELUB,WR4,WR4);
		I3(WUNPCKILB,WR3,WR5,WR3);
		I3(WSLLWG,WR4,WR4,WCGR1);
		I3(WAND,WR3,WR3,WR9);
		I3(WOR,WR3,WR3,WR4);
	}
	else
	{
		if (p->DstPos[0]!=0) I3(WAND,WR3,WR3,WR9);
		if (p->DstPos[1]!=0) I3(WAND,WR4,WR4,WR10);
		if (p->DstPos[2]!=0) 
		{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -