📄 blit_wmmx_fix.c
字号:
/*****************************************************************************
*
* This program is free software ; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* $Id: blit_wmmx_fix.c 271 2005-08-09 08:31:35Z picard $
*
* The Core Pocket Media Player
* Copyright (c) 2004-2005 Gabor Kovacs
*
****************************************************************************/
#include "../common.h"
#include "../dyncode/dyncode.h"
#include "blit_soft.h"
// RScale==16 && !SwapXY 8x2 -> 8x2
// RScale==16 && SwapXY 2x8 -> 8x2
// RScale==8 && !SwapXY 8x2 -> 16x4
// RScale==8 && SwapXY 2x8 -> 16x4
// RScale==32 && !SwapXY 16x4 -> 8x2
// RScale==32 && SwapXY 4x16 -> 8x2
#if defined(ARM)
typedef struct stack
{
int EndOfRect;
int DstNext;
int SrcNext;
int UVNext;
int StackFrame[STACKFRAME];
//void* this R0
//char* Dst R1
//char* Src R2
//int DstPitch R3 can be signed
int SrcPitch; //can be signed
int Width;
int Height;
int Src2SrcLast;
} stack;
// r0 bmask (if bpos!=0)
// r1 temp
// r2 dst1 (dst+dstpitch*DoubleY or dst+8*DoubleX*DirX)
// r3 u (-=4 if !swapxy)
// r4 v (-=4 if !swapxy)
// r5..r7 temp
// r8 endofline
// r9 dst0
// r10 dstpitch
// r11 srcpitch
// r12 y0
// r14 y1 (y0+(srchalfy?2:1)*srcpitch)
// wr0..wr2 r0,g0,b0 (from uv)
// wr3..wr5 temp1 (r,g,b)
// wr6..wr8 temp2 (r,g,b)
// wr9,wr10 mask r,g
// wr11 radd (if !dither)
// wr12 gadd (if !dither)
// wr13 badd (if !dither)
// wr11 radd,gadd,badd (if dither)
// wr12,wr13 dither
// wr14 rvmul,gumul,gvmul,bumul
// wr15 ymul
// wcgr0 abs(rpos-8)
// wcgr1 abs(gpos-8)
// wcgr2 abs(bpos-8)
// wcgr3 8 (if HalfMode)
// using: wr0,wr1,wr2,wr6,wr7
static void Fix_UVSwapXY(blit_soft* p,bool_t HalfMode,bool_t Part,bool_t Row,reg Src,reg Dst)
{
if (p->DstHalfY && Part)
{
Byte(); I3S(LDR_POSTSUB,R7,Src,R11,ASR,1); //3
Byte(); I3S(LDR_POSTSUB,R1,Src,R11,ASR,1); //2
Byte(); I3S(LDR_POSTSUB,R6,Src,R11,ASR,1); //1
Byte(); I2C(LDR_POST,R5,Src,Row?1:-1); //0
I2C(TINSRB,Dst,R7,p->DirX>0?7:1);
I2C(TINSRB,Dst,R1,p->DirX>0?5:3);
I2C(TINSRB,Dst,R6,p->DirX>0?3:5);
I2C(TINSRB,Dst,R5,p->DirX>0?1:7);
if (p->SrcHalfY)
I3S(Row?SUB:ADD,Src,Src,R11,LSL,1); // 4 rows
}
else
if (p->DstHalfY && !Part)
{
Byte(); I3S(LDR_POST,R5,Src,R11,ASR,1); //0
Byte(); I3S(LDR_POST,R6,Src,R11,ASR,1); //1
Byte(); I3S(LDR_POST,R1,Src,R11,ASR,1); //2
Byte(); I2C(LDR_POST,R7,Src,1); //3
I2C(TINSRB,Dst,R5,p->DirX>0?1:7);
I2C(TINSRB,Dst,R6,p->DirX>0?3:5);
I2C(TINSRB,Dst,R1,p->DirX>0?5:3);
I2C(TINSRB,Dst,R7,p->DirX>0?7:1);
}
else
{
Byte(); I3S(LDR_POST,R5,Src,R11,ASR,1); //0
Byte(); I3S(LDR_POST,R6,Src,R11,ASR,0); //1
Byte(); I3S(LDR_POSTSUB,R7,Src,R11,ASR,1); //3
Byte(); I3S(LDR_POSTSUB,R1,Src,R11,ASR,0); //2
I2C(TINSRB,Dst,R5,p->DirX>0?1:3);
I2C(TINSRB,Dst,R6,p->DirX>0?3:1);
I2C(TINSRB,Dst,R7,p->DirX>0?7:5);
I2C(TINSRB,Dst,R1,p->DirX>0?5:7);
I2C(ADD,Src,Src,1);
}
}
static void Fix_UV(blit_soft* p,bool_t HalfMode,bool_t Part,bool_t Row)
{
if (p->SwapXY)
{
Fix_UVSwapXY(p,HalfMode,Part,Row,R3,WR6);
Fix_UVSwapXY(p,HalfMode,Part,Row,R4,WR7);
}
else
{
if (p->DstHalfY && Part)
{
// one uv row below
MB(); I3S(ADD,R5,R3,R11,ASR,1);
MB(); I3S(ADD,R6,R4,R11,ASR,1);
MB(); I2C(WLDRW,WR6,R5,0);
MB(); I2C(WLDRW,WR7,R6,0);
}
else
{
int Pre = (HalfMode && !p->SrcHalfX && Row)?0:4;
MB(); I2C(WLDRW_PRE,WR6,R3,Pre);
MB(); I2C(WLDRW_PRE,WR7,R4,Pre);
}
I3(WUNPCKILB,WR6,WR6,WR6);
I3(WUNPCKILB,WR7,WR7,WR7);
if (p->DirX<0)
{
I2C(WSHUFH,WR6,WR6,p->SrcHalfY?0x1B:0xB1); //swap order (2:2)
I2C(WSHUFH,WR7,WR7,p->SrcHalfY?0x1B:0xB1); //swap order (2:2)
}
}
if (HalfMode && !p->DstHalfX)
{
I3(Row==(p->DirX>0)?WUNPCKIHH:WUNPCKILH,WR6,WR6,WR6);
I3(Row==(p->DirX>0)?WUNPCKIHH:WUNPCKILH,WR7,WR7,WR7);
}
//WR6 U
//WR7 V
I2C(WSHUFH,WR1,WR14,0x55); //gumul
I3(WMULUM,WR1,WR1,WR6);
I2C(WSHUFH,WR2,WR14,0xFF); //bumul
I3(WMULUM,WR2,WR2,WR6);
I2C(WSHUFH,WR6,WR14,0xAA); //gvmul
I3(WMULUM,WR6,WR6,WR7);
I2C(WSHUFH,WR0,WR14,0x00); //rvmul
I3(WMULUM,WR0,WR0,WR7);
I3((p->_GUMul<0)^(p->_GVMul<0)?WSUBH:WADDH,WR1,WR1,WR6);
//WR0 R
//WR1 G
//WR2 B
if (p->FX.Flags & BLITFX_DITHER)
{
I2C(WSHUFH,WR6,WR11,0x00); //radd
I2C(WSHUFH,WR7,WR11,0x55); //gadd
I3(p->_RVMul<0?WSUBH:WADDH,WR0,WR6,WR0);
I2C(WSHUFH,WR6,WR11,0xAA); //badd
I3(p->_GUMul<0?WSUBH:WADDH,WR1,WR7,WR1);
I3(p->_BUMul<0?WSUBH:WADDH,WR2,WR6,WR2);
}
else
{
I3(p->_RVMul<0?WSUBH:WADDH,WR0,WR11,WR0);
I3(p->_GUMul<0?WSUBH:WADDH,WR1,WR12,WR1);
I3(p->_BUMul<0?WSUBH:WADDH,WR2,WR13,WR2);
}
}
static void Fix_Y(blit_soft* p,bool_t Row,int Col,bool_t HalfMode)
{
// load to upper 8bits (doesn't matter what is in the lower 8 bits)
// load y0 wr5
// load y1 wr8
reg Dither;
int AddY;
if (p->SwapXY)
{
Dither = (reg)(Col?WR13:WR12);
if (p->ArithStretch && p->SrcHalfX && p->SrcHalfY)
{
I3(LDR_POST,R5,R12,R11);
I3(LDR_POST,R6,R12,R11);
I3(LDR_POST,R7,R12,R11);
I3(LDR_POST,R1,R12,R11);
I2C(TINSRH,WR5,R5,p->DirX>0?0:3);
I2C(TINSRH,WR6,R6,p->DirX>0?0:3);
I2C(TINSRH,WR5,R7,p->DirX>0?1:2);
I2C(TINSRH,WR6,R1,p->DirX>0?1:2);
I3S(MOV,R5,NONE,R5,LSR,16);
I3S(MOV,R6,NONE,R6,LSR,16);
I3S(MOV,R7,NONE,R7,LSR,16);
I3S(MOV,R1,NONE,R1,LSR,16);
I2C(TINSRH,WR8,R5,p->DirX>0?0:3);
I2C(TINSRH,WR7,R6,p->DirX>0?0:3);
I2C(TINSRH,WR8,R7,p->DirX>0?1:2);
I2C(TINSRH,WR7,R1,p->DirX>0?1:2);
I3(LDR_POST,R5,R12,R11);
I3(LDR_POST,R6,R12,R11);
I3(LDR_POST,R7,R12,R11);
I3(LDR_POST,R1,R12,R11);
I2C(TINSRH,WR5,R5,p->DirX>0?2:1);
I2C(TINSRH,WR6,R6,p->DirX>0?2:1);
I2C(TINSRH,WR5,R7,p->DirX>0?3:0);
I2C(TINSRH,WR6,R1,p->DirX>0?3:0);
I3S(MOV,R5,NONE,R5,LSR,16);
I3S(MOV,R6,NONE,R6,LSR,16);
I3S(MOV,R7,NONE,R7,LSR,16);
I3S(MOV,R1,NONE,R1,LSR,16);
I2C(TINSRH,WR8,R5,p->DirX>0?2:1);
I2C(TINSRH,WR7,R6,p->DirX>0?2:1);
I2C(TINSRH,WR8,R7,p->DirX>0?3:0);
I2C(TINSRH,WR7,R1,p->DirX>0?3:0);
I3(WAVG2B,WR5,WR5,WR6);
I3(WSLLHG,WR6,WR5,WCGR3);
I3(WAVG2B,WR8,WR8,WR7);
I3(WSLLHG,WR7,WR8,WCGR3);
I3(WAVG2B,WR5,WR5,WR6);
I3(WAVG2B,WR8,WR8,WR7);
}
else
if (p->SrcHalfX)
{
reg Add = R11;
if (p->SrcHalfY)
{
I3(ADD,R1,R11,R11);
Add = R1;
}
I3(LDR_POST,R5,R12,Add);
I3(LDR_POST,R6,R12,Add);
I3(LDR_POST,R7,R12,Add);
I3(LDR_POST,R1,R12,Add);
I2C(TINSRH,WR5,R5,p->DirX>0?0:3);
I2C(TINSRH,WR5,R6,p->DirX>0?1:2);
I2C(TINSRH,WR5,R7,p->DirX>0?2:1);
I2C(TINSRH,WR5,R1,p->DirX>0?3:0);
I3S(MOV,R5,NONE,R5,LSR,16);
I3S(MOV,R6,NONE,R6,LSR,16);
I3S(MOV,R7,NONE,R7,LSR,16);
I3S(MOV,R1,NONE,R1,LSR,16);
I2C(TINSRH,WR8,R5,p->DirX>0?0:3);
I2C(TINSRH,WR8,R6,p->DirX>0?1:2);
I2C(TINSRH,WR8,R7,p->DirX>0?2:1);
I2C(TINSRH,WR8,R1,p->DirX>0?3:0);
}
else
{
reg Add = R11;
if (p->SrcHalfY)
{
I3(ADD,R1,R11,R11);
Add = R1;
}
Half(); I3(LDR_POST,R5,R12,Add);
Half(); I3(LDR_POST,R6,R12,Add);
Half(); I3(LDR_POST,R7,R12,Add);
Half(); I3(LDR_POST,R1,R12,Add);
I2C(TINSRB,WR5,R5,p->DirX>0?1:7);
I2C(TINSRH,WR8,R5,p->DirX>0?0:3);
I2C(TINSRB,WR5,R6,p->DirX>0?3:5);
I2C(TINSRH,WR8,R6,p->DirX>0?1:2);
I2C(TINSRB,WR5,R7,p->DirX>0?5:3);
I2C(TINSRH,WR8,R7,p->DirX>0?2:1);
I2C(TINSRB,WR5,R1,p->DirX>0?7:1);
I2C(TINSRH,WR8,R1,p->DirX>0?3:0);
}
if (Row)
{
I3S(SUB,R12,R12,R11,LSL,3+p->SrcHalfY);
I2C(ADD,R12,R12,2 << p->SrcHalfX);
}
}
else
{
Dither = WR12;
if (p->ArithStretch && p->SrcHalfX && p->SrcHalfY)
{
MB(); I3(ADD,R1,R12,R11);
MB(); I3(ADD,R5,R14,R11);
MB(); I2C(WLDRD_POST,WR5,R12,8);
MB(); I2C(WLDRD,WR6,R1,0);
MB(); I2C(WLDRD_POST,WR8,R14,8);
MB(); I2C(WLDRD,WR7,R5,0);
I3(WAVG2B,WR5,WR5,WR6);
I3(WSLLHG,WR6,WR5,WCGR3);
I3(WAVG2B,WR8,WR8,WR7);
I3(WSLLHG,WR7,WR8,WCGR3);
I3(WAVG2B,WR5,WR5,WR6);
I3(WAVG2B,WR8,WR8,WR7);
}
else
if (p->SrcHalfX)
{
MB(); I2C(WLDRD_POST,WR5,R12,8); // use only every second pixel
MB(); I2C(WLDRD_POST,WR8,R14,8); // use only every second pixel
}
else
{
MB(); I2C(WLDRW_POST,WR5,R12,4);
MB(); I2C(WLDRW_POST,WR8,R14,4);
I3(WUNPCKILB,WR5,WR5,WR5);
I3(WUNPCKILB,WR8,WR8,WR8);
}
if (p->DirX<0)
{
I2C(WSHUFH,WR8,WR8,0x1B); //swap order
I2C(WSHUFH,WR5,WR5,0x1B); //swap order
}
}
MB(); I3(WMULUM,WR5,WR5,WR15);
MB(); I3(WMULUM,WR8,WR8,WR15);
AddY = p->_YMul>=0?WADDH:WSUBH;
if (HalfMode)
{
Fix_UV(p,HalfMode,0,Row);
I3(AddY,WR3,WR0,WR5);
I3(AddY,WR4,WR1,WR5);
I3(AddY,WR5,WR2,WR5);
if (p->DstHalfY)
Fix_UV(p,HalfMode,1,Row);
I3(AddY,WR6,WR0,WR8);
I3(AddY,WR7,WR1,WR8);
I3(AddY,WR8,WR2,WR8);
I3(WPACKHUS,WR3,WR3,WR6);
I3(WPACKHUS,WR4,WR4,WR7);
I3(WPACKHUS,WR5,WR5,WR8);
}
else
{
// WR0,WR1,WR2 already has UV information
I3(Row?WUNPCKIHH:WUNPCKILH,WR6,WR0,WR0);
I3(Row?WUNPCKIHH:WUNPCKILH,WR7,WR1,WR1);
I3(AddY,WR3,WR6,WR5);
I3(AddY,WR4,WR7,WR5);
I3(AddY,WR6,WR6,WR8);
I3(AddY,WR7,WR7,WR8);
I3(WPACKHUS,WR3,WR3,WR6);
I3(WPACKHUS,WR4,WR4,WR7);
MB(); I3(Row?WUNPCKIHH:WUNPCKILH,WR6,WR2,WR2);
I3(AddY,WR5,WR6,WR5);
I3(AddY,WR8,WR6,WR8);
I3(WPACKHUS,WR5,WR5,WR8);
}
if (p->FX.Flags & BLITFX_DITHER)
{
if (p->DstSize[0] > p->DitherSize || p->DstSize[1] > p->DitherSize || p->DstSize[2] > p->DitherSize)
{
MB(); I3(WXOR,WR6,WR6,WR6);
I3(WAVG2B,WR6,WR6,Dither);
}
I3(WADDBUS,WR3,WR3,(reg)(p->DstSize[0] > p->DitherSize ? WR6:Dither));
I3(WADDBUS,WR4,WR4,(reg)(p->DstSize[1] > p->DitherSize ? WR6:Dither));
I3(WADDBUS,WR5,WR5,(reg)(p->DstSize[2] > p->DitherSize ? WR6:Dither));
}
if (p->DstPos[2]==0 && p->DstPos[0]+p->DstSize[0]==16)
{
// special optimized case for rgb565
// (red and blue word mask in WR9)
I3(WAND,WR4,WR4,WR10);
I3(WSRLDG,WR5,WR5,WCGR2);
I2(WUNPCKEHUB,WR7,WR4);
I3(WUNPCKIHB,WR6,WR5,WR3);
I3(WSLLWG,WR7,WR7,WCGR1);
I3(WAND,WR6,WR6,WR9);
I3(WOR,WR6,WR6,WR7);
I2(WUNPCKELUB,WR4,WR4);
I3(WUNPCKILB,WR3,WR5,WR3);
I3(WSLLWG,WR4,WR4,WCGR1);
I3(WAND,WR3,WR3,WR9);
I3(WOR,WR3,WR3,WR4);
}
else
{
if (p->DstPos[0]!=0) I3(WAND,WR3,WR3,WR9);
if (p->DstPos[1]!=0) I3(WAND,WR4,WR4,WR10);
if (p->DstPos[2]!=0)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -