📄 blit_wmmx_fix.c
字号:
I2(TBCSTH,WR6,R0);
I3(WAND,WR5,WR5,WR6);
}
I2(WUNPCKEHUB,WR6,WR3);
I2(WUNPCKEHUB,WR7,WR4);
I2(WUNPCKEHUB,WR8,WR5);
if (p->DstPos[0]+p->DstSize[0]!=8) I3(p->DstPos[0]+p->DstSize[0]>8?WSLLWG:WSRLWG,WR6,WR6,WCGR0);
if (p->DstPos[1]+p->DstSize[1]!=8) I3(p->DstPos[1]+p->DstSize[1]>8?WSLLWG:WSRLWG,WR7,WR7,WCGR1);
if (p->DstPos[2]+p->DstSize[2]!=8) I3(p->DstPos[2]+p->DstSize[2]>8?WSLLWG:WSRLWG,WR8,WR8,WCGR2);
I2(WUNPCKELUB,WR3,WR3);
I2(WUNPCKELUB,WR4,WR4);
I2(WUNPCKELUB,WR5,WR5);
if (p->DstPos[0]+p->DstSize[0]!=8) I3(p->DstPos[0]+p->DstSize[0]>8?WSLLWG:WSRLWG,WR3,WR3,WCGR0);
if (p->DstPos[1]+p->DstSize[1]!=8) I3(p->DstPos[1]+p->DstSize[1]>8?WSLLWG:WSRLWG,WR4,WR4,WCGR1);
if (p->DstPos[2]+p->DstSize[2]!=8) I3(p->DstPos[2]+p->DstSize[2]>8?WSLLWG:WSRLWG,WR5,WR5,WCGR2);
I3(WOR,WR6,WR6,WR7);
I3(WOR,WR3,WR3,WR4);
I3(WOR,WR6,WR6,WR8);
I3(WOR,WR3,WR3,WR5);
}
if (p->DstDoubleX)
{
I2C(WSHUFH,WR7,WR6,0xFA); // 7 7 6 6
I2C(WSHUFH,WR4,WR3,0xFA); // 3 3 2 2
I2C(WSHUFH,WR6,WR6,0x50); // 5 5 4 4
I2C(WSHUFH,WR3,WR3,0x50); // 1 1 0 0
}
if (p->SwapXY)
{
reg Dst = (reg)(Row?R2:R9);
MB(); I3S(ADD,R1,Dst,R10,LSL,p->DstDoubleY);
MB(); I2C(WSTRD,WR3,Dst,0);
if (p->DstDoubleX)
{
MB(); I2C(WSTRD,WR4,Dst,8);
}
if (p->DstDoubleY)
{
MB(); I3(ADD,R5,Dst,R10);
MB(); I2C(WSTRD,WR3,R5,0);
if (p->DstDoubleX)
{
MB(); I2C(WSTRD,WR4,R5,8);
}
}
MB(); I2C(WSTRD,WR6,R1,0);
if (p->DstDoubleX)
{
MB(); I2C(WSTRD,WR7,R1,8);
}
if (p->DstDoubleY)
{
MB(); I3(ADD,R6,R1,R10);
MB(); I2C(WSTRD,WR6,R6,0);
if (p->DstDoubleX)
{
MB(); I2C(WSTRD,WR7,R6,8);
}
}
MB(); I3S(ADD,Dst,Dst,R10,LSL,1+p->DstDoubleY);
}
else
{
if (p->DstDoubleY)
{
MB(); I3(ADD,R1,R2,R10);
MB(); I3(ADD,R5,R9,R10);
}
if (p->DstDoubleX)
{
MB(); I2C(WSTRD_POST,WR6,R2,8);
MB(); I2C(WSTRD_POST,WR3,R9,8);
}
else
{
MB(); I2C(WSTRD_POST,WR6,R2,8*p->DirX);
MB(); I2C(WSTRD_POST,WR3,R9,8*p->DirX);
}
if (p->DstDoubleY)
{
MB(); I2C(WSTRD,WR6,R1,0);
MB(); I2C(WSTRD,WR3,R5,0);
}
if (p->DstDoubleX)
{
MB(); I2C(WSTRD_POST,WR7,R2,p->DirX>0?8:-24);
MB(); I2C(WSTRD_POST,WR4,R9,p->DirX>0?8:-24);
if (p->DstDoubleY)
{
MB(); I2C(WSTRD,WR7,R1,8);
MB(); I2C(WSTRD,WR4,R5,8);
}
}
}
}
void WMMXFix_RGB_UV(blit_soft* p)
{
bool_t HalfMode = p->SrcHalfX || p->SrcHalfY;
dyninst* LoopY;
dyninst* LoopX;
dyninst* EndLine;
dyninst* Dither = NULL;
p->SrcAlignPos = p->DstAlignPos = p->DstAlignSize = 8;
if (p->RScaleX==8) p->DstAlignSize = 16;
p->DstStepX = p->DirX * ((p->DstBPP*8) >> 3) << p->DstDoubleX;
p->YMul = InstCreate16(abs(p->_YMul) >> 8,NONE,NONE,NONE,0,0);
p->RVMul = InstCreate16(abs(p->_RVMul) >> 8,NONE,NONE,NONE,0,0);
p->RAdd = InstCreate16((p->_RAdd) >> 16,NONE,NONE,NONE,0,0);
p->GUMul = InstCreate16(abs(p->_GUMul) >> 8,NONE,NONE,NONE,0,0);
p->GVMul = InstCreate16(abs(p->_GVMul) >> 8,NONE,NONE,NONE,0,0);
p->GAdd = InstCreate16((p->_GAdd) >> 16,NONE,NONE,NONE,0,0);
p->BUMul = InstCreate16(abs(p->_BUMul) >> 8,NONE,NONE,NONE,0,0);
p->BAdd = InstCreate16((p->_BAdd) >> 16,NONE,NONE,NONE,0,0);
CodeBegin();
I2C(SUB,SP,SP,OFS(stack,StackFrame));
I1P(WLDRD,WR11,p->RAdd,0);
I1P(WLDRD,WR14,p->RVMul,0);
I3(MOV,R10,NONE,R3); //DstPitch
I2C(LDR,R9,R1,0); //Dst[0] RGB
I2C(LDR,R3,R2,4); //Src[1] U
I2C(LDR,R4,R2,8); //Src[2] V
I2C(LDR,R12,R2,0); //Src[0] Y
I2C(LDR,R11,SP,OFS(stack,SrcPitch));
I2C(WSHUFH,WR15,WR11,0xFF); //ymul
if (!(p->FX.Flags & BLITFX_DITHER))
{
I2C(WSHUFH,WR12,WR11,0x55); //gadd
I2C(WSHUFH,WR13,WR11,0xAA); //badd
I2C(WSHUFH,WR11,WR11,0x00); //radd
}
else
{
int i;
static const uint8_t Matrix0[16] =
{ 0, 8, 2, 10,
12, 4, 14, 6,
3, 11, 1, 9,
15, 7, 13, 5 };
uint8_t Matrix[16];
memcpy(Matrix,Matrix0,sizeof(Matrix));
p->DitherSize = min(p->DstSize[0],min(p->DstSize[1],p->DstSize[2]));
if (p->DitherSize>4)
for (i=0;i<16;++i)
Matrix[i] >>= p->DitherSize-4;
// dither mask
Dither = InstCreate(Matrix,16,NONE,NONE,NONE,0,0);
I1P(WLDRD,WR12,Dither,0);
I1P(WLDRD,WR13,Dither,8);
}
I2C(LDR,R5,SP,OFS(stack,Height));
I2C(LDR,R6,SP,OFS(stack,Width));
//SrcNext = 2*(SrcHalfY?2:1)*(SwapXY?4:1)*Src->Pitch - (Width*(SrcHalfY?2:1) >> SrcDoubleX)
I3S(MOV,R1,NONE,R11,LSL,1+p->SrcHalfY+p->SwapXY*2);
I3S(SUB,R1,R1,R6,LSR,p->SrcDoubleX-p->SrcHalfX);
I2C(STR,R1,SP,OFS(stack,SrcNext));
//UVNext = (Src->Pitch >> 1)*(SrcHalfY?2:1)*(SwapXY?4:1) - (Width*(SrcHalfY?2:1) >> SrcDoubleX >> 1);
I3S(MOV,R2,NONE,R11,ASR,1-p->SrcHalfY-p->SwapXY*2);
I3S(SUB,R2,R2,R6,LSR,p->SrcDoubleX+1-p->SrcHalfX);
I2C(STR,R2,SP,OFS(stack,UVNext));
if (p->DirX<0) //adjust reversed destination for block size
I2C(SUB,R9,R9,-(p->DstStepX >> 1)-(p->DstBPP >> 3));
if (p->SwapXY)
{
// EndOfRect = Dst + ((Height * DstBPP * DirX) >> 3)
I2C(MOV,R1,NONE,p->DstBPP * p->DirX);
I3(MUL,R1,R5,R1);
I3S(ADD,R1,R9,R1,ASR,3);
I2C(STR,R1,SP,OFS(stack,EndOfRect));
//DstNext = DstStepX - Width*DstPitch;
MB(); I3(MUL,R2,R10,R6);
I2C(MOV,R1,NONE,p->DstStepX);
I3(SUB,R1,R1,R2);
I2C(STR,R1,SP,OFS(stack,DstNext));
}
else
{
// EndOfRect = Dst + DstPitch * Height
I3(MUL,R1,R10,R5);
I3(ADD,R1,R9,R1);
I2C(STR,R1,SP,OFS(stack,EndOfRect));
//DstNext = ((DstPitch*2 << DstDoubleY) - DirX * Width << DstBPP2;
I3S(MOV,R2,NONE,R10,LSL,p->DstDoubleY+1);
I3S(p->DirX>0?SUB:ADD,R2,R2,R6,LSL,p->DstBPP2);
I2C(STR,R2,SP,OFS(stack,DstNext));
}
// setup shift registers
// wcgr0 abs(rpos-8)
// wcgr1 abs(gpos-8)
// wcgr2 abs(bpos-8)
// wcgr3 8 (if HalfMode)
I2C(MOV,R5,NONE,abs(p->DstPos[0]+p->DstSize[0]-8));
I2C(MOV,R6,NONE,abs(p->DstPos[1]+p->DstSize[1]-8));
I2C(MOV,R7,NONE,abs(p->DstPos[2]+p->DstSize[2]-8));
I2(TMCR,WCGR0,R5);
I2(TMCR,WCGR1,R6);
I2(TMCR,WCGR2,R7);
if (HalfMode)
{
I2C(MOV,R1,NONE,8);
I2(TMCR,WCGR3,R1);
}
// setup masks
// r0 bmask (if bpos!=0)
// wr9,wr10 mask r,g
I2C(MOV,R1,NONE,((1 << p->DstSize[1])-1)<<(8-p->DstSize[1]));
I2(TBCSTB,WR10,R1);
if (p->DstPos[2]==0 && p->DstPos[0]+p->DstSize[0]==16)
{
// (red and blue word mask in R9)
I2C(MOV,R1,NONE,((1 << p->DstSize[0])-1)<<(16-p->DstSize[0]));
I2C(ORR,R1,R1,(1 << p->DstSize[2])-1);
I2(TBCSTH,WR9,R1);
}
else
{
I2C(MOV,R1,NONE,((1 << p->DstSize[0])-1)<<(8-p->DstSize[0]));
I2C(MOV,R0,NONE,((1 << p->DstSize[2])-1)<<(8-p->DstSize[2]));
I2(TBCSTB,WR9,R1);
}
if (p->SwapXY)
I2C(ADD,R2,R9,(8*p->DirX) << p->DstDoubleX);
else
I3S(ADD,R2,R9,R10,LSL,p->DstDoubleY);
I3S(ADD,R14,R12,R11,LSL,p->SrcHalfY);
if (!p->SwapXY)
{
I2C(SUB,R3,R3,4);
I2C(SUB,R4,R4,4);
}
I2C(LDR,R5,SP,OFS(stack,Width));
LoopY = Label(0);
I0P(B,AL,LoopY);
Align(8);
InstPost(p->RVMul);
InstPost(p->GUMul);
InstPost(p->GVMul);
InstPost(p->BUMul);
InstPost(p->RAdd);
InstPost(p->GAdd);
InstPost(p->BAdd);
InstPost(p->YMul);
if (Dither)
InstPost(Dither);
InstPost(LoopY);
if (p->SwapXY)
{
I3(MUL,R1,R10,R5); //dstpitch * width
I3(ADD,R8,R9,R1);
}
else
{
if (p->DirX > 0)
I3S(ADD,R8,R9,R5,LSL,p->DstBPP2);
else
I3S(SUB,R8,R9,R5,LSL,p->DstBPP2);
}
LoopX = Label(0);
// preload
if (!p->Slices)
{
dyninst* PreLoad1;
dyninst* PreLoad2;
dyninst* PreLoad3;
dyninst* PreLoad4;
int UVAdj = p->SwapXY?0:4;
I3S(ADD,R1,R12,R5,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0));
I2C(ADD,R5,R12,32);
I3(CMP,NONE,R5,R1);
I0P(B,CS,LoopX);
//y0
PreLoad1 = Label(1);
Byte(); I2C(LDR,R6,R5,-32);
I2C(ADD,R5,R5,64);
I3(CMP,NONE,R5,R1);
Byte(); I2C(LDR,R7,R5,-64);
I0P(B,CC,PreLoad1);
I3(SUB,R1,R1,R12);
I3(ADD,R1,R1,R14);
I2C(ADD,R5,R14,32);
//y1
PreLoad2 = Label(1);
Byte(); I2C(LDR,R6,R5,-32);
I2C(ADD,R5,R5,64);
I3(CMP,NONE,R5,R1);
Byte(); I2C(LDR,R7,R5,-64);
I0P(B,CC,PreLoad2);
I3(SUB,R1,R1,R14);
I3S(ADD,R1,R3,R1,ASR,p->SrcUVX2);
I2C(ADD,R5,R3,32);
I3(CMP,NONE,R5,R1);
I0P(B,CS,LoopX);
//u
PreLoad3 = Label(1);
Byte(); I2C(LDR,R6,R5,-32+UVAdj);
I2C(ADD,R5,R5,64);
I3(CMP,NONE,R5,R1);
Byte(); I2C(LDR,R7,R5,-64+UVAdj);
I0P(B,CC,PreLoad3);
I3(SUB,R1,R1,R3);
I3(ADD,R1,R1,R4);
I2C(ADD,R5,R4,32);
//v
PreLoad4 = Label(1);
Byte(); I2C(LDR,R6,R5,-32+UVAdj);
I2C(ADD,R5,R5,64);
I3(CMP,NONE,R5,R1);
Byte(); I2C(LDR,R7,R5,-64+UVAdj);
I0P(B,CC,PreLoad4);
}
else
if (p->ARM5)
{
//preload next
I3S(PLD,NONE,R12,R11,LSL,p->SrcHalfY+1);
I3S(PLD,NONE,R14,R11,LSL,p->SrcHalfY+1);
I3S(PLD,NONE,R3,R11,ASR,p->SrcUVPitch2);
I3S(PLD,NONE,R4,R11,ASR,p->SrcUVPitch2);
}
EndLine = Label(0);
InstPost(LoopX);
{
if (!HalfMode) Fix_UV(p,0,0,0);
Fix_Y(p,0,0,HalfMode);
Fix_Y(p,1,0,HalfMode);
if (p->SwapXY && (p->FX.Flags & BLITFX_DITHER))
{
I3(CMP,NONE,R9,R8);
I0P(B,EQ,EndLine);
if (!HalfMode) Fix_UV(p,0,0,0);
Fix_Y(p,0,1,HalfMode);
Fix_Y(p,1,1,HalfMode);
}
I3(CMP,NONE,R9,R8);
I0P(B,NE,LoopX);
}
InstPost(EndLine);
I2C(LDR,R5,SP,OFS(stack,SrcNext));
I2C(LDR,R6,SP,OFS(stack,DstNext));
I2C(LDR,R7,SP,OFS(stack,UVNext));
I2C(LDR,R8,SP,OFS(stack,EndOfRect));
//increment pointers
I3(ADD,R12,R12,R5);
I3(ADD,R14,R14,R5);
I3(ADD,R2,R2,R6);
I3(ADD,R9,R9,R6);
I3(ADD,R3,R3,R7);
I3(ADD,R4,R4,R7);
if (!p->SwapXY && (p->FX.Flags & BLITFX_DITHER))
{
//swap WR12 and WR13
I3(WOR,WR3,WR12,WR12);
I3(WOR,WR12,WR13,WR13);
I3(WOR,WR13,WR3,WR3);
}
//prepare registers for next row
I2C(LDR,R5,SP,OFS(stack,Width));
I3(CMP,NONE,R9,R8);
I0P(B,NE,LoopY);
I2C(ADD,SP,SP,OFS(stack,StackFrame));
CodeEnd();
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -