📄 blit_arm_fix.c
字号:
int i,Shift;
for (i=0;i<3;++i)
Mask |= 1 << (p->DstPos[i] + p->DstSize[i] -1);
Mask2 = Mask;
Shift = 0;
if (p->DstBPP==8 && p->DstDoubleX)
Mask2 |= Mask << 16;
else
if (p->DstBPP <= 16)
{
if (p->DirX<0) Shift = p->DstBPP;
Mask2 |= Mask << p->DstBPP;
}
Invert ^= RotateRight(Mask2,Shift+p->DstPos[0]);
}
p->YMul = InstCreate32(p->_YMul,NONE,NONE,NONE,0,0);
p->RVMul = InstCreate32(p->_RVMul,NONE,NONE,NONE,0,0);
p->RAdd = InstCreate32(p->_RAdd,NONE,NONE,NONE,0,0);
p->GUMul = InstCreate32(p->_GUMul,NONE,NONE,NONE,0,0);
p->GVMul = InstCreate32(p->_GVMul,NONE,NONE,NONE,0,0);
p->GAdd = InstCreate32(p->_GAdd,NONE,NONE,NONE,0,0);
p->BUMul = InstCreate32(p->_BUMul,NONE,NONE,NONE,0,0);
p->BAdd = InstCreate32(p->_BAdd,NONE,NONE,NONE,0,0);
if (Invert)
p->InvertMask = InstCreate32(Invert,NONE,NONE,NONE,0,0);
if (p->OnlyDiff)
p->DiffMask = InstCreate32(0xFCFCFCFC,NONE,NONE,NONE,0,0);
CodeBegin();
I2C(SUB,SP,SP,OFS(stack,StackFrame));
I2C(LDR,R9,R1,0); //Dst[0] RGB
I2C(LDR,R10,R2,4); //Src[1] U
I2C(LDR,R11,R2,8); //Src[2] V
I2C(LDR,R12,R2,0); //Src[0] Y
I2C(STR,R3,SP,OFS(stack,DstPitch));
I3(MOV,R6,NONE,R3); //DstPitch
I2C(LDR,R7,SP,OFS(stack,SrcPitch));
I2C(LDR,R0,SP,OFS(stack,Height));
I2C(LDR,R4,SP,OFS(stack,Width));
if (!p->ColorLookup)
I1P(LDR,R8,p->YMul,0);
else
I1P(MOV,R8,p->LookUp,0);
//YNext = 2*Src->Pitch - (Width >> SrcDoubleX)
I3S(MOV,R1,NONE,R7,LSL,1);
I3S(SUB,R1,R1,R4,LSR,p->SrcDoubleX);
I2C(STR,R1,SP,OFS(stack,YNext));
//UVNext = (Src->Pitch >> 1) - (Width >> SrcDoubleX >> 1);
I3S(MOV,R2,NONE,R7,ASR,1);
I3S(SUB,R2,R2,R4,LSR,p->SrcDoubleX+1);
I2C(STR,R2,SP,OFS(stack,UVNext));
if (p->DirX<0 && p->DstBPP==16) //adjust reversed destination for block size
I2C(SUB,R9,R9,-p->DstStepX-(p->DstBPP >> 3));
if (p->DstBPP==32)
I2C(ADD,R9,R9,p->DstStepX/2);
if (p->SwapXY)
{
// EndOfRect = Dst + ((Height * DstBPP * DirX) >> 3) - (DstPitch << DstDoubleY)
I3S(SUB,R9,R9,R6,LSL,p->DstDoubleY);
I2C(MOV,R1,NONE,p->DstBPP * p->DirX);
I3(MUL,R0,R1,R0);
I3S(ADD,R0,R9,R0,ASR,3);
I2C(STR,R0,SP,OFS(stack,EndOfRect));
//DstNext = DstStepX - Width*DstPitch;
MB(); I3(MUL,R2,R6,R4);
I2C(MOV,R0,NONE,p->DstStepX);
I3(SUB,R0,R0,R2);
I2C(STR,R0,SP,OFS(stack,DstNext));
}
else
{
// EndOfRect = Dst + DstPitch * Height
I3(MUL,R0,R6,R0);
I3(ADD,R0,R9,R0);
I2C(STR,R0,SP,OFS(stack,EndOfRect));
//DstNext = ((DstPitch*2 << DstDoubleY) - DirX * Width << DstBPP2;
I3S(MOV,R2,NONE,R6,LSL,p->DstDoubleY+1);
I3S(p->DirX>0?SUB:ADD,R2,R2,R4,LSL,p->DstBPP2);
I2C(STR,R2,SP,OFS(stack,DstNext));
}
I3(ADD,R14,R12,R7);
if (p->Dither)
{
if (!p->ColorLookup)
{
I2C(MVN,R1,NONE,0x80000000);
I2C(MVN,R2,NONE,0x80000000);
I2C(MVN,R3,NONE,0x80000000);
}
else
{
I2C(MOV,R1,NONE,0x80);
I2C(MOV,R2,NONE,0x80);
I2C(MOV,R3,NONE,0x80);
}
}
LoopY = Label(1);
if (p->SwapXY)
{
I3(MUL,R0,R6,R4); //R6=dstpitch
I3(ADD,R7,R9,R0);
}
else
{
if (p->DirX > 0)
I3S(ADD,R7,R9,R4,LSL,p->DstBPP2);
else
I3S(SUB,R7,R9,R4,LSL,p->DstBPP2);
}
I2C(STR,R7,SP,OFS(stack,EndOfLine));
// preload
if (p->ARM5) // not needed for non slices mode, but testing show it didn't help on ARM4
{
if (!p->Slices)
{
//R4 width
//R0,R7,R5,R6 tmp
dyninst* PreLoadEnd = Label(0);
dyninst* PreLoad1;
dyninst* PreLoad2;
dyninst* PreLoad3;
dyninst* PreLoad4;
I3S(ADD,R0,R12,R4,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0));
I2C(ADD,R5,R12,32);
I3(CMP,NONE,R5,R0);
I0P(B,CS,PreLoadEnd);
//y0
PreLoad1 = Label(1);
Byte(); I2C(LDR,R6,R5,-32);
I2C(ADD,R5,R5,64);
I3(CMP,NONE,R5,R0);
Byte(); I2C(LDR,R7,R5,-64);
I0P(B,CC,PreLoad1);
I3S(ADD,R0,R14,R4,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0));
I2C(ADD,R5,R14,32);
//y1
PreLoad2 = Label(1);
Byte(); I2C(LDR,R6,R5,-32);
I2C(ADD,R5,R5,64);
I3(CMP,NONE,R5,R0);
Byte(); I2C(LDR,R7,R5,-64);
I0P(B,CC,PreLoad2);
I3S(ADD,R0,R10,R4,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0)+p->SrcUVX2);
I2C(ADD,R5,R10,32);
I3(CMP,NONE,R5,R0);
I0P(B,CS,PreLoadEnd);
//u
PreLoad3 = Label(1);
Byte(); I2C(LDR,R6,R5,-32);
I2C(ADD,R5,R5,64);
I3(CMP,NONE,R5,R0);
Byte(); I2C(LDR,R7,R5,-64);
I0P(B,CC,PreLoad3);
I3S(ADD,R0,R11,R4,ASR,(p->SrcDoubleX?1:0)-(p->SrcHalfX?1:0)+p->SrcUVX2);
I2C(ADD,R5,R11,32);
//v
PreLoad4 = Label(1);
Byte(); I2C(LDR,R6,R5,-32);
I2C(ADD,R5,R5,64);
I3(CMP,NONE,R5,R0);
Byte(); I2C(LDR,R7,R5,-64);
I0P(B,CC,PreLoad4);
if (p->OnlyDiff) //restore R7
I2C(LDR,R7,SP,OFS(stack,EndOfLine));
InstPost(PreLoadEnd);
}
else
{
//preload next
MB(); I2C(LDR,R6,SP,OFS(stack,SrcPitch));
I3S(PLD,NONE,R12,R6,LSL,1);
I3S(PLD,NONE,R14,R6,LSL,1);
I3S(PLD,NONE,R10,R6,ASR,p->SrcUVPitch2);
I3S(PLD,NONE,R11,R6,ASR,p->SrcUVPitch2);
}
}
if (p->OnlyDiff)
{
MB(); I1P(LDR,R5,p->DiffMask,0);
MB(); I2C(LDR,R6,SP,OFS(stack,Src2SrcLast));
p->Skip = Label(0);
}
LoopX = Label(1);
{
int PitchDouble;
reg Pitch;
Fix_RGB_UV_LoadUV(p);
p->Pos = -1;
Fix_RGB_UV_Pixel(p,0,0);
if (p->DstBPP==32)
{
if (p->Pos)
I3S(MOV,R0,NONE,R0,ROR,-p->Pos);
assert(!p->DstDoubleX && !p->DstDoubleY);
MB(); I2C(LDR,R1,SP,OFS(stack,DstPitch));
I2C(ADD,R1,R1,-p->DstStepX/2);
I3(STR,R0,R9,R1);
p->Pos = -1;
}
Fix_RGB_UV_Pixel(p,1,0);
if (p->Pos)
I3S(MOV,R0,NONE,R0,ROR,-p->Pos);
Pitch = (reg)(p->Dither ? R7:R1);
MB(); I2C(LDR,Pitch,SP,OFS(stack,DstPitch));
if (p->DstBPP==8 && p->DstDoubleX)
I3S(ORR,R0,R0,R0,LSL,8);
if (p->DstBPP==16 && p->DstDoubleX)
{
I2C(ADD,R9,R9,4);
I3S(MOV,R3,NONE,R0,LSR,16);
I3S(MOV,R0,NONE,R0,LSL,16);
I3S(ORR,R3,R3,R3,LSL,16);
I3S(ORR,R0,R0,R0,LSR,16);
if (p->DstDoubleY)
{
I3S(ADD,R2,Pitch,Pitch,LSL,1); //R2=3*DstPitch
I3(STR,R3,R9,R2);
}
I3S(STR,R3,R9,Pitch,LSL,p->DstDoubleY);
I2C(SUB,R9,R9,4);
}
if (p->DstDoubleY)
{
I3S(ADD,R2,Pitch,Pitch,LSL,1); //R2=3*DstPitch
if (p->DstBPP==8 && !p->DstDoubleX) Half();
I3(STR,R0,R9,R2);
}
PitchDouble = p->DstDoubleY;
if (p->DstBPP==8 && !p->DstDoubleX)
{
if (PitchDouble) // can't use STR with Half() and LSL,#1 at the same time
{
PitchDouble = 0;
I3(ADD,Pitch,Pitch,Pitch);
}
Half();
}
I3S(STR,R0,R9,Pitch,LSL,PitchDouble);
if (p->SwapXY)
I3S(ADD,R9,R9,Pitch,LSL,1+PitchDouble);
p->Pos = -1;
Fix_RGB_UV_Pixel(p,0,1);
if (p->DstBPP==32)
{
if (p->Pos)
I3S(MOV,R0,NONE,R0,ROR,-p->Pos);
assert(!p->DstDoubleX && !p->DstDoubleY);
I2C(STR,R0,R9,-p->DstStepX/2);
p->Pos = -1;
}
Fix_RGB_UV_Pixel(p,1,1);
if (p->Pos)
I3S(MOV,R0,NONE,R0,ROR,-p->Pos);
if (p->DstBPP==8 && p->DstDoubleX)
I3S(ORR,R0,R0,R0,LSL,8);
if (p->DstDoubleY)
{
MB(); I2C(LDR,R1,SP,OFS(stack,DstPitch));
}
if (p->DstBPP==16 && p->DstDoubleX)
{
I3S(MOV,R3,NONE,R0,LSR,16);
I3S(MOV,R0,NONE,R0,LSL,16);
I3S(ORR,R3,R3,R3,LSL,16);
I3S(ORR,R0,R0,R0,LSR,16);
if (p->DstDoubleY)
{
I2C(ADD,R2,R1,4); //DstPitch+4
I3(STR,R3,R9,R2);
}
I2C(STR,R3,R9,4);
}
if (p->DstDoubleY)
{
if (p->DstBPP==8 && !p->DstDoubleX) Half();
I3(STR,R0,R9,R1);
}
if (p->DstBPP==8 && !p->DstDoubleX) Half();
if (p->SwapXY)
I2(STR,R0,R9);
else
I2C(STR_POST,R0,R9,p->DstStepX);
MB(); I2C(LDR,R7,SP,OFS(stack,EndOfLine));
if (p->OnlyDiff)
{
MB(); I1P(LDR,R5,p->DiffMask,0);
MB(); I2C(LDR,R6,SP,OFS(stack,Src2SrcLast));
InstPost(p->Skip);
}
I3(CMP,NONE,R9,R7);
I0P(B,NE,LoopX);
}
I2C(LDR,R0,SP,OFS(stack,YNext));
I2C(LDR,R4,SP,OFS(stack,DstNext));
I2C(LDR,R6,SP,OFS(stack,UVNext));
I2C(LDR,R5,SP,OFS(stack,EndOfRect));
//increment pointers
I3(ADD,R12,R12,R0);
I3(ADD,R14,R14,R0);
I3(ADD,R9,R9,R4);
I3(ADD,R10,R10,R6);
I3(ADD,R11,R11,R6);
//prepare registers for next row
if (p->SwapXY) I2C(LDR,R6,SP,OFS(stack,DstPitch));
I2C(LDR,R4,SP,OFS(stack,Width));
I3(CMP,NONE,R9,R5);
I0P(B,NE,LoopY);
I2C(ADD,SP,SP,OFS(stack,StackFrame));
CodeEnd();
InstPost(p->YMul);
InstPost(p->RVMul);
InstPost(p->RAdd);
InstPost(p->GUMul);
InstPost(p->GVMul);
InstPost(p->GAdd);
InstPost(p->BUMul);
InstPost(p->BAdd);
if (p->InvertMask) InstPost(p->InvertMask);
if (p->DiffMask) InstPost(p->DiffMask);
if (p->PalPtr) InstPost(p->PalPtr);
if (p->LookUp)
{
Align(16);
InstPost(p->LookUp);
}
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -