📄 2xsai.cpp
字号:
/*
* Copyright (c) 2005,2006 Milan Cutka
* uses code from mplayer 2xsai filter
* http://elektron.its.tudelft.nl/~dalikifa/
* and original 2xSai Copyright (c) Derek Liauw Kie Fa, 1999
*
* hq2x filter (C) 2003 MaxSt ( maxst@hiend3d.com )
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "stdafx.h"
#include "2xsai.h"
void T2xSaI::super(const uint8_t *src, stride_t src_pitch,
uint8_t *dst, stride_t dst_pitch,
uint32_t width, uint32_t height)
{
unsigned int x, y;
unsigned long color[16];
const unsigned char *src_line[4];
/* Point to the first 3 lines. */
src_line[0] = src;
src_line[1] = src;
src_line[2] = src + src_pitch;
src_line[3] = src + src_pitch * 2;
x = 0, y = 0;
unsigned long *lbp;
lbp = (unsigned long*)src_line[0];
color[0] = *lbp; color[1] = color[0]; color[2] = color[0]; color[3] = color[0];
color[4] = color[0]; color[5] = color[0]; color[6] = *(lbp + 1); color[7] = *(lbp + 2);
lbp = (unsigned long*)src_line[2];
color[8] = *lbp; color[9] = color[8]; color[10] = *(lbp + 1); color[11] = *(lbp + 2);
lbp = (unsigned long*)src_line[3];
color[12] = *lbp; color[13] = color[12]; color[14] = *(lbp + 1); color[15] = *(lbp + 2);
for (y = 0; y < height; y++)
{
unsigned char *dst_line[2];
dst_line[0] = dst + dst_pitch*2*y;
dst_line[1] = dst + dst_pitch*(2*y+1);
/* Todo: x = width - 2, x = width - 1 */
for (x = 0; x < width; x++)
{
unsigned long product1a, product1b, product2a, product2b;
//--------------------------------------- B0 B1 B2 B3 0 1 2 3
// 4 5* 6 S2 -> 4 5* 6 7
// 1 2 3 S1 8 9 10 11
// A0 A1 A2 A3 12 13 14 15
//--------------------------------------
if (color[9] == color[6] && color[5] != color[10])
{
product2b = color[9];
product1b = product2b;
}
else if (color[5] == color[10] && color[9] != color[6])
{
product2b = color[5];
product1b = product2b;
}
else if (color[5] == color[10] && color[9] == color[6])
{
int r = 0;
r += GET_RESULT(color[6], color[5], color[8], color[13]);
r += GET_RESULT(color[6], color[5], color[4], color[1]);
r += GET_RESULT(color[6], color[5], color[14], color[11]);
r += GET_RESULT(color[6], color[5], color[2], color[7]);
if (r > 0)
product1b = color[6];
else if (r < 0)
product1b = color[5];
else
product1b = INTERPOLATE(color[5], color[6]);
product2b = product1b;
}
else
{
if (color[6] == color[10] && color[10] == color[13] && color[9] != color[14] && color[10] != color[12])
product2b = Q_INTERPOLATE(color[10], color[10], color[10], color[9]);
else if (color[5] == color[9] && color[9] == color[14] && color[13] != color[10] && color[9] != color[15])
product2b = Q_INTERPOLATE(color[9], color[9], color[9], color[10]);
else
product2b = INTERPOLATE(color[9], color[10]);
if (color[6] == color[10] && color[6] == color[1] && color[5] != color[2] && color[6] != color[0])
product1b = Q_INTERPOLATE(color[6], color[6], color[6], color[5]);
else if (color[5] == color[9] && color[5] == color[2] && color[1] != color[6] && color[5] != color[3])
product1b = Q_INTERPOLATE(color[6], color[5], color[5], color[5]);
else
product1b = INTERPOLATE(color[5], color[6]);
}
if (color[5] == color[10] && color[9] != color[6] && color[4] == color[5] && color[5] != color[14])
product2a = INTERPOLATE(color[9], color[5]);
else if (color[5] == color[8] && color[6] == color[5] && color[4] != color[9] && color[5] != color[12])
product2a = INTERPOLATE(color[9], color[5]);
else
product2a = color[9];
if (color[9] == color[6] && color[5] != color[10] && color[8] == color[9] && color[9] != color[2])
product1a = INTERPOLATE(color[9], color[5]);
else if (color[4] == color[9] && color[10] == color[9] && color[8] != color[5] && color[9] != color[0])
product1a = INTERPOLATE(color[9], color[5]);
else
product1a = color[5];
*((unsigned long *) (&dst_line[0][x * 8])) = product1a;
*((unsigned long *) (&dst_line[0][x * 8 + 4])) = product1b;
*((unsigned long *) (&dst_line[1][x * 8])) = product2a;
*((unsigned long *) (&dst_line[1][x * 8 + 4])) = product2b;
/* Move color matrix forward */
color[0] = color[1]; color[4] = color[5]; color[8] = color[9]; color[12] = color[13];
color[1] = color[2]; color[5] = color[6]; color[9] = color[10]; color[13] = color[14];
color[2] = color[3]; color[6] = color[7]; color[10] = color[11]; color[14] = color[15];
if (x < width - 3)
{
x += 3;
color[3] = *(((unsigned long*)src_line[0]) + x);
color[7] = *(((unsigned long*)src_line[1]) + x);
color[11] = *(((unsigned long*)src_line[2]) + x);
color[15] = *(((unsigned long*)src_line[3]) + x);
x -= 3;
}
}
/* We're done with one line, so we shift the source lines up */
src_line[0] = src_line[1];
src_line[1] = src_line[2];
src_line[2] = src_line[3];
/* Read next line */
if (y + 3 >= height)
src_line[3] = src_line[2];
else
src_line[3] = src_line[2] + src_pitch;
/* Then shift the color matrix up */
lbp = (unsigned long*)src_line[0];
color[0] = *lbp; color[1] = color[0]; color[2] = *(lbp + 1); color[3] = *(lbp + 2);
lbp = (unsigned long*)src_line[1];
color[4] = *lbp; color[5] = color[4]; color[6] = *(lbp + 1); color[7] = *(lbp + 2);
lbp = (unsigned long*)src_line[2];
color[8] = *lbp; color[9] = color[9]; color[10] = *(lbp + 1); color[11] = *(lbp + 2);
lbp = (unsigned long*)src_line[3];
color[12] = *lbp; color[13] = color[12]; color[14] = *(lbp + 1); color[15] = *(lbp + 2);
} // y loop
}
int T2xSaI::GetResult1(uint32 A, uint32 B, uint32 C, uint32 D, uint32 E)
{
int x = 0;
int y = 0;
int r = 0;
if (A == C) x+=1; else if (B == C) y+=1;
if (A == D) x+=1; else if (B == D) y+=1;
if (x <= 1) r+=1;
if (y <= 1) r-=1;
return r;
}
int T2xSaI::GetResult2(uint32 A, uint32 B, uint32 C, uint32 D, uint32 E)
{
int x = 0;
int y = 0;
int r = 0;
if (A == C) x+=1; else if (B == C) y+=1;
if (A == D) x+=1; else if (B == D) y+=1;
if (x <= 1) r-=1;
if (y <= 1) r+=1;
return r;
}
void T2xSaI::_2xSaI(const uint8 *srcPtr, stride_t srcPitch, uint8 *dstBitmap, int width, int height,stride_t dstPitch)
{
uint32 line;
uint32 x_offset;
line = 0;//(dstBitmap->h - height * 2) >> 1;
x_offset = 0;//(width*2 - width * 2);
const uint16 *bp[4] =
{
(uint16*)(srcPtr/*-srcPitch*/),
(uint16*) srcPtr,
(uint16*)(srcPtr+srcPitch),
(uint16*)(srcPtr+2*srcPitch)
};
for (; height; height-=1)
{
uint8_t *dP = dstBitmap+line*dstPitch+x_offset; /*bmp_write_line (dstBitmap, line) + x_offset*/;
for (int x = 0; x<width; x++ )
{
register uint32 colorA, colorB;
uint32 colorC, colorD,
colorE, colorF, colorG, colorH,
colorI, colorJ, colorK, colorL,
colorM, colorN, colorO, colorP;
uint32 product, product1, product2;
//---------------------------------------
// Map of the pixels: I|E F|J
// G|A B|K
// H|C D|L
// M|N O|P
int a1=x>0?-1:0;
int a2=x<width-1?1:0;
int a3=x<width-2?2:1;
colorI = bp[0][a1+x];//*(bP- Nextline - 1);
colorE = bp[0][ 0+x];//*(bP- Nextline);
colorF = bp[0][a2+x];//*(bP- Nextline + 1);
colorJ = bp[0][a3+x];//*(bP- Nextline + 2);
colorG = bp[1][a1+x];//*(bP - 1);
colorA = bp[1][ 0+x];//*(bP);
colorB = bp[1][a2+x];//*(bP + 1);
colorK = bp[1][a3+x];//*(bP + 2);
colorH = bp[2][a1+x];//*(bP + Nextline - 1);
colorC = bp[2][ 0+x];//*(bP + Nextline);
colorD = bp[2][a2+x];//*(bP + Nextline + 1);
colorL = bp[2][a3+x];//*(bP + Nextline + 2);
colorM = bp[3][a1+x];//*(bP + Nextline + Nextline - 1);
colorN = bp[3][ 0+x];//*(bP + Nextline + Nextline);
colorO = bp[3][a2+x];//*(bP + Nextline + Nextline + 1);
colorP = bp[3][a3+x];//*(bP + Nextline + Nextline + 2);
if ((colorA == colorD) && (colorB != colorC))
{
if ( ((colorA == colorE) && (colorB == colorL)) ||
((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ)) )
{
product = colorA;
}
else
{
product = INTERPOLATE16(colorA, colorB);
}
if (((colorA == colorG) && (colorC == colorO)) ||
((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM)) )
{
product1 = colorA;
}
else
{
product1 = INTERPOLATE16(colorA, colorC);
}
product2 = colorA;
}
else if ((colorB == colorC) && (colorA != colorD))
{
if (((colorB == colorF) && (colorA == colorH)) ||
((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)) )
{
product = colorB;
}
else
{
product = INTERPOLATE16(colorA, colorB);
}
if (((colorC == colorH) && (colorA == colorF)) ||
((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)) )
{
product1 = colorC;
}
else
{
product1 = INTERPOLATE16(colorA, colorC);
}
product2 = colorB;
}
else if ((colorA == colorD) && (colorB == colorC))
{
if (colorA == colorB)
{
product = colorA;
product1 = colorA;
product2 = colorA;
}
else
{
register int r = 0;
product1 = INTERPOLATE16(colorA, colorC);
product = INTERPOLATE16(colorA, colorB);
r += GetResult1 (colorA, colorB, colorG, colorE, colorI);
r += GetResult2 (colorB, colorA, colorK, colorF, colorJ);
r += GetResult2 (colorB, colorA, colorH, colorN, colorM);
r += GetResult1 (colorA, colorB, colorL, colorO, colorP);
if (r > 0)
product2 = colorA;
else if (r < 0)
product2 = colorB;
else
{
product2 = Q_INTERPOLATE16(colorA, colorB, colorC, colorD);
}
}
}
else
{
product2 = Q_INTERPOLATE16(colorA, colorB, colorC, colorD);
if ((colorA == colorC) && (colorA == colorF) && (colorB != colorE) && (colorB == colorJ))
{
product = colorA;
}
else if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI))
{
product = colorB;
}
else
{
product = INTERPOLATE16(colorA, colorB);
}
if ((colorA == colorB) && (colorA == colorH) && (colorG != colorC) && (colorC == colorM))
{
product1 = colorA;
}
else if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI))
{
product1 = colorC;
}
else
{
product1 = INTERPOLATE16(colorA, colorC);
}
}
product = colorA | (product << 16);
product1 = product1 | (product2 << 16);
*(uint32_t*)dP=product;
*(uint32_t*)(dP+dstPitch)=product1;
dP += 4;
}//end of for ( finish= width etc..)
bp[0]=bp[1];bp[1]=bp[2];bp[2]=bp[3];
if (height>3)
bp[3]+=srcPitch/2;
line += 2;
//srcPtr += srcPitch;
} //endof: for (height; height; height--)
//memcpy(dstBitmap+(line-1)*dstPitch,dstBitmap+(line-2)*dstPitch,width*4);
}
//=====================================================================================
int Thq2x::LUT16to32[65536];
__align8(int,Thq2x::RGBtoYUV[65536]);
void Thq2x::init(void)
{
int i, j, k, r, g, b, Y, u, v;
for (i=0; i<65536; i++)
LUT16to32[i] = ((i & 0xF800) << 8) + ((i & 0x07E0) << 5) + ((i & 0x001F) << 3);
for (i=0; i<32; i++)
for (j=0; j<64; j++)
for (k=0; k<32; k++)
{
r = i << 3;
g = j << 2;
b = k << 3;
Y = (r + g + b) >> 2;
u = 128 + ((r - b) >> 2);
v = 128 + ((-r + 2*g -b)>>3);
RGBtoYUV[ (i << 11) + (j << 5) + k ] = (Y<<16) + (u<<8) + v;
}
}
const __int64 Thq2x::reg_blank = 0;
const __int64 Thq2x::const3 = 0x0000000300030003LL;
const __int64 Thq2x::const5 = 0x0000000500050005LL;
const __int64 Thq2x::const6 = 0x0000000600060006LL;
const __int64 Thq2x::const14 = 0x0000000E000E000ELL;
__forceinline void Thq2x::Interp1(unsigned char * pc, int c1, int c2)
{
*((int*)pc) = (c1*3+c2) >> 2;
}
__forceinline void Thq2x::Interp2(unsigned char * pc, int c1, int c2, int c3)
{
*((int*)pc) = (c1*2+c2+c3) >> 2;
}
__forceinline void Thq2x::Interp5(unsigned char * pc, int c1, int c2)
{
*((int*)pc) = (c1+c2) >> 1;
}
__forceinline void Thq2x::Interp6(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*5+c2*2+c3)/8;
unsigned char *eax= pc;
__m64 mm1,mm2,mm3;
movd ( mm1, c1);
movd ( mm2, c2);
movd ( mm3, c3);
punpcklbw ( mm1, reg_blank);
punpcklbw ( mm2, reg_blank);
punpcklbw ( mm3, reg_blank);
pmullw ( mm1, const5);
psllw ( mm2, 1);
paddw ( mm1, mm3);
paddw ( mm1, mm2);
psrlw ( mm1, 3);
packuswb ( mm1, reg_blank);
movd( eax, mm1);
}
__forceinline void Thq2x::Interp7(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*6+c2+c3)/8;
unsigned char *eax= pc;
__m64 mm1,mm2,mm3;
movd ( mm1, c1);
movd ( mm2, c2);
movd ( mm3, c3);
punpcklbw ( mm1, reg_blank);
punpcklbw ( mm2, reg_blank);
punpcklbw ( mm3, reg_blank);
pmullw ( mm1, const6);
paddw ( mm2, mm3);
paddw ( mm1, mm2);
psrlw ( mm1, 3);
packuswb ( mm1, reg_blank);
movd ( eax, mm1);
}
__forceinline void Thq2x::Interp9(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*2+(c2+c3)*3)/8;
unsigned char *eax= pc;
__m64 mm1,mm2,mm3;
movd (mm1, c1);
movd (mm2, c2);
movd (mm3, c3);
punpcklbw (mm1, reg_blank);
punpcklbw (mm2, reg_blank);
punpcklbw (mm3, reg_blank);
psllw (mm1, 1);
paddw (mm2, mm3);
pmullw (mm2, const3);
paddw (mm1, mm2);
psrlw (mm1, 3);
packuswb (mm1, reg_blank);
movd (eax, mm1);
}
__forceinline void Thq2x::Interp10(unsigned char * pc, int c1, int c2, int c3)
{
//*((int*)pc) = (c1*14+c2+c3)/16;
unsigned char *eax= pc;
__m64 mm1,mm2,mm3;
movd (mm1, c1);
movd (mm2, c2);
movd (mm3, c3);
punpcklbw (mm1, reg_blank);
punpcklbw (mm2, reg_blank);
punpcklbw (mm3, reg_blank);
pmullw (mm1, const14);
paddw (mm2, mm3);
paddw (mm1, mm2);
psrlw (mm1, 4);
packuswb (mm1, reg_blank);
movd (eax, mm1);
}
#define PIXEL00_0 *((int*)(pOut)) = c[5];
#define PIXEL00_10 Interp1(pOut, c[5], c[1]);
#define PIXEL00_11 Interp1(pOut, c[5], c[4]);
#define PIXEL00_12 Interp1(pOut, c[5], c[2]);
#define PIXEL00_20 Interp2(pOut, c[5], c[4], c[2]);
#define PIXEL00_21 Interp2(pOut, c[5], c[1], c[2]);
#define PIXEL00_22 Interp2(pOut, c[5], c[1], c[4]);
#define PIXEL00_60 Interp6(pOut, c[5], c[2], c[4]);
#define PIXEL00_61 Interp6(pOut, c[5], c[4], c[2]);
#define PIXEL00_70 Interp7(pOut, c[5], c[4], c[2]);
#define PIXEL00_90 Interp9(pOut, c[5], c[4], c[2]);
#define PIXEL00_100 Interp10(pOut, c[5], c[4], c[2]);
#define PIXEL01_0 *((int*)(pOut+4)) = c[5];
#define PIXEL01_10 Interp1(pOut+4, c[5], c[3]);
#define PIXEL01_11 Interp1(pOut+4, c[5], c[2]);
#define PIXEL01_12 Interp1(pOut+4, c[5], c[6]);
#define PIXEL01_20 Interp2(pOut+4, c[5], c[2], c[6]);
#define PIXEL01_21 Interp2(pOut+4, c[5], c[3], c[6]);
#define PIXEL01_22 Interp2(pOut+4, c[5], c[3], c[2]);
#define PIXEL01_60 Interp6(pOut+4, c[5], c[6], c[2]);
#define PIXEL01_61 Interp6(pOut+4, c[5], c[2], c[6]);
#define PIXEL01_70 Interp7(pOut+4, c[5], c[2], c[6]);
#define PIXEL01_90 Interp9(pOut+4, c[5], c[2], c[6]);
#define PIXEL01_100 Interp10(pOut+4, c[5], c[2], c[6]);
#define PIXEL10_0 *((int*)(pOut+dstBpL)) = c[5];
#define PIXEL10_10 Interp1(pOut+dstBpL, c[5], c[7]);
#define PIXEL10_11 Interp1(pOut+dstBpL, c[5], c[8]);
#define PIXEL10_12 Interp1(pOut+dstBpL, c[5], c[4]);
#define PIXEL10_20 Interp2(pOut+dstBpL, c[5], c[8], c[4]);
#define PIXEL10_21 Interp2(pOut+dstBpL, c[5], c[7], c[4]);
#define PIXEL10_22 Interp2(pOut+dstBpL, c[5], c[7], c[8]);
#define PIXEL10_60 Interp6(pOut+dstBpL, c[5], c[4], c[8]);
#define PIXEL10_61 Interp6(pOut+dstBpL, c[5], c[8], c[4]);
#define PIXEL10_70 Interp7(pOut+dstBpL, c[5], c[8], c[4]);
#define PIXEL10_90 Interp9(pOut+dstBpL, c[5], c[8], c[4]);
#define PIXEL10_100 Interp10(pOut+dstBpL, c[5], c[8], c[4]);
#define PIXEL11_0 *((int*)(pOut+dstBpL+4)) = c[5];
#define PIXEL11_10 Interp1(pOut+dstBpL+4, c[5], c[9]);
#define PIXEL11_11 Interp1(pOut+dstBpL+4, c[5], c[6]);
#define PIXEL11_12 Interp1(pOut+dstBpL+4, c[5], c[8]);
#define PIXEL11_20 Interp2(pOut+dstBpL+4, c[5], c[6], c[8]);
#define PIXEL11_21 Interp2(pOut+dstBpL+4, c[5], c[9], c[8]);
#define PIXEL11_22 Interp2(pOut+dstBpL+4, c[5], c[9], c[6]);
#define PIXEL11_60 Interp6(pOut+dstBpL+4, c[5], c[8], c[6]);
#define PIXEL11_61 Interp6(pOut+dstBpL+4, c[5], c[6], c[8]);
#define PIXEL11_70 Interp7(pOut+dstBpL+4, c[5], c[6], c[8]);
#define PIXEL11_90 Interp9(pOut+dstBpL+4, c[5], c[6], c[8]);
#define PIXEL11_100 Interp10(pOut+dstBpL+4, c[5], c[6], c[8]);
__forceinline int Thq2x::Diff(unsigned int w5, unsigned int w1)
{
static const __int64 treshold=0x0000000000300706LL;
int eax=0;;
unsigned int ebx=w5;
unsigned int edx=w1;
if (ebx==edx) return eax;
const unsigned char *ecx=(const unsigned char*)RGBtoYUV;
__m64 mm1,mm5,mm2;
movd (mm1,ecx + ebx*4);
movq (mm5,mm1);
movd (mm2,ecx + edx*4);
psubusb (mm1,mm2);
psubusb (mm2,mm5);
por (mm1,mm2);
psubusb (mm1,treshold);
movd (eax,mm1);
return eax;
}
// returns result in eax register
void Thq2x::hq2x_32( const unsigned char * src, unsigned char * dst, int dx, int dy, stride_t srcBpL, stride_t dstBpL )
{
static bool firsttime=true;
if (firsttime)
{
firsttime=false;
init();
}
int i, j, k;
stride_t prevline, nextline;
int w[10];
int c[10];
// +----+----+----+
// | | | |
// | w1 | w2 | w3 |
// +----+----+----+
// | | | |
// | w4 | w5 | w6 |
// +----+----+----+
// | | | |
// | w7 | w8 | w9 |
// +----+----+----+
for (j=0; j<dy; j++)
{
if (j>0) prevline = -srcBpL; else prevline = 0;
if (j<dy-1) nextline = srcBpL; else nextline = 0;
const unsigned char *pIn=src+j*srcBpL;
unsigned char *pOut=dst+(j*2)*dstBpL;
for (i=0; i<dx; i++)
{
w[2] = *((unsigned short*)(pIn + prevline));
w[5] = *((unsigned short*)pIn);
w[8] = *((unsigned short*)(pIn + nextline));
if (i>0)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -