📄 convert_yuy2.h
字号:
static __forceinline bool YUV2RGB_INNER_LOOP(int no_next_pixel,const unsigned char* &esi,const unsigned char* const ecx,unsigned char* &edi,const unsigned char* const edx,__m64 &mm0,__m64 &mm1,__m64 &mm2,__m64 &mm3,__m64 &mm4,__m64 &mm5,__m64 &mm6,__m64 &mm7)
{
//This YUV422->RGB conversion code uses only four MMX registers per
//source dword, so I convert two dwords in parallel. Lines corresponding
//to the "second pipe" are indented an extra space. There's almost no
//overlap, except at the end and in the three lines marked ***.
//revised 4july,2002 to properly set alpha in rgb32 to default "on" & other small memory optimizations
static const int ofs_x0000_0000_0010_0010=0;
static const int ofs_x0080_0080_0080_0080=8;
static const int ofs_x00002000_00002000=24;
static const int ofs_xFF000000_FF000000=32;
static const int ofs_cy=40;
static const int ofs_crv=48;
static const int ofs_cgu_cgv=56;
static const int ofs_cbu=64;
bool ret;
movd (mm0,esi);
movd ( mm5,esi+4);
movq (mm1,mm0);
GET_Y (mm0,uyvy,edx); // mm0 = __________Y1__Y0
movq ( mm4,mm5);
GET_UV (mm1,uyvy,edx); // mm1 = __________V0__U0
GET_Y ( mm4,uyvy,edx); // mm4 = __________Y3__Y2
movq (mm2,mm5); // *** avoid reload from [esi+4]
GET_UV ( mm5,uyvy,edx); // mm5 = __________V2__U2
psubw (mm0,edx+ofs_x0000_0000_0010_0010); // (Y-16)
movd ( mm6,esi+8-4*(no_next_pixel));
GET_UV (mm2,uyvy,edx); // mm2 = __________V2__U2
psubw ( mm4,edx+ofs_x0000_0000_0010_0010); // (Y-16)
paddw (mm2,mm1); // 2*UV1=UV0+UV2
GET_UV ( mm6,uyvy,edx); // mm6 = __________V4__U4
psubw (mm1,edx+ofs_x0080_0080_0080_0080); // (UV-128)
paddw ( mm6,mm5); // 2*UV3=UV2+UV4
psllq (mm2,32);
psubw ( mm5,edx+ofs_x0080_0080_0080_0080); // (UV-128)
punpcklwd (mm0,mm2); // mm0 = ______Y1______Y0
psllq ( mm6,32);
pmaddwd (mm0,edx+ofs_cy); // (Y-16)*(255./219.)<<14
punpcklwd ( mm4,mm6);
paddw (mm1,mm1); // 2*UV0=UV0+UV0
pmaddwd ( mm4,edx+ofs_cy);
paddw ( mm5,mm5); // 2*UV2=UV2+UV2
paddw (mm1,mm2); // mm1 = __V1__U1__V0__U0 * 2
paddd (mm0,edx+ofs_x00002000_00002000); // +=0.5<<14
paddw ( mm5,mm6); // mm5 = __V3__U3__V2__U2 * 2
movq (mm2,mm1);
paddd ( mm4,edx+ofs_x00002000_00002000); // +=0.5<<14
movq (mm3,mm1);
movq ( mm6,mm5);
pmaddwd (mm1,edx+ofs_crv);
movq ( mm7,mm5);
paddd (mm1,mm0);
pmaddwd ( mm5,edx+ofs_crv);
psrad (mm1,14); // mm1 = RRRRRRRRrrrrrrrr
paddd ( mm5,mm4);
pmaddwd (mm2,edx+ofs_cgu_cgv);
psrad ( mm5,14);
paddd (mm2,mm0);
pmaddwd ( mm6,edx+ofs_cgu_cgv);
psrad (mm2,14); // mm2 = GGGGGGGGgggggggg
paddd ( mm6,mm4);
pmaddwd (mm3,edx+ofs_cbu);
psrad ( mm6,14);
paddd (mm3,mm0);
pmaddwd ( mm7,edx+ofs_cbu);
esi+=8;
edi+=12+4*rgb32;
if (!no_next_pixel)
ret=esi<ecx;//cmp esi,ecx
else
ret=true;
psrad (mm3,14 ); // mm3 = BBBBBBBBbbbbbbbb
paddd ( mm7,mm4);
pxor (mm0,mm0 );
psrad ( mm7,14 );
packssdw (mm3,mm2 );// mm3 = GGGGggggBBBBbbbb
packssdw ( mm7,mm6);
packssdw (mm1,mm0 );// mm1 = ________RRRRrrrr
packssdw ( mm5,mm0); // *** avoid pxor mm4,mm4
movq (mm2,mm3 );
movq ( mm6,mm7);
punpcklwd (mm2,mm1 );// mm2 = RRRRBBBBrrrrbbbb
punpcklwd ( mm6,mm5);
punpckhwd (mm3,mm1 );// mm3 = ____GGGG____gggg
punpckhwd ( mm7,mm5);
movq (mm0,mm2 );
movq ( mm4,mm6);
punpcklwd (mm0,mm3 );// mm0 = ____rrrrggggbbbb
punpcklwd ( mm4,mm7);
if (!rgb32)
{
psllq ( mm0,16);
psllq ( mm4,16);
}
punpckhwd (mm2,mm3 );// mm2 = ____RRRRGGGGBBBB
punpckhwd ( mm6,mm7);
packuswb (mm0,mm2 );// mm0 = __RRGGBB__rrggbb <- ta dah!
packuswb ( mm4,mm6);
if (rgb32)
{
por (mm0, edx+ofs_xFF000000_FF000000); // set alpha channels "on"
por (mm4, edx+ofs_xFF000000_FF000000);
movq (edi-16,mm0); // store the quadwords independently
movq ( edi-8,mm4);
}
else
{
psrlq (mm0,8 ); // pack the two quadwords into 12 bytes
psllq (mm4,8 ); // (note: the two shifts above leave
movd (edi-12,mm0); // mm0,4 = __RRGGBBrrggbb__)
psrlq (mm0,32 );
por (mm4,mm0 );
movd (edi-8,mm4 );
psrlq (mm4,32 );
movd (edi-4,mm4 );
}
return ret;
}
public:
static void mmx_ConvertYUY2toRGB(const BYTE* src,BYTE* dst,const BYTE* src_end,stride_t src_pitch,stride_t dst_pitch,int row_size,int matrix) //0=rec601, 1=rec709, 3=PC_601, 7=PC_709
{
static const int64_t yuv2rgb_constants[4][9]=
{
{0x00000000000100010LL, //rec601
0x00080008000800080LL,
0x000FF00FF00FF00FFLL,
0x00000200000002000LL,
0x0FF000000FF000000LL,
0x000004A8500004A85LL,
0x03313000033130000LL,
0x0E5FCF377E5FCF377LL,
0x00000408D0000408DLL},
{0x00000000000100010LL, //rec709
0x00080008000800080LL,
0x000FF00FF00FF00FFLL,
0x00000200000002000LL,
0x0FF000000FF000000LL,
0x000004A8500004A85LL,
0x03960000039600000LL,
0x0EEF5F930EEF5F930LL,
0x00000439B0000439BLL},
{0x00000000000000000LL, //PC601
0x00080008000800080LL,
0x000FF00FF00FF00FFLL,
0x00000200000002000LL,
0x0FF000000FF000000LL,
0x00000400000004000LL,
0x02D0B00002D0B0000LL,
0x0E90FF4F2E90FF4F2LL,
0x0000038ED000038EDLL},
{0x00000000000000000LL, //PC709
0x00080008000800080LL,
0x000FF00FF00FF00FFLL,
0x00000200000002000LL,
0x0FF000000FF000000LL,
0x00000400000004000LL,
0x03299000032990000LL,
0x0F0F8F9FEF0F8F9FELL,
0x000003B9F00003B9FLL}
};
const unsigned char *edx=(const unsigned char*)yuv2rgb_constants[matrix];
__m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7;
for (;src!=src_end;src+=src_pitch,dst+=dst_pitch)
{
const unsigned char *srcLn=src,*srcLnEnd=srcLn+row_size-8;
unsigned char *dstLn=dst;
while (YUV2RGB_INNER_LOOP(0,srcLn,srcLnEnd,dstLn,edx,mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7))
;
YUV2RGB_INNER_LOOP(1,srcLn,srcLnEnd,dstLn,edx,mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7);
}
_mm_empty();
}
};
#pragma warning(pop)
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -