📄 yuv420argb.c
字号:
// yuv4202rgba.cpp : Defines the entry point for the console application.
#include "yuv420argb.h"
static long int crv_tab[256];
static long int cbu_tab[256];
static long int cgu_tab[256];
static long int cgv_tab[256];
static long int tab_76309[256];
static unsigned char clp[1024];
static unsigned __int64 mmw_mult_Y = 0x2568256825682568;
static unsigned __int64 mmw_mult_U_G = 0xf36ef36ef36ef36e;
static unsigned __int64 mmw_mult_U_B = 0x40cf40cf40cf40cf;
static unsigned __int64 mmw_mult_V_R = 0x3343334333433343;
static unsigned __int64 mmw_mult_V_G = 0xe5e2e5e2e5e2e5e2;
static unsigned __int64 mmb_0x10 = 0x1010101010101010;
static unsigned __int64 mmw_0x0080 = 0x0080008000800080;
static unsigned __int64 mmw_0x00ff = 0x00ff00ff00ff00ff;
void init_dither_tab()
{
int i,ind;
long crv,cbu,cgu,cgv;
static int inited = 0;
if (inited != 0)
return;
inited = 1;
crv = 104597; cbu = 132201;
cgu = 25675; cgv = 53279;
for (i = 0; i < 256; i++)
{
crv_tab[i] = (i-128) * crv;
cbu_tab[i] = (i-128) * cbu;
cgu_tab[i] = (i-128) * cgu;
cgv_tab[i] = (i-128) * cgv;
tab_76309[i] = 76309*(i-16);
}
for (i=0; i<384; i++)
clp[i] =0;
ind=384;
for (i=0;i<256; i++)
clp[ind++]=i;
ind=640;
for (i=0;i<384;i++)
clp[ind++]=255;
}
int YUV420ToARGB32( unsigned char *src0,
unsigned char *src1,
unsigned char *src2,
int stride_y,
int stride_u,
int stride_v,
unsigned char *dst_ori,
int width,
int height,
int out_stride )
{
int i,j,c1,c2,c3,c4;
int y1,y2,u,v;
unsigned char *py0,*py1,*pu,*pv;
unsigned char *d1, *d2;
py0 = src0;
py1 = src0+stride_y;
pu = src1;
pv = src2;
d1 = dst_ori;
d2 = dst_ori+out_stride;
for (j = 0; j < height; j += 2)
{
for (i = 0; i < width; i += 2)
{
u = *pu++;
v = *pv++;
c1 = crv_tab[v];
c2 = cgu_tab[u];
c3 = cgv_tab[v];
c4 = cbu_tab[u];
//up-left
y1 = tab_76309[*py0++];
*d1++ = clp[384+((y1 + c4)>>16)];
*d1++ = clp[384+((y1 - c2 - c3)>>16)];
*d1++ = clp[384+((y1 + c1)>>16)];
*d1++ = 0;
//down-left
y2 = tab_76309[*py1++];
*d2++ = clp[384+((y2 + c4)>>16)];
*d2++ = clp[384+((y2 - c2 - c3)>>16)];
*d2++ = clp[384+((y2 + c1)>>16)];
*d2++ = 0;
//up-right
y1 = tab_76309[*py0++];
*d1++ = clp[384+((y1 + c4)>>16)];
*d1++ = clp[384+((y1 - c2 - c3)>>16)];
*d1++ = clp[384+((y1 + c1)>>16)];
*d1++ = 0;
//down-right
y2 = tab_76309[*py1++];
*d2++ = clp[384+((y2 + c4)>>16)];
*d2++ = clp[384+((y2 - c2 - c3)>>16)];
*d2++ = clp[384+((y2 + c1)>>16)];
*d2++ = 0;
}
d1 += out_stride;
d2 += out_stride;
src0 += stride_y*2;
py0 = src0;
py1 = src0+stride_y;
src1 += stride_u;
src2 += stride_v;
pu = src1;
pv = src2;
}
return 1;
}
int YUV420_TO_ARGB32_MMX( unsigned char *puc_y,
int stride_y,
unsigned char *puc_u,
unsigned char *puc_v,
int stride_u,
int stride_v,
unsigned char *puc_out,
int out_width,
int out_height,
int out_stride )
{
//unsigned char temp;
int y, horiz_count;
horiz_count = -(out_width >> 3);
for (y=0; y<out_height; y++)
{
__asm
{
push eax
push ebx
push ecx
push edx
push edi
mov eax, puc_out
mov ebx, puc_y
mov ecx, puc_u
mov edx, puc_v
mov edi, horiz_count
horiz_loop:
movd mm2, [ecx]
pxor mm7, mm7
movd mm3, [edx]
punpcklbw mm2, mm7
movq mm0, [ebx]
punpcklbw mm3, mm7
movq mm1, mmw_0x00ff
psubusb mm0, mmb_0x10
psubw mm2, mmw_0x0080
pand mm1, mm0
psubw mm3, mmw_0x0080
psllw mm1, 3
psrlw mm0, 8
psllw mm2, 3
pmulhw mm1, mmw_mult_Y
psllw mm0, 3
psllw mm3, 3
movq mm5, mm3
pmulhw mm5, mmw_mult_V_R
movq mm4, mm2
pmulhw mm0, mmw_mult_Y
movq mm7, mm1
pmulhw mm2, mmw_mult_U_G
paddsw mm7, mm5
pmulhw mm3, mmw_mult_V_G
packuswb mm7, mm7
pmulhw mm4, mmw_mult_U_B
paddsw mm5, mm0
packuswb mm5, mm5
paddsw mm2, mm3
movq mm3, mm1
movq mm6, mm1
paddsw mm3, mm4
paddsw mm6, mm2
punpcklbw mm7, mm5
paddsw mm2, mm0
packuswb mm6, mm6
packuswb mm2, mm2
packuswb mm3, mm3
paddsw mm4, mm0
packuswb mm4, mm4
punpcklbw mm6, mm2
punpcklbw mm3, mm4
// 32-bit shuffle.
pxor mm0, mm0
movq mm1, mm6
punpcklbw mm1, mm0
movq mm0, mm3
punpcklbw mm0, mm7
movq mm2, mm0
punpcklbw mm0, mm1
punpckhbw mm2, mm1
// 24-bit shuffle and sav
movd [eax], mm0
psrlq mm0, 32
movd 4[eax], mm0
movd 8[eax], mm2
psrlq mm2, 32
movd 12[eax], mm2
// 32-bit shuffle.
pxor mm0, mm0
movq mm1, mm6
punpckhbw mm1, mm0
movq mm0, mm3
punpckhbw mm0, mm7
movq mm2, mm0
punpcklbw mm0, mm1
punpckhbw mm2, mm1
// 24-bit shuffle and sav
movd 16[eax], mm0
psrlq mm0, 32
movd 20[eax], mm0
add ebx, 8
movd 24[eax], mm2
psrlq mm2, 32
add ecx, 4
add edx, 4
movd 28[eax], mm2
add eax, 32
inc edi
jne horiz_loop
pop edi
pop edx
pop ecx
pop ebx
pop eax
emms
}
puc_out += out_stride;
puc_y += stride_y;
if (y%2)
{
puc_u += stride_u;
puc_v += stride_v;
}
}
return 1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -