📄 rgb2yuv.c
字号:
int RGB2YUV(unsigned char *screenbuf)
{
/* unsigned char * YDATA;
unsigned char * UDATA;
unsigned char * VDATA;*/
unsigned char *videoRef;
_declspec (align(16)) __int16 ZEROS[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 YBGR0[8]={3211,16515,8421,0,3211,16515,8421,0};
_declspec (align(16)) __int16 UBGR0[8]={14385,-9535,-4850,0,14385,-9535,-4850,0};
_declspec (align(16)) __int16 VBGR0[8]={-2326,-12059,14385,0,-2326,-12059,14385,0};
_declspec (align(16)) __int8 DELTAY[16]={16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16};
_declspec (align(16)) __int16 ONES[8]={1,1,1,1,1,1,1,1};
_declspec (align(16)) __int16 DELTAUV[8]={128,128,128,128,128,128,128,128};
_declspec (align(16)) __int16 MMTMP0[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 MMTMP1[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 MMTMP2[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 MMTMP3[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 MMTMP4[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 MMTMP5[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 MMTMP6[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 MMTMP7[8]={0,0,0,0,0,0,0,0};
_declspec (align(16)) __int16 MMTMP8[8]={0,0,0,0,0,0,0,0};
__int32 TMPEAX=0;
__int32 TMP2EAX=0;
__int32 BLOCK_X_NUM=0;
__int32 BLOCK_Y_NUM=0;
__int32 BLOCK_LIE_NUM=96;//1024X768
//__int32 BLOCK_LIE_NUM=76;//800*608
__int32 TMPEAAX=0;
__int32 TMPEBX=0;
__int32 TMPECX=0;
__int32 TMPEDX=0;
FILE *fp;
if(g_N_FrameNum&1)
{
YDATA=g_P_YData1;UDATA=g_P_CbData1;VDATA=g_P_CrData1;
}
else
{
YDATA=g_P_YData;UDATA=g_P_CbData;VDATA=g_P_CrData;
}
videoRef=screenbuf;
/* videoRef = (unsigned char*)_aligned_malloc(1024*768*4, 16);
fp=fopen("D:\\rgbx.rgb","r");
fread(videoRef,1024*768*4,1,fp);*/
__asm
{
mov TMPEAAX,eax;
mov TMPEBX,ebx;
mov TMPECX,ecx;
mov TMPEDX,edx;
mov eax, videoRef;
mov ebx, YDATA;
mov TMPEAX,eax;
mov TMP2EAX,eax;
BLOCK_LIE:
add BLOCK_X_NUM,128//1024*768
//add BLOCK_X_NUM,100 //800*600 128//1024*768
BLOCK_X:
add BLOCK_Y_NUM, 4
BLOCK_Y:
movdqa xmm3,[eax] //load XR3G3B3XR2G2B2XR1G1B1XR0G0B0
pshufd xmm4, xmm3,0xE4; //xmm0->ONES
punpcklbw xmm3,ZEROS;
pmaddwd xmm3,YBGR0 //YbB1+YgG1,YrR1,YbB0+YgG0,YrR0->xmm0
psrld xmm3,11
punpckhbw xmm4,ZEROS;
pmaddwd xmm4,YBGR0 //YbB3+YgG3,YrR3,YbB2+YgG2,YrR2->ONES
psrld xmm4,11
packssdw xmm3,xmm4 //YbB3+YgG3,YrR3,YbB2+YgG2,YrR2,YbB1+YgG1,YrR1,YbB0+YgG0,YrR0->xmm3
pmaddwd xmm3,ONES //YbB3+YgG3+YrR3,YbB2+YgG2+YrR2,YbB1+YgG1+YrR1,YbB0+YgG0+YrR0->xmm3
psrld xmm3,4 //Y3Y2Y1Y0->xmm0
movdqa xmm4,[eax+16] //load XR7G7B7XR6G6B6XR5G5B5XR4G4B4 XMMWORD PTR
pshufd xmm5, xmm4,0xE4;
punpcklbw xmm4,ZEROS;
pmaddwd xmm4,YBGR0
psrld xmm4,11
punpckhbw xmm5,ZEROS;
pmaddwd xmm5,YBGR0
psrld xmm5,11
packssdw xmm4,xmm5
pmaddwd xmm4,ONES
psrld xmm4,4 //Y7Y6Y5Y4->ONES
packssdw xmm3,xmm4 //Y7Y6Y5Y4Y3Y2Y1Y0->xmm3
movdqa xmm4,[eax+4096];//XMMWORD PTR 1024
pshufd xmm5, xmm4,0xE4;
punpcklbw xmm4,ZEROS;
pmaddwd xmm4,YBGR0
psrld xmm4,11
punpckhbw xmm5,ZEROS;
pmaddwd xmm5,YBGR0
psrld xmm5,11
packssdw xmm4,xmm5
pmaddwd xmm4,ONES
psrld xmm4,4
movdqa xmm5, [eax+4112];//XMMWORD PTR 1024
//movdqa xmm6,xmm5;
pshufd xmm6, xmm5,0xE4;
punpcklbw xmm5,ZEROS;
pmaddwd xmm5,YBGR0
psrld xmm5,11
punpckhbw xmm6,ZEROS;
pmaddwd xmm6,YBGR0
psrld xmm6,11
packssdw xmm5,xmm6
pmaddwd xmm5,ONES
psrld xmm5,4
packssdw xmm4,xmm5
add eax,8192 //1024
packuswb xmm3,xmm4 //Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1Y0->xmm0
//movdqa xmm4,DELTAY;
paddusw xmm3,DELTAY
movdqa [ebx],xmm3
add ebx,16
dec BLOCK_Y_NUM
jnz BLOCK_Y
mov eax,TMPEAX
add eax,32
mov TMPEAX,eax
dec BLOCK_X_NUM
jnz BLOCK_X
mov eax,TMP2EAX;
add eax,32768; // 1024
mov TMP2EAX,eax;
mov TMPEAX,eax;
dec BLOCK_LIE_NUM
jnz BLOCK_LIE
mov BLOCK_LIE_NUM,48//1024 768;
mov BLOCK_Y_NUM,0;
mov BLOCK_X_NUM,0;
mov eax, videoRef;
mov ecx, UDATA;
mov edx, VDATA;
mov TMPEAX,eax;
mov TMP2EAX,eax;
BLOCK_LIEUV:
add BLOCK_X_NUM, 64//1024*768
BLOCK_XUV:
add BLOCK_Y_NUM, 4
BLOCK_YUV:
movdqa xmm3,[eax] //XR3G3B3XR2G2B2XR1G1B1XR0G0B0->xmm3
pshufd xmm4, xmm3,0xE4; //XR3G3B3XR2G2B2XR1G1B1XR0G0B0->xmm4
punpcklbw xmm3,ZEROS; //XR1G1B1XR0G0B0->xmm3
pshufd xmm5, xmm3,0xE4;//XR1G1B1XR0G0B0->xmm5
pmaddwd xmm3,UBGR0
psrad xmm3,11
punpckhbw xmm4,ZEROS; //XR3G3B3XR2G2B2->xmm4
pshufd xmm6, xmm4,0xE4;//XR3G3B3XR2G2B2->xmm6
pmaddwd xmm4,UBGR0
psrad xmm4,11
packssdw xmm3,xmm4 //YbB3+YgG3,YrR3,YbB2+YgG2,YrR2,YbB1+YgG1,YrR1,YbB0+YgG0,YrR0->xmm3
pmaddwd xmm3,ONES //YbB3+YgG3+YrR3,YbB2+YgG2+YrR2,YbB1+YgG1+YrR1,YbB0+YgG0+YrR0->xmm3
psrad xmm3,4 //U3U2U1U0->xmm3
pmaddwd xmm5,VBGR0
psrad xmm5,11
pmaddwd xmm6,VBGR0
psrad xmm6,11
packssdw xmm5,xmm6
pmaddwd xmm5,ONES
psrad xmm5,4 //V3V2V1V0->xmm5
movdqa xmm4,[eax+16]; //XR7G7B7XR6G6B6XR5G5B5XR4G4B4->xmm4
pshufd xmm6, xmm4,0xE4; //XR7G7B7XR6G6B6XR5G5B5XR4G4B4->xmm6
punpcklbw xmm4,ZEROS; //XR5G5B5XR4G4B4->xmm4
pshufd xmm1, xmm4,0xE4; //XR5G5B5XR4G4B4->xmm1
pmaddwd xmm4,UBGR0;
psrad xmm4,11;
punpckhbw xmm6,ZEROS; //XR7G7B7XR6G6B6->xmm6
pshufd xmm2, xmm6,0xE4; //XR7G7B7XR6G6B6->xmm2
pmaddwd xmm6,UBGR0;
psrad xmm6,11;
packssdw xmm4,xmm6;//
pmaddwd xmm4,ONES;
psrad xmm4,4; //U7U6U5U4->xmm4
packssdw xmm3,xmm4; //U7U6U5U4U3U2U1U0->xmm3 16bit
pmaddwd xmm1,VBGR0;
psrad xmm1,11;
pmaddwd xmm2,VBGR0;
psrad xmm2,11;
packssdw xmm1,xmm2
pmaddwd xmm1,ONES
psrad xmm1,4 //V7V6V5V4->xmm1 32bit
packssdw xmm5,xmm1; //V7V6V5V4V3V2V1V0->xmm5 16bit
movdqa xmm1,[eax+4096]
pshufd xmm2, xmm1,0xE4;
punpcklbw xmm1,ZEROS;
pshufd xmm6,xmm1,0x4E;
pmaddwd xmm1,UBGR0;
psrad xmm1,11;
punpckhbw xmm2,ZEROS;
pshufd xmm7,xmm2,0x4E;
pmaddwd xmm2,UBGR0;
psrad xmm2,11;
packssdw xmm1,xmm2;
pmaddwd xmm1,ONES;
psrad xmm1,4; //U_2_3-0->xmm1
pmaddwd xmm6,VBGR0;
psrad xmm6,11;
pmaddwd xmm7,VBGR0;
psrad xmm7,11;
packssdw xmm6,xmm7;
pmaddwd xmm6,ONES
psrad xmm6,4 //V_2_3-0->xmm6
movdqa xmm0,[eax+4112]
pshufd xmm2, xmm0,0xE4;
punpcklbw xmm0,ZEROS;
pshufd xmm4, xmm0,0xE4;
pmaddwd xmm0,UBGR0;
psrad xmm0,11;
punpckhbw xmm2,ZEROS;
pshufd xmm7, xmm2,0xE4;
pmaddwd xmm2,UBGR0;
psrad xmm2,11;
packssdw xmm0,xmm2
pmaddwd xmm0,ONES
psrad xmm0,4 //U_2_7_4->xmm0 32bit
packssdw xmm1,xmm0; //U_2_7_0->xmm1 16bit
paddsw xmm3,xmm1;
pmaddwd xmm3,ONES;
psrad xmm3,2; //U4_0->xmm3 32bit
pmaddwd xmm4,VBGR0;
psrad xmm4,11;
pmaddwd xmm7,VBGR0;
psrad xmm7,11
packssdw xmm4,xmm7
pmaddwd xmm4,ONES
psrad xmm4,4 //V_2_7_4->xmm4 32bit
packssdw xmm6,xmm4; //V_2_7_0->xmm6 16bit
paddsw xmm5,xmm6;
pmaddwd xmm5,ONES;
psrad xmm5,2; //U4_0->xmm5 32bit
movdqa xmm0,[eax+32] //XR11G11B11XR10G10B10XR9G9B9XR8G8B8->xmm0
pshufd xmm1, xmm0,0xE4;
punpcklbw xmm0,ZEROS;
pshufd xmm2, xmm0,0xE4;
pmaddwd xmm0,UBGR0;
psrad xmm0,11;
punpckhbw xmm1,ZEROS;
pshufd xmm4, xmm1,0xE4;
pmaddwd xmm1,UBGR0;
psrad xmm1,11
packssdw xmm0,xmm1;
pmaddwd xmm0,ONES;
psrad xmm0,4 //U11_8->xmm0
pmaddwd xmm2,VBGR0;
psrad xmm2,11;
pmaddwd xmm4,VBGR0;
psrad xmm4,11;
packssdw xmm2,xmm4;
pmaddwd xmm2,ONES;
psrad xmm2,4 //V11_8->xmm2
movdqa xmm1,[eax+48] //1024
pshufd xmm4, xmm1,0xE4;
punpcklbw xmm1,ZEROS;
pshufd xmm6, xmm1,0xE4;
pmaddwd xmm1,UBGR0
psrad xmm1,11
punpckhbw xmm4,ZEROS;
pshufd xmm7, xmm4,0xE4;
pmaddwd xmm4,UBGR0
psrad xmm4,11
packssdw xmm1,xmm4;
pmaddwd xmm1,ONES;
psrad xmm1,4 //U15_12->xmm1
packssdw xmm0,xmm1; //U15_8->xmm0 16bit
pmaddwd xmm6,VBGR0;
psrad xmm6,11;
pmaddwd xmm7,VBGR0;
psrad xmm7,11;
packssdw xmm6,xmm7
pmaddwd xmm6,ONES
psrad xmm6,4 //V15_12->xmm6
packssdw xmm2,xmm6; //V15_8->xmm2 16bit
movdqa xmm1,[eax+4128]// 1024
pshufd xmm4, xmm1,0xE4;
punpcklbw xmm1,ZEROS;
pshufd xmm6, xmm1,0xE4;
pmaddwd xmm1,UBGR0;
psrad xmm1,11;
punpckhbw xmm4,ZEROS;
pshufd xmm7, xmm4,0xE4;
pmaddwd xmm4,UBGR0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -