⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rgb2yuv.c

📁 采用intel奔腾2以后处理器支持的mmx指令集
💻 C
📖 第 1 页 / 共 2 页
字号:
int RGB2YUV(unsigned char *screenbuf)
{	
/*	unsigned char * YDATA;
	unsigned char * UDATA;
	unsigned char * VDATA;*/
	unsigned char *videoRef;
    _declspec (align(16)) __int16 ZEROS[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 YBGR0[8]={3211,16515,8421,0,3211,16515,8421,0};	
	_declspec (align(16)) __int16 UBGR0[8]={14385,-9535,-4850,0,14385,-9535,-4850,0};
	_declspec (align(16)) __int16 VBGR0[8]={-2326,-12059,14385,0,-2326,-12059,14385,0};
	_declspec (align(16)) __int8  DELTAY[16]={16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16};
	_declspec (align(16)) __int16 ONES[8]={1,1,1,1,1,1,1,1};
	_declspec (align(16)) __int16 DELTAUV[8]={128,128,128,128,128,128,128,128};
	_declspec (align(16)) __int16 MMTMP0[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 MMTMP1[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 MMTMP2[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 MMTMP3[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 MMTMP4[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 MMTMP5[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 MMTMP6[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 MMTMP7[8]={0,0,0,0,0,0,0,0};
	_declspec (align(16)) __int16 MMTMP8[8]={0,0,0,0,0,0,0,0};
	__int32 TMPEAX=0;
	__int32 TMP2EAX=0;
	__int32 BLOCK_X_NUM=0;
	__int32 BLOCK_Y_NUM=0;
	__int32 BLOCK_LIE_NUM=96;//1024X768
	//__int32 BLOCK_LIE_NUM=76;//800*608
	 __int32 TMPEAAX=0;
	__int32 TMPEBX=0;
	__int32 TMPECX=0;
	__int32 TMPEDX=0;
	FILE *fp;
  		if(g_N_FrameNum&1)
  {
	YDATA=g_P_YData1;UDATA=g_P_CbData1;VDATA=g_P_CrData1;
  }
  else
  {
	YDATA=g_P_YData;UDATA=g_P_CbData;VDATA=g_P_CrData;
  }	
 videoRef=screenbuf;
 /* videoRef = (unsigned char*)_aligned_malloc(1024*768*4, 16);
  fp=fopen("D:\\rgbx.rgb","r");
  fread(videoRef,1024*768*4,1,fp);*/

 		__asm
		{	

			mov TMPEAAX,eax;
			mov TMPEBX,ebx;
			mov TMPECX,ecx;
			mov TMPEDX,edx;
			mov eax, videoRef;	
			mov ebx, YDATA;
			mov TMPEAX,eax;
			mov TMP2EAX,eax;

BLOCK_LIE:
			add BLOCK_X_NUM,128//1024*768
			//add BLOCK_X_NUM,100 //800*600 128//1024*768
BLOCK_X:
			add BLOCK_Y_NUM, 4
BLOCK_Y:
			movdqa xmm3,[eax]	//load XR3G3B3XR2G2B2XR1G1B1XR0G0B0 	
			pshufd xmm4, xmm3,0xE4;	    //xmm0->ONES
			punpcklbw xmm3,ZEROS;
			pmaddwd xmm3,YBGR0     //YbB1+YgG1,YrR1,YbB0+YgG0,YrR0->xmm0
			psrld xmm3,11
			punpckhbw xmm4,ZEROS;	
			pmaddwd xmm4,YBGR0     //YbB3+YgG3,YrR3,YbB2+YgG2,YrR2->ONES
			psrld xmm4,11
			packssdw xmm3,xmm4     //YbB3+YgG3,YrR3,YbB2+YgG2,YrR2,YbB1+YgG1,YrR1,YbB0+YgG0,YrR0->xmm3
			pmaddwd xmm3,ONES      //YbB3+YgG3+YrR3,YbB2+YgG2+YrR2,YbB1+YgG1+YrR1,YbB0+YgG0+YrR0->xmm3
			psrld xmm3,4           //Y3Y2Y1Y0->xmm0
				
			movdqa xmm4,[eax+16]		//load XR7G7B7XR6G6B6XR5G5B5XR4G4B4 XMMWORD PTR 
			pshufd xmm5, xmm4,0xE4;
			punpcklbw xmm4,ZEROS;
			pmaddwd xmm4,YBGR0
			psrld xmm4,11
			punpckhbw xmm5,ZEROS;
			pmaddwd xmm5,YBGR0
			psrld xmm5,11
			packssdw xmm4,xmm5
			pmaddwd xmm4,ONES
			psrld xmm4,4         //Y7Y6Y5Y4->ONES
			packssdw xmm3,xmm4   //Y7Y6Y5Y4Y3Y2Y1Y0->xmm3
			
			movdqa xmm4,[eax+4096];//XMMWORD PTR 1024 
			pshufd xmm5, xmm4,0xE4;	
			punpcklbw xmm4,ZEROS;
			pmaddwd xmm4,YBGR0
			psrld xmm4,11		
			punpckhbw xmm5,ZEROS;
			pmaddwd xmm5,YBGR0
			psrld xmm5,11
			packssdw xmm4,xmm5
			pmaddwd xmm4,ONES
			psrld xmm4,4 
				
			movdqa xmm5, [eax+4112];//XMMWORD PTR  1024 
			//movdqa xmm6,xmm5;
			pshufd xmm6, xmm5,0xE4;
			punpcklbw xmm5,ZEROS;
			pmaddwd xmm5,YBGR0
			psrld xmm5,11
			punpckhbw xmm6,ZEROS;
			pmaddwd xmm6,YBGR0
			psrld xmm6,11
			packssdw xmm5,xmm6
			pmaddwd xmm5,ONES
			psrld xmm5,4 
				
			packssdw xmm4,xmm5
			add eax,8192  //1024
			packuswb xmm3,xmm4    //Y15Y14Y13Y12Y11Y10Y9Y8Y7Y6Y5Y4Y3Y2Y1Y0->xmm0
			//movdqa xmm4,DELTAY;
			paddusw xmm3,DELTAY
			movdqa [ebx],xmm3			
			add ebx,16 

			dec BLOCK_Y_NUM
			jnz BLOCK_Y 
			
			mov eax,TMPEAX
			add eax,32
			mov TMPEAX,eax

			dec BLOCK_X_NUM
				jnz BLOCK_X
			mov eax,TMP2EAX;
			add eax,32768; // 1024
			mov TMP2EAX,eax;
			mov TMPEAX,eax;
			dec BLOCK_LIE_NUM
				jnz BLOCK_LIE

			mov BLOCK_LIE_NUM,48//1024 768;
			mov	BLOCK_Y_NUM,0;
			mov BLOCK_X_NUM,0;
			mov eax, videoRef;	
			mov ecx, UDATA;
			mov edx, VDATA;	
			mov TMPEAX,eax;
			mov TMP2EAX,eax;
BLOCK_LIEUV:
			add BLOCK_X_NUM, 64//1024*768
BLOCK_XUV:
			add BLOCK_Y_NUM, 4
BLOCK_YUV:	
			movdqa xmm3,[eax]       //XR3G3B3XR2G2B2XR1G1B1XR0G0B0->xmm3 
			pshufd xmm4, xmm3,0xE4; //XR3G3B3XR2G2B2XR1G1B1XR0G0B0->xmm4
			punpcklbw xmm3,ZEROS;  //XR1G1B1XR0G0B0->xmm3 
			pshufd xmm5, xmm3,0xE4;//XR1G1B1XR0G0B0->xmm5
			pmaddwd xmm3,UBGR0
			psrad xmm3,11
			punpckhbw xmm4,ZEROS;  //XR3G3B3XR2G2B2->xmm4
			pshufd xmm6, xmm4,0xE4;//XR3G3B3XR2G2B2->xmm6
			pmaddwd xmm4,UBGR0
			psrad xmm4,11
			packssdw xmm3,xmm4     //YbB3+YgG3,YrR3,YbB2+YgG2,YrR2,YbB1+YgG1,YrR1,YbB0+YgG0,YrR0->xmm3
			pmaddwd xmm3,ONES      //YbB3+YgG3+YrR3,YbB2+YgG2+YrR2,YbB1+YgG1+YrR1,YbB0+YgG0+YrR0->xmm3
			psrad xmm3,4           //U3U2U1U0->xmm3

			pmaddwd xmm5,VBGR0
			psrad xmm5,11
			pmaddwd xmm6,VBGR0
			psrad xmm6,11
			packssdw xmm5,xmm6
			pmaddwd xmm5,ONES
			psrad xmm5,4          //V3V2V1V0->xmm5


			movdqa xmm4,[eax+16];  //XR7G7B7XR6G6B6XR5G5B5XR4G4B4->xmm4
			pshufd xmm6, xmm4,0xE4; //XR7G7B7XR6G6B6XR5G5B5XR4G4B4->xmm6
			punpcklbw xmm4,ZEROS;  //XR5G5B5XR4G4B4->xmm4
            pshufd xmm1, xmm4,0xE4; //XR5G5B5XR4G4B4->xmm1
			pmaddwd xmm4,UBGR0;
			psrad xmm4,11;
			punpckhbw xmm6,ZEROS;  //XR7G7B7XR6G6B6->xmm6
			pshufd xmm2, xmm6,0xE4;     //XR7G7B7XR6G6B6->xmm2
			pmaddwd xmm6,UBGR0;
			psrad xmm6,11;
			packssdw xmm4,xmm6;//
			pmaddwd xmm4,ONES;
			psrad xmm4,4;        //U7U6U5U4->xmm4
			packssdw xmm3,xmm4;  //U7U6U5U4U3U2U1U0->xmm3 16bit

			
			pmaddwd xmm1,VBGR0;
			psrad xmm1,11;
			pmaddwd xmm2,VBGR0;
			psrad xmm2,11;
			packssdw xmm1,xmm2
			pmaddwd xmm1,ONES
			psrad xmm1,4         //V7V6V5V4->xmm1   32bit
			packssdw xmm5,xmm1;  //V7V6V5V4V3V2V1V0->xmm5   16bit


			movdqa xmm1,[eax+4096]
			pshufd xmm2, xmm1,0xE4;
			punpcklbw xmm1,ZEROS;
			pshufd xmm6,xmm1,0x4E;
			pmaddwd xmm1,UBGR0;
			psrad xmm1,11;
			punpckhbw xmm2,ZEROS;
			pshufd xmm7,xmm2,0x4E;
			pmaddwd xmm2,UBGR0;
			psrad xmm2,11;	
			packssdw xmm1,xmm2;
			pmaddwd xmm1,ONES;
			psrad xmm1,4;        //U_2_3-0->xmm1

            pmaddwd xmm6,VBGR0;
			psrad xmm6,11;
			pmaddwd xmm7,VBGR0;
			psrad xmm7,11;
			packssdw xmm6,xmm7;
			pmaddwd xmm6,ONES
			psrad xmm6,4       //V_2_3-0->xmm6


			movdqa xmm0,[eax+4112]
			pshufd xmm2, xmm0,0xE4;
			punpcklbw xmm0,ZEROS;
			pshufd xmm4, xmm0,0xE4;
			pmaddwd xmm0,UBGR0;
			psrad xmm0,11;
			punpckhbw xmm2,ZEROS;
			pshufd xmm7, xmm2,0xE4;
			pmaddwd xmm2,UBGR0;
			psrad xmm2,11;
			packssdw xmm0,xmm2
			pmaddwd xmm0,ONES
			psrad xmm0,4    //U_2_7_4->xmm0   32bit
	
			packssdw xmm1,xmm0; //U_2_7_0->xmm1   16bit
			
			paddsw xmm3,xmm1;
			pmaddwd xmm3,ONES;
			psrad xmm3,2;        //U4_0->xmm3      32bit

			pmaddwd xmm4,VBGR0;
			psrad xmm4,11;
			pmaddwd xmm7,VBGR0;
			psrad xmm7,11
			packssdw xmm4,xmm7
			pmaddwd xmm4,ONES
			psrad xmm4,4    //V_2_7_4->xmm4   32bit

			packssdw xmm6,xmm4; //V_2_7_0->xmm6   16bit
			
			paddsw xmm5,xmm6;
			pmaddwd xmm5,ONES;
			psrad xmm5,2;        //U4_0->xmm5      32bit
			
			movdqa xmm0,[eax+32]  //XR11G11B11XR10G10B10XR9G9B9XR8G8B8->xmm0
			pshufd xmm1, xmm0,0xE4;
			punpcklbw xmm0,ZEROS;
			pshufd xmm2, xmm0,0xE4;
			pmaddwd xmm0,UBGR0;
			psrad xmm0,11;
			punpckhbw xmm1,ZEROS;
			pshufd xmm4, xmm1,0xE4;
			pmaddwd xmm1,UBGR0;	
			psrad xmm1,11
			packssdw xmm0,xmm1;
			pmaddwd xmm0,ONES;
			psrad xmm0,4        //U11_8->xmm0
      
			pmaddwd xmm2,VBGR0;
			psrad xmm2,11;
			pmaddwd xmm4,VBGR0;
			psrad xmm4,11;
			packssdw xmm2,xmm4;
			pmaddwd xmm2,ONES;
			psrad xmm2,4        //V11_8->xmm2


			movdqa xmm1,[eax+48]   //1024
			pshufd xmm4, xmm1,0xE4;
			punpcklbw xmm1,ZEROS;
			pshufd xmm6, xmm1,0xE4;
			pmaddwd xmm1,UBGR0
			psrad xmm1,11
			punpckhbw xmm4,ZEROS;			
			pshufd xmm7, xmm4,0xE4;
			pmaddwd xmm4,UBGR0
			psrad xmm4,11             
			packssdw xmm1,xmm4;    
			pmaddwd xmm1,ONES;
			psrad xmm1,4        //U15_12->xmm1
			packssdw xmm0,xmm1;  //U15_8->xmm0 16bit

			pmaddwd xmm6,VBGR0;
			psrad xmm6,11;
			pmaddwd xmm7,VBGR0;
			psrad xmm7,11;
			packssdw xmm6,xmm7
			pmaddwd xmm6,ONES
			psrad xmm6,4     //V15_12->xmm6 
			packssdw xmm2,xmm6; //V15_8->xmm2 16bit


			movdqa xmm1,[eax+4128]// 1024
			pshufd xmm4, xmm1,0xE4;
			punpcklbw xmm1,ZEROS;
			pshufd xmm6, xmm1,0xE4;
			pmaddwd xmm1,UBGR0;
			psrad xmm1,11;
			punpckhbw xmm4,ZEROS;
			pshufd xmm7, xmm4,0xE4;
			pmaddwd xmm4,UBGR0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -