⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 yuv2rgb_mmx.c

📁 <VC++视频音频开发>一书的光盘资料。
💻 C
📖 第 1 页 / 共 2 页
字号:
			mov edi, horiz_count
			
		horiz_loop:

			movd mm2, [ecx]					 ; mm2 = ________u3u2u1u0
			movd mm3, [edx]					 ; mm3 = ________v3v2v1v0
			movq mm0, [ebx]          ; mm0 = y7y6y5y4y3y2y1y0  

			pxor mm7, mm7						 ; zero mm7

			punpcklbw mm2, mm7       ; mm2 = __u3__u2__u1__u0
			punpcklbw mm3, mm7       ; mm3 = __v3__v2__v1__v0
			psubw mm2, mmw_0x0080    ; mm2 -= 128
			psubw mm3, mmw_0x0080    ; mm3 -= 128
			psllw mm2, 3             ; mm2 *= 8
			psllw mm3, 3             ; mm3 *= 8
			movq mm4, mm2            ; mm4 = mm2 = u
			movq mm5, mm3            ; mm5 = mm3 = v
			pmulhw mm2, mmw_mult_U_G ; mm2 *= u green coeff 
			pmulhw mm3, mmw_mult_V_G ; mm3 *= v green coeff  
			pmulhw mm4, mmw_mult_U_B ; mm4 = blue chroma
			pmulhw mm5, mmw_mult_V_R ; mm5 = red chroma
			paddsw mm2, mm3					 ; mm2 = green chroma

		
			psubusb mm0, mmb_0x10    ; mm0 -= 16
			movq mm1, mmw_0x00ff     ; mm1 = 00ff00ff00ff00ff 
			psrlw mm0, 8             ; mm0 = __y7__y5__y3__y1 luma odd
			pand mm1, mm0            ; mm1 = __y6__y4__y2__y0 luma even
			psllw mm0, 3             ; mm0 *= 8
			psllw mm1, 3             ; mm1 *= 8
			pmulhw mm0, mmw_mult_Y   ; mm0 luma odd *= luma coeff 
			pmulhw mm1, mmw_mult_Y   ; mm1 luma even *= luma coeff 

	
			movq mm3, mm4						 ; copy blue chroma
			movq mm6, mm5						 ; copy red chroma
			movq mm7, mm2						 ; copy green chroma
			paddsw mm3, mm0					 ; mm3 = luma odd + blue chroma
			paddsw mm4, mm1					 ; mm4 = luma even + blue chroma
			paddsw mm6, mm0					 ; mm6 = luma odd + red chroma
			paddsw mm5, mm1					 ; mm5 = luma even + red chroma
			paddsw mm7, mm0					 ; mm7 = luma odd + green chroma
			paddsw mm2, mm1					 ; mm2 = luma even + green chroma
		
			packuswb mm3, mm3
			packuswb mm4, mm4
			packuswb mm6, mm6
			packuswb mm5, mm5
			packuswb mm7, mm7
			packuswb mm2, mm2
		
			punpcklbw mm4, mm3			 ; mm4 = b7b6b5b4b3b2b1b0 blue
			punpcklbw mm5, mm6			 ; mm5 = r7r6r5r4r3r2r1r0 red
			punpcklbw mm2, mm7			 ; mm2 = g7g6g5g4g3g2g1g0 green


			pand mm4, mask_5
			pand mm5, mask_5
			pand mm2, mask_5

	

			psrlw mm4, 3						 ; mm4 = blue shifted
			pand mm4, mask_blue			 ; mask the blue again
			pxor mm7, mm7						 ; zero mm7
			movq mm1, mm4						 ; mm1 = copy blue
			movq mm3, mm5						 ; mm3 = copy red
			movq mm6, mm2						 ; mm6 = copy green

			punpckhbw mm1, mm7
			punpckhbw mm3, mm7
			punpckhbw mm6, mm7
			psllw mm6, 2						 ; shift green
			psllw mm3, 7						 ; shift red
			por mm6, mm3
			por mm6, mm1
			movq 8[eax], mm6

			punpcklbw mm2, mm7			 ; mm2 = __g3__g2__g1__g0 already masked
			punpcklbw mm5, mm7
			punpcklbw mm4, mm7
			psllw mm2, 2						 ; shift green
			psllw mm5, 7						 ; shift red
			por mm2, mm5
			por mm2, mm4
			movq [eax], mm2

			add ebx, 8               ; puc_y   += 8;
			add ecx, 4               ; puc_u   += 4;
			add edx, 4               ; puc_v   += 4;
			add eax, 16              ; puc_out += 16 // wrote 16 bytes

			inc edi
			jne horiz_loop			

			pop edi 
			pop edx 
			pop ecx
			pop ebx 
			pop eax

			emms
						
		}
		puc_y   += stride_y;
		if (y%2) {
			puc_u   += stride_uv;
			puc_v   += stride_uv;
		}
		puc_out += stride_out;
	}
}

/***/

#define _S(a)		(a)>255 ? 255 : (a)<0 ? 0 : (a)

#define _R(y,u,v) (0x2568*(y)  			       + 0x3343*(u)) /0x2000
#define _G(y,u,v) (0x2568*(y) - 0x0c92*(v) - 0x1a1e*(u)) /0x2000
#define _B(y,u,v) (0x2568*(y) + 0x40cf*(v))					     /0x2000

#define _mR	0x7c00
#define _mG 0x03e0
#define _mB 0x001f

#define _Ps565(r,g,b) ( ((r & 0xF8) >> 3) | (((g & 0xF8) << 3)) | (((b & 0xF8) << 8)) )


void yuv2rgb_565(uint8_t *puc_y, int stride_y, 
                uint8_t *puc_u, uint8_t *puc_v, int stride_uv, 
                uint8_t *puc_out, int width_y, int height_y,
								unsigned int _stride_out) 
{

	int y, horiz_count;
	unsigned short * pus_out;
	int stride_out = width_y * 2;

	if (height_y < 0) {
	
		height_y  = -height_y;
		puc_y     += (height_y   - 1) * stride_y ;
		puc_u     += (height_y/2 - 1) * stride_uv;
		puc_v     += (height_y/2 - 1) * stride_uv;
		stride_y  = -stride_y;
		stride_uv = -stride_uv;
	}
	pus_out = (unsigned short *) puc_out;

	horiz_count = -(width_y >> 3);

	for (y=0; y<height_y; y++) 
	{


		_asm {
			push eax
			push ebx
			push ecx
			push edx
			push edi

			mov eax, puc_out       
			mov ebx, puc_y       
			mov ecx, puc_u       
			mov edx, puc_v
			mov edi, horiz_count
			
		horiz_loop:

		
			movd mm2, [ecx]					 ; mm2 = ________u3u2u1u0
			movd mm3, [edx]					 ; mm3 = ________v3v2v1v0
			movq mm0, [ebx]          ; mm0 = y7y6y5y4y3y2y1y0  

			pxor mm7, mm7						 ; zero mm7

			punpcklbw mm2, mm7       ; mm2 = __u3__u2__u1__u0
			punpcklbw mm3, mm7       ; mm3 = __v3__v2__v1__v0
			psubw mm2, mmw_0x0080    ; mm2 -= 128
			psubw mm3, mmw_0x0080    ; mm3 -= 128
			psllw mm2, 3             ; mm2 *= 8
			psllw mm3, 3             ; mm3 *= 8
			movq mm4, mm2            ; mm4 = mm2 = u
			movq mm5, mm3            ; mm5 = mm3 = v
			pmulhw mm2, mmw_mult_U_G ; mm2 *= u green coeff 
			pmulhw mm3, mmw_mult_V_G ; mm3 *= v green coeff  
			pmulhw mm4, mmw_mult_U_B ; mm4 = blue chroma
			pmulhw mm5, mmw_mult_V_R ; mm5 = red chroma
			paddsw mm2, mm3					 ; mm2 = green chroma

		
			psubusb mm0, mmb_0x10    ; mm0 -= 16
			movq mm1, mmw_0x00ff     ; mm1 = 00ff00ff00ff00ff 
			psrlw mm0, 8             ; mm0 = __y7__y5__y3__y1 luma odd
			pand mm1, mm0            ; mm1 = __y6__y4__y2__y0 luma even
			psllw mm0, 3             ; mm0 *= 8
			psllw mm1, 3             ; mm1 *= 8
			pmulhw mm0, mmw_mult_Y   ; mm0 luma odd *= luma coeff 
			pmulhw mm1, mmw_mult_Y   ; mm1 luma even *= luma coeff 

		
			movq mm3, mm4						 ; copy blue chroma
			movq mm6, mm5						 ; copy red chroma
			movq mm7, mm2						 ; copy green chroma
			paddsw mm3, mm0					 ; mm3 = luma odd + blue chroma
			paddsw mm4, mm1					 ; mm4 = luma even + blue chroma
			paddsw mm6, mm0					 ; mm6 = luma odd + red chroma
			paddsw mm5, mm1					 ; mm5 = luma even + red chroma
			paddsw mm7, mm0					 ; mm7 = luma odd + green chroma
			paddsw mm2, mm1					 ; mm2 = luma even + green chroma
		
			packuswb mm3, mm3
			packuswb mm4, mm4
			packuswb mm6, mm6
			packuswb mm5, mm5
			packuswb mm7, mm7
			packuswb mm2, mm2
		
			punpcklbw mm4, mm3			 ; mm4 = b7b6b5b4b3b2b1b0 blue
			punpcklbw mm5, mm6			 ; mm5 = r7r6r5r4r3r2r1r0 red
			punpcklbw mm2, mm7			 ; mm2 = g7g6g5g4g3g2g1g0 green

	
			pand mm4, mask_5
			pand mm5, mask_5
			pand mm2, mask_5

	

			psrlw mm4, 3						 ; mm4 = red shifted
			pand mm4, mask_blue			 ; mask the blue again
			pxor mm7, mm7						 ; zero mm7
			movq mm1, mm5						 ; mm1 = copy blue
			movq mm3, mm4						 ; mm3 = copy red
			movq mm6, mm2						 ; mm6 = copy green

			punpckhbw mm1, mm7
			punpckhbw mm3, mm7
			punpckhbw mm6, mm7
			psllw mm6, 3						 ; shift green
			psllw mm1, 8						 ; shift blue
			por mm6, mm3
			por mm6, mm1
			movq 8[eax], mm6

			punpcklbw mm2, mm7			 ; mm2 = __g3__g2__g1__g0 already masked
			punpcklbw mm4, mm7
			punpcklbw mm5, mm7
			psllw mm2, 3						 ; shift green
			psllw mm5, 8						 ; shift blue
			por mm2, mm4
			por mm2, mm5
			movq [eax], mm2

			add ebx, 8               ; puc_y   += 8;
			add ecx, 4               ; puc_u   += 4;
			add edx, 4               ; puc_v   += 4;
			add eax, 16              ; puc_out += 16 // wrote 16 bytes

			inc edi
			jne horiz_loop			

			pop edi 
			pop edx 
			pop ecx
			pop ebx 
			pop eax

			emms
						
		}
		/***/

		puc_y   += stride_y;
		if (y%2) {
			puc_u   += stride_uv;
			puc_v   += stride_uv;
		}
		puc_out += stride_out;
	}
}

/***/






 


void yuy2_out(uint8_t *puc_y, int stride_y, 
  uint8_t *puc_u, uint8_t *puc_v, int stride_uv, 
  uint8_t *puc_out, int width_y, int height_y,
	unsigned int stride_out) 
{ 
	int y;
	uint8_t* puc_out2;
	unsigned int stride_diff = 4 * stride_out - 2 * width_y; 

	if (height_y < 0) {

		height_y  = -height_y;
		puc_y     += (height_y   - 1) * stride_y ;
		puc_u     += (height_y/2 - 1) * stride_uv;
		puc_v     += (height_y/2 - 1) * stride_uv;
		stride_y  = -stride_y;
		stride_uv = -stride_uv;
	}
	puc_out2 = puc_out + 2 * stride_out;
	for (y=height_y/2; y; y--) {
		register uint8_t *py, *py2, *pu, *pv;
		register int x;
		uint32_t tmp;

		py = puc_y;
		py2 = puc_y + stride_y;
		pu = puc_u;
		pv = puc_v;
		for (x=width_y/2; x; x--) {
			tmp = *(py++);
			tmp |= *(pu++) << 8;
			tmp |= *(py++) << 16;
			tmp |= *(pv++) << 24;
			*(uint32_t*)puc_out=tmp;
			puc_out += 4;

			tmp &= 0xFF00FF00;
			tmp |= *(py2++);
			tmp |= *(py2++) << 16;
			*(uint32_t*)puc_out2=tmp;
			puc_out2 += 4;
		}

		puc_y += 2*stride_y;
		puc_u += stride_uv;
		puc_v += stride_uv;

		puc_out += stride_diff;
		puc_out2 += stride_diff;
	}

}



void uyvy_out(uint8_t *puc_y, int stride_y, 
  uint8_t *puc_u, uint8_t *puc_v, int stride_uv, 
  uint8_t *puc_out, int width_y, int height_y,
	unsigned int stride_out) 
{ 
	int y;
	uint8_t* puc_out2;
	unsigned int stride_diff = 4 * stride_out - 2 * width_y; 

	if (height_y < 0) {

		height_y  = -height_y;
		puc_y     += (height_y   - 1) * stride_y ;
		puc_u     += (height_y/2 - 1) * stride_uv;
		puc_v     += (height_y/2 - 1) * stride_uv;
		stride_y  = -stride_y;
		stride_uv = -stride_uv;
	}
	puc_out2 = puc_out + 2 * stride_out;
	for (y=height_y/2; y; y--) {
		register uint8_t *py, *py2, *pu, *pv;
		register int x;
		uint32_t tmp;

		py = puc_y;
		py2 = puc_y + stride_y;
		pu = puc_u;
		pv = puc_v;
		for (x=width_y/2; x; x--) {
			tmp = *(pu++);
			tmp |= *(py++) << 8;
			tmp |= *(pv++) << 16;
			tmp |= *(py++) << 24;
			*(uint32_t*)puc_out=tmp;
			puc_out += 4;

			tmp &= 0x00FF00FF;
			tmp |= *(py2++) << 8;
			tmp |= *(py2++) << 24;
			*(uint32_t*)puc_out2=tmp;
			puc_out2 += 4;
		}

		puc_y += 2*stride_y;
		puc_u += stride_uv;
		puc_v += stride_uv;

		puc_out += stride_diff;
		puc_out2 += stride_diff;
	}

}



void yuv12_out(uint8_t *puc_y, int stride_y, 
  uint8_t *puc_u, uint8_t *puc_v, int stride_uv, 
  uint8_t *puc_out, int width_y, int height_y,
	unsigned int stride_out) 
{ 
	int i;

	unsigned char * pauc_out[3];

	if (height_y < 0) {
		
		height_y = -height_y;
		puc_y     += (height_y   - 1) * stride_y ;
		puc_u     += (height_y/2 - 1) * stride_uv;
		puc_v     += (height_y/2 - 1) * stride_uv;
		stride_y  = -stride_y;
		stride_uv = -stride_uv;
	}

	pauc_out[0] = puc_out;
	pauc_out[1] = puc_out + stride_out * height_y;
	pauc_out[2] = puc_out + stride_out * height_y * 5 / 4;

	for (i=0; i<height_y; i++) {
		
		memcpy(pauc_out[0], puc_y, width_y);

		pauc_out[0] += stride_out;
		puc_y += stride_y;
	}

	for (i=0; i<height_y/2; i++) {
		
		memcpy(pauc_out[2], puc_u, width_y/2); 
		memcpy(pauc_out[1], puc_v, width_y/2);

		pauc_out[1] += stride_out/2;
		pauc_out[2] += stride_out/2;
		puc_u += stride_uv;
		puc_v += stride_uv;
	}
}

/***/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -