📄 image.cpp
字号:
movd ((int*)&edx, mm0);
edx>>= 10;
edx+= YUV_RGB_DATA<CCIR>::U_ADD;
*ebx=(unsigned char)edx;
movd ((int*)&edx, mm2);
edx>>= 10;
edx+= YUV_RGB_DATA<CCIR>::V_ADD;
*ecx=(unsigned char)edx;
}
};
struct YV12_TO_BGR
{
static __forceinline void INIT(__m64 &mm7)
{
pxor (mm7, mm7);
}
static __forceinline void PROCESS(__m64 &mm7,unsigned char *edi,stride_t edx,unsigned char *ebx,unsigned char *ecx,unsigned char *esi,stride_t eax,stride_t uv_stride)
{
__m64 TEMP_Y1;
__m64 TEMP_Y2;
__m64 TEMP_G1;
__m64 TEMP_G2;
__m64 TEMP_B1;
__m64 TEMP_B2;
static const int SCALEBITS=6;
__m64 mm2,mm3,mm4,mm5,mm6,mm0,mm1;
movd (mm2, ebx); // u_ptr[0]
movd (mm3, ecx); // v_ptr[0]
punpcklbw (mm2, mm7); // u3u2u1u0 -> mm2
punpcklbw (mm3, mm7); // v3v2v1v0 -> mm3
psubsw (mm2, YUV_RGB_DATA<CCIR>::U_SUB); // U - 128
psubsw (mm3, YUV_RGB_DATA<CCIR>::V_SUB); // V - 128
movq (mm4, mm2);
movq (mm5, mm3);
pmullw (mm2, YUV_RGB_DATA<CCIR>::UG_MUL);
pmullw (mm3, YUV_RGB_DATA<CCIR>::VG_MUL);
movq (mm6, mm2); // u3u2u1u0 -> mm6
punpckhwd (mm2, mm2); // u3u3u2u2 -> mm2
punpcklwd (mm6, mm6); // u1u1u0u0 -> mm6
pmullw (mm4, YUV_RGB_DATA<CCIR>::UB_MUL); // B_ADD -> mm4
movq (mm0, mm3);
punpckhwd (mm3, mm3); // v3v3v2v2 -> mm2
punpcklwd (mm0, mm0); // v1v1v0v0 -> mm6
paddsw (mm2, mm3);
paddsw (mm6, mm0);
pmullw (mm5, YUV_RGB_DATA<CCIR>::VR_MUL); // R_ADD -> mm5
movq (mm0, esi); // y7y6y5y4y3y2y1y0 -> mm0
movq (mm1, mm0);
punpckhbw (mm1, mm7); // y7y6y5y4 -> mm1
punpcklbw (mm0, mm7); // y3y2y1y0 -> mm0
psubsw (mm0, YUV_RGB_DATA<CCIR>::Y_SUB); // Y - Y_SUB
psubsw (mm1, YUV_RGB_DATA<CCIR>::Y_SUB); // Y - Y_SUB
pmullw (mm1, YUV_RGB_DATA<CCIR>::Y_MUL);
pmullw (mm0, YUV_RGB_DATA<CCIR>::Y_MUL);
movq (TEMP_Y2, mm1); // y7y6y5y4 -> mm3
movq (TEMP_Y1, mm0); // y3y2y1y0 -> mm7
psubsw (mm1, mm2); // g7g6g5g4 -> mm1
psubsw (mm0, mm6); // g3g2g1g0 -> mm0
psraw (mm1, SCALEBITS);
psraw (mm0, SCALEBITS);
packuswb (mm0, mm1); //g7g6g5g4g3g2g1g0 -> mm0
movq (TEMP_G1, mm0);
movq (mm0, esi+eax); // y7y6y5y4y3y2y1y0 -> mm0
movq (mm1, mm0);
punpckhbw( mm1, mm7); // y7y6y5y4 -> mm1
punpcklbw( mm0, mm7); // y3y2y1y0 -> mm0
psubsw (mm0, YUV_RGB_DATA<CCIR>::Y_SUB); // Y - Y_SUB
psubsw (mm1, YUV_RGB_DATA<CCIR>::Y_SUB); // Y - Y_SUB
pmullw (mm1, YUV_RGB_DATA<CCIR>::Y_MUL);
pmullw (mm0, YUV_RGB_DATA<CCIR>::Y_MUL);
movq (mm3, mm1);
psubsw (mm1, mm2); // g7g6g5g4 -> mm1
movq (mm2, mm0);
psubsw (mm0, mm6); // g3g2g1g0 -> mm0
psraw (mm1, SCALEBITS);
psraw (mm0, SCALEBITS);
packuswb (mm0, mm1); // g7g6g5g4g3g2g1g0 -> mm0
movq (TEMP_G2, mm0);
movq (mm0, mm4);
punpckhwd (mm4, mm4); // u3u3u2u2 -> mm2
punpcklwd (mm0, mm0); // u1u1u0u0 -> mm6
movq (mm1, mm3); // y7y6y5y4 -> mm1
paddsw (mm3, mm4); // b7b6b5b4 -> mm3
movq (mm7, mm2); // y3y2y1y0 -> mm7
paddsw (mm2, mm0 ); // b3b2b1b0 -> mm2
psraw (mm3, SCALEBITS);
psraw (mm2, SCALEBITS );
packuswb (mm2, mm3 ); // b7b6b5b4b3b2b1b0 -> mm2
movq (TEMP_B2, mm2);
movq (mm3, TEMP_Y2);
movq (mm2, TEMP_Y1);
movq (mm6, mm3); // TEMP_Y2 -> mm6
paddsw (mm3, mm4); // b7b6b5b4 -> mm3
movq (mm4, mm2); // TEMP_Y1 -> mm4
paddsw (mm2, mm0); // b3b2b1b0 -> mm2
psraw (mm3, SCALEBITS);
psraw (mm2, SCALEBITS);
packuswb (mm2, mm3); // b7b6b5b4b3b2b1b0 -> mm2
movq (TEMP_B1, mm2);
movq (mm0, mm5);
punpckhwd (mm5, mm5); // v3v3v2v2 -> mm5
punpcklwd (mm0, mm0); // v1v1v0v0 -> mm0
paddsw (mm1, mm5); // r7r6r5r4 -> mm1
paddsw (mm7, mm0 ); // r3r2r1r0 -> mm7
psraw (mm1, SCALEBITS);
psraw (mm7, SCALEBITS);
packuswb (mm7, mm1); // r7r6r5r4r3r2r1r0 -> mm7 (TEMP_R2)
paddsw (mm6, mm5); // r7r6r5r4 -> mm6
paddsw (mm4, mm0); // r3r2r1r0 -> mm4
psraw (mm6, SCALEBITS);
psraw (mm4, SCALEBITS);
packuswb (mm4, mm6); // r7r6r5r4r3r2r1r0 -> mm4 (TEMP_R1)
movq (mm0, TEMP_B1);
movq (mm1, TEMP_G1);
movq (mm6, mm7);
movq (mm2, mm0);
punpcklbw (mm2, mm4); // r3b3r2b2r1b1r0b0 -> mm2
punpckhbw (mm0, mm4); // r7b7r6b6r5b5r4b4 -> mm0
pxor (mm7, mm7);
movq (mm3, mm1);
punpcklbw (mm1, mm7); // 0g30g20g10g0 -> mm1
punpckhbw (mm3, mm7); // 0g70g60g50g4 -> mm3
movq (mm4, mm2);
punpcklbw (mm2, mm1); // 0r1g1b10r0g0b0 -> mm2
punpckhbw (mm4, mm1); // 0r3g3b30r2g2b2 -> mm4
movq (mm5, mm0);
punpcklbw( mm0, mm3); // 0r5g5b50r4g4b4 -> mm0
punpckhbw (mm5, mm3); // 0r7g7b70r6g6b6 -> mm5
if (ARG1 == 3) // BGR (24-bit)
{
movd (edi, mm2);
psrlq (mm2, 32);
movd (edi + 3, mm2);
movd (edi + 6, mm4);
psrlq (mm4, 32);
movd (edi + 9, mm4);
movd (edi + 12, mm0);
psrlq (mm0, 32);
movd (edi + 15, mm0);
movq (mm2, mm5);
psrlq (mm0, 8); // 000000r5g5 -> mm0
psllq (mm2, 32); // 0r6g6b60000 -> mm2
psrlq (mm5, 32); // 00000r7g7b7 -> mm5
psrlq (mm2, 16); // 000r6g6b600 -> mm2
por (mm0, mm2); // 000r6g6b6r5g5 -> mm0
psllq (mm5, 40); // r7g7b700000 -> mm5
por (mm5, mm0); // r7g7b7r6g6b6r5g5 -> mm5
movq (edi + 16, mm5);
movq (mm0, TEMP_B2);
movq (mm1, TEMP_G2);
movq (mm2, mm0);
punpcklbw (mm2, mm6); // r3b3r2b2r1b1r0b0 -> mm2
punpckhbw (mm0, mm6); // r7b7r6b6r5b5r4b4 -> mm0
movq (mm3, mm1 );
punpcklbw (mm1, mm7); // 0g30g20g10g0 -> mm1
punpckhbw (mm3, mm7); // 0g70g60g50g4 -> mm3
movq (mm4, mm2 );
punpcklbw (mm2, mm1); // 0r1g1b10r0g0b0 -> mm2
punpckhbw (mm4, mm1); // 0r3g3b30r2g2b2 -> mm4
movq (mm5, mm0 );
punpcklbw (mm0, mm3); // 0r5g5b50r4g4b4 -> mm0
punpckhbw (mm5, mm3); // 0r7g7b70r6g6b6 -> mm5
movd (edi+edx, mm2 );
psrlq (mm2, 32 );
movd (edi+edx + 3, mm2);
movd (edi+edx + 6, mm4);
psrlq (mm4, 32 );
movd (edi+edx + 9, mm4);
movd (edi+edx + 12, mm0);
psrlq (mm0, 32 );
movd (edi+edx + 15, mm0);
movq (mm2, mm5 );
psrlq (mm0, 8 ); // 000000r5g5 -> mm0
psllq (mm2, 32); // 0r6g6b60000 -> mm2
psrlq (mm5, 32); // 00000r7g7b7 -> mm5
psrlq (mm2, 16); // 000r6g6b600 -> mm2
por (mm0, mm2 ); // 000r6g6b6r5g5 -> mm0
psllq (mm5, 40); // r7g7b700000 -> mm5
por (mm5, mm0 ); // r7g7b7r6g6b6r5g5 -> mm5
movq (edi + edx + 16, mm5);
}else{ // BGRA (32-bit)
movq (edi, mm2);
movq (edi + 8, mm4);
movq (edi + 16, mm0);
movq (edi + 24, mm5);
movq (mm0, TEMP_B2);
movq (mm1, TEMP_G2);
movq (mm2, mm0);
punpcklbw (mm2, mm6); // r3b3r2b2r1b1r0b0 -> mm2
punpckhbw (mm0, mm6); // r7b7r6b6r5b5r4b4 -> mm0
movq (mm3, mm1 );
punpcklbw (mm1, mm7); // 0g30g20g10g0 -> mm1
punpckhbw (mm3, mm7); // 0g70g60g50g4 -> mm3
movq (mm4, mm2 );
punpcklbw (mm2, mm1); // 0r1g1b10r0g0b0 -> mm2
punpckhbw (mm4, mm1); // 0r3g3b30r2g2b2 -> mm4
movq (mm5, mm0 );
punpcklbw (mm0, mm3); // 0r5g5b50r4g4b4 -> mm0
punpckhbw (mm5, mm3); // 0r7g7b70r6g6b6 -> mm5
movq (edi + edx, mm2);
movq (edi + edx + 8, mm4);
movq (edi + edx + 16, mm0);
movq (edi + edx + 24, mm5);
}
}
};
template<class TFUNC> static __forceinline void MAKE_COLORSPACE(uint8_t * x_ptr,
stride_t x_stride,
uint8_t * y_ptr,
uint8_t * v_ptr,
uint8_t * u_ptr,
stride_t y_stride,
stride_t uv_stride,
int width,
int height,
const TFUNC &FUNC)
{
//------------------------------------------------------------------------------
//
// MAKE_COLORSPACE(NAME,STACK, BYTES,PIXELS,ROWS, FUNC, ARG1, ARG2, CCIR)
//
// This macro provides a assembler width/height scroll loop
// NAME function name
// STACK additional stack bytes required by FUNC
// BYTES bytes-per-pixel for the given colorspace
// PIXELS pixels (columns) operated on per FUNC call
// VPIXELS vpixels (rows) operated on per FUNC call
// FUNC conversion macro name// we expect to find FUNC_INIT and FUNC macros
// ARG1 argument passed to FUNC
//
// throughout the FUNC the registers mean:
// eax y_stride
// ebx u_ptr
// ecx v_ptr
// edx x_stride
// esi y_ptr
// edi x_ptr
// ebp width
//
//------------------------------------------------------------------------------
stride_t x_dif;
stride_t y_dif;
stride_t uv_dif;
stride_t fixed_width;
stride_t eax= width;
eax+=15;
eax&=~15;
fixed_width=eax;
stride_t ebx1=x_stride;
for (int i=0;i<BYTES;i++)
ebx1-= eax;
x_dif= ebx1; // x_dif = x_stride - BYTES*fixed_width
ebx1= y_stride;
ebx1-= eax;
y_dif= ebx1; // y_dif = y_stride - fixed_width
ebx1=uv_stride;
stride_t ecx1= eax;
ecx1>>=1;
ebx1-= ecx1;
uv_dif= ebx1; // uv_dif = uv_stride - fixed_width/2
unsigned char *esi= y_ptr; // $esi$ = y_ptr
unsigned char *edi= x_ptr; // $edi$ = x_ptr
stride_t edx= x_stride; // $edx$ = x_stride
stride_t ebp= height; // $ebp$ = height
// ; --- begin loop ---
eax= y_stride; //$eax$ = y_stride
unsigned char *ebx= u_ptr; //$ebx$ = u_ptr
unsigned char *ecx= v_ptr;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -