📄 image.cpp
字号:
movq (mm5, mm2);
if (ARG1 == 0) // yuyv
{
psrlw (mm4, 8);
psrlw (mm5, 8);
}
pand (mm4, mm7);
pand (mm5, mm7);
paddw (mm4, mm5);
movq (mm5, mm1);
movq (mm6, mm3);
if (ARG1 == 0) // yuyv
{
psrlw (mm5, 8);
psrlw (mm6, 8);
}
pand (mm5, mm7);
pand (mm6, mm7);
paddw( mm5, mm6);
paddw( mm4, mmx_one); // +1 rounding
paddw( mm5, mmx_one); //
psrlw( mm4, 1);
psrlw( mm5, 1);
}
else //---[ 3dnow/xmm ]----------------------------------------------------
{
movq (mm4, mm0);
movq (mm5, mm1);
pavgb(mm4, mm2); //pavgb/pavgusb mm4, mm2
pavgb(mm5, mm3); //pavgb/pavgusb mm5, mm3
////movq mm6, mm0 // 0 rounding
////pxor mm6, mm2 //
////psubb mm4, mm6 //
////movq mm6, mm1 //
////pxor mm6, mm3 //
////psubb mm5, mm5 //
if (ARG1 == 0) // yuyv
{
psrlw (mm4, 8);
psrlw (mm5, 8);
}
pand (mm4, mm7);
pand (mm5, mm7);
}
//--------------------------------------------------------------------
// write y-component
if (ARG1 == 1) // uyvy
{
psrlw (mm0, 8);
psrlw (mm1, 8);
psrlw (mm2, 8);
psrlw (mm3, 8);
}
pand (mm0, mm7);
pand (mm1, mm7);
pand (mm2, mm7);
pand (mm3, mm7);
packuswb (mm0, mm1);
packuswb (mm2, mm3);
Tsimd::movntq (esi, mm0);
Tsimd::movntq (esi+eax, mm2);
// write uv-components
packuswb (mm4, mm5);
movq (mm5, mm4);
psrlq (mm4, 8);
pand (mm5, mm7);
pand (mm4, mm7);
packuswb (mm5,mm5);
packuswb (mm4,mm4);
movd (ebx,mm5);
movd (ecx,mm4);
}
};
struct YV12_TO_YUYV
{
static __forceinline void INIT(__m64 &mm7)
{
}
static __forceinline void PROCESS(__m64 &mm7,unsigned char *edi,stride_t edx,unsigned char *ebx,unsigned char *ecx,unsigned char *esi,stride_t eax,stride_t uv_stride)
{
__m64 mm4,mm5,mm0,mm1,mm2,mm3,mm6;
movd (mm4, ebx); // [ |uuuu]
movd (mm5, ecx); // [ |vvvv]
movq (mm0, esi); // [yyyy|yyyy] // y row 0
movq (mm1, esi+eax); // [yyyy|yyyy] // y row 1
punpcklbw (mm4, mm5); // [vuvu|vuvu] // uv row 0
if (ARG1 == 0) // YUYV
{
movq (mm2, mm0);
movq (mm3, mm1);
punpcklbw (mm0, mm4); // vyuy|vyuy // y row 0 + 0
punpckhbw (mm2, mm4); // vyuy|vyuy // y row 0 + 8
punpcklbw (mm1, mm4); // vyuy|vyuy // y row 1 + 0
punpckhbw (mm3, mm4); // vyuy|vyuy // y row 1 + 8
movq (edi, mm0 );
movq (edi+8, mm2 );
movq (edi+edx, mm1 );
movq (edi+edx+8, mm3);
} else { // UYVY
movq (mm5, mm4 );
movq (mm6, mm4 );
movq (mm7, mm4 );
punpcklbw (mm4, mm0); // yvyu|yvyu // y row 0 + 0
punpckhbw (mm5, mm0); // yvyu|yvyu // y row 0 + 8
punpcklbw (mm6, mm1); // yvyu|yvyu // y row 1 + 0
punpckhbw (mm7, mm1); // yvyu|yvyu // y row 1 + 8
movq (edi, mm4 );
movq (edi+8, mm5 );
movq (edi+edx, mm6 );
movq (edi+edx+8, mm7 );
}
}
};
struct YV12_TO_YUYVI
{
static __forceinline void INIT(__m64 &mm7)
{
}
static __forceinline void PROCESS(__m64 &mm7,unsigned char *edi,stride_t edx,unsigned char *ebx,unsigned char *ecx,unsigned char *esi,stride_t eax,stride_t uv_stride)
{
__m64 mm0,mm1,mm2,mm3,mm4,mm5,mm6;
movd (mm0, ebx); // [ |uuuu]
movd (mm1, ebx+uv_stride); // [ |uuuu]
punpcklbw (mm0, ecx); // [vuvu|vuvu] // uv row 0
punpcklbw (mm1, ecx+uv_stride); // [vuvu|vuvu] // uv row 1
if (ARG1 == 0) { // YUYV
movq (mm4, esi); // [yyyy|yyyy] // y row 0
movq (mm6, esi+eax); // [yyyy|yyyy] // y row 1
movq (mm5, mm4);
movq (mm7, mm6);
punpcklbw (mm4, mm0); // [yuyv|yuyv] // y row 0 + 0
punpckhbw (mm5, mm0); // [yuyv|yuyv] // y row 0 + 8
punpcklbw (mm6, mm1); // [yuyv|yuyv] // y row 1 + 0
punpckhbw (mm7, mm1); // [yuyv|yuyv] // y row 1 + 8
movq (edi, mm4);
movq (edi+8, mm5);
movq (edi+edx, mm6);
movq (edi+edx+8, mm7);
//push esi
//push edi
esi+= eax;
edi+= edx;
movq (mm4, esi+eax); // [yyyy|yyyy] // y row 2
movq (mm6, esi+2*eax); // [yyyy|yyyy] // y row 3
movq (mm5, mm4);
movq (mm7, mm6);
punpcklbw (mm4, mm0); // [yuyv|yuyv] // y row 2 + 0
punpckhbw (mm5, mm0 ); // [yuyv|yuyv] // y row 2 + 8
punpcklbw (mm6, mm1 ); // [yuyv|yuyv] // y row 3 + 0
punpckhbw (mm7, mm1 ); // [yuyv|yuyv] // y row 3 + 8
movq (edi+edx, mm4);
movq (edi+edx+8, mm5);
movq (edi+2*edx, mm6);
movq (edi+2*edx+8, mm7);
//pop edi
//pop esi
}else{ // UYVY
movq (mm2, esi); // [yyyy|yyyy] // y row 0
movq (mm3, esi+eax); // [yyyy|yyyy] // y row 1
movq (mm4, mm0);
movq (mm5, mm0);
movq (mm6, mm1);
movq (mm7, mm1);
punpcklbw( mm4, mm2); // [uyvy|uyvy] // y row 0 + 0
punpckhbw( mm5, mm2); // [uyvy|uyvy] // y row 0 + 8
punpcklbw( mm6, mm3); // [uyvy|uyvy] // y row 1 + 0
punpckhbw( mm7, mm3); // [uyvy|uyvy] // y row 1 + 8
movq (edi, mm4);
movq (edi+8, mm5);
movq (edi+edx, mm6);
movq (edi+edx+8, mm7);
//push esi
//push edi
esi+= eax;
edi+= edx;
movq (mm2, esi+eax); // [yyyy|yyyy] // y row 2
movq (mm3, esi+2*eax); // [yyyy|yyyy] // y row 3
movq (mm4, mm0);
movq (mm5, mm0);
movq (mm6, mm1);
movq (mm7, mm1);
punpcklbw (mm4, mm2); // [uyvy|uyvy] // y row 2 + 0
punpckhbw (mm5, mm2); // [uyvy|uyvy] // y row 2 + 8
punpcklbw (mm6, mm3); // [uyvy|uyvy] // y row 3 + 0
punpckhbw (mm7, mm3); // [uyvy|uyvy] // y row 3 + 8
movq (edi+edx, mm4);
movq (edi+edx+8, mm5);
movq (edi+2*edx, mm6);
movq (edi+2*edx+8, mm7);
//pop edi
//pop esi
}
}
};
struct BGR_TO_YV12
{
static __forceinline void INIT(__m64 &mm7)
{
movq (mm7, YUV_RGB_DATA<CCIR>::y_mul);
}
static __forceinline void PROCESS(__m64 &mm7,unsigned char *edi,stride_t edx1,unsigned char *ebx,unsigned char *ecx,unsigned char *esi,stride_t eax,stride_t uv_stride)
{
// y_out
__m64 mm4,mm5,mm0,mm2,mm6,mm1,mm3;
pxor (mm4, mm4);
pxor (mm5, mm5);
movd (mm0, edi); // x_ptr[0...]
movd (mm2, edi+edx1); // x_ptr[x_stride...]
punpcklbw (mm0, mm4); // [ |b |g |r ]
punpcklbw (mm2, mm5); // [ |b |g |r ]
movq (mm6, mm0 ); // = [ |b4|g4|r4]
paddw (mm6, mm2 ); // +[ |b4|g4|r4]
pmaddwd (mm0, mm7 ); // *= Y_MUL
pmaddwd (mm2, mm7 ); // *= Y_MUL
movq (mm4, mm0 ); // [r]
movq (mm5, mm2 ); // [r]
psrlq (mm4, 32 ); // +[g]
psrlq (mm5, 32 ); // +[g]
paddd (mm0, mm4 ); // +[b]
paddd (mm2, mm5 ); // +[b]
pxor (mm4, mm4);
pxor (mm5, mm5);
movd (mm1, edi+ARG1); // src[%1...]
movd (mm3, edi+edx1+ARG1); // src[x_stride+%1...]
punpcklbw (mm1, mm4); // [ |b |g |r ]
punpcklbw (mm3, mm5); // [ |b |g |r ]
paddw (mm6, mm1 ); // +[ |b4|g4|r4]
paddw (mm6, mm3 ); // +[ |b4|g4|r4]
pmaddwd (mm1, mm7 ); // *= Y_MUL
pmaddwd (mm3, mm7 ); // *= Y_MUL
movq (mm4, mm1 ); // [r]
movq (mm5, mm3 ); // [r]
psrlq (mm4, 32 ); // +[g]
psrlq (mm5, 32 ); // +[g]
paddd (mm1, mm4 ); // +[b]
paddd (mm3, mm5 ); // +[b]
//push edx
unsigned int edx;
movd ((int*)&edx, mm0);
edx>>= 8;
edx+=YUV_RGB_DATA<CCIR>::Y_ADD;
*esi=(unsigned char)edx; // y_ptr[0]
movd ((int*)&edx, mm1);
edx>>= 8;
edx+=YUV_RGB_DATA<CCIR>::Y_ADD;
*(esi + 1)=(unsigned char)edx; // y_ptr[1]
movd ((int*)&edx, mm2);
edx>>= 8;
edx+=YUV_RGB_DATA<CCIR>::Y_ADD;
*(esi + eax + 0)=(unsigned char)edx; // y_ptr[y_stride + 0]
movd ((int*)&edx, mm3);
edx>>= 8;
edx+=YUV_RGB_DATA<CCIR>::Y_ADD;
*(esi + eax + 1)=(unsigned char)edx; // y_ptr[y_stride + 1]
// u_ptr, v_ptr
movq (mm0, mm6); // = [ |b4|g4|r4]
pmaddwd (mm6, YUV_RGB_DATA<CCIR>::v_mul); // *= V_MUL
pmaddwd (mm0, YUV_RGB_DATA<CCIR>::u_mul); // *= U_MUL
movq (mm1, mm0);
movq (mm2, mm6);
psrlq (mm1, 32);
psrlq (mm2, 32);
paddd (mm0, mm1);
paddd (mm2, mm6);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -