⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mc_sse2.c

📁 linux下实现视频播放的播放器
💻 C
字号:
/*	 *      Copyright (C) 2003-2005 Gabest	 *      http://www.gabest.org	 *	 *  This Program is free software; you can redistribute it and/or modify	 *  it under the terms of the GNU General Public License as published by	 *  the Free Software Foundation; either version 2, or (at your option)	 *  any later version.	 *	 *  This Program is distributed in the hope that it will be useful,	 *  but WITHOUT ANY WARRANTY; without even the implied warranty of	 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the	 *  GNU General Public License for more details.	 *	 *  You should have received a copy of the GNU General Public License	 *  along with GNU Make; see the file COPYING.  If not, write to	 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.	 *  http://www.gnu.org/copyleft/gpl.html	 *	 *  Based on Intel's AP-942	 *	 */#ifdef __MINGW32__#include <inttypes.h>#include "attributes.h"#include "simd.h"#include "mpeg2.h"#include "mpeg2_internal.h"#ifdef __SSE2__static const __m128i const_1_16_bytes=_mm_set1_epi16(1);static void MC_put_o_16_sse2(uint8_t* ecx, const uint8_t* edx, const int eax, int esi){                const int edi= eax+eax;                const int ebx= edi+eax;        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4){                __m128i xmm0,xmm1,xmm2,xmm3;                movdqu (xmm0,  edx    );                movdqu (xmm1,  edx+eax);                movdqu (xmm2,  edx+edi);                movdqu (xmm3,  edx+ebx);                movdqa (ecx, xmm0     );                movdqa (ecx+eax, xmm1 );                movdqa (ecx+edi, xmm2 );                movdqa (ecx+ebx, xmm3 );        }}static void MC_put_o_8_sse2(uint8_t* ecx, const uint8_t* edx, const int eax, int esi){                const int edi= eax+eax;		const int ebx= edi+eax;        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4){                __m128d xmm0,xmm1,xmm2,xmm3;		movlpd (xmm0, edx);		movlpd (xmm1, edx+eax);		movlpd (xmm2, edx+edi);		movlpd (xmm3, edx+ebx);		movlpd (ecx, xmm0);		movlpd (ecx+eax, xmm1 );		movlpd (ecx+edi, xmm2);		movlpd (ecx+ebx, xmm3 );        }}static void MC_put_x_16_sse2(uint8_t* ecx, const uint8_t* edx, const int eax, int esi){                const int edi= eax+eax;		const int ebx= edi+eax;        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4){                __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7;		movdqu (xmm0, edx);		movdqu (xmm1, edx+1);		movdqu (xmm2, edx+eax);		movdqu (xmm3, edx+eax+1);		movdqu (xmm4, edx+edi);		movdqu( xmm5, edx+edi+1);		movdqu( xmm6, edx+ebx );		movdqu( xmm7, edx+ebx+1 );		pavgb (xmm0, xmm1);		pavgb (xmm2, xmm3);		pavgb (xmm4, xmm5);		pavgb (xmm6, xmm7);		movdqa (ecx, xmm0);		movdqa (ecx+eax, xmm2);		movdqa (ecx+edi, xmm4);		movdqa (ecx+ebx, xmm6);        }}static void MC_put_x_8_sse2(uint8_t* ecx, const uint8_t* edx, const int eax, int esi){                const int edi= eax+eax;		const int ebx= edi+eax;                __m128i xmm0,xmm1,xmm2,xmm3;        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4){		movlpd (xmm0, edx);		movlpd (xmm1, edx+1);		movhpd (xmm0, edx+eax);		movhpd (xmm1, edx+eax+1);		movlpd (xmm2, edx+edi);		movlpd (xmm3, edx+edi+1);		movhpd (xmm2, edx+ebx);		movhpd (xmm3, edx+ebx+1);		pavgb (xmm0, xmm1);		pavgb (xmm2, xmm3);		movlpd (ecx, xmm0);		movhpd (ecx+eax, xmm0);		movlpd (ecx+edi, xmm2);		movhpd (ecx+ebx, xmm2);        }}static void MC_put_y_16_sse2(uint8_t* ecx, const uint8_t* edx, const int eax, int esi){                const int edi= eax+eax;		const int ebx= edi+eax;                __m128i xmm0;                movdqu (xmm0, edx);        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4){                __m128i xmm1,xmm2,xmm3,xmm4;		movdqu (xmm1, edx+eax);		movdqu (xmm2, edx+edi );		movdqu (xmm3, edx+ebx );		movdqu (xmm4, edx+edi*2 );		pavgb (xmm0, xmm1 );		pavgb (xmm1, xmm2 );		pavgb (xmm2, xmm3 );		pavgb (xmm3, xmm4 );		movdqa (ecx, xmm0 );		movdqa (ecx+eax, xmm1 );		movdqa (ecx+edi, xmm2 );		movdqa (ecx+ebx, xmm3 );		movdqa (xmm0, xmm4 );        }}static void MC_put_y_8_sse2(uint8_t* ecx, const uint8_t* edx, const int eax, int esi){                const int edi= eax+eax;		const int ebx= edi+eax;                __m128i xmm0;                //movhpd (xmm0,edx);                //movlpd (xmm0,edx+eax);		movlpd (xmm0, edx);                        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4){                __m128i xmm1,xmm2,xmm3,xmm4;		movlpd (xmm1, edx+eax );		movlpd (xmm2, edx+edi );		movlpd (xmm3, edx+ebx );		movlpd (xmm4, edx+edi*2 );		pavgb (xmm0, xmm1 );		pavgb (xmm1, xmm2);		pavgb (xmm2, xmm3 );		pavgb (xmm3, xmm4 );		movlpd (ecx, xmm0 );		movlpd (ecx+eax, xmm1 );		movlpd (ecx+edi, xmm2 );		movlpd (ecx+ebx, xmm3 );		movdqa (xmm0, xmm4 );        }}static void MC_put_xy_16_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref ;                uint8_t  *ecx= dest;                int eax= stride;                int esi= height;                int edi= eax+eax;                __m128i xmm7,xmm0,xmm1,xmm4,xmm5,xmm2,xmm3;                movdqa (xmm7, const_1_16_bytes );                movdqu (xmm0, edx );                movdqu (xmm1, edx+1 );        for (;esi;edx+= edi,ecx+= edi,esi-= 2)         {                movdqu (xmm2, edx+eax );                movdqu (xmm3, edx+eax+1 );                movdqu (xmm4, edx+edi );                movdqu (xmm5, edx+edi+1 );                pavgb (xmm0, xmm1 );                pavgb (xmm2, xmm3 );                movdqa( xmm1, xmm5 );                pavgb (xmm5, xmm4 );                psubusb( xmm2, xmm7 );                pavgb (xmm0, xmm2 );                pavgb (xmm2, xmm5);                movdqa (ecx, xmm0);                movdqa (xmm0, xmm4);                movdqa (ecx+eax, xmm2);        }       }static void MC_put_xy_8_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int eax= stride;                int esi= height;                int edi= eax+eax;                __m128i xmm7,xmm0,xmm2,xmm1,xmm3,xmm4,xmm5;		movdqa (xmm7, const_1_16_bytes);		movlpd (xmm0, edx);		movlpd (xmm1, edx+1);        for (;esi;edx+= edi,ecx+= edi,esi-= 2)         {		movlpd (xmm2, edx+eax);		movlpd (xmm3, edx+eax+1);		movlpd (xmm4, edx+edi);		movlpd (xmm5, edx+edi+1);		pavgb (xmm0, xmm1 );		pavgb (xmm2, xmm3 );		movdqa( xmm1, xmm5 );		pavgb (xmm5, xmm4 );		psubusb( xmm2, xmm7 );		pavgb (xmm0, xmm2 );		pavgb (xmm2, xmm5);		movlpd (ecx, xmm0);		movdqa (xmm0, xmm4);		movlpd (ecx+eax, xmm2);        }        }static void MC_avg_o_16_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int esi= height;                int eax= stride;                int edi= eax+eax;       		int ebx= edi+eax;        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4)         {                __m128i xmm0,xmm1,xmm2,xmm3;		movdqu (xmm0, edx);		movdqu (xmm1, edx+eax );		movdqu (xmm2, edx+edi);		movdqu (xmm3, edx+ebx );		pavgb (xmm0, ecx);		pavgb (xmm1, ecx+eax);		pavgb (xmm2, ecx+edi);		pavgb (xmm3, ecx+ebx);		movdqa (ecx, xmm0);		movdqa (ecx+eax, xmm1 );		movdqa (ecx+edi, xmm2);		movdqa (ecx+ebx, xmm3 );         }}static void MC_avg_o_8_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int esi= height;                int eax= stride;                int edi= eax+eax;                int ebx= edi+eax;                __m128i xmm0,xmm1,xmm2,xmm3;        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4)         {		movlpd (xmm0, edx);		movhpd (xmm0, edx+eax );		movlpd (xmm2, edx+edi);		movhpd (xmm2, edx+ebx );		movlpd (xmm1, ecx);		movhpd (xmm1, ecx+eax);		movlpd (xmm3, ecx+edi);		movhpd (xmm3, ecx+ebx);		pavgb (xmm0, xmm1);		pavgb (xmm2, xmm3);		movlpd (ecx, xmm0);		movhpd (ecx+eax, xmm0);		movlpd (ecx+edi, xmm2);		movhpd (ecx+ebx, xmm2);         }}static void MC_avg_x_16_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int esi= height;                int eax= stride;                int edi= eax+eax;                int ebx= edi+eax;                __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7;        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4)         {		movdqu (xmm0, edx);		movdqu (xmm1, edx+1);		movdqu (xmm2, edx+eax);		movdqu (xmm3, edx+eax+1);		movdqu (xmm4, edx+edi);		movdqu (xmm5, edx+edi+1);		movdqu (xmm6, edx+ebx);		movdqu (xmm7, edx+ebx+1);		pavgb (xmm0, xmm1);		pavgb (xmm2, xmm3);		pavgb (xmm4, xmm5);		pavgb (xmm6, xmm7);		pavgb (xmm0, ecx);		pavgb (xmm2, ecx+eax);		pavgb (xmm4, ecx+edi);		pavgb (xmm6, ecx+ebx);		movdqa (ecx, xmm0);		movdqa (ecx+eax, xmm2);		movdqa (ecx+edi, xmm4);		movdqa (ecx+ebx, xmm6);         }}static void MC_avg_x_8_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int esi= height;                int eax= stride;                int edi= eax+eax;                int ebx= edi+eax;                __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5;        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4)         {		movlpd (xmm0, edx);		movlpd (xmm1, edx+1);		movhpd (xmm0, edx+eax);		movhpd (xmm1, edx+eax+1);		movlpd (xmm2, edx+edi);		movlpd (xmm3, edx+edi+1);		movhpd (xmm2, edx+ebx);		movhpd (xmm3, edx+ebx+1);		movlpd (xmm4, ecx);		movhpd (xmm4, ecx+eax);		movlpd (xmm5, ecx+edi);		movhpd (xmm5, ecx+ebx);		pavgb (xmm0, xmm1);		pavgb (xmm2, xmm3);		pavgb (xmm0, xmm4);		pavgb (xmm2, xmm5);		movlpd (ecx, xmm0);		movhpd (ecx+eax, xmm0);		movlpd (ecx+edi, xmm2);		movhpd (ecx+ebx, xmm2);        }}static void MC_avg_y_16_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int esi= height;                int eax= stride;                int edi= eax+eax;		int ebx= edi+eax;                __m128i xmm0,xmm1,xmm2,xmm3,xmm4;                movdqu (xmm0,edx);        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4)         {		movdqu (xmm1, edx+eax );		movdqu (xmm2, edx+edi );		movdqu (xmm3, edx+ebx );		movdqu (xmm4, edx+edi*2 );		pavgb (xmm0, xmm1 );		pavgb (xmm1, xmm2 );		pavgb (xmm2, xmm3 );		pavgb (xmm3, xmm4 );		pavgb (xmm0, ecx);		pavgb (xmm1, ecx+eax );		pavgb (xmm2, ecx+edi);		pavgb (xmm3, ecx+ebx );		movdqa (ecx, xmm0 );		movdqa (ecx+eax, xmm1 );		movdqa (ecx+edi, xmm2 );		movdqa (ecx+ebx, xmm3 );		movdqa (xmm0, xmm4 );         }}static void MC_avg_y_8_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int esi= height;                int eax= stride;                int edi= eax+eax;		int ebx= edi+eax;                __m128i xmm0,xmm1,xmm2,xmm3,xmm4,xmm5;                movhpd (xmm0, edx );                movlpd (xmm0, edx+eax );        for (;esi;edx+=edi*2,ecx+=edi*2,esi-=4)         {		movlhps (xmm1, xmm0);		movlpd (xmm1, edx+edi );		movlhps (xmm2, xmm1);		movlpd (xmm2, edx+ebx );		movlhps (xmm3, xmm2);		movlpd (xmm3, edx+edi*2 );		movhpd (xmm4, ecx );		movlpd (xmm4, ecx+eax );		movhpd (xmm5, ecx+edi );		movlpd (xmm5, ecx+ebx );		pavgb (xmm0, xmm1 );		pavgb (xmm2, xmm3);		pavgb (xmm0, xmm4);		pavgb (xmm2, xmm5);		movhpd (ecx, xmm0 );		movlpd (ecx+eax, xmm0 );		movhpd (ecx+edi, xmm2 );		movlpd (ecx+ebx, xmm2);		movdqa (xmm0, xmm3 );         }}static void MC_avg_xy_16_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int esi= height;                int eax= stride;                int edi= eax+eax;                __m128i xmm7,xmm0,xmm1,xmm2,xmm3,xmm4,xmm5;                movdqa (xmm7, const_1_16_bytes );                movdqu (xmm0, edx );                movdqu (xmm1, edx+1 );        for (;esi;edx+=edi,ecx+=edi,esi-=2)          {		movdqu (xmm2, edx+eax );		movdqu (xmm3, edx+eax+1 );		movdqu (xmm4, edx+edi );		movdqu (xmm5, edx+edi+1 );		pavgb (xmm0, xmm1 );		pavgb (xmm2, xmm3 );		movdqa (xmm1, xmm5 );		pavgb (xmm5, xmm4 );		psubusb (xmm2, xmm7 );		pavgb (xmm0, xmm2 );		pavgb (xmm2, xmm5);		pavgb (xmm0, ecx );		pavgb (xmm2, ecx+eax);		movdqa (ecx, xmm0);		movdqa (xmm0, xmm4);		movdqa (ecx+eax, xmm2);         }}static void MC_avg_xy_8_sse2(uint8_t* dest, const uint8_t* ref, const int stride, int height){                const uint8_t *edx= ref;                uint8_t *ecx= dest;                int esi= height;                int eax= stride;                int edi= eax+eax;                __m128i xmm7,xmm0,xmm2,xmm1,xmm3,xmm4;                movdqa (xmm7, const_1_16_bytes );                movhpd (xmm0, edx );                movlpd (xmm0, edx+eax );                movhpd (xmm2, edx+1 );                movlpd (xmm2, edx+eax+1 );        for (;esi;edx+=edi,ecx+=edi,esi-=2)         {                movhpd (xmm1, edx+eax );                movlpd (xmm1, edx+edi );                movhpd (xmm3, edx+eax+1 );                movlpd (xmm3, edx+edi+1 );                pavgb (xmm0, xmm1 );                pavgb (xmm2, xmm3 );                psubusb( xmm0, xmm7 );                pavgb (xmm0, xmm2 );                movhpd( xmm4, ecx);                movlpd( xmm4, ecx+eax);                pavgb (xmm0, xmm4 );                movhpd (ecx, xmm0 );                movlpd (ecx+eax, xmm0 );                movdqa (xmm0, xmm1 );                movdqa (xmm2, xmm3 );        }}mpeg2_mc_t mpeg2_mc_sse2 = {        {MC_put_o_16_sse2, MC_put_x_16_sse2, MC_put_y_16_sse2, MC_put_xy_16_sse2,        MC_put_o_8_sse2,  MC_put_x_8_sse2,  MC_put_y_8_sse2,  MC_put_xy_8_sse2},        {MC_avg_o_16_sse2, MC_avg_x_16_sse2, MC_avg_y_16_sse2, MC_avg_xy_16_sse2,        MC_avg_o_8_sse2,  MC_avg_x_8_sse2,  MC_avg_y_8_sse2,  MC_avg_xy_8_sse2}};#endif //__SSE2__#endif // __MINGW32__

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -