⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 deblock_horiz_lpf9.c

📁 <VC++视频音频开发>一书的光盘资料。
💻 C
字号:


#include "postprocess_mmx.h"




const static uint64_t mm64_0008 = 0x0008000800080008;
const static uint64_t mm64_0101 = 0x0101010101010101;
static uint64_t mm64_temp;
const static uint64_t mm64_coefs[18] =  {
	0x0001000200040006, /* p1 left */ 0x0000000000000001, /* v1 right */
	0x0001000200020004, /* v1 left */ 0x0000000000010001, /* v2 right */
	0x0002000200040002, /* v2 left */ 0x0000000100010002, /* v3 right */
	0x0002000400020002, /* v3 left */ 0x0001000100020002, /* v4 right */
	0x0004000200020001, /* v4 left */ 0x0001000200020004, /* v5 right */
	0x0002000200010001, /* v5 left */ 0x0002000200040002, /* v6 right */
	0x0002000100010000, /* v6 left */ 0x0002000400020002, /* v7 right */
	0x0001000100000000, /* v7 left */ 0x0004000200020001, /* v8 right */
	0x0001000000000000, /* v8 left */ 0x0006000400020001  /* p2 right */
};
static uint32_t mm32_p1p2;
static uint8_t *pmm1;





INLINE void deblock_horiz_lpf9(uint8_t *v, int stride, int QP) {
	int y, p1, p2;
	#ifdef PP_SELF_CHECK
	uint8_t selfcheck[9];
	int psum;
	uint8_t *vv; 
	int i;	
	#endif

	for (y=0; y<4; y++) {
		p1 = (ABS(v[0+y*stride]-v[1+y*stride]) < QP ) ?  v[0+y*stride] : v[1+y*stride];
		p2 = (ABS(v[8+y*stride]-v[9+y*stride]) < QP ) ?  v[9+y*stride] : v[8+y*stride];

		mm32_p1p2 = 0x0101 * ((p2 << 16) + p1);

		#ifdef PP_SELF_CHECK

		vv = &(v[y*stride]);
		psum = p1 + p1 + p1 + vv[1] + vv[2] + vv[3] + vv[4] + 4;
		selfcheck[1] = (((psum + vv[1]) << 1) - (vv[4] - vv[5])) >> 4;
		psum += vv[5] - p1; 
		selfcheck[2] = (((psum + vv[2]) << 1) - (vv[5] - vv[6])) >> 4;
		psum += vv[6] - p1; 
		selfcheck[3] = (((psum + vv[3]) << 1) - (vv[6] - vv[7])) >> 4;
		psum += vv[7] - p1; 
		selfcheck[4] = (((psum + vv[4]) << 1) + p1 - vv[1] - (vv[7] - vv[8])) >> 4;
		psum += vv[8] - vv[1]; 
		selfcheck[5] = (((psum + vv[5]) << 1) + (vv[1] - vv[2]) - vv[8] + p2) >> 4;
		psum += p2 - vv[2]; 
		selfcheck[6] = (((psum + vv[6]) << 1) + (vv[2] - vv[3])) >> 4;
		psum += p2 - vv[3]; 
		selfcheck[7] = (((psum + vv[7]) << 1) + (vv[3] - vv[4])) >> 4;
		psum += p2 - vv[4]; 
		selfcheck[8] = (((psum + vv[8]) << 1) + (vv[4] - vv[5])) >> 4;
		#endif

		pmm1 = (&(v[y*stride-3])); 		__asm {
			push eax
			push ebx
			mov eax, pmm1
			lea ebx, mm64_coefs

			#ifdef PREFETCH_ENABLE
			prefetcht0 32[ebx]                     
			#endif

			movd   mm0,   mm32_p1p2            
			punpcklbw mm0, mm0                 

			movq    mm2, qword ptr [eax]       
			pxor    mm7, mm7                   

			movq     mm6, mm64_0008            
			punpckhbw mm2, mm2                 

			movq     mm64_temp, mm0           

			punpcklbw mm0, mm7                 
			movq      mm5, mm6                 

			pmullw    mm0, [ebx]              

			movq      mm1, mm2                
			punpcklbw mm2, mm2                 

			punpckhbw mm1, mm1                

			#ifdef PREFETCH_ENABLE
			prefetcht0 32[ebx]                     
			#endif

			movq      mm3, mm2                
			punpcklbw mm2, mm7                

			punpckhbw mm3, mm7                
			paddw     mm6, mm0                

			movq      mm0, mm2                 

			pmullw    mm0, 8[ebx]             
			movq      mm4, mm3                 

			pmullw    mm2, 16[ebx]            

			pmullw    mm3, 32[ebx]            

			pmullw    mm4, 24[ebx]            
			paddw     mm5, mm0               

			paddw     mm6, mm2               
			movq      mm2, mm1                

			punpckhbw mm2, mm7                
			paddw     mm5, mm4               

			punpcklbw mm1, mm7               
			paddw     mm6, mm3                

			#ifdef PREFETCH_ENABLE
			prefetcht0 64[ebx]                   
			#endif
			movq      mm0, mm1                 

			pmullw    mm1, 48[ebx]            

			pmullw    mm0, 40[ebx]            
			movq      mm4, mm2               

			pmullw    mm2, 64[ebx]            
			paddw     mm6, mm1                

			pmullw    mm4, 56[ebx]            
			pxor      mm3, mm3               

			movq      mm1, 8[eax]             
			paddw     mm5, mm0                

			punpcklbw mm1, mm1                
			paddw     mm6, mm2                

			#ifdef PREFETCH_ENABLE
			prefetcht0 96[ebx]                   
			#endif

			movq      mm2, mm1                
			paddw     mm5, mm4               

			punpcklbw mm2, mm2               
			punpckhbw mm1, mm1                

			movq      mm3, mm2                
			punpcklbw mm2, mm7                

			punpckhbw mm3, mm7                
			movq      mm0, mm2                

			pmullw    mm0, 72[ebx]           
			movq      mm4, mm3                

			pmullw    mm2, 80[ebx]           

			pmullw    mm3, 96[ebx]            

			pmullw    mm4, 88[ebx]            
			paddw     mm5, mm0                

			paddw     mm6, mm2                
			movq      mm2, mm1                

			paddw     mm6, mm3                
			punpcklbw mm1, mm7                

			paddw     mm5, mm4                
			punpckhbw mm2, mm7                

			#ifdef PREFETCH_ENABLE
			prefetcht0 128[ebx]                  
			#endif

			movq      mm3, mm64_temp       
			movq      mm0, mm1                

			pmullw    mm0, 104[ebx]          
			movq      mm4, mm2                

			pmullw    mm1, 112[ebx]          
			punpckhbw mm3, mm7                

			pmullw    mm2, 128[ebx]         

			pmullw    mm4, 120[ebx]           
			paddw     mm5, mm0               

			pmullw    mm3, 136[ebx]           
			paddw     mm6, mm1               

			paddw     mm6, mm2                

			paddw     mm5, mm4                
			psrlw     mm6, 4                 

			paddw     mm5, mm3                

			psrlw     mm5, 4                 

			packuswb  mm6, mm5                

			movq      4[eax], mm6             

			pop ebx
			pop eax




		};
	
		#ifdef PP_SELF_CHECK
		for (i=1; i<=8; i++) {
			if (selfcheck[i] != v[i+y*stride]) {
				printf("ERROR: MMX version of horiz lpf9 is incorrect at %d\n", i);
			}
		}
		#endif

	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -