⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 memcopy.c

📁 包含了从MPEG4的视频解码到H.264的视频编码部分的源代码
💻 C
字号:
// memcopy.cpp : Defines the entry point for the console application.
//


//#ifdef USEMMX
void  memfill(void *dst, int n32, unsigned long i)
{
	__asm {
		movd mm0, n32
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		mov edi, dst
		mov eax,i
		xor ecx,ecx


loopwrite1:
		add ecx,32
		cmp ecx,eax
		jg loopdone
		movntq 0[edi], mm0
		movntq 8[edi], mm0
		movntq 16[edi], mm0
		movntq 24[edi], mm0		
		add edi, 32		
		jmp loopwrite1

		add ecx,32
		cmp ecx,eax
		jg loopdone
		movntq 0[edi], mm0
		movntq 8[edi], mm0
		movntq 16[edi], mm0
		movntq 24[edi], mm0		
		add edi, 32		
		jmp loopwrite1
loopdone:
		sub eax,ecx
		add eax,32
		jz done
		mov ebx,n32
loopwrite2:		
		mov  [edi],bl		
		add edi, 1
		sub eax,1
		jne loopwrite2
done:
		emms
	}
}



void memcopy(void *dst, void *src, int nbytes)
{
	_asm { 
        mov esi, src 
        mov edi, dst 
        mov ecx, nbytes 
        shr ecx, 6 // 64 bytes per iteration 

loop1: 
        prefetchnta 64[ESI] // Prefetch next loop, non-temporal 
        prefetchnta 96[ESI] 

        movq mm1,  0[ESI] // Read in source data 
        movq mm2,  8[ESI] 
        movq mm3, 16[ESI] 
        movq mm4, 24[ESI] 
        movq mm5, 32[ESI] 
        movq mm6, 40[ESI] 
        movq mm7, 48[ESI] 
        movq mm0, 56[ESI] 

        movntq  0[EDI], mm1 // Non-temporal stores 
        movntq  8[EDI], mm2 
        movntq 16[EDI], mm3 
        movntq 24[EDI], mm4 
        movntq 32[EDI], mm5 
        movntq 40[EDI], mm6 
        movntq 48[EDI], mm7 
        movntq 56[EDI], mm0 

        add esi, 64 
        add edi, 64 
        dec ecx 
        jnz loop1 

        emms 
	} 
}



 

void copyframewithextend16(void * dst,void *src,int h,int w)
{
	_asm
	{
		mov esi, src 		
		mov eax,w
		add eax,32
		pxor mm7,mm7
		pxor mm6,mm6
		//扩展左上角
		mov edi, dst
		sub edi,16
		mov ebx,eax
		shl ebx,4
		sub edi,ebx
		xor ecx,ecx
		mov cl,[esi]		
		movd mm0,ecx
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		mov ecx,4
loop_cpbytewithextend_1:
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		sub ecx,1
		jne loop_cpbytewithextend_1
		//扩展右上角
		mov edi, dst
		add edi,w		
		sub edi,ebx
		add esi,w
		sub esi,1
		xor ecx,ecx
		mov cl,[esi]		
		movd mm0,ecx
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		mov ecx,4
loop_cpbytewithextend_2:
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		sub ecx,1
		jne loop_cpbytewithextend_2
		//扩展左下角
		mov edi, dst
		mov esi, src
		mov ebx,w
		mov ecx,h
		sub ecx,1
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add esi,ebx
		mov ebx,w
		add ebx,32
		mov ecx,h
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add edi,ebx
		sub edi,16
		xor ecx,ecx
		mov cl,[esi]		
		movd mm0,ecx
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		mov ecx,4
loop_cpbytewithextend_3:
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		sub ecx,1
		jne loop_cpbytewithextend_3
		//扩展右下角
		mov edi, dst
		mov esi, src
		mov ebx,w
		mov ecx,h		
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add esi,ebx
		sub esi,1
		mov ebx,w
		add ebx,32
		mov ecx,h
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add edi,ebx
		add edi,w
		xor ecx,ecx
		mov cl,[esi]		
		movd mm0,ecx
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		mov ecx,4
loop_cpbytewithextend_4:
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		sub ecx,1
		jne loop_cpbytewithextend_4
		//扩展顶部
		mov edi, dst		
		mov ebx,w 
		add ebx,32
		shl ebx,4
		sub edi,ebx	
		mov edx,16
loop_cpbytewithextend_6:
		mov esi, src
		mov ecx,w
		shr ecx,3
loop_cpbytewithextend_5:
		movq mm0,[esi]
		add esi,8
		movntq [edi],mm0
		add edi,8
		sub ecx,1
		jne loop_cpbytewithextend_5
		add edi,32
		sub edx,1
		jne loop_cpbytewithextend_6
		//扩展底部
		mov edi, dst		
		mov ebx,w
		add ebx,32
		mov ecx,h
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add edi,ebx	
		mov esi, src
		mov ebx,w
		mov ecx,h
		sub ecx,1
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add esi,ebx
		mov ebx,esi
		mov edx,16
loop_cpbytewithextend_8:
		mov esi,ebx
		mov ecx,w
		shr ecx,3
loop_cpbytewithextend_7:
		movq mm0,[esi]
		add esi,8
		movntq [edi],mm0
		add edi,8
		sub ecx,1
		jne loop_cpbytewithextend_7
		add edi,32
		sub edx,1
		jne loop_cpbytewithextend_8
		//扩展左边
		mov edi,dst		
		mov esi,src
		mov edx,h
		mov ebx,w
		mov ecx,ebx
		add ecx,32		
loop_cpbytewithextend_9:
		xor eax,eax
		mov al,[esi]
		movd mm0,eax
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		movq mm1,mm0
		pslld mm1,8
		por mm0,mm1
		pslld mm1,8
		por mm0,mm1
		pslld mm1,8
		por mm0,mm1
		movq [edi-8],mm0
		movq [edi-16],mm0
		add esi,ebx
		add edi,ecx		
		sub edx,1
		jne loop_cpbytewithextend_9
		//扩展右边
		mov edi,dst		
		mov esi,src
		mov edx,h
		mov ebx,w
		mov ecx,ebx
		add ecx,32
		add edi,ebx
		add esi,ebx
		sub esi,1
loop_cpbytewithextend_10:
		xor eax,eax
		mov al,[esi]
		movd mm0,eax
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		movq mm1,mm0
		pslld mm1,8
		por mm0,mm1
		pslld mm1,8
		por mm0,mm1
		pslld mm1,8
		por mm0,mm1
		movq [edi],mm0
		movq [edi+8],mm0
		add esi,ebx
		add edi,ecx		
		sub edx,1
		jne loop_cpbytewithextend_10
		//拷贝中间
		mov edi,dst		
		mov esi,src
		mov edx,h
		mov eax,w
		mov ebx,eax
		add ebx,32		
loop_cpbytewithextend_12:
		mov ecx,eax
		shr ecx,3
		movd mm6,edi
		movd mm7,esi
loop_cpbytewithextend_11:
		movq mm0,[esi]
		add esi,8
		movq [edi],mm0
		add edi,8
		sub ecx,1
		jne loop_cpbytewithextend_11	
		movd edi,mm6
		movd esi,mm7
		add edi,ebx		
		add esi,eax
		sub edx,1
		jne loop_cpbytewithextend_12


		emms
	}
}
/* void copyframewithextend64(void * dst,void *src,int h,int w)
{
	_asm
	{
		mov esi, src 		
		mov eax,w
		add eax,128
		pxor mm7,mm7
		pxor mm6,mm6
		//扩展左上角
		mov edi, dst
		sub edi,64
		mov ebx,eax
		shl ebx,6
		sub edi,ebx
		
		movd mm0,[esi]
		movd2qd xmm0,mm0		
		PUNPCKLQDQ xmm0,xmm0
		PUNPCKLQDQ xmm0,xmm0
		mov ecx,16
loop_cpbytewithextend_1:
		movdqu [edi],xmm0
		movdqu [edi+16],xmm0
		movdqu [edi+32],xmm0
		movdqu [edi+48],xmm0
		add edi,eax
		movdqu [edi],xmm0
		movdqu [edi+16],xmm0
		movdqu [edi+32],xmm0
		movdqu [edi+48],xmm0
		add edi,eax
		movdqu [edi],xmm0
		movdqu [edi+16],xmm0
		movdqu [edi+32],xmm0
		movdqu [edi+48],xmm0
		add edi,eax
		movdqu [edi],xmm0
		movdqu [edi+16],xmm0
		movdqu [edi+32],xmm0
		movdqu [edi+48],xmm0
		add edi,eax
		sub ecx,1
		jne loop_cpbytewithextend_1
		//扩展右上角
		mov edi, dst
		add edi,w		
		sub edi,ebx
		add esi,w
		sub esi,1
		movd mm0,[esi]
		movd2qd xmm0,mm0		
		PUNPCKLQDQ xmm0,xmm0
		PUNPCKLQDQ xmm0,xmm0
		mov ecx,16
loop_cpbytewithextend_2:
		movdqu [edi],xmm0
		movdqu [edi+16],xmm0
		movdqu [edi+32],xmm0
		movdqu [edi+48],xmm0
		add edi,eax
		movdqu [edi],xmm0
		movdqu [edi+16],xmm0
		movdqu [edi+32],xmm0
		movdqu [edi+48],xmm0
		add edi,eax
		movdqu [edi],xmm0
		movdqu [edi+16],xmm0
		movdqu [edi+32],xmm0
		movdqu [edi+48],xmm0
		add edi,eax
		movdqu [edi],xmm0
		movdqu [edi+16],xmm0
		movdqu [edi+32],xmm0
		movdqu [edi+48],xmm0
		add edi,eax
		sub ecx,1
		jne loop_cpbytewithextend_2
		//扩展左下角
		mov edi, dst
		mov esi, src
		mov ebx,w
		mov ecx,h
		sub ecx,1
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add esi,ebx
		mov ebx,w
		add ebx,32
		mov ecx,h
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add edi,ebx
		sub edi,16
		xor ecx,ecx
		mov cl,[esi]		
		movd mm0,ecx
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		mov ecx,4
loop_cpbytewithextend_3:
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		sub ecx,1
		jne loop_cpbytewithextend_3
		//扩展右下角
		mov edi, dst
		mov esi, src
		mov ebx,w
		mov ecx,h		
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add esi,ebx
		sub esi,1
		mov ebx,w
		add ebx,32
		mov ecx,h
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add edi,ebx
		add edi,w
		xor ecx,ecx
		mov cl,[esi]		
		movd mm0,ecx
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		mov ecx,4
loop_cpbytewithextend_4:
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		movntq [edi],mm0
		movntq [edi+8],mm0
		add edi,eax
		sub ecx,1
		jne loop_cpbytewithextend_4
		//扩展顶部
		mov edi, dst		
		mov ebx,w 
		add ebx,32
		shl ebx,4
		sub edi,ebx	
		mov edx,16
loop_cpbytewithextend_6:
		mov esi, src
		mov ecx,w
		shr ecx,3
loop_cpbytewithextend_5:
		movq mm0,[esi]
		add esi,8
		movntq [edi],mm0
		add edi,8
		sub ecx,1
		jne loop_cpbytewithextend_5
		add edi,32
		sub edx,1
		jne loop_cpbytewithextend_6
		//扩展底部
		mov edi, dst		
		mov ebx,w
		add ebx,32
		mov ecx,h
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add edi,ebx	
		mov esi, src
		mov ebx,w
		mov ecx,h
		sub ecx,1
		movd mm7,ebx
		movd mm6,ecx
		PMADDWD mm7,mm6
		movd ebx,mm7
		add esi,ebx
		mov ebx,esi
		mov edx,16
loop_cpbytewithextend_8:
		mov esi,ebx
		mov ecx,w
		shr ecx,3
loop_cpbytewithextend_7:
		movq mm0,[esi]
		add esi,8
		movntq [edi],mm0
		add edi,8
		sub ecx,1
		jne loop_cpbytewithextend_7
		add edi,32
		sub edx,1
		jne loop_cpbytewithextend_8
		//扩展左边
		mov edi,dst		
		mov esi,src
		mov edx,h
		mov ebx,w
		mov ecx,ebx
		add ecx,32		
loop_cpbytewithextend_9:
		xor eax,eax
		mov al,[esi]
		movd mm0,eax
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		movq mm1,mm0
		pslld mm1,8
		por mm0,mm1
		pslld mm1,8
		por mm0,mm1
		pslld mm1,8
		por mm0,mm1
		movq [edi-8],mm0
		movq [edi-16],mm0
		add esi,ebx
		add edi,ecx		
		sub edx,1
		jne loop_cpbytewithextend_9
		//扩展右边
		mov edi,dst		
		mov esi,src
		mov edx,h
		mov ebx,w
		mov ecx,ebx
		add ecx,32
		add edi,ebx
		add esi,ebx
		sub esi,1
loop_cpbytewithextend_10:
		xor eax,eax
		mov al,[esi]
		movd mm0,eax
		PUNPCKLBW mm0,mm0
		PUNPCKLWD mm0,mm0
		PUNPCKLDQ mm0,mm0
		movq mm1,mm0
		pslld mm1,8
		por mm0,mm1
		pslld mm1,8
		por mm0,mm1
		pslld mm1,8
		por mm0,mm1
		movq [edi],mm0
		movq [edi+8],mm0
		add esi,ebx
		add edi,ecx		
		sub edx,1
		jne loop_cpbytewithextend_10
		//拷贝中间
		mov edi,dst		
		mov esi,src
		mov edx,h
		mov eax,w
		mov ebx,eax
		add ebx,32		
loop_cpbytewithextend_12:
		mov ecx,eax
		shr ecx,3
		movd mm6,edi
		movd mm7,esi
loop_cpbytewithextend_11:
		movq mm0,[esi]
		add esi,8
		movq [edi],mm0
		add edi,8
		sub ecx,1
		jne loop_cpbytewithextend_11	
		movd edi,mm6
		movd esi,mm7
		add edi,ebx		
		add esi,eax
		sub edx,1
		jne loop_cpbytewithextend_12


		emms
	}
}*/
//#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -