⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blit_mmx.asm

📁 大名鼎鼎的CE下播放软件,TCPPMP的源代码!!!2410下可以流畅的解QVGA的H264,MPEG4等格式.
💻 ASM
📖 第 1 页 / 共 2 页
字号:
	psrlq mm0,24		;000ggg00
	por mm1,mm6			;ccbbbaaa
	punpckldq mm6,mm7	;0cccxxxx
	pslld mm7,8			;ddd0xxxx
	por mm5,mm0			;hhhgggff
	psrld mm6,16		;000cxxxx
	punpckhdq mm7,mm3	;0eeeddd0
	punpckhdq mm6,mm4	;f000000c
	save %1,[edi],mm1
	por mm7,mm6			;feeedddc
	add ecx,4
	add edx,4
	save %1,[edi+8],mm7
	add esi,8
	save %1,[edi+16],mm5
	add edi,24
%endif

%endmacro

;---------------------------------------------------------------------

;caps, mode, type, align, repeat, name
%macro blit_pack_line 6

%%small:
	add ebp,ebp
%%smallstart:
%if (s%6=srgb) || (s%6=syuy2_color)
	push eax
%endif
%%smallloop:
	blit_pack_single_%6 %1,%3,%2
	sub ebp,2
	jne %%smallloop
%if (s%6=srgb) || (s%6=syuy2_color)
	pop eax
%endif
	ret

blit_i420_%2_%3_%1:
	cmp ebp,8+%4
	jl %%small

%%large4:
	lea ebp,[esi+ebp*2]
%if %4==4
	test edi,4
	je %%noalign4
%if (s%6=srgb) || (s%6=syuy2_color)
	push eax
%endif
	blit_pack_single_%6 %1,%3,%2
%if (s%6=srgb) || (s%6=syuy2_color)
	pop eax
%endif
%%noalign4:
%endif

	add esi,16
%%move4:
%assign i 0
%rep %5
	blit_pack_block_%6 %1,%3,%2,i
%assign i i+1 
%endrep
	cmp esi,ebp
	jbe %%move4
	sub esi,16
	sub ebp,esi
	je  %%end
	jmp %%smallstart
%%end:
	ret

%endmacro

%macro blit_pack_inner 3

	sub dword [esp+24+28],2
	jle %%noloop3

%%loop3:
	call blit_i420_%2_1_%1
	mov ebp,[esp] ;-Width/2
%if %3==6
	lea edi,[edi+ebp*4]
	lea edi,[edi+ebp*2]
%else
	lea edi,[edi+ebp*%3]
%endif
	lea esi,[esi+ebp*2]
	add ecx,ebp
	add edx,ebp
	mov ebp,[esp+4] ;Width/2

	add edi,[esp+12+28] ;DstPitch
	lea esi,[esi+ebx*2]

	call blit_i420_%2_2_%1
	mov ebp,[esp] ;-Width/2
%if %3==6
	lea edi,[edi+ebp*4]
	lea edi,[edi+ebp*2]
%else
	lea edi,[edi+ebp*%3]
%endif
	lea esi,[esi+ebp*2]
	add ecx,ebp
	add edx,ebp
	mov ebp,[esp+4] ;Width/2

	add edi,[esp+12+28] ;DstPitch
	lea esi,[esi+ebx*2] ;
	add ecx,ebx
	add edx,ebx
	sub dword [esp+24+28],2
	jg %%loop3

%%noloop3
	call blit_i420_%2_1_%1
	mov ebp,[esp] ;-Width/2
%if %3==6
	lea edi,[edi+ebp*4]
	lea edi,[edi+ebp*2]
%else
	lea edi,[edi+ebp*%3]
%endif
	lea esi,[esi+ebp*2]
	add ecx,ebp
	add edx,ebp
	mov ebp,[esp+4] ;Width/2

	add edi,[esp+12+28] ;DstPitch
	lea esi,[esi+ebx*2]

	cmp dword [esp+24+28],0
	jne %%nolast3
	call blit_i420_%2_1_%1
%%nolast3:

	add esp,8
	emms
	pop ebp
	pop ebx
	pop	edi
	pop esi
	ret 32

%endmacro

;caps, mode, align, block, name, color
%macro blit_pack 6

blit_pack_line %1,%2,1,%3,%4,%5
blit_pack_line %1,%2,2,%3,%4,%5

%if %6
blit_pack_line %1,%2_color,1,%3,%4,%5_color
blit_pack_line %1,%2_color,2,%3,%4,%5_color
%endif

func blit_i420_%2_%1,32
	push esi
	push edi
	push ebx
	push ebp

	mov ebx,[esp+16+20] ;SrcPitch
	mov edi,[esp+4+20]  ;DstPtr
	mov esi,[esp+8+20]  ;SrcPtr
	mov ebp,[esp+20+20] ;Width
	mov edi,[edi]
	sar ebx,1
	sar ebp,1
	xor eax,eax
	push ebp
	sub eax,ebp
	push eax
	mov eax,[esp+0+28]	;this
	mov ecx,[esi+4]		;u
	mov edx,[esi+8]		;v
	mov esi,[esi]		;y
	add eax,OFFSET_COL

%if %6
	cmp dword [eax],2048+(2048<<16)
	jne %%color
	cmp dword [eax+8],0
	jne %%color
	cmp dword [eax+8*2],2048+(2048<<16)
	jne %%color
	cmp dword [eax+8*3],0
	jne %%color
	cmp dword [eax+8*4],2048+(2048<<16)
	jne %%color
	cmp dword [eax+8*5],0
	jne %%color
	jmp %%nocolor
%%color:
	blit_pack_inner %1,%2_color,%3
%%nocolor
%endif

	blit_pack_inner %1,%2,%3

%endmacro

%macro blit_plane_rgb 3

func blit_%2_%3_%1,32
	push esi
	push edi
	push ebx
	push ebp

	mov eax,[esp+12+20] ;DstPitch
	mov ebx,[esp+16+20] ;SrcPitch

	mov edi,[esp+4+20]  ;DstPtr
	mov esi,[esp+8+20]  ;SrcPtr
	mov ebp,[esp+20+20] ;Width
	mov edx,[esp+24+20] ;Height
	mov edi,[edi]		
	mov esi,[esi]
%if (s%2=srgb32) || (s%2=sbgr32)
	shl ebp,2
%endif
%if (s%2=srgb24) || (s%2=sbgr24)
	lea ebp,[ebp+ebp*2]
%endif
%if (s%2=srgb16) || (s%2=sbgr16)
	shl ebp,2
%endif

%%loopy6:
	_prefetch %1,esi
	mov ecx,ebp
	cmp ebp,32+4
	jl %%small6

	push ebp
	add ebp,esi
%if (s%2=srgb16) || (s%2=sbgr16)
	test edi,2
	je %%noalignw6
	movsw
%%noalignw6:
%endif
	test edi,4
	je %%noalignd6
	movsd
%%noalignd6:
	add esi,32
%%move6:
	movq mm0,[esi-32]
	movq mm1,[esi-24]
	movq mm2,[esi-16]
	movq mm3,[esi-8]
	add esi,32
	_prefetch %1,esi
	save %1,[edi+0],mm0
	save %1,[edi+8],mm1
	save %1,[edi+16],mm2
	save %1,[edi+24],mm3
	add edi,32
	cmp esi,ebp
	jbe %%move6

	sub esi,32
	mov ecx,ebp
	pop ebp
	sub ecx,esi

%%small6:
	rep movsb
	add edi,eax
	add esi,ebx
	sub edi,ebp
	sub esi,ebp
	dec edx
	jne %%loopy6

	emms
	pop ebp
	pop ebx
	pop	edi
	pop esi
	ret 32

%endmacro

%macro blit 1

;This,DstPtr,SrcPtr,DstPitch,SrcPitch,Width,Height,Src2SrcLast

; edi dst
; esi src
; eax dstpitch
; ebx srcpitch
; ebp width
; edx height
; ecx mul,ofs

blit_plane_yuv_%1:
	cmp word [ecx],2048
	jne %%jmpcolor
	cmp word [ecx+8],0
	je  %%loopy
%%jmpcolor:
	jmp %%color

%%loopy:
	_prefetch %1,esi
	mov ecx,ebp
	cmp ebp,32+4
	jl %%small

	push ebp
	add ebp,esi
	test edi,2
	je %%noalignw
	movsw
%%noalignw:
	test edi,4
	je %%noalignd
	movsd
%%noalignd:
	add esi,32
%%move:
	movq mm0,[esi-32]
	movq mm1,[esi-24]
	movq mm2,[esi-16]
	movq mm3,[esi-8]
	add esi,32
	_prefetch %1,esi
	save %1,[edi+0],mm0
	save %1,[edi+8],mm1
	save %1,[edi+16],mm2
	save %1,[edi+24],mm3
	add edi,32
	cmp esi,ebp
	jbe %%move

	sub esi,32
	mov ecx,ebp
	pop ebp
	sub ecx,esi

%%small:
	rep movsb
	add edi,eax
	add esi,ebx
	sub edi,ebp
	sub esi,ebp
	dec edx
	jne %%loopy
	ret

%%color:
	movq mm5,[ecx]
	movq mm6,[ecx+8]
	pxor mm7,mm7

%%loopy2:
	_prefetch %1,esi
	cmp ebp,16+4
	jge %%large2
	mov ecx,ebp
	jmp %%small2

%%large2:
	push ebp
	add ebp,esi
	test edi,4
	je %%noalignd2

	movd mm0,[esi]
	add esi,4
	punpcklbw mm0,mm7
	psllw  mm0,5
	pmulhw mm0,mm5
	paddsw mm0,mm6
	packuswb mm0,mm0
	movd [edi],mm0
	add edi,4

%%noalignd2:
	add esi,16
%%move2:
	movq mm0,[esi-16]
	movq mm2,[esi-8]
	_prefetch %1,esi
	add esi,16
	movq mm1,mm0
	movq mm3,mm2
	punpcklbw mm0,mm7
	punpcklbw mm2,mm7
	psllw  mm0,5
	psllw  mm2,5
	pmulhw mm0,mm5
	pmulhw mm2,mm5
	punpckhbw mm1,mm7
	punpckhbw mm3,mm7
	psllw  mm1,5
	psllw  mm3,5
	pmulhw mm1,mm5
	pmulhw mm3,mm5
	paddsw mm0,mm6
	paddsw mm2,mm6
	paddsw mm1,mm6
	paddsw mm3,mm6
	packuswb mm0,mm1
	packuswb mm2,mm3
	save %1,[edi+0],mm0
	save %1,[edi+8],mm2
	add edi,16
	cmp esi,ebp
	jbe %%move2

	sub esi,16
	mov ecx,ebp
	pop ebp
	sub ecx,esi

%%small2:
	or ecx,ecx
	jle %%nosmall2
	push eax
%%smallloop2:
	movzx eax,byte [esi]
	add esi,1
	movd mm0,eax
	punpcklbw mm0,mm7
	psllw  mm0,5
	pmulhw mm0,mm5
	paddsw mm0,mm6
	packuswb mm0,mm0
	movd eax,mm0
	mov [edi],al
	add edi,1
	dec ecx
	jne %%smallloop2
	pop eax
%%nosmall2:

	add edi,eax
	add esi,ebx
	sub edi,ebp
	sub esi,ebp
	dec edx
	jne %%loopy2
	ret

func blit_i420_i420_%1,32
	push esi
	push edi
	push ebx
	push ebp

	mov eax,[esp+12+20] ;DstPitch
	mov ebx,[esp+16+20] ;SrcPitch

	mov ecx,[esp+0+20]  ;this
	mov edi,[esp+4+20]  ;DstPtr
	mov esi,[esp+8+20]  ;SrcPtr
	mov ebp,[esp+20+20] ;Width
	mov edx,[esp+24+20] ;Height
	mov edi,[edi]		;y
	mov esi,[esi]		;y
	add ecx,OFFSET_COL
	call blit_plane_yuv_%1

	sar eax,1
	sar ebx,1

	mov ecx,[esp+0+20]  ;this
	mov edi,[esp+4+20]  ;DstPtr
	mov esi,[esp+8+20]  ;SrcPtr
	mov ebp,[esp+20+20] ;Width
	mov edx,[esp+24+20] ;Height
	sar ebp,1
	sar edx,1
	mov edi,[edi+4]		;u
	mov esi,[esi+4]		;u
	add ecx,OFFSET_COL+8*2
	call blit_plane_yuv_%1

	mov ecx,[esp+0+20]  ;this
	mov edi,[esp+4+20]  ;DstPtr
	mov esi,[esp+8+20]  ;SrcPtr
	mov ebp,[esp+20+20] ;Width
	mov edx,[esp+24+20] ;Height
	sar ebp,1
	sar edx,1
	mov edi,[edi+8]		;v
	mov esi,[esi+8]		;v
	add ecx,OFFSET_COL+8*4
	call blit_plane_yuv_%1

	emms
	pop ebp
	pop ebx
	pop	edi
	pop esi
	ret 32

blit_plane_rgb %1,rgb32,rgb32
blit_plane_rgb %1,rgb24,rgb24
blit_plane_rgb %1,rgb16,rgb16
;blit_plane_rgb %1,rgb32,rgb24
;blit_plane_rgb %1,rgb24,rgb32
;blit_plane_rgb %1,rgb32,bgr32
;blit_plane_rgb %1,rgb24,bgr24
;blit_plane_rgb %1,rgb32,bgr24
;blit_plane_rgb %1,rgb24,bgr32

blit_pack %1,yuy2,4,1,yuy2,1
blit_pack %1,rgb32,8,2,rgb,0
blit_pack %1,rgb24,6,2,rgb,0
blit_pack %1,bgr32,8,2,rgb,0
blit_pack %1,bgr24,6,2,rgb,0

%endmacro

blit mmx2
blit mmx
blit 3dnow

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -