📄 blit_mmx.asm
字号:
psrlq mm0,24 ;000ggg00
por mm1,mm6 ;ccbbbaaa
punpckldq mm6,mm7 ;0cccxxxx
pslld mm7,8 ;ddd0xxxx
por mm5,mm0 ;hhhgggff
psrld mm6,16 ;000cxxxx
punpckhdq mm7,mm3 ;0eeeddd0
punpckhdq mm6,mm4 ;f000000c
save %1,[edi],mm1
por mm7,mm6 ;feeedddc
add ecx,4
add edx,4
save %1,[edi+8],mm7
add esi,8
save %1,[edi+16],mm5
add edi,24
%endif
%endmacro
;---------------------------------------------------------------------
;caps, mode, type, align, repeat, name
%macro blit_pack_line 6
%%small:
add ebp,ebp
%%smallstart:
%if (s%6=srgb) || (s%6=syuy2_color)
push eax
%endif
%%smallloop:
blit_pack_single_%6 %1,%3,%2
sub ebp,2
jne %%smallloop
%if (s%6=srgb) || (s%6=syuy2_color)
pop eax
%endif
ret
blit_i420_%2_%3_%1:
cmp ebp,8+%4
jl %%small
%%large4:
lea ebp,[esi+ebp*2]
%if %4==4
test edi,4
je %%noalign4
%if (s%6=srgb) || (s%6=syuy2_color)
push eax
%endif
blit_pack_single_%6 %1,%3,%2
%if (s%6=srgb) || (s%6=syuy2_color)
pop eax
%endif
%%noalign4:
%endif
add esi,16
%%move4:
%assign i 0
%rep %5
blit_pack_block_%6 %1,%3,%2,i
%assign i i+1
%endrep
cmp esi,ebp
jbe %%move4
sub esi,16
sub ebp,esi
je %%end
jmp %%smallstart
%%end:
ret
%endmacro
%macro blit_pack_inner 3
sub dword [esp+24+28],2
jle %%noloop3
%%loop3:
call blit_i420_%2_1_%1
mov ebp,[esp] ;-Width/2
%if %3==6
lea edi,[edi+ebp*4]
lea edi,[edi+ebp*2]
%else
lea edi,[edi+ebp*%3]
%endif
lea esi,[esi+ebp*2]
add ecx,ebp
add edx,ebp
mov ebp,[esp+4] ;Width/2
add edi,[esp+12+28] ;DstPitch
lea esi,[esi+ebx*2]
call blit_i420_%2_2_%1
mov ebp,[esp] ;-Width/2
%if %3==6
lea edi,[edi+ebp*4]
lea edi,[edi+ebp*2]
%else
lea edi,[edi+ebp*%3]
%endif
lea esi,[esi+ebp*2]
add ecx,ebp
add edx,ebp
mov ebp,[esp+4] ;Width/2
add edi,[esp+12+28] ;DstPitch
lea esi,[esi+ebx*2] ;
add ecx,ebx
add edx,ebx
sub dword [esp+24+28],2
jg %%loop3
%%noloop3
call blit_i420_%2_1_%1
mov ebp,[esp] ;-Width/2
%if %3==6
lea edi,[edi+ebp*4]
lea edi,[edi+ebp*2]
%else
lea edi,[edi+ebp*%3]
%endif
lea esi,[esi+ebp*2]
add ecx,ebp
add edx,ebp
mov ebp,[esp+4] ;Width/2
add edi,[esp+12+28] ;DstPitch
lea esi,[esi+ebx*2]
cmp dword [esp+24+28],0
jne %%nolast3
call blit_i420_%2_1_%1
%%nolast3:
add esp,8
emms
pop ebp
pop ebx
pop edi
pop esi
ret 32
%endmacro
;caps, mode, align, block, name, color
%macro blit_pack 6
blit_pack_line %1,%2,1,%3,%4,%5
blit_pack_line %1,%2,2,%3,%4,%5
%if %6
blit_pack_line %1,%2_color,1,%3,%4,%5_color
blit_pack_line %1,%2_color,2,%3,%4,%5_color
%endif
func blit_i420_%2_%1,32
push esi
push edi
push ebx
push ebp
mov ebx,[esp+16+20] ;SrcPitch
mov edi,[esp+4+20] ;DstPtr
mov esi,[esp+8+20] ;SrcPtr
mov ebp,[esp+20+20] ;Width
mov edi,[edi]
sar ebx,1
sar ebp,1
xor eax,eax
push ebp
sub eax,ebp
push eax
mov eax,[esp+0+28] ;this
mov ecx,[esi+4] ;u
mov edx,[esi+8] ;v
mov esi,[esi] ;y
add eax,OFFSET_COL
%if %6
cmp dword [eax],2048+(2048<<16)
jne %%color
cmp dword [eax+8],0
jne %%color
cmp dword [eax+8*2],2048+(2048<<16)
jne %%color
cmp dword [eax+8*3],0
jne %%color
cmp dword [eax+8*4],2048+(2048<<16)
jne %%color
cmp dword [eax+8*5],0
jne %%color
jmp %%nocolor
%%color:
blit_pack_inner %1,%2_color,%3
%%nocolor
%endif
blit_pack_inner %1,%2,%3
%endmacro
%macro blit_plane_rgb 3
func blit_%2_%3_%1,32
push esi
push edi
push ebx
push ebp
mov eax,[esp+12+20] ;DstPitch
mov ebx,[esp+16+20] ;SrcPitch
mov edi,[esp+4+20] ;DstPtr
mov esi,[esp+8+20] ;SrcPtr
mov ebp,[esp+20+20] ;Width
mov edx,[esp+24+20] ;Height
mov edi,[edi]
mov esi,[esi]
%if (s%2=srgb32) || (s%2=sbgr32)
shl ebp,2
%endif
%if (s%2=srgb24) || (s%2=sbgr24)
lea ebp,[ebp+ebp*2]
%endif
%if (s%2=srgb16) || (s%2=sbgr16)
shl ebp,2
%endif
%%loopy6:
_prefetch %1,esi
mov ecx,ebp
cmp ebp,32+4
jl %%small6
push ebp
add ebp,esi
%if (s%2=srgb16) || (s%2=sbgr16)
test edi,2
je %%noalignw6
movsw
%%noalignw6:
%endif
test edi,4
je %%noalignd6
movsd
%%noalignd6:
add esi,32
%%move6:
movq mm0,[esi-32]
movq mm1,[esi-24]
movq mm2,[esi-16]
movq mm3,[esi-8]
add esi,32
_prefetch %1,esi
save %1,[edi+0],mm0
save %1,[edi+8],mm1
save %1,[edi+16],mm2
save %1,[edi+24],mm3
add edi,32
cmp esi,ebp
jbe %%move6
sub esi,32
mov ecx,ebp
pop ebp
sub ecx,esi
%%small6:
rep movsb
add edi,eax
add esi,ebx
sub edi,ebp
sub esi,ebp
dec edx
jne %%loopy6
emms
pop ebp
pop ebx
pop edi
pop esi
ret 32
%endmacro
%macro blit 1
;This,DstPtr,SrcPtr,DstPitch,SrcPitch,Width,Height,Src2SrcLast
; edi dst
; esi src
; eax dstpitch
; ebx srcpitch
; ebp width
; edx height
; ecx mul,ofs
blit_plane_yuv_%1:
cmp word [ecx],2048
jne %%jmpcolor
cmp word [ecx+8],0
je %%loopy
%%jmpcolor:
jmp %%color
%%loopy:
_prefetch %1,esi
mov ecx,ebp
cmp ebp,32+4
jl %%small
push ebp
add ebp,esi
test edi,2
je %%noalignw
movsw
%%noalignw:
test edi,4
je %%noalignd
movsd
%%noalignd:
add esi,32
%%move:
movq mm0,[esi-32]
movq mm1,[esi-24]
movq mm2,[esi-16]
movq mm3,[esi-8]
add esi,32
_prefetch %1,esi
save %1,[edi+0],mm0
save %1,[edi+8],mm1
save %1,[edi+16],mm2
save %1,[edi+24],mm3
add edi,32
cmp esi,ebp
jbe %%move
sub esi,32
mov ecx,ebp
pop ebp
sub ecx,esi
%%small:
rep movsb
add edi,eax
add esi,ebx
sub edi,ebp
sub esi,ebp
dec edx
jne %%loopy
ret
%%color:
movq mm5,[ecx]
movq mm6,[ecx+8]
pxor mm7,mm7
%%loopy2:
_prefetch %1,esi
cmp ebp,16+4
jge %%large2
mov ecx,ebp
jmp %%small2
%%large2:
push ebp
add ebp,esi
test edi,4
je %%noalignd2
movd mm0,[esi]
add esi,4
punpcklbw mm0,mm7
psllw mm0,5
pmulhw mm0,mm5
paddsw mm0,mm6
packuswb mm0,mm0
movd [edi],mm0
add edi,4
%%noalignd2:
add esi,16
%%move2:
movq mm0,[esi-16]
movq mm2,[esi-8]
_prefetch %1,esi
add esi,16
movq mm1,mm0
movq mm3,mm2
punpcklbw mm0,mm7
punpcklbw mm2,mm7
psllw mm0,5
psllw mm2,5
pmulhw mm0,mm5
pmulhw mm2,mm5
punpckhbw mm1,mm7
punpckhbw mm3,mm7
psllw mm1,5
psllw mm3,5
pmulhw mm1,mm5
pmulhw mm3,mm5
paddsw mm0,mm6
paddsw mm2,mm6
paddsw mm1,mm6
paddsw mm3,mm6
packuswb mm0,mm1
packuswb mm2,mm3
save %1,[edi+0],mm0
save %1,[edi+8],mm2
add edi,16
cmp esi,ebp
jbe %%move2
sub esi,16
mov ecx,ebp
pop ebp
sub ecx,esi
%%small2:
or ecx,ecx
jle %%nosmall2
push eax
%%smallloop2:
movzx eax,byte [esi]
add esi,1
movd mm0,eax
punpcklbw mm0,mm7
psllw mm0,5
pmulhw mm0,mm5
paddsw mm0,mm6
packuswb mm0,mm0
movd eax,mm0
mov [edi],al
add edi,1
dec ecx
jne %%smallloop2
pop eax
%%nosmall2:
add edi,eax
add esi,ebx
sub edi,ebp
sub esi,ebp
dec edx
jne %%loopy2
ret
func blit_i420_i420_%1,32
push esi
push edi
push ebx
push ebp
mov eax,[esp+12+20] ;DstPitch
mov ebx,[esp+16+20] ;SrcPitch
mov ecx,[esp+0+20] ;this
mov edi,[esp+4+20] ;DstPtr
mov esi,[esp+8+20] ;SrcPtr
mov ebp,[esp+20+20] ;Width
mov edx,[esp+24+20] ;Height
mov edi,[edi] ;y
mov esi,[esi] ;y
add ecx,OFFSET_COL
call blit_plane_yuv_%1
sar eax,1
sar ebx,1
mov ecx,[esp+0+20] ;this
mov edi,[esp+4+20] ;DstPtr
mov esi,[esp+8+20] ;SrcPtr
mov ebp,[esp+20+20] ;Width
mov edx,[esp+24+20] ;Height
sar ebp,1
sar edx,1
mov edi,[edi+4] ;u
mov esi,[esi+4] ;u
add ecx,OFFSET_COL+8*2
call blit_plane_yuv_%1
mov ecx,[esp+0+20] ;this
mov edi,[esp+4+20] ;DstPtr
mov esi,[esp+8+20] ;SrcPtr
mov ebp,[esp+20+20] ;Width
mov edx,[esp+24+20] ;Height
sar ebp,1
sar edx,1
mov edi,[edi+8] ;v
mov esi,[esi+8] ;v
add ecx,OFFSET_COL+8*4
call blit_plane_yuv_%1
emms
pop ebp
pop ebx
pop edi
pop esi
ret 32
blit_plane_rgb %1,rgb32,rgb32
blit_plane_rgb %1,rgb24,rgb24
blit_plane_rgb %1,rgb16,rgb16
;blit_plane_rgb %1,rgb32,rgb24
;blit_plane_rgb %1,rgb24,rgb32
;blit_plane_rgb %1,rgb32,bgr32
;blit_plane_rgb %1,rgb24,bgr24
;blit_plane_rgb %1,rgb32,bgr24
;blit_plane_rgb %1,rgb24,bgr32
blit_pack %1,yuy2,4,1,yuy2,1
blit_pack %1,rgb32,8,2,rgb,0
blit_pack %1,rgb24,6,2,rgb,0
blit_pack %1,bgr32,8,2,rgb,0
blit_pack %1,bgr24,6,2,rgb,0
%endmacro
blit mmx2
blit mmx
blit 3dnow
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -