⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 blit_mmx.asm

📁 大名鼎鼎的CE下播放软件,TCPPMP的源代码!!!2410下可以流畅的解QVGA的H264,MPEG4等格式.
💻 ASM
📖 第 1 页 / 共 2 页
字号:
;*****************************************************************************
;*
;* This program is free software ; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* (at your option) any later version.
;*
;* This program is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
;*
;* $Id: blit_mmx.asm 323 2005-11-01 20:52:32Z picard $
;*
;* The Core Pocket Media Player
;* Copyright (c) 2004-2005 Gabor Kovacs
;*
;*****************************************************************************

BITS 32
SECTION .data

OFFSET_COL	equ 8

align 8
const_b254		dd	0xfefefefe,0xfefefefe

SECTION .text

%macro func 2
%define %1 _%1@%2
global %1
align 16
%1:
%endmacro

%macro _prefetch 2
%if s%1=smmx2
	prefetchnta [%2+256]
%elif s%1=s3dnow
	prefetch [%2+256]
%endif
%endmacro

; caps, in, src, in2, src2, tmp1, tmp2
%macro avg 7
%if s%1=smmx
	movq	%6,%3
	movq	%7,%5
	pxor	%6,%2
	pxor	%7,%4
	por		%2,%6
	por		%4,%7
	pand	%6,[const_b254]
	pand	%7,[const_b254]
	psrlq	%6,1
	psrlq	%7,1
	psubb	%2,%6
	psubb	%4,%7

%elif s%1=smmx2
	pavgb	%2,%3
	pavgb	%4,%5
%elif s%1=s3dnow
	pavgusb	%2,%3	
	pavgusb	%4,%5
%else
	%error	not supported caps %1
%endif
%endmacro

%macro save 3
%if s%1=smmx2
 	movntq %2,%3
%else
	movq %2,%3
%endif
%endmacro

%define savg 2	
%define smmx2 3
%define s3dnow 4
%define snone 5
%define smmx 6
%define srgb32 7
%define srgb24 8
%define sbgr32 9
%define sbgr24 10
%define syuy2 11
%define srgb 12
%define srgb16 13
%define sbgr16 14
%define syuy2_color 15

;eax tmp/Col
;ebx SrcPitch/2
;ecx SrcPtr[1]
;edx StrPtr[2]
;esi SrcPtr[0]
;edi DstPtr
;ebp Width/2

;---------------------------------------------------------------------

; caps, type, mode
%macro blit_pack_single_yuy2 3
	mov al,[esi]
	add esi,2
	mov [edi],al
	mov al,[ecx]
%if %2=2
	add al,[ecx+ebx]
	rcr al,1
%endif	
	add ecx,1
	mov [edi+1],al
	mov al,[esi-1]
	add edi,4
	mov [edi-2],al
	mov al,[edx]
%if %2=2
	add al,[edx+ebx]
	rcr al,1
%endif	
	add edx,1
	mov [edi-1],al
%endmacro

; caps, type, mode, i
%macro blit_pack_block_yuy2 4
	movq mm0,[esi-16]
	movq mm2,[ecx]
	movq mm3,[edx]
	movq mm1,[esi-8]

%if %2=2
	avg %1,mm2,[ecx+ebx],mm3,[edx+ebx],mm4,mm5
%endif

	_prefetch %1,esi

	movq mm5,mm0
	movq mm4,mm2
	punpcklbw mm2,mm3
	punpckhbw mm4,mm3
	movq mm3,mm1
    punpcklbw mm0,mm2
    punpcklbw mm1,mm4
    punpckhbw mm5,mm2
    punpckhbw mm3,mm4

%if %2=2
	_prefetch %1,ecx+ebx
%endif

	save %1,[edi],mm0
	add ecx,8
	save %1,[edi+8],mm5
	add edx,8
	save %1,[edi+16],mm1
	add esi,16
	save %1,[edi+24],mm3
	add edi,32

%if %2=2
	_prefetch %1,edx+ebx
%endif
%endmacro

;---------------------------------------------------------------------

; caps, type, mode
%macro blit_pack_single_yuy2_color 3

	movzx eax,word [esi]
	add esi,2
	movd mm0,eax
	mov al,[ecx]
%if %2=2
	add al,[ecx+ebx]
	rcr al,1
%endif	
	add ecx,1
	movd mm2,eax
	mov al,[edx]
%if %2=2
	add al,[edx+ebx]
	rcr al,1
%endif	
	add edx,1
	movd mm3,eax
	mov eax,[esp]

	pxor mm7,mm7
	punpcklbw mm0,mm7
	punpcklbw mm2,mm7
	punpcklbw mm3,mm7
	psllw  mm0,5
	psllw  mm2,5
	psllw  mm3,5
	pmulhw mm0,[eax+8*0]
	pmulhw mm2,[eax+8*2]
	pmulhw mm3,[eax+8*4]
	paddsw mm0,[eax+8*1]
	paddsw mm2,[eax+8*3]
	paddsw mm3,[eax+8*5]
	packuswb mm0,mm0
	packuswb mm2,mm2
	packuswb mm3,mm3

	punpcklbw mm2,mm3
    punpcklbw mm0,mm2
	movd [edi],mm0
	add edi,4

%endmacro

; caps, type, mode, i
%macro blit_pack_block_yuy2_color 4
	movq mm0,[esi-16]
	movq mm2,[ecx]
	movq mm3,[edx]
	movq mm1,[esi-8]

%if %2=2
	avg %1,mm2,[ecx+ebx],mm3,[edx+ebx],mm4,mm5
%endif

	_prefetch %1,esi

	movq mm6,[eax+8*0]
	pxor mm7,mm7
	movq mm4,mm0
	movq mm5,mm1
	punpcklbw mm0,mm7
	punpcklbw mm1,mm7
	psllw  mm0,5
	psllw  mm1,5
	pmulhw mm0,mm6
	pmulhw mm1,mm6
	punpckhbw mm4,mm7
	punpckhbw mm5,mm7
	movq mm7,[eax+8*1]
	psllw  mm4,5
	psllw  mm5,5
	pmulhw mm4,mm6
	pmulhw mm5,mm6
	paddsw mm0,mm7
	paddsw mm1,mm7
	paddsw mm4,mm7
	paddsw mm5,mm7
	packuswb mm0,mm4
	packuswb mm1,mm5

%if %2=2
	_prefetch %1,ecx+ebx
%endif

	pxor mm7,mm7
	movq mm4,mm2
	movq mm5,mm3
	punpcklbw mm2,mm7
	punpcklbw mm3,mm7
	punpckhbw mm4,mm7
	punpckhbw mm5,mm7
	movq mm6,[eax+8*2]
	movq mm7,[eax+8*4]
	psllw  mm2,5
	psllw  mm3,5
	pmulhw mm2,mm6
	pmulhw mm3,mm7
	psllw  mm4,5
	psllw  mm5,5
	pmulhw mm4,mm6
	pmulhw mm5,mm7
	movq mm6,[eax+8*3]
	movq mm7,[eax+8*5]
	paddsw mm2,mm6
	paddsw mm3,mm7
	paddsw mm4,mm6
	paddsw mm5,mm7
	packuswb mm2,mm4
	packuswb mm3,mm5

	movq mm5,mm0
	movq mm4,mm2
	punpcklbw mm2,mm3
	punpckhbw mm4,mm3
	movq mm3,mm1
    punpcklbw mm0,mm2
    punpcklbw mm1,mm4
    punpckhbw mm5,mm2
    punpckhbw mm3,mm4

%if %2=2
	_prefetch %1,edx+ebx
%endif

	save %1,[edi],mm0
	add ecx,8
	save %1,[edi+8],mm5
	add edx,8
	save %1,[edi+16],mm1
	add esi,16
	save %1,[edi+24],mm3
	add edi,32

%endmacro

;---------------------------------------------------------------------

;input
; mm0 y(8)
; mm1 u(4)
; mm2 v(4)
; mm7 0
;output
; mm0 g
; mm1 b
; mm2 r

%macro yuv_rgb 0

	movq mm5,mm0
	movq mm3,[eax+8]
	punpcklbw mm0,mm7
	movq mm4,[eax+16+8]
	punpckhbw mm5,mm7
	movq mm6,[eax+32+8]
	punpcklbw mm1,mm7
	paddsw mm0,mm3
	punpcklbw mm2,mm7
	paddsw mm5,mm3
	movq mm7,[eax]
	psllw mm0,7
	paddsw mm1,mm4
	psllw mm5,7
	paddsw mm2,mm6
	psllw mm1,7
	psllw mm2,7
	pmulhw mm0,mm7
	movq mm6,[eax+16]
	pmulhw mm5,mm7
	movq mm7,[eax+32]
	movq mm3,mm1
	movq mm4,mm2
	punpckldq mm1,mm1
	punpckldq mm2,mm2
	pmulhw mm1,mm6
	pmulhw mm2,mm7
	punpckhdq mm3,mm3
	punpckhdq mm4,mm4
	pmulhw mm3,mm6
	pmulhw mm4,mm7
	movq mm6,mm1
	movq mm7,mm2
	punpcklwd mm1,mm1
	punpcklwd mm2,mm2
	punpckhwd mm6,mm6
	punpckhwd mm7,mm7
	paddsw mm1,mm0		;y*y_mul+u*u_mul_lo
	paddsw mm2,mm0      ;y*y_mul           +v*v_mul_lo
	paddsw mm0,mm6
	paddsw mm0,mm7      ;y*y_mul+u*u_mul_hi+v*v_mul_hi
	psraw mm1,4
	psraw mm2,4
	psraw mm0,4
	movq mm6,mm3
	movq mm7,mm4
	punpcklwd mm3,mm3
	punpcklwd mm4,mm4
	punpckhwd mm6,mm6
	punpckhwd mm7,mm7
	paddsw mm3,mm5		;y*y_mul+u*u_mul_lo
	paddsw mm4,mm5      ;y*y_mul           +v*v_mul_lo
	paddsw mm5,mm6
	paddsw mm5,mm7      ;y*y_mul+u*u_mul_hi+v*v_mul_hi
	psraw mm3,4
	psraw mm4,4
	psraw mm5,4
	packuswb mm1,mm3    ;b
	packuswb mm2,mm4	;r
	packuswb mm0,mm5    ;g

%endmacro

; caps, type, mode
%macro blit_pack_single_rgb 3
	movzx eax,word [esi]
	add esi,2
	pxor mm7,mm7
	movd mm0,eax
%if %2=2
	mov al,[ecx]
	add al,[ecx+ebx]
	rcr al,1
%else
	movzx eax,byte [ecx]
%endif	
	add ecx,1
	movd mm1,eax
%if %2=2
	mov al,[edx]
	add al,[edx+ebx]
	rcr al,1
%else
	movzx eax,byte [edx]
%endif	
	add edx,1
	movd mm2,eax
	mov eax,[esp]
	yuv_rgb

	pxor mm7,mm7
%if (s%3=srgb32) || (s%3=srgb24)
	punpcklbw mm2,mm7	;0r0r0r0r (0)
	punpcklbw mm1,mm0	;gbgbgbgb (0)
	punpcklwd mm1,mm2	;0rgb0rgb (0)
%else
	punpcklbw mm1,mm7	;0b0b0b0b (0)
	punpcklbw mm2,mm0	;grgrgrgr (0)
	punpcklwd mm2,mm1	;0bgr0bgr (0)
%endif
	
%if s%3=srgb32
	save %1,[edi],mm1
	add edi,8
%endif
%if s%3=sbgr32
	save %1,[edi],mm2
	add edi,8
%endif
%if (s%3=srgb24) || (s%3=sbgr24)
%if s%3=srgb24
	movq mm2,mm1
%else
	movq mm1,mm2
%endif
	punpckhdq mm2,mm7	;00000bgr (1)
	punpckldq mm1,mm7	;00000bgr (0)
	psllq mm2,24		;00bgr000 (1)
	por mm2,mm1			;00bgrbgr
	movd eax,mm2
	psrlq mm2,32
	mov [edi],eax
	movd eax,mm2
	mov [edi+4],ax
	add edi,6
%endif

%endmacro

; caps, type, mode, i
%macro blit_pack_block_rgb 4
	movq mm0,[esi-16]
	movd mm1,[ecx]
	pxor mm7,mm7
	movd mm2,[edx]
%if %2=2
	avg %1,mm1,[ecx+ebx],mm2,[edx+ebx],mm4,mm5
%endif
%if (%4=0)
	_prefetch %1,esi
%endif
	yuv_rgb

%if (%2=2) 
%if (%4=0)
	_prefetch %1,edx+ebx
%else
	_prefetch %1,ecx+ebx
%endif
%endif

	pxor mm7,mm7
%if (s%3=srgb32) || (s%3=srgb24)
	movq mm3,mm2
	movq mm4,mm1
	punpcklbw mm2,mm7	;0r0r0r0r (0)
	punpckhbw mm3,mm7	;0r0r0r0r (1)
	punpcklbw mm1,mm0	;gbgbgbgb (0)
	punpckhbw mm4,mm0	;gbgbgbgb (1)
	movq mm7,mm1
	movq mm0,mm4
	punpcklwd mm1,mm2	;0rgb0rgb (0)
	punpckhwd mm7,mm2	;0rgb0rgb (1)
	punpcklwd mm4,mm3	;0rgb0rgb (2)
	punpckhwd mm0,mm3	;0rgb0rgb (3)
%else
	movq mm3,mm1
	movq mm4,mm2
	punpcklbw mm1,mm7	;0b0b0b0b (0)
	punpckhbw mm3,mm7	;0b0b0b0b (1)
	punpcklbw mm2,mm0	;grgrgrgr (0)
	punpckhbw mm4,mm0	;grgrgrgr (1)
	movq mm7,mm2
	movq mm0,mm4
	punpcklwd mm2,mm1	;0bgr0bgr (0)
	punpckhwd mm7,mm1	;0bgr0bgr (1)
	punpcklwd mm4,mm3	;0bgr0bgr (2)
	punpckhwd mm0,mm3	;0bgr0bgr (3)
	movq mm1,mm2
%endif

%if (s%3=srgb32) || (s%3=sbgr32)
	save %1,[edi],mm1
	add ecx,4
	save %1,[edi+8],mm7
	add edx,4
	save %1,[edi+16],mm4
	add esi,8
	save %1,[edi+24],mm0
	add edi,32
%else
	pxor mm6,mm6
	movq mm3,mm7		;0ddd0ccc
	movq mm5,mm4		;0fff0eee
	pslld mm0,8 		;hhh0ggg0
	psrld mm5,8			;00ffxxxx
	punpckhdq mm6,mm1	;0bbb0000
	punpckhdq mm5,mm0	;hhh000ff
	psllq mm0,32		;ggg00000
	pslld mm3,16		;xxxxcc00
	psrlq mm6,8			;00bbb000
	punpckldq mm1,mm3	;cc000aaa
	punpckldq mm3,mm4	;0eeexxxx
	pslld mm4,24		;f000xxxx

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -