⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 idct_mmx.asm

📁 大名鼎鼎的CE下播放软件,TCPPMP的源代码!!!2410下可以流畅的解QVGA的H264,MPEG4等格式.
💻 ASM
字号:
;*****************************************************************************
;*
;* This program is free software ; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* (at your option) any later version.
;*
;* This program is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
;*
;* $Id: idct_mmx.asm 432 2005-12-28 16:39:13Z picard $
;*
;* The Core Pocket Media Player
;* Copyright (c) 2004-2005 Gabor Kovacs
;*
;*****************************************************************************

;******************
;*  NOT FINISHED  *
;******************

BITS 32

ROW_SHIFT		equ	11
COL_SHIFT		equ 6

SECTION .data
ALIGN 16

SECTION .text

%macro cglobal 2
%define %1 _%1@%2
global %1
%endmacro

cglobal IDCT_Const8x8,16
cglobal IDCT_Const4x4,16
;cglobal IDCT_Block8x8,16
;cglobal IDCT_Block8x4,16

; ecx:block
%macro Row 1
	movq	mm0,[ecx+%1*16]
	movq	mm1,[ecx+%1*16+8]

; x0 x4 x3 x7 x1 x6 x2 x5

;		x4' = W7 * x5 + W1 * x4;
;		x5' = W7 * x4 - W1 * x5;

;		x6' = W3 * x7 + W5 * x6;
;		x7' = W3 * x6 - W5 * x7;

;		x6' = x4 + x6;
;		x4' = x4 - x6;

;		x7' = x5 + x7;
;		x5' = x5 - x7;

;		x5' = (181 * (x4 + x5) + 128) >> 8;
;		x4' = (181 * (x4 - x5) + 128) >> 8;

;		x3' = W6 * x2 + W2 * x3;
;		x2' = W6 * x3 - W2 * x2;

;		x1 <<= 11;
;		x0 <<= 11;
		   
;		x1' = x0 + x1;
;		x0' = x0 - x1;

;		x3' = x1 + x3;
;		x1' = x1 - x3;

;		x2' = x0 + x2;
;		x0' = x0 - x2;

	movq	[ecx+%1*16],mm0
	movq	[ecx+%1*16+8],mm1
%endmacro

; ecx:block
; edi:dest   edx:dest pitch
; esi:src    eax:src pitch

%macro Col4x4 2


%endmacro

%macro Col4x8 2


%endmacro

%if 0
ALIGN 16
IDCT_Block8x8:
	push	esi
	push	edi

	mov		ecx,[esp+12]		;block
	mov		edi,[esp+12+4]		;dst
	mov		edx,[esp+12+8]		;dst pitch
	mov		esi,[esp+12+12]		;src 
	mov		eax,8				;src pitch

	Row		0
	Row		1
	Row		2
	Row		3
	Row		4
	Row		5
	Row		6
	Row		7

	or		esi,esi
	jne		.Add

	Col4x8	0,0
	Col4x8	8,0
	pop		edi
	pop		esi 
	ret 16

.Add:
	Col4x8	0,1
	Col4x8	8,1
	pop		edi
	pop		esi 
	ret 16

ALIGN 16
IDCT_Block8x4:
	push	esi
	push	edi

	mov		ecx,[esp+12]		;src
	mov		edi,[esp+12+4]		;dst
	mov		edx,[esp+12+8]		;dst pitch
	mov		esi,[esp+12+12]		;src 
	mov		eax,8				;src pitch

	Row		0
	Row		1
	Row		2
	Row		3

	or		esi,esi
	jne		.Add

	Col4x4	0,0
	Col4x4	8,0
	pop		edi
	pop		esi 
	ret 16

.Add:
	Col4x4	0,1
	Col4x4	8,1
	pop		edi
	pop		esi 
	ret 16
%endif

ALIGN 16
IDCT_Const8x8:
	push	esi
	push	edi

	mov		ecx,[esp+12]		;v
	mov		edi,[esp+12+4]		;dst
	mov		edx,[esp+12+8]		;dst pitch
	mov		esi,[esp+12+12]		;src
	mov		eax,8				;src pitch

	or		ecx,ecx
	js		.Sub

.Add:
	movd	mm7,ecx
	punpcklbw mm7,mm7
	punpcklwd mm7,mm7
	punpckldq mm7,mm7
	
%rep 4
	movq	mm0,[esi]
	movq	mm1,[esi+eax]
	paddusb mm0,mm7
	lea		esi,[esi+eax*2]
	paddusb mm1,mm7
	movq	[edi],mm0
	movq	[edi+edx],mm1
	lea		edi,[edi+edx*2]
%endrep

	pop		edi
	pop		esi 
	ret 16

.Sub:
	neg		ecx
	movd	mm7,ecx
	punpcklbw mm7,mm7
	punpcklwd mm7,mm7
	punpckldq mm7,mm7
	
%rep 4
	movq	mm0,[esi]
	movq	mm1,[esi+eax]
	psubusb mm0,mm7
	lea		esi,[esi+eax*2]
	psubusb mm1,mm7
	movq	[edi],mm0
	movq	[edi+edx],mm1
	lea		edi,[edi+edx*2]
%endrep

	pop		edi
	pop		esi 
	ret 16

ALIGN 16
IDCT_Const4x4:
	push	esi
	push	edi

	mov		ecx,[esp+12]		;v
	mov		edi,[esp+12+4]		;dst
	mov		edx,[esp+12+8]		;dst pitch
	mov		esi,[esp+12+12]		;src
	mov		eax,8				;src pitch

	or		ecx,ecx
	js		.Sub

.Add:
	movd	mm7,ecx
	punpcklbw mm7,mm7
	punpcklwd mm7,mm7
	punpckldq mm7,mm7
	
%rep 2
	movd	mm0,[esi]
	movd	mm1,[esi+eax]
	paddusb mm0,mm7
	lea		esi,[esi+eax*2]
	paddusb mm1,mm7
	movd	[edi],mm0
	movd	[edi+edx],mm1
	lea		edi,[edi+edx*2]
%endrep

	pop		edi
	pop		esi 
	ret 16

.Sub:
	neg		ecx
	movd	mm7,ecx
	punpcklbw mm7,mm7
	punpcklwd mm7,mm7
	punpckldq mm7,mm7
	
%rep 2
	movd	mm0,[esi]
	movd	mm1,[esi+eax]
	psubusb mm0,mm7
	lea		esi,[esi+eax*2]
	psubusb mm1,mm7
	movd	[edi],mm0
	movd	[edi+edx],mm1
	lea		edi,[edi+edx*2]
%endrep

	pop		edi
	pop		esi 
	ret 16

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -