⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fdct_mmx.asm

📁 视频压缩标准MPEG4的视频参考代码xvid9.1
💻 ASM
📖 第 1 页 / 共 4 页
字号:
;/*****************************************************************************; *; *  XVID MPEG-4 VIDEO CODEC; *  mmx version - fast discrete cosine transformation ; *; *  Initial version provided by Intel at AppNote AP-922       ; *  Copyright (C) 1999 Intel Corporation,                                     ; * ; *  completed and corrected                        ; *  Copyright (C) 2000 - Royce Shih-Wea Liao <liaor@iname.com>,               ; * ; *  ported to NASM and some minor changes                                      ; *  Copyright (C) 2001 - Michael Militzer <isibaar@xvid.org>                  ; *; *  This file is part of XviD, a free MPEG-4 video encoder/decoder; *; *  XviD is free software; you can redistribute it and/or modify it; *  under the terms of the GNU General Public License as published by; *  the Free Software Foundation; either version 2 of the License, or; *  (at your option) any later version.; *; *  This program is distributed in the hope that it will be useful,; *  but WITHOUT ANY WARRANTY; without even the implied warranty of; *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the; *  GNU General Public License for more details.; *; *  You should have received a copy of the GNU General Public License; *  along with this program; if not, write to the Free Software; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA; *; *  Under section 8 of the GNU General Public License, the copyright; *  holders of XVID explicitly forbid distribution in the following; *  countries:; *; *    - Japan; *    - United States of America; *; *  Linking XviD statically or dynamically with other modules is making a; *  combined work based on XviD.  Thus, the terms and conditions of the; *  GNU General Public License cover the whole combination.; *; *  As a special exception, the copyright holders of XviD give you; *  permission to link XviD with independent modules that communicate with; *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the; *  license terms of these independent modules, and to copy and distribute; *  the resulting combined work under terms of your choice, provided that; *  every copy of the combined work is accompanied by a complete copy of; *  the source code of XviD (the version of XviD used to produce the; *  combined work), being distributed under the terms of the GNU General; *  Public License plus this exception.  An independent module is a module; *  which is not derived from or based on XviD.; *; *  Note that people who make modified versions of XviD are not obligated; *  to grant this special exception for their modified versions; it is; *  their choice whether to do so.  The GNU General Public License gives; *  permission to release a modified version without this exception; this; *  exception also makes it possible to release a modified version which; *  carries forward this exception.; *; * $Id: fdct_mmx.asm,v 1.6 2002/11/16 23:51:58 edgomez Exp $; *; *************************************************************************/BITS 32%macro cglobal 1 	%ifdef PREFIX		global _%1 		%define %1 _%1	%else		global %1	%endif%endmacro%define INP eax%define TABLE ebx%define TABLEF ebx%define OUT ecx%define round_frw_row edx%define INP_1 eax + 16%define INP_2 eax + 32%define INP_3 eax + 48%define INP_4 eax + 64%define INP_5 eax + 80%define INP_6 eax + 96%define INP_7 eax + 112%define OUT_1 ecx + 16%define OUT_2 ecx + 32%define OUT_3 ecx + 48%define OUT_4 ecx + 64%define OUT_5 ecx + 80%define OUT_6 ecx + 96%define OUT_7 ecx + 112%define OUT_8 ecx + 128%define TABLE_1 ebx + 64%define TABLE_2 ebx + 128%define TABLE_3 ebx + 192%define TABLE_4 ebx + 256%define TABLE_5 ebx + 320%define TABLE_6 ebx + 384%define TABLE_7 ebx + 448%define x0 INP + 0*16%define x1 INP + 1*16%define x2 INP + 2*16%define x3 INP + 3*16%define x4 INP + 4*16%define x5 INP + 5*16%define x6 INP + 6*16%define x7 INP + 7*16%define y0 OUT + 0*16%define y1 OUT + 1*16%define y2 OUT + 2*16%define y3 OUT + 3*16%define y4 OUT + 4*16%define y5 OUT + 5*16%define y6 OUT + 6*16%define y7 OUT + 7*16%define tg_1_16 (TABLEF + 0)%define tg_2_16 (TABLEF + 8)%define tg_3_16 (TABLEF + 16)%define cos_4_16 (TABLEF + 24)%define ocos_4_16 (TABLEF + 32);---added for sse2%define tg_1_16_2 (TABLEF + 0)%define tg_2_16_2 (TABLEF + 16)%define tg_3_16_2 (TABLEF + 32)%define cos_4_16_2 (TABLEF + 48)%define ocos_4_16_2 (TABLEF + 64);---%ifdef FORMAT_COFFSECTION .data data%elseSECTION .data data align=64%endifALIGN 16BITS_FRW_ACC equ 3							; 2 or 3 for accuracySHIFT_FRW_COL equ BITS_FRW_ACCSHIFT_FRW_ROW equ (BITS_FRW_ACC + 17)RND_FRW_ROW equ (1 << (SHIFT_FRW_ROW-1))SHIFT_FRW_ROW_CLIP2	equ (4)SHIFT_FRW_ROW_CLIP1	equ (SHIFT_FRW_ROW - SHIFT_FRW_ROW_CLIP2)PASS1_BITS		equ	(2)CONST_BITS		equ	(13)one_corr		 dw 1, 1, 1, 1;---added for sse2align 16one_corr_2		 dw 1, 1, 1, 1,                         dw 1, 1, 1, 1;---r_frw_row		 dd RND_FRW_ROW, RND_FRW_ROWtg_all_16		 dw 13036, 13036, 13036, 13036,		; tg * (2<<16) + 0.5				 dw 27146, 27146, 27146, 27146,		; tg * (2<<16) + 0.5				 dw -21746, -21746, -21746, -21746,	; tg * (2<<16) + 0.5				 dw -19195, -19195, -19195, -19195,	; cos * (2<<16) + 0.5				 dw 23170, 23170, 23170, 23170      ; cos * (2<<15) + 0.5;---added for sse2align 16tg_all_16_2		 dw 13036, 13036, 13036, 13036,		; tg * (2<<16) + 0.5                         dw 13036, 13036, 13036, 13036,        				 dw 27146, 27146, 27146, 27146,		; tg * (2<<16) + 0.5                                 dw 27146, 27146, 27146, 27146, 				 dw -21746, -21746, -21746, -21746,	; tg * (2<<16) + 0.5                                 dw -21746, -21746, -21746, -21746,				 dw -19195, -19195, -19195, -19195,	; cos * (2<<16) + 0.5                                 dw -19195, -19195, -19195, -19195,				 dw 23170, 23170, 23170, 23170      ; cos * (2<<15) + 0.5                                 dw 23170, 23170, 23170, 23170;---tab_frw_01234567 				 ; row0				 dw 16384, 16384, 21407, -8867,     ; w09 w01 w08 w00				 dw 16384, 16384, 8867, -21407,     ; w13 w05 w12 w04                 dw 16384, -16384, 8867, 21407,     ; w11 w03 w10 w02                 dw -16384, 16384, -21407, -8867,   ; w15 w07 w14 w06                 dw 22725, 12873, 19266, -22725,    ; w22 w20 w18 w16                 dw 19266, 4520, -4520, -12873,     ; w23 w21 w19 w17                 dw 12873, 4520, 4520, 19266,       ; w30 w28 w26 w24                 dw -22725, 19266, -12873, -22725,  ; w31 w29 w27 w25				 ; row1                 dw 22725, 22725, 29692, -12299,    ; w09 w01 w08 w00                 dw 22725, 22725, 12299, -29692,    ; w13 w05 w12 w04                 dw 22725, -22725, 12299, 29692,    ; w11 w03 w10 w02                 dw -22725, 22725, -29692, -12299,  ; w15 w07 w14 w06                 dw 31521, 17855, 26722, -31521,    ; w22 w20 w18 w16                 dw 26722, 6270, -6270, -17855,     ; w23 w21 w19 w17                 dw 17855, 6270, 6270, 26722,       ; w30 w28 w26 w24                 dw -31521, 26722, -17855, -31521,  ; w31 w29 w27 w25				 ; row2                 dw 21407, 21407, 27969, -11585,    ; w09 w01 w08 w00                 dw 21407, 21407, 11585, -27969,    ; w13 w05 w12 w04                 dw 21407, -21407, 11585, 27969,    ; w11 w03 w10 w02                 dw -21407, 21407, -27969, -11585,	; w15 w07 w14 w06                 dw 29692, 16819, 25172, -29692,    ; w22 w20 w18 w16                 dw 25172, 5906, -5906, -16819,     ; w23 w21 w19 w17                 dw 16819, 5906, 5906, 25172,       ; w30 w28 w26 w24                 dw -29692, 25172, -16819, -29692,  ; w31 w29 w27 w25				 ; row3                 dw 19266, 19266, 25172, -10426,    ; w09 w01 w08 w00                 dw 19266, 19266, 10426, -25172,    ; w13 w05 w12 w04                 dw 19266, -19266, 10426, 25172,    ; w11 w03 w10 w02                 dw -19266, 19266, -25172, -10426,  ; w15 w07 w14 w06                  dw 26722, 15137, 22654, -26722,    ; w22 w20 w18 w16                 dw 22654, 5315, -5315, -15137,     ; w23 w21 w19 w17                 dw 15137, 5315, 5315, 22654,       ; w30 w28 w26 w24                 dw -26722, 22654, -15137, -26722,  ; w31 w29 w27 w25				 ; row4                 dw 16384, 16384, 21407, -8867,     ; w09 w01 w08 w00                 dw 16384, 16384, 8867, -21407,     ; w13 w05 w12 w04                 dw 16384, -16384, 8867, 21407,     ; w11 w03 w10 w02                 dw -16384, 16384, -21407, -8867,   ; w15 w07 w14 w06                 dw 22725, 12873, 19266, -22725,    ; w22 w20 w18 w16                 dw 19266, 4520, -4520, -12873,     ; w23 w21 w19 w17                 dw 12873, 4520, 4520, 19266,       ; w30 w28 w26 w24                 dw -22725, 19266, -12873, -22725,  ; w31 w29 w27 w25 				 ; row5                 dw 19266, 19266, 25172, -10426,    ; w09 w01 w08 w00                 dw 19266, 19266, 10426, -25172,    ; w13 w05 w12 w04                 dw 19266, -19266, 10426, 25172,    ; w11 w03 w10 w02                 dw -19266, 19266, -25172, -10426,  ; w15 w07 w14 w06                 dw 26722, 15137, 22654, -26722,    ; w22 w20 w18 w16                 dw 22654, 5315, -5315, -15137,     ; w23 w21 w19 w17                 dw 15137, 5315, 5315, 22654,       ; w30 w28 w26 w24                 dw -26722, 22654, -15137, -26722,  ; w31 w29 w27 w25				 ; row6                 dw 21407, 21407, 27969, -11585,    ; w09 w01 w08 w00                 dw 21407, 21407, 11585, -27969,    ; w13 w05 w12 w04                 dw 21407, -21407, 11585, 27969,    ; w11 w03 w10 w02                 dw -21407, 21407, -27969, -11585,  ; w15 w07 w14 w06                 dw 29692, 16819, 25172, -29692,    ; w22 w20 w18 w16                 dw 25172, 5906, -5906, -16819,     ; w23 w21 w19 w17                 dw 16819, 5906, 5906, 25172,       ; w30 w28 w26 w24                 dw -29692, 25172, -16819, -29692,  ; w31 w29 w27 w25				 ; row7                 dw 22725, 22725, 29692, -12299,    ; w09 w01 w08 w00                 dw 22725, 22725, 12299, -29692,    ; w13 w05 w12 w04                 dw 22725, -22725, 12299, 29692,    ; w11 w03 w10 w02                 dw -22725, 22725, -29692, -12299,  ; w15 w07 w14 w06                 dw 31521, 17855, 26722, -31521,    ; w22 w20 w18 w16                 dw 26722, 6270, -6270, -17855,     ; w23 w21 w19 w17                 dw 17855, 6270, 6270, 26722,       ; w30 w28 w26 w24                 dw -31521, 26722, -17855, -31521   ; w31 w29 w27 w25align 128FIX_1		dw 10703, 4433, 10703, 4433		dw 10703, 4433, 10703, 4433FIX_2		dw 4433, -10704, 4433, -10704		dw 4433, -10704, 4433, -10704rounder_5       dw 2+16,2+16,2+16,2+16,2+16,2+16,2+16,2+16rounder_11      dd 1024,1024,1024,1024rounder_18      dd 16384+131072,16384+131072,16384+131072,16384+131072align 128FIX_3		dw 6437, 2260, 6437, 2260		dw 6437, 2260, 6437, 2260		dw 11363, 9633, 11363, 9633		dw 11363, 9633, 11363, 9633FIX_4		dw -11362, -6436, -11362, -6436		dw -11362, -6436, -11362, -6436		dw 9633, -2259, 9633, -2259		dw 9633, -2259, 9633, -2259FIX_5		dw 2261, 9633, 2261, 9633		dw 2261, 9633, 2261, 9633		dw 6437, -11362, 6437, -11362		dw 6437, -11362, 6437, -11362FIX_6           dw 9633, -11363, 9633, -11363                dw 9633, -11363, 9633, -11363		dw 2260, -6436, 2260, -6436		dw 2260, -6436, 2260, -6436align 128buffer		dw 0,0,0,0,0,0,0,0		dw 0,0,0,0,0,0,0,0		dw 0,0,0,0,0,0,0,0		dw 0,0,0,0,0,0,0,0		dw 0,0,0,0,0,0,0,0		dw 0,0,0,0,0,0,0,0		dw 0,0,0,0,0,0,0,0		dw 0,0,0,0,0,0,0,0SECTION .textALIGN 16cglobal fdct_mmx;;void fdct_mmx(short *block);fdct_mmx:		push ebx    mov INP, dword [esp + 8]	; block    mov TABLEF, tg_all_16    mov OUT, INP    movq mm0, [x1]				; 0 ; x1    movq mm1, [x6]				; 1 ; x6    movq mm2, mm0				; 2 ; x1    movq mm3, [x2]				; 3 ; x2    paddsw mm0, mm1				; t1 = x[1] + x[6]    movq mm4, [x5]				; 4 ; x5    psllw mm0, SHIFT_FRW_COL	; t1    movq mm5, [x0]				; 5 ; x0    paddsw mm4, mm3				; t2 = x[2] + x[5]    paddsw mm5, [x7]			; t0 = x[0] + x[7]    psllw mm4, SHIFT_FRW_COL	; t2    movq mm6, mm0				; 6 ; t1    psubsw mm2, mm1				; 1 ; t6 = x[1] - x[6]    movq mm1, [tg_2_16]			; 1 ; tg_2_16    psubsw mm0, mm4				; tm12 = t1 - t2    movq mm7, [x3]				; 7 ; x3    pmulhw mm1, mm0				; tm12*tg_2_16    paddsw mm7, [x4]			; t3 = x[3] + x[4]    psllw mm5, SHIFT_FRW_COL	; t0    paddsw mm6, mm4				; 4 ; tp12 = t1 + t2    psllw mm7, SHIFT_FRW_COL	; t3    movq mm4, mm5				; 4 ; t0    psubsw mm5, mm7				; tm03 = t0 - t3    paddsw mm1, mm5				; y2 = tm03 + tm12*tg_2_16    paddsw mm4, mm7				; 7 ; tp03 = t0 + t3    por mm1, qword [one_corr]	; correction y2 +0.5    psllw mm2, SHIFT_FRW_COL+1	; t6    pmulhw mm5, [tg_2_16]		; tm03*tg_2_16    movq mm7, mm4				; 7 ; tp03    psubsw mm3, [x5]			; t5 = x[2] - x[5]    psubsw mm4, mm6				; y4 = tp03 - tp12    movq [y2], mm1				; 1 ; save y2    paddsw mm7, mm6				; 6 ; y0 = tp03 + tp12         movq mm1, [x3]				; 1 ; x3    psllw mm3, SHIFT_FRW_COL+1	; t5    psubsw mm1, [x4]			; t4 = x[3] - x[4]    movq mm6, mm2				; 6 ; t6

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -