⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 simple_idct_mmx.asm

📁 1.Xvid Mpeg4 1.13 version 2.Building 0 error(s), 0 warning(s) By Jesse Stone Taiwan
💻 ASM
📖 第 1 页 / 共 3 页
字号:
%define	src1		%3%define	src5		%4%define	dst			%5%define	rounder_op	%6%define	rounder_arg	%7%define	shift		%8  movq mm0,[src0]               ; R4    R0  r4  r0  movq mm1,[src4]               ; R6    R2  r6  r2  movq mm3,[src5]               ; R7    R5  r7  r5  movq mm4,[coeffs+16]          ; C4    C4  C4  C4  pmaddwd mm4,mm0               ; C4R4+C4R0 C4r4+C4r0  movq mm5,[coeffs+24]          ; -C4   C4  -C4 C4  pmaddwd mm0,mm5               ; -C4R4+C4R0    -C4r4+C4r0  movq mm5,[coeffs+32]          ; C6    C2  C6  C2  pmaddwd mm5,mm1               ; C6R6+C2R2 C6r6+C2r2  movq mm6,[coeffs+40]          ; -C2   C6  -C2 C6  pmaddwd mm1,mm6               ; -C2R6+C6R2    -C2r6+C6r2  ; rounder_op mm4, rounder_arg  movq mm6,mm4                  ; C4R4+C4R0 C4r4+C4r0  ; rounder_op mm0, rounder_arg  paddd mm4,mm5                 ; A0        a0  psubd mm6,mm5                 ; A3        a3  movq mm5,mm0                  ; -C4R4+C4R0    -C4r4+C4r0  paddd mm0,mm1                 ; A1        a1  psubd mm5,mm1                 ; A2        a2  movq mm1,[coeffs+56]          ; C7    C5  C7  C5  pmaddwd mm1,mm3               ; C7R7+C5R5 C7r7+C5r5  movq mm7,[coeffs+72]          ; -C5   -C1 -C5 -C1  pmaddwd mm7,mm3               ; -C5R7-C1R5    -C5r7-C1r5  paddd mm1,mm4                 ; A0+B0     a0+b0  paddd mm4,mm4                 ; 2A0       2a0  psubd mm4,mm1                 ; A0-B0     a0-b0  psrad mm1,shift  psrad mm4,shift  movq mm2,mm0                  ; A1        a1  paddd mm0,mm7                 ; A1+B1     a1+b1  psubd mm2,mm7                 ; A1-B1     a1-b1  psrad mm0,shift  psrad mm2,shift  packssdw mm1,mm1              ; A0+B0 a0+b0  movd [ dst ],mm1  packssdw mm0,mm0              ; A1+B1 a1+b1  movd [ dst + 16 ],mm0  packssdw mm2,mm2              ; A1-B1 a1-b1  movd [ dst + 96 ],mm2  packssdw mm4,mm4              ; A0-B0 a0-b0  movd [ dst + 112 ],mm4  movq mm1,[coeffs+88]          ; C3    C7  C3  C7  pmaddwd mm1,mm3               ; C3R7+C7R5 C3r7+C7r5  movq mm2,mm5                  ; A2        a2  pmaddwd mm3,[coeffs+104]      ; -C1R7+C3R5    -C1r7+C3r5  paddd mm2,mm1                 ; A2+B2     a2+b2  psubd mm5,mm1                 ; a2-B2     a2-b2  psrad mm2,shift  psrad mm5,shift  movq mm1,mm6                  ; A3        a3  paddd mm6,mm3                 ; A3+B3     a3+b3  psubd mm1,mm3                 ; a3-B3     a3-b3  psrad mm6,shift  psrad mm1,shift  packssdw mm2,mm2              ; A2+B2 a2+b2  packssdw mm6,mm6              ; A3+B3 a3+b3  movd [dst + 32],mm2  packssdw mm1,mm1              ; A3-B3 a3-b3  packssdw mm5,mm5              ; A2-B2 a2-b2  movd [dst + 48],mm6  movd [dst + 64],mm1  movd [dst + 80],mm5%undef	src0%undef	src4%undef	src1%undef	src5%undef	dst%undef	rounder_op%undef	rounder_arg%undef	shift%endmacro;---------------------------------------------------------------------------; IDCT6;---------------------------------------------------------------------------%macro	IDCT6		8%define	src0		%1%define	src4		%2%define	src1		%3%define	src5		%4%define	dst			%5%define	rounder_op	%6%define	rounder_arg	%7%define	shift		%8  movq mm0,[src0]               ; R4    R0  r4  r0  movq mm3,[src5]               ; R7    R5  r7  r5  movq mm4,[coeffs+16]          ; C4    C4  C4  C4  pmaddwd mm4,mm0               ; C4R4+C4R0 C4r4+C4r0  movq mm5,[coeffs+24]          ; -C4   C4  -C4 C4  pmaddwd mm0,mm5               ; -C4R4+C4R0    -C4r4+C4r0  ; rounder_op mm4, rounder_arg  movq mm6,mm4                  ; C4R4+C4R0 C4r4+C4r0  ; rounder_op mm0, rounder_arg  movq mm5,mm0                  ; -C4R4+C4R0    -C4r4+C4r0  movq mm1,[coeffs+56]          ; C7    C5  C7  C5  pmaddwd mm1,mm3               ; C7R7+C5R5 C7r7+C5r5  movq mm7,[coeffs+72]          ; -C5   -C1 -C5 -C1  pmaddwd mm7,mm3               ; -C5R7-C1R5    -C5r7-C1r5  paddd mm1,mm4                 ; A0+B0     a0+b0  paddd mm4,mm4                 ; 2A0       2a0  psubd mm4,mm1                 ; A0-B0     a0-b0  psrad mm1,shift  psrad mm4,shift  movq mm2,mm0                  ; A1        a1  paddd mm0,mm7                 ; A1+B1     a1+b1  psubd mm2,mm7                 ; A1-B1     a1-b1  psrad mm0,shift  psrad mm2,shift  packssdw mm1,mm1              ; A0+B0 a0+b0  movd [ dst ],mm1  packssdw mm0,mm0              ; A1+B1 a1+b1  movd [ dst + 16 ],mm0  packssdw mm2,mm2              ; A1-B1 a1-b1  movd [ dst + 96 ],mm2  packssdw mm4,mm4              ; A0-B0 a0-b0  movd [ dst + 112 ],mm4  movq mm1,[coeffs+88]          ; C3    C7  C3  C7  pmaddwd mm1,mm3               ; C3R7+C7R5 C3r7+C7r5  movq mm2,mm5                  ; A2        a2  pmaddwd mm3,[coeffs+104]      ; -C1R7+C3R5    -C1r7+C3r5  paddd mm2,mm1                 ; A2+B2     a2+b2  psubd mm5,mm1                 ; a2-B2     a2-b2  psrad mm2,shift  psrad mm5,shift  movq mm1,mm6                  ; A3        a3  paddd mm6,mm3                 ; A3+B3     a3+b3  psubd mm1,mm3                 ; a3-B3     a3-b3  psrad mm6,shift  psrad mm1,shift  packssdw mm2,mm2              ; A2+B2 a2+b2  packssdw mm6,mm6              ; A3+B3 a3+b3  movd [dst + 32],mm2  packssdw mm1,mm1              ; A3-B3 a3-b3  packssdw mm5,mm5              ; A2-B2 a2-b2  movd [dst + 48],mm6  movd [dst + 64],mm1  movd [dst + 80],mm5%undef  src0%undef  src4%undef  src1%undef  src5%undef  dst%undef  rounder_op%undef  rounder_arg%undef	shift%endmacro;---------------------------------------------------------------------------; IDCT2;---------------------------------------------------------------------------%macro	IDCT2		8%define	src0		%1%define	src4		%2%define	src1		%3%define	src5		%4%define	dst			%5%define	rounder_op	%6%define	rounder_arg	%7%define shift       %8  movq mm0,[src0]               ; R4    R0  r4  r0  movq mm2,[src1]               ; R3    R1  r3  r1  movq mm3,[src5]               ; R7    R5  r7  r5  movq mm4,[coeffs+16]          ; C4    C4  C4  C4  pmaddwd mm4,mm0               ; C4R4+C4R0 C4r4+C4r0  movq mm5,[coeffs+24]          ; -C4   C4  -C4 C4  pmaddwd mm0,mm5               ; -C4R4+C4R0    -C4r4+C4r0  ; rounder_op mm4, rounder_arg  movq mm6,mm4                  ; C4R4+C4R0 C4r4+C4r0  movq mm7,[coeffs+48]          ; C3    C1  C3  C1  ; rounder_op mm0, rounder_arg  pmaddwd mm7,mm2               ; C3R3+C1R1 C3r3+C1r1  movq mm5,mm0                  ; -C4R4+C4R0    -C4r4+C4r0  movq mm1,[coeffs+56]          ; C7    C5  C7  C5  pmaddwd mm1,mm3               ; C7R7+C5R5 C7r7+C5r5  pmaddwd mm2,[coeffs+64]       ; -C7R3+C3R1    -C7r3+C3r1  paddd mm7,mm1                 ; B0        b0  movq mm1,[coeffs+72]          ; -C5   -C1 -C5 -C1  pmaddwd mm1,mm3               ; -C5R7-C1R5    -C5r7-C1r5  paddd mm7,mm4                 ; A0+B0     a0+b0  paddd mm4,mm4                 ; 2A0       2a0  psubd mm4,mm7                 ; A0-B0     a0-b0  paddd mm1,mm2                 ; B1        b1  psrad mm7,shift  psrad mm4,shift  movq mm2,mm0                  ; A1        a1  paddd mm0,mm1                 ; A1+B1     a1+b1  psubd mm2,mm1                 ; A1-B1     a1-b1  psrad mm0,shift  psrad mm2,shift  packssdw mm7,mm7              ; A0+B0 a0+b0  movd [dst],mm7  packssdw mm0,mm0              ; A1+B1 a1+b1  movd [dst + 16],mm0  packssdw mm2,mm2              ; A1-B1 a1-b1  movd [dst + 96],mm2  packssdw mm4,mm4              ; A0-B0 a0-b0  movd [dst + 112],mm4  movq mm0,[src1]               ; R3    R1  r3  r1  movq mm4,[coeffs+80]          ; -C1   C5  -C1     C5  pmaddwd mm4,mm0               ; -C1R3+C5R1    -C1r3+C5r1  movq mm7,[coeffs+88]          ; C3    C7  C3  C7  pmaddwd mm0,[coeffs+96]       ; -C5R3+C7R1    -C5r3+C7r1  pmaddwd mm7,mm3               ; C3R7+C7R5 C3r7+C7r5  movq mm2,mm5                  ; A2        a2  pmaddwd mm3,[coeffs+104]      ; -C1R7+C3R5    -C1r7+C3r5  paddd mm4,mm7                 ; B2        b2  paddd mm2,mm4                 ; A2+B2     a2+b2  psubd mm5,mm4                 ; a2-B2     a2-b2  psrad mm2,shift  psrad mm5,shift  movq mm4,mm6                  ; A3        a3  paddd mm3,mm0                 ; B3        b3  paddd mm6,mm3                 ; A3+B3     a3+b3  psubd mm4,mm3                 ; a3-B3     a3-b3  psrad mm6,shift  psrad mm4,shift  packssdw mm2,mm2              ; A2+B2 a2+b2  packssdw mm6,mm6              ; A3+B3 a3+b3  movd [dst + 32],mm2  packssdw mm4,mm4              ; A3-B3 a3-b3  packssdw mm5,mm5              ; A2-B2 a2-b2  movd [dst + 48],mm6  movd [dst + 64],mm4  movd [dst + 80],mm5%undef  src0%undef  src4%undef  src1%undef  src5%undef  dst%undef  rounder_op%undef  rounder_arg%undef  shift%endmacro;---------------------------------------------------------------------------; IDCT3;---------------------------------------------------------------------------%macro  IDCT3       8%define src0        %1%define src4        %2%define src1        %3%define src5        %4%define dst         %5%define rounder_op  %6%define rounder_arg %7%define shift       %8  movq mm0,[src0]               ; R4    R0  r4  r0  movq mm2,[src1]               ; R3    R1  r3  r1  movq mm4,[coeffs+16]          ; C4    C4  C4  C4  pmaddwd mm4,mm0               ; C4R4+C4R0 C4r4+C4r0  movq mm5,[coeffs+24]          ; -C4   C4  -C4 C4  pmaddwd mm0,mm5               ; -C4R4+C4R0    -C4r4+C4r0  ; rounder_op mm4, rounder_arg  movq mm6,mm4                  ; C4R4+C4R0 C4r4+C4r0  movq mm7,[coeffs+48]          ; C3    C1  C3  C1  ; rounder_op mm0, rounder_arg  pmaddwd mm7,mm2               ; C3R3+C1R1 C3r3+C1r1  movq mm5,mm0                  ; -C4R4+C4R0    -C4r4+C4r0  movq mm3,[coeffs+64]  pmaddwd mm3,mm2               ; -C7R3+C3R1    -C7r3+C3r1  paddd mm7,mm4                 ; A0+B0     a0+b0  paddd mm4,mm4                 ; 2A0       2a0  psubd mm4,mm7                 ; A0-B0     a0-b0  psrad mm7,shift  psrad mm4,shift  movq mm1,mm0                  ; A1        a1  paddd mm0,mm3                 ; A1+B1     a1+b1  psubd mm1,mm3                 ; A1-B1     a1-b1  psrad mm0,shift  psrad mm1,shift  packssdw mm7,mm7              ; A0+B0 a0+b0  movd [dst],mm7  packssdw mm0,mm0              ; A1+B1 a1+b1  movd [dst + 16],mm0  packssdw mm1,mm1              ; A1-B1 a1-b1  movd [dst + 96],mm1  packssdw mm4,mm4              ; A0-B0 a0-b0  movd [dst + 112],mm4  movq mm4,[coeffs+80]          ; -C1   C5  -C1     C5  pmaddwd mm4,mm2               ; -C1R3+C5R1    -C1r3+C5r1  pmaddwd mm2,[coeffs+96]       ; -C5R3+C7R1    -C5r3+C7r1  movq mm1,mm5                  ; A2        a2  paddd mm1,mm4                 ; A2+B2     a2+b2  psubd mm5,mm4                 ; a2-B2     a2-b2  psrad mm1,shift  psrad mm5,shift  movq mm4,mm6                  ; A3        a3  paddd mm6,mm2                 ; A3+B3     a3+b3  psubd mm4,mm2                 ; a3-B3     a3-b3  psrad mm6,shift  psrad mm4,shift  packssdw mm1,mm1              ; A2+B2 a2+b2  packssdw mm6,mm6              ; A3+B3 a3+b3  movd [dst + 32],mm1  packssdw mm4,mm4              ; A3-B3 a3-b3  packssdw mm5,mm5              ; A2-B2 a2-b2  movd [dst + 48],mm6  movd [dst + 64],mm4  movd [dst + 80],mm5%undef  src0%undef	src4%undef	src1%undef	src5%undef	dst%undef	rounder_op%undef	rounder_arg%undef	shift%endmacro;---------------------------------------------------------------------------; IDCT5;---------------------------------------------------------------------------%macro	IDCT5		8%define	src0		%1%define	src4		%2%define	src1		%3%define	src5		%4%define	dst			%5%define	rounder_op	%6%define	rounder_arg	%7%define	shift		%8  movq mm0,[src0]               ; R4    R0  r4  r0  movq mm1,[src4]               ; R6    R2  r6  r2  movq mm4,[coeffs+16]          ; C4    C4  C4  C4  pmaddwd mm4,mm0               ; C4R4+C4R0 C4r4+C4r0  movq mm5,[coeffs+24]          ; -C4   C4  -C4 C4  pmaddwd mm0,mm5               ; -C4R4+C4R0    -C4r4+C4r0  movq mm5,[coeffs+32]          ; C6    C2  C6  C2  pmaddwd mm5,mm1               ; C6R6+C2R2 C6r6+C2r2  movq mm6,[coeffs+40]          ; -C2   C6  -C2 C6  pmaddwd mm1,mm6               ; -C2R6+C6R2    -C2r6+C6r2  ; rounder_op mm4, rounder_arg  movq mm6,mm4                  ; C4R4+C4R0 C4r4+C4r0  paddd mm4,mm5                 ; A0        a0  ; rounder_op mm0, rounder_arg  psubd mm6,mm5                 ; A3        a3  movq mm5,mm0                  ; -C4R4+C4R0    -C4r4+C4r0  paddd mm0,mm1                 ; A1        a1  psubd mm5,mm1                 ; A2        a2  movq mm2,[src0 + 8]           ; R4    R0  r4  r0  movq mm3,[src4 + 8]           ; R6    R2  r6  r2  movq mm1,[coeffs+16]          ; C4    C4  C4  C4  pmaddwd mm1,mm2               ; C4R4+C4R0 C4r4+C4r0  movq mm7,[coeffs+24]          ; -C4   C4  -C4 C4  pmaddwd mm2,mm7               ; -C4R4+C4R0    -C4r4+C4r0  movq mm7,[coeffs+32]          ; C6    C2  C6  C2  pmaddwd mm7,mm3               ; C6R6+C2R2 C6r6+C2r2  pmaddwd mm3,[coeffs+40]       ; -C2R6+C6R2    -C2r6+C6r2  ; rounder_op mm1, rounder_arg  paddd mm7,mm1                 ; A0        a0  paddd mm1,mm1                 ; 2C0       2c0  ; rounder_op mm2, rounder_arg  psubd mm1,mm7                 ; A3        a3  paddd mm3,mm2                 ; A1        a1  paddd mm2,mm2                 ; 2C1       2c1  psubd mm2,mm3                 ; A2        a2  psrad mm4,shift  psrad mm7,shift  psrad mm3,shift  packssdw mm4,mm7              ; A0    a0  movq [dst],mm4  psrad mm0,shift  packssdw mm0,mm3              ; A1    a1  movq [dst + 16],mm0  movq [dst + 96],mm0  movq [dst + 112],mm4  psrad mm5,shift  psrad mm6,shift  psrad mm2,shift  packssdw mm5,mm2              ; A2-B2 a2-b2  movq [dst + 32],mm5  psrad mm1,shift  packssdw mm6,mm1              ; A3+B3 a3+b3  movq [dst + 48],mm6  movq [dst + 64],mm6  movq [dst + 80],mm5%undef  src0%undef  src4%undef  src1%undef  src5%undef  dst%undef  rounder_op%undef	rounder_arg%undef	shift%endmacro;---------------------------------------------------------------------------; IDCT1;---------------------------------------------------------------------------%macro	IDCT1		8%define	src0		%1%define	src4		%2%define	src1		%3%define	src5		%4%define dst         %5%define rounder_op  %6%define rounder_arg %7%define shift       %8  movq mm0,[src0]               ; R4    R0  r4  r0  movq mm1,[src4]               ; R6    R2  r6  r2  movq mm2,[src1]               ; R3    R1  r3  r1  movq mm4,[coeffs+16]          ; C4    C4  C4  C4  pmaddwd mm4,mm0               ; C4R4+C4R0 C4r4+C4r0  movq mm5,[coeffs+24]          ; -C4   C4  -C4 C4  pmaddwd mm0,mm5               ; -C4R4+C4R0    -C4r4+C4r0  movq mm5,[coeffs+32]          ; C6    C2  C6  C2  pmaddwd mm5,mm1               ; C6R6+C2R2 C6r6+C2r2  movq mm6,[coeffs+40]          ; -C2   C6  -C2 C6

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -