📄 gcc_dwt_mmx.s

📁 JPEG2000压缩解压图像源码
💻 S
📖 第 1 页 / 共 2 页
字号:
12 下一页
/*****************************************************************************//* File: gcc_dwt_mmx.s [scope = CORESYS/TRANSFORMS]                          *//* Version: Kakadu, V2.2                                                     *//* Author: David Taubman                                                     *//* Last Revised: 20 June, 2001                                               *//*****************************************************************************//* Copyright 2001, David Taubman, The University of New South Wales (UNSW)   *//* The copyright owner is Unisearch Ltd, Australia (commercial arm of UNSW)  *//* Neither this copyright statement, nor the licensing details below         *//* may be removed from this file or dissociated from its contents.           *//*****************************************************************************//*****************************************************************************//* Licensee: Book Owner *//* License number: 99999 *//* The Licensee has been granted a NON-COMMERCIAL license to the contents of *//* this source file, said Licensee being the owner of a copy of the book, *//* "JPEG2000: Image Compression Fundamentals, Standards and Practice," by *//* Taubman and Marcellin (Kluwer Academic Publishers, 2001).  A brief summary *//* of the license appears below.  This summary is not to be relied upon in *//* preference to the full text of the license agreement, which was accepted *//* upon breaking the seal of the compact disc accompanying the above-mentioned *//* book. *//* 1. The Licensee has the right to Non-Commercial Use of the Kakadu software, *//*    Version 2.2, including distribution of one or more Applications built *//*    using the software, provided such distribution is not for financial *//*    return. *//* 2. The Licensee has the right to personal use of the Kakadu software, *//*    Version 2.2. *//* 3. The Licensee has the right to distribute Reusable Code (including *//*    source code and dynamically or statically linked libraries) to a Third *//*    Party, provided the Third Party possesses a license to use the Kakadu *//*    software, Version 2.2, and provided such distribution is not for *//*    financial return. *//*****************************************************************************//*****************************************************************************//* Description:                                                              *//*   Assembler file for use with GCC builds on Pentium platforms (e.g.,      *//* Linux).  You need only include this is the core system was built with     *//* KDU_PENTIUM_GCC defined.                                                  *//*****************************************************************************/	.text/*****************************************************************************//* EXTERN                      gcc_dwt_mmx_exists                            *//*****************************************************************************/	.align 16.globl gcc_dwt_mmx_existsgcc_dwt_mmx_exists:.LFB1:	pushl	%ebp.LCFI0:	movl	%esp, %ebp.LCFI1:	subl	$4, %esp.LCFI2:	movl	$1, -4(%ebp)    /* Here is where we insert the special MMX code. */        MOV $1,%EAX        CPUID        TEST $0x800000,%EDX        JNZ .Lmmx_exists_label        MOVL $0,-4(%EBP).Lmmx_exists_label:    /* Here is where the GCC generated stub code resumes. */	movl	-4(%ebp), %eax	movl	%ebp, %esp	popl	%ebp	ret.LFE1:.Lfe1:/*****************************************************************************//* EXTERN                    simd_irrev_v_synth                              *//*****************************************************************************/	.align 16.globl simd_irrev_v_synthsimd_irrev_v_synth:.LFB2:	pushl	%ebp.LCFI3:	movl	%esp, %ebp.LCFI4:	subl	$24, %esp.LCFI5:	movl	28(%ebp), %eax	movw	%ax, -2(%ebp)	movl	32(%ebp), %eax	movw	%ax, -4(%ebp)	movl	20(%ebp), %eax	addl	$3, %eax	sarl	$2, %eax	movl	%eax, -8(%ebp)      # Base of `quads'	cmpl	$0, -8(%ebp)	jg	.L4	jmp	.L3	.p2align 4,,7.L4:	movzwl	-2(%ebp), %eax	movw	%ax, -10(%ebp)	movw	%ax, -12(%ebp)	movw	%ax, -14(%ebp)	movw	%ax, -16(%ebp)      # Base of `q_lambda'	movl	-4(%ebp), %eax	movw	%ax, -18(%ebp)	movw	%ax, -20(%ebp)	movw	%ax, -22(%ebp)	movw	%ax, -24(%ebp)      # Base of `q_offset'	cmpl	$-2, 24(%ebp)	jne	.L5    /* Special MMX code begins here */          MOV -8(%ebp),%ECX     # Set up counter used for  looping          MOV 8(%ebp),%EAX      # Load `src1' pointer          MOV 12(%EBP),%EBX     # Load `src2' pointer          MOV 16(%EBP),%EDX     # Load `dst' pointer          MOVQ -16(%ebp),%MM0   # Load `q_lambda'          MOVQ -24(%ebp),%MM1   # Load `q_offset'.Liv_loop_minus2:          MOVQ (%EAX),%MM2     # Start with source sample 1          PADDSW (%EBX),%MM2   # Add source sample 2          MOVQ (%EDX),%MM3          PADDSW %MM2,%MM3     # Here is a -1 contribution          PADDSW %MM2,%MM3     # Here is another -1 contribution          PADDSW %MM1,%MM2     # Add pre-offset for rounding          PMULHW %MM0,%MM2     # Multiply by lambda and discard 16 LSB's          PSUBSW %MM2,%MM3     # Final contribution          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EBX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Liv_loop_minus2          EMMS               # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */	jmp	.L12	.p2align 4,,7.L5:	cmpl	$0, 24(%ebp)	jne	.L7	cmpw	$0, -2(%ebp)	jns	.L7	movl	-16(%ebp), %eax	sall	$3, %eax	movw	%ax, -10(%ebp)	movw	%ax, -12(%ebp)	movw	%ax, -14(%ebp)	movw	%ax, -16(%ebp)	movw	$4, -18(%ebp)	movw	$4, -20(%ebp)	movw	$4, -22(%ebp)	movw	$4, -24(%ebp)    /* Special MMX code begins here: i_lambda=0 and remainder < 0 */          MOV -8(%ebp),%ECX     # Set up counter used for  looping          MOV 8(%ebp),%EAX      # Load `src1' pointer          MOV 12(%EBP),%EBX     # Load `src2' pointer          MOV 16(%EBP),%EDX     # Load `dst' pointer          MOVQ -16(%ebp),%MM0   # Load `q_lambda'          MOVQ -24(%ebp),%MM1   # Load `q_offset'.Liv_loop_0neg:          MOVQ (%EAX),%MM2      # Start with source samples 1          PMULHW %MM0,%MM2      # Multiply by lambda and discard 16 LSB's          PXOR %MM4,%MM4          PSUBSW (%EBX),%MM4    # Load negated source samples 2          PMULHW %MM0,%MM4      # Multiply by lambda and discard 16 LSB's          MOVQ (%EDX),%MM3          PSUBSW %MM4,%MM2      # Accumulate non-negated scaled sources.          PADDSW %MM1,%MM2      # Add post-offset for rounding          PSRAW $3,%MM2         # Divide by 8 (we've been working with 8x lambda)          PSUBSW %MM2,%MM3      # Update destination samples          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EBX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Liv_loop_0neg          EMMS                  # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */	jmp	.L12	.p2align 4,,7.L7:	cmpl	$0, 24(%ebp)	jne	.L9    /* Special MMX code begins here */          MOV -8(%ebp),%ECX     # Set up counter used for  looping          MOV 8(%ebp),%EAX      # Load `src1' pointer          MOV 12(%EBP),%EBX     # Load `src2' pointer          MOV 16(%EBP),%EDX     # Load `dst' pointer          MOVQ -16(%ebp),%MM0   # Load `q_lambda'          MOVQ -24(%ebp),%MM1   # Load `q_offset'.Liv_loop_0pos:          MOVQ (%EAX),%MM2      # Start with source sample 1          PADDSW (%EBX),%MM2    # Add source sample 2          MOVQ (%EDX),%MM3          PADDSW %MM1,%MM2      # Add pre-offset for rounding          PMULHW %MM0,%MM2      # Multiply by lambda and discard 16 LSB's          PSUBSW %MM2,%MM3      # Final contribution          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EBX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Liv_loop_0pos          EMMS                  # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */	jmp	.L12	.p2align 4,,7.L9:	cmpl	$1, 24(%ebp)	jne	.L12    /* Special MMX code begins here */          MOV -8(%ebp),%ECX     # Set up counter used for  looping          MOV 8(%ebp),%EAX      # Load `src1' pointer          MOV 12(%EBP),%EBX     # Load `src2' pointer          MOV 16(%EBP),%EDX     # Load `dst' pointer          MOVQ -16(%ebp),%MM0   # Load `q_lambda'          MOVQ -24(%ebp),%MM1   # Load `q_offset'.Liv_loop_plus1:          MOVQ (%EAX),%MM2      # Start with source sample 1          PADDSW (%EBX),%MM2    # Add source sample 2          MOVQ (%EDX),%MM3          PSUBSW %MM2,%MM3      # Here is a +1 contribution          PADDSW %MM1,%MM2      # Add pre-offset for rounding          PMULHW %MM0,%MM2      # Multiply by lambda and discard 16 LSB's          PSUBSW %MM2,%MM3      # Final contribution          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EBX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Liv_loop_plus1          EMMS                  # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */.L12:.L3:	movl	%ebp, %esp	popl	%ebp	ret.LFE2:.Lfe2:/*****************************************************************************//* EXTERN                      simd_rev_v_synth                              *//*****************************************************************************/	.align 16.globl simd_rev_v_synthsimd_rev_v_synth:.LFB3:	pushl	%ebp.LCFI6:	movl	%esp, %ebp.LCFI7:	subl	$16, %esp.LCFI8:	movl	20(%ebp), %eax	addl	$3, %eax	sarl	$2, %eax	movl	%eax, -4(%ebp)      # Base of `quads'	cmpl	$0, -4(%ebp)	jg	.L14	jmp	.L13	.p2align 4,,7.L14:	movzbl	24(%ebp), %ecx	movl	$1, %eax	sall	%cl, %eax	sarl	%eax	movw	%ax, -10(%ebp)	movzwl	-10(%ebp), %eax	movw	%ax, -10(%ebp)	movw	%ax, -12(%ebp)	movw	%ax, -14(%ebp)	movw	%ax, -16(%ebp)      # Base of `q_offset'	cmpl	$1, 28(%ebp)	jne	.L15    /* Special MMX code begins here */          MOV -4(%ebp),%ECX     # Set up counter used for looping          MOV 8(%ebp),%EAX      # Load `src1' pointer          MOV 12(%ebp),%EBX     # Load `src2' pointer          MOV 16(%ebp),%EDX     # Load `dst' pointer          MOVQ -16(%ebp),%MM0   # Load `q_offset'          MOVD 24(%ebp),%MM1    # Load `downshift'.Lrv_loop_plus1:          MOVQ %MM0,%MM2        # start with the offset          PADDSW (%EAX),%MM2    # add 1'st source sample          PADDSW (%EBX),%MM2    # add 2'nd source sample          MOVQ (%EDX),%MM3          PSRAW %MM1,%MM2       # shift rigth by the `downshift' value          PSUBSW %MM2,%MM3      # subtract from dest sample          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EBX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Lrv_loop_plus1          EMMS                  # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */	jmp	.L18	.p2align 4,,7.L15:	cmpl	$-1, 28(%ebp)	jne	.L18    /* Special MMX code begins here */          MOV -4(%ebp),%ECX     # Set up counter used for looping          MOV 8(%ebp),%EAX      # Load `src1' pointer          MOV 12(%ebp),%EBX     # Load `src2' pointer          MOV 16(%ebp),%EDX     # Load `dst' pointer          MOVQ -16(%ebp),%MM0   # Load `q_offset'          MOVD 24(%ebp),%MM1    # Load `downshift'.Lrv_loop_minus1:          MOVQ %MM0,%MM2        # start with the offset          PSUBSW (%EAX),%MM2    # subtract 1'st source sample          PSUBSW (%EBX),%MM2    # subtract 2'nd source sample          MOVQ (%EDX),%MM3          PSRAW %MM1,%MM2       # shift rigth by the `downshift' value          PSUBSW %MM2,%MM3      # subtract from dest sample          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EBX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Lrv_loop_minus1          EMMS                  # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */.L18:.L13:	movl	%ebp, %esp	popl	%ebp	ret.LFE3:.Lfe3:/*****************************************************************************//* EXTERN                    simd_irrev_h_synth                              *//*****************************************************************************/	.align 16.globl simd_irrev_h_synthsimd_irrev_h_synth:.LFB4:	pushl	%ebp.LCFI9:
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -