📄 gcc_dwt_mmx.s

📁 JPEG2000压缩解压图像源码
💻 S
📖 第 1 页 / 共 2 页
字号:
上一页 12
	movl	%esp, %ebp.LCFI10:	subl	$24, %esp.LCFI11:	movl	24(%ebp), %eax	movw	%ax, -2(%ebp)	movl	28(%ebp), %eax	movw	%ax, -4(%ebp)	movl	16(%ebp), %eax	addl	$3, %eax	sarl	$2, %eax	movl	%eax, -8(%ebp)     # Base of `quads'	cmpl	$0, -8(%ebp)	jg	.L20	jmp	.L19	.p2align 4,,7.L20:	movzwl	-2(%ebp), %eax	movw	%ax, -10(%ebp)	movw	%ax, -12(%ebp)	movw	%ax, -14(%ebp)	movw	%ax, -16(%ebp)     # Base of `q_lambda'	movl	-4(%ebp), %eax	movw	%ax, -18(%ebp)	movw	%ax, -20(%ebp)	movw	%ax, -22(%ebp)	movw	%ax, -24(%ebp)     # Base of `q_offset'	cmpl	$-2, 20(%ebp)	jne	.L21    /* Special MMX code begins here */          MOV -8(%ebp),%ECX    # Set up counter used for looping          MOV 8(%ebp),%EAX     # Load `src' pointer          MOV 12(%ebp),%EDX    # Load `dst' pointer          MOVQ -16(%ebp),%MM0  # Load `q_lambda'          MOVQ -24(%ebp),%MM1  # Load `q_offset'.Lih_loop_minus2:          MOVQ (%EAX),%MM2          PADDSW 2(%EAX),%MM2   # Add source sample 2          MOVQ (%EDX),%MM3          PADDSW %MM2,%MM3     # Here is a -1 contribution          PADDSW %MM2,%MM3     # Here is another -1 contribution          PADDSW %MM1,%MM2     # Add pre-offset for rounding          PMULHW %MM0,%MM2     # Multiply by lambda and discard 16 LSB's          PSUBSW %MM2,%MM3     # Final contribution          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Lih_loop_minus2          EMMS                 # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */	jmp	.L28	.p2align 4,,7.L21:	cmpl	$0, 20(%ebp)	jne	.L23	cmpw	$0, -2(%ebp)	jns	.L23	movl	-16(%ebp), %eax	sall	$3, %eax	movw	%ax, -10(%ebp)	movw	%ax, -12(%ebp)	movw	%ax, -14(%ebp)	movw	%ax, -16(%ebp)	movw	$4, -18(%ebp)	movw	$4, -20(%ebp)	movw	$4, -22(%ebp)	movw	$4, -24(%ebp)    /* Special MMX code begins here */          MOV -8(%ebp),%ECX    # Set up counter used for looping          MOV 8(%ebp),%EAX     # Load `src' pointer          MOV 12(%ebp),%EDX    # Load `dst' pointer          MOVQ -16(%ebp),%MM0  # Load `q_lambda'          MOVQ -24(%ebp),%MM1  # Load `q_offset'.Lih_loop_0neg:          MOVQ (%EAX),%MM2          PMULHW %MM0,%MM2     # Multiply by lambda and discard 16 LSB's          PXOR %MM4,%MM4          PSUBSW 2(%EAX),%MM4  # Load negated source samples 2          PMULHW %MM0,%MM4     # Multiply by lambda and discard 16 LSB's          MOVQ (%EDX),%MM3          PSUBSW %MM4,%MM2     # Accumulate non-negated scaled sources.          PADDSW %MM1,%MM2     # Add post-offset for rounding          PSRAW $3,%MM2        # Divide by 8 (we've been working with 8x lambda)          PSUBSW %MM2,%MM3     # Final contribution          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Lih_loop_0neg          EMMS                 # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */	jmp	.L28	.p2align 4,,7.L23:	cmpl	$0, 20(%ebp)	jne	.L25    /* Special MMX code begins here */          MOV -8(%ebp),%ECX    # Set up counter used for looping          MOV 8(%ebp),%EAX     # Load `src' pointer          MOV 12(%ebp),%EDX    # Load `dst' pointer          MOVQ -16(%ebp),%MM0  # Load `q_lambda'          MOVQ -24(%ebp),%MM1  # Load `q_offset'.Lih_loop_0pos:          MOVQ (%EAX),%MM2          PADDSW 2(%EAX),%MM2 # Add source sample 2          MOVQ (%EDX),%MM3          PADDSW %MM1,%MM2     # Add pre-offset for rounding          PMULHW %MM0,%MM2     # Multiply by lambda and discard 16 LSB's          PSUBSW %MM2,%MM3     # Final contribution          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Lih_loop_0pos          EMMS                 # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */	jmp	.L28	.p2align 4,,7.L25:	cmpl	$1, 20(%ebp)	jne	.L28    /* Special MMX code begins here */          MOV -8(%ebp),%ECX    # Set up counter used for looping          MOV 8(%ebp),%EAX     # Load `src' pointer          MOV 12(%ebp),%EDX    # Load `dst' pointer          MOVQ -16(%ebp),%MM0  # Load `q_lambda'          MOVQ -24(%ebp),%MM1  # Load `q_offset'.Lih_loop_plus1:          MOVQ (%EAX),%MM2          PADDSW 2(%EAX),%MM2  # Add source sample 2          MOVQ (%EDX),%MM3          PSUBSW %MM2,%MM3     # Here is a +1 contribution          PADDSW %MM1,%MM2     # Add pre-offset for rounding          PMULHW %MM0,%MM2     # Multiply by lambda and discard 16 LSB's          PSUBSW %MM2,%MM3     # Final contribution          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Lih_loop_plus1          EMMS                 # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */.L28:.L19:	movl	%ebp, %esp	popl	%ebp	ret.LFE4:.Lfe4:/*****************************************************************************//* EXTERN                     simd_rev_h_synth                               *//*****************************************************************************/	.align 16.globl simd_rev_h_synthsimd_rev_h_synth:.LFB5:	pushl	%ebp.LCFI12:	movl	%esp, %ebp.LCFI13:	subl	$16, %esp.LCFI14:	movl	16(%ebp), %eax	addl	$3, %eax	sarl	$2, %eax	movl	%eax, -4(%ebp)   # Base of `quads'	cmpl	$0, -4(%ebp)	jg	.L30	jmp	.L29	.p2align 4,,7.L30:	movzbl	20(%ebp), %ecx   # Load `downshift'	movl	$1, %eax	sall	%cl, %eax	sarl	%eax	movw	%ax, -10(%ebp)	movzwl	-10(%ebp), %eax	movw	%ax, -10(%ebp)	movw	%ax, -12(%ebp)	movw	%ax, -14(%ebp)	movw	%ax, -16(%ebp)   # Base of `q_offset'	cmpl	$1, 24(%ebp)	jne	.L31    /* Special MMX code begins here */          MOV -4(%ebp),%ECX      # Set up counter used for looping          MOV 8(%ebp),%EAX       # Load `src' pointer          MOV 12(%ebp),%EDX      # Load `dst' pointer          MOVQ -16(%ebp),%MM0    # Load `q_offset'          MOVD 20(%ebp),%MM1     # Load `downshift'.Lrh_loop_plus1:          MOVQ %MM0,%MM2         # start with the offset          PADDSW (%EAX),%MM2     # add 1'st source sample          PADDSW 2(%EAX),%MM2    # add 2'nd source sample          MOVQ (%EDX),%MM3          PSRAW %MM1,%MM2        # shift rigth by the `downshift' value          PSUBSW %MM2,%MM3       # subtract from dest sample          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Lrh_loop_plus1          EMMS                   # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */	jmp	.L34	.p2align 4,,7.L31:	cmpl	$-1, 24(%ebp)	jne	.L34    /* Special MMX code begins here */          MOV -4(%ebp),%ECX      # Set up counter used for looping          MOV 8(%ebp),%EAX       # Load `src' pointer          MOV 12(%ebp),%EDX      # Load `dst' pointer          MOVQ -16(%ebp),%MM0    # Load `q_offset'          MOVD 20(%ebp),%MM1     # Load `downshift'.Lrh_loop_minus1:          MOVQ %MM0,%MM2         # start with the offset          PSUBSW (%EAX),%MM2     # subtract 1'st source sample          PSUBSW 2(%EAX),%MM2    # subtract 2'nd source sample          MOVQ (%EDX),%MM3          PSRAW %MM1,%MM2        # shift rigth by the `downshift' value          PSUBSW %MM2,%MM3       # subtract from dest sample          MOVQ %MM3,(%EDX)          ADD $8,%EAX          ADD $8,%EBX          ADD $8,%EDX          SUB $1,%ECX          JNZ .Lrh_loop_minus1          EMMS                   # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */.L34:.L29:	movl	%ebp, %esp	popl	%ebp	ret.LFE5:.Lfe5:/*****************************************************************************//* EXTERN                        simd_interleave                             *//*****************************************************************************/	.align 16.globl simd_interleavesimd_interleave:.LFB6:	pushl	%ebp.LCFI15:	movl	%esp, %ebp.LCFI16:	subl	$4, %esp.LCFI17:	movl	20(%ebp), %eax	addl	$3, %eax	sarl	$2, %eax	movl	%eax, -4(%ebp)   # Base of `octets'	cmpl	$0, -4(%ebp)	jg	.L36	jmp	.L35	.p2align 4,,7.L36:    /* Special MMX code begins here */      MOV -4(%ebp),%ECX     # Set up counter used for looping      MOV 8(%ebp),%EAX      # Load `src1' pointer      MOV 12(%ebp),%EBX     # Load `src2' pointer      MOV 16(%ebp),%EDX     # Load `dst' pointer.Lloop_ilv:      MOVQ (%EAX),%MM0      MOVQ %MM0,%MM2      MOVQ (%EBX),%MM1      PUNPCKLWD %MM1,%MM2      MOVQ %MM2,(%EDX)      PUNPCKHWD %MM1,%MM0      MOVQ %MM0,8(%EDX)      ADD $8,%EAX      ADD $8,%EBX      ADD $16,%EDX      SUB $1,%ECX      JNZ .Lloop_ilv      EMMS                  # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */.L35:	movl	%ebp, %esp	popl	%ebp	ret.LFE6:.Lfe6:/*****************************************************************************//* EXTERN                   simd_upshifted_interleave                        *//*****************************************************************************/	.align 16.globl simd_upshifted_interleavesimd_upshifted_interleave:.LFB7:	pushl	%ebp.LCFI18:	movl	%esp, %ebp.LCFI19:	subl	$4, %esp.LCFI20:	movl	20(%ebp), %eax	addl	$3, %eax	sarl	$2, %eax	movl	%eax, -4(%ebp)     # Base of `octets'	cmpl	$0, -4(%ebp)	jg	.L39	jmp	.L38	.p2align 4,,7.L39:    /* Special MMX code begins here */      MOV -4(%ebp),%ECX      # Set up counter used for looping      MOV 8(%ebp),%EAX       # Load `src1' pointer      MOV 12(%ebp),%EBX      # Load `src2' pointer      MOV 16(%ebp),%EDX      # Load `dst' pointer      MOVD 24(%ebp),%MM3     # Load `upshift'.Lloop_shift_ilv:      MOVQ (%EAX),%MM0      PSLLW %MM3,%MM0      MOVQ %MM0,%MM2      MOVQ (%EBX),%MM1      PSLLW %MM3,%MM1      PUNPCKLWD %MM1,%MM2      MOVQ %MM2,(%EDX)      PUNPCKHWD %MM1,%MM0      MOVQ %MM0,8(%EDX)      ADD $8,%EAX      ADD $8,%EBX      ADD $16,%EDX      SUB $1,%ECX      JNZ .Lloop_shift_ilv      EMMS                   # Clear MMX registers for use by FPU    /* End of special MMX code; resume stub code generated by gcc */.L38:	movl	%ebp, %esp	popl	%ebp	ret.LFE7:.Lfe7:
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -