📄 gcc_dwt_mmx.s
字号:
movl %esp, %ebp.LCFI10: subl $24, %esp.LCFI11: movl 24(%ebp), %eax movw %ax, -2(%ebp) movl 28(%ebp), %eax movw %ax, -4(%ebp) movl 16(%ebp), %eax addl $3, %eax sarl $2, %eax movl %eax, -8(%ebp) # Base of `quads' cmpl $0, -8(%ebp) jg .L20 jmp .L19 .p2align 4,,7.L20: movzwl -2(%ebp), %eax movw %ax, -10(%ebp) movw %ax, -12(%ebp) movw %ax, -14(%ebp) movw %ax, -16(%ebp) # Base of `q_lambda' movl -4(%ebp), %eax movw %ax, -18(%ebp) movw %ax, -20(%ebp) movw %ax, -22(%ebp) movw %ax, -24(%ebp) # Base of `q_offset' cmpl $-2, 20(%ebp) jne .L21 /* Special MMX code begins here */ MOV -8(%ebp),%ECX # Set up counter used for looping MOV 8(%ebp),%EAX # Load `src' pointer MOV 12(%ebp),%EDX # Load `dst' pointer MOVQ -16(%ebp),%MM0 # Load `q_lambda' MOVQ -24(%ebp),%MM1 # Load `q_offset'.Lih_loop_minus2: MOVQ (%EAX),%MM2 PADDSW 2(%EAX),%MM2 # Add source sample 2 MOVQ (%EDX),%MM3 PADDSW %MM2,%MM3 # Here is a -1 contribution PADDSW %MM2,%MM3 # Here is another -1 contribution PADDSW %MM1,%MM2 # Add pre-offset for rounding PMULHW %MM0,%MM2 # Multiply by lambda and discard 16 LSB's PSUBSW %MM2,%MM3 # Final contribution MOVQ %MM3,(%EDX) ADD $8,%EAX ADD $8,%EDX SUB $1,%ECX JNZ .Lih_loop_minus2 EMMS # Clear MMX registers for use by FPU /* End of special MMX code; resume stub code generated by gcc */ jmp .L28 .p2align 4,,7.L21: cmpl $0, 20(%ebp) jne .L23 cmpw $0, -2(%ebp) jns .L23 movl -16(%ebp), %eax sall $3, %eax movw %ax, -10(%ebp) movw %ax, -12(%ebp) movw %ax, -14(%ebp) movw %ax, -16(%ebp) movw $4, -18(%ebp) movw $4, -20(%ebp) movw $4, -22(%ebp) movw $4, -24(%ebp) /* Special MMX code begins here */ MOV -8(%ebp),%ECX # Set up counter used for looping MOV 8(%ebp),%EAX # Load `src' pointer MOV 12(%ebp),%EDX # Load `dst' pointer MOVQ -16(%ebp),%MM0 # Load `q_lambda' MOVQ -24(%ebp),%MM1 # Load `q_offset'.Lih_loop_0neg: MOVQ (%EAX),%MM2 PMULHW %MM0,%MM2 # Multiply by lambda and discard 16 LSB's PXOR %MM4,%MM4 PSUBSW 2(%EAX),%MM4 # Load negated source samples 2 PMULHW %MM0,%MM4 # Multiply by lambda and discard 16 LSB's MOVQ (%EDX),%MM3 PSUBSW %MM4,%MM2 # Accumulate non-negated scaled sources. PADDSW %MM1,%MM2 # Add post-offset for rounding PSRAW $3,%MM2 # Divide by 8 (we've been working with 8x lambda) PSUBSW %MM2,%MM3 # Final contribution MOVQ %MM3,(%EDX) ADD $8,%EAX ADD $8,%EDX SUB $1,%ECX JNZ .Lih_loop_0neg EMMS # Clear MMX registers for use by FPU /* End of special MMX code; resume stub code generated by gcc */ jmp .L28 .p2align 4,,7.L23: cmpl $0, 20(%ebp) jne .L25 /* Special MMX code begins here */ MOV -8(%ebp),%ECX # Set up counter used for looping MOV 8(%ebp),%EAX # Load `src' pointer MOV 12(%ebp),%EDX # Load `dst' pointer MOVQ -16(%ebp),%MM0 # Load `q_lambda' MOVQ -24(%ebp),%MM1 # Load `q_offset'.Lih_loop_0pos: MOVQ (%EAX),%MM2 PADDSW 2(%EAX),%MM2 # Add source sample 2 MOVQ (%EDX),%MM3 PADDSW %MM1,%MM2 # Add pre-offset for rounding PMULHW %MM0,%MM2 # Multiply by lambda and discard 16 LSB's PSUBSW %MM2,%MM3 # Final contribution MOVQ %MM3,(%EDX) ADD $8,%EAX ADD $8,%EDX SUB $1,%ECX JNZ .Lih_loop_0pos EMMS # Clear MMX registers for use by FPU /* End of special MMX code; resume stub code generated by gcc */ jmp .L28 .p2align 4,,7.L25: cmpl $1, 20(%ebp) jne .L28 /* Special MMX code begins here */ MOV -8(%ebp),%ECX # Set up counter used for looping MOV 8(%ebp),%EAX # Load `src' pointer MOV 12(%ebp),%EDX # Load `dst' pointer MOVQ -16(%ebp),%MM0 # Load `q_lambda' MOVQ -24(%ebp),%MM1 # Load `q_offset'.Lih_loop_plus1: MOVQ (%EAX),%MM2 PADDSW 2(%EAX),%MM2 # Add source sample 2 MOVQ (%EDX),%MM3 PSUBSW %MM2,%MM3 # Here is a +1 contribution PADDSW %MM1,%MM2 # Add pre-offset for rounding PMULHW %MM0,%MM2 # Multiply by lambda and discard 16 LSB's PSUBSW %MM2,%MM3 # Final contribution MOVQ %MM3,(%EDX) ADD $8,%EAX ADD $8,%EDX SUB $1,%ECX JNZ .Lih_loop_plus1 EMMS # Clear MMX registers for use by FPU /* End of special MMX code; resume stub code generated by gcc */.L28:.L19: movl %ebp, %esp popl %ebp ret.LFE4:.Lfe4:/*****************************************************************************//* EXTERN simd_rev_h_synth *//*****************************************************************************/ .align 16.globl simd_rev_h_synthsimd_rev_h_synth:.LFB5: pushl %ebp.LCFI12: movl %esp, %ebp.LCFI13: subl $16, %esp.LCFI14: movl 16(%ebp), %eax addl $3, %eax sarl $2, %eax movl %eax, -4(%ebp) # Base of `quads' cmpl $0, -4(%ebp) jg .L30 jmp .L29 .p2align 4,,7.L30: movzbl 20(%ebp), %ecx # Load `downshift' movl $1, %eax sall %cl, %eax sarl %eax movw %ax, -10(%ebp) movzwl -10(%ebp), %eax movw %ax, -10(%ebp) movw %ax, -12(%ebp) movw %ax, -14(%ebp) movw %ax, -16(%ebp) # Base of `q_offset' cmpl $1, 24(%ebp) jne .L31 /* Special MMX code begins here */ MOV -4(%ebp),%ECX # Set up counter used for looping MOV 8(%ebp),%EAX # Load `src' pointer MOV 12(%ebp),%EDX # Load `dst' pointer MOVQ -16(%ebp),%MM0 # Load `q_offset' MOVD 20(%ebp),%MM1 # Load `downshift'.Lrh_loop_plus1: MOVQ %MM0,%MM2 # start with the offset PADDSW (%EAX),%MM2 # add 1'st source sample PADDSW 2(%EAX),%MM2 # add 2'nd source sample MOVQ (%EDX),%MM3 PSRAW %MM1,%MM2 # shift rigth by the `downshift' value PSUBSW %MM2,%MM3 # subtract from dest sample MOVQ %MM3,(%EDX) ADD $8,%EAX ADD $8,%EDX SUB $1,%ECX JNZ .Lrh_loop_plus1 EMMS # Clear MMX registers for use by FPU /* End of special MMX code; resume stub code generated by gcc */ jmp .L34 .p2align 4,,7.L31: cmpl $-1, 24(%ebp) jne .L34 /* Special MMX code begins here */ MOV -4(%ebp),%ECX # Set up counter used for looping MOV 8(%ebp),%EAX # Load `src' pointer MOV 12(%ebp),%EDX # Load `dst' pointer MOVQ -16(%ebp),%MM0 # Load `q_offset' MOVD 20(%ebp),%MM1 # Load `downshift'.Lrh_loop_minus1: MOVQ %MM0,%MM2 # start with the offset PSUBSW (%EAX),%MM2 # subtract 1'st source sample PSUBSW 2(%EAX),%MM2 # subtract 2'nd source sample MOVQ (%EDX),%MM3 PSRAW %MM1,%MM2 # shift rigth by the `downshift' value PSUBSW %MM2,%MM3 # subtract from dest sample MOVQ %MM3,(%EDX) ADD $8,%EAX ADD $8,%EBX ADD $8,%EDX SUB $1,%ECX JNZ .Lrh_loop_minus1 EMMS # Clear MMX registers for use by FPU /* End of special MMX code; resume stub code generated by gcc */.L34:.L29: movl %ebp, %esp popl %ebp ret.LFE5:.Lfe5:/*****************************************************************************//* EXTERN simd_interleave *//*****************************************************************************/ .align 16.globl simd_interleavesimd_interleave:.LFB6: pushl %ebp.LCFI15: movl %esp, %ebp.LCFI16: subl $4, %esp.LCFI17: movl 20(%ebp), %eax addl $3, %eax sarl $2, %eax movl %eax, -4(%ebp) # Base of `octets' cmpl $0, -4(%ebp) jg .L36 jmp .L35 .p2align 4,,7.L36: /* Special MMX code begins here */ MOV -4(%ebp),%ECX # Set up counter used for looping MOV 8(%ebp),%EAX # Load `src1' pointer MOV 12(%ebp),%EBX # Load `src2' pointer MOV 16(%ebp),%EDX # Load `dst' pointer.Lloop_ilv: MOVQ (%EAX),%MM0 MOVQ %MM0,%MM2 MOVQ (%EBX),%MM1 PUNPCKLWD %MM1,%MM2 MOVQ %MM2,(%EDX) PUNPCKHWD %MM1,%MM0 MOVQ %MM0,8(%EDX) ADD $8,%EAX ADD $8,%EBX ADD $16,%EDX SUB $1,%ECX JNZ .Lloop_ilv EMMS # Clear MMX registers for use by FPU /* End of special MMX code; resume stub code generated by gcc */.L35: movl %ebp, %esp popl %ebp ret.LFE6:.Lfe6:/*****************************************************************************//* EXTERN simd_upshifted_interleave *//*****************************************************************************/ .align 16.globl simd_upshifted_interleavesimd_upshifted_interleave:.LFB7: pushl %ebp.LCFI18: movl %esp, %ebp.LCFI19: subl $4, %esp.LCFI20: movl 20(%ebp), %eax addl $3, %eax sarl $2, %eax movl %eax, -4(%ebp) # Base of `octets' cmpl $0, -4(%ebp) jg .L39 jmp .L38 .p2align 4,,7.L39: /* Special MMX code begins here */ MOV -4(%ebp),%ECX # Set up counter used for looping MOV 8(%ebp),%EAX # Load `src1' pointer MOV 12(%ebp),%EBX # Load `src2' pointer MOV 16(%ebp),%EDX # Load `dst' pointer MOVD 24(%ebp),%MM3 # Load `upshift'.Lloop_shift_ilv: MOVQ (%EAX),%MM0 PSLLW %MM3,%MM0 MOVQ %MM0,%MM2 MOVQ (%EBX),%MM1 PSLLW %MM3,%MM1 PUNPCKLWD %MM1,%MM2 MOVQ %MM2,(%EDX) PUNPCKHWD %MM1,%MM0 MOVQ %MM0,8(%EDX) ADD $8,%EAX ADD $8,%EBX ADD $16,%EDX SUB $1,%ECX JNZ .Lloop_shift_ilv EMMS # Clear MMX registers for use by FPU /* End of special MMX code; resume stub code generated by gcc */.L38: movl %ebp, %esp popl %ebp ret.LFE7:.Lfe7:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -