⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 k6opt.s

📁 IAX client库, 一个VOIP的库. 支持H.323和SIP, PBX就是采用的它
💻 S
📖 第 1 页 / 共 2 页
字号:
/* k6opt.s  vector functions optimized for MMX extensions to x86
 *
 * Copyright (C) 1999 by Stanley J. Brooks <stabro@megsinet.net>
 * 
 * Any use of this software is permitted provided that this notice is not
 * removed and that neither the authors nor the Technische Universitaet Berlin
 * are deemed to have made any representations as to the suitability of this
 * software for any purpose nor are held responsible for any defects of
 * this software.  THERE IS ABSOLUTELY NO WARRANTY FOR THIS SOFTWARE;
 * not even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE.
 * 
 * Chicago, 03.12.1999
 * Stanley J. Brooks
 */

	.file	"k6opt.s"
	.version	"01.01"
/* gcc2_compiled.: */
.section	.rodata
	.align 4
	.type	 coefs,@object
	.size	 coefs,24
coefs:
	.value -134
	.value -374
	.value 0
	.value 2054
	.value 5741
	.value 8192
	.value 5741
	.value 2054
	.value 0
	.value -374
	.value -134
	.value 0
.text
	.align 4
/* void Weighting_filter (const short *e, short *x) */
.globl Weighting_filter
	.type	 Weighting_filter,@function
Weighting_filter:
	pushl %ebp
	movl %esp,%ebp
	pushl %edi
	pushl %esi
	pushl %ebx
	movl 12(%ebp),%edi
	movl 8(%ebp),%ebx
	addl $-10,%ebx
	emms
	movl $0x1000,%eax; movd %eax,%mm5  /* for rounding */
	movq coefs,%mm1
	movq coefs+8,%mm2
	movq coefs+16,%mm3
	xorl %esi,%esi
	.p2align 2
.L21:
	movq (%ebx,%esi,2),%mm0
	pmaddwd %mm1,%mm0

	movq 8(%ebx,%esi,2),%mm4
	pmaddwd %mm2,%mm4
	paddd %mm4,%mm0

	movq 16(%ebx,%esi,2),%mm4
	pmaddwd %mm3,%mm4
	paddd %mm4,%mm0

	movq %mm0,%mm4
	punpckhdq %mm0,%mm4  /* mm4 has high int32 of mm0 dup'd */
	paddd %mm4,%mm0;

	paddd %mm5,%mm0 /* add for roundoff */
	psrad $13,%mm0
	packssdw %mm0,%mm0	
	movd %mm0,%eax  /* ax has result */
	movw %ax,(%edi,%esi,2)
	incl %esi
	cmpl $39,%esi
	jle .L21
	emms
	popl %ebx
	popl %esi
	popl %edi
	leave
	ret
.Lfe1:
	.size	 Weighting_filter,.Lfe1-Weighting_filter

.macro ccstep n
.if \n
	movq \n(%edi),%mm1
	movq \n(%esi),%mm2
.else
	movq (%edi),%mm1
	movq (%esi),%mm2
.endif
	pmaddwd %mm2,%mm1
	paddd %mm1,%mm0
.endm

	.align 4
/* long k6maxcc(const short *wt, const short *dp, short *Nc_out) */
.globl k6maxcc
	.type	 k6maxcc,@function
k6maxcc:
	pushl %ebp
	movl %esp,%ebp
	pushl %edi
	pushl %esi
	pushl %ebx
	emms
	movl 8(%ebp),%edi
	movl 12(%ebp),%esi
	movl $0,%edx  /* will be maximum inner-product */
	movl $40,%ebx
	movl %ebx,%ecx /* will be index of max inner-product */
	subl $80,%esi
	.p2align 2
.L41:
	movq (%edi),%mm0
	movq (%esi),%mm2
	pmaddwd %mm2,%mm0
	ccstep 8
	ccstep 16
	ccstep 24
	ccstep 32
	ccstep 40
	ccstep 48
	ccstep 56
	ccstep 64
	ccstep 72

	movq %mm0,%mm1
	punpckhdq %mm0,%mm1  /* mm1 has high int32 of mm0 dup'd */
	paddd %mm1,%mm0;
	movd %mm0,%eax  /* eax has result */

	cmpl %edx,%eax
	jle .L40
	movl %eax,%edx
	movl %ebx,%ecx
	.p2align 2
.L40:
	subl $2,%esi
	incl %ebx
	cmpl $120,%ebx
	jle .L41
	movl 16(%ebp),%eax
	movw %cx,(%eax)
	movl %edx,%eax
	emms
	popl %ebx
	popl %esi
	popl %edi
	leave
	ret
.Lfe2:
	.size	 k6maxcc,.Lfe2-k6maxcc


	.align 4
/* long k6iprod (const short *p, const short *q, int n) */
.globl k6iprod
	.type	 k6iprod,@function
k6iprod:
	pushl %ebp
	movl %esp,%ebp
	pushl %edi
	pushl %esi
	emms
	pxor %mm0,%mm0
	movl 8(%ebp),%esi
	movl 12(%ebp),%edi
	movl 16(%ebp),%eax
	leal -32(%esi,%eax,2),%edx /* edx = top - 32 */

	cmpl %edx,%esi; ja .L202

	.p2align 2
.L201:
	ccstep 0
	ccstep 8
	ccstep 16
	ccstep 24

	addl $32,%esi
	addl $32,%edi
	cmpl %edx,%esi; jbe .L201

	.p2align 2
.L202:
	addl $24,%edx  /* now edx = top-8 */
	cmpl %edx,%esi; ja .L205

	.p2align 2
.L203:
	ccstep 0

	addl $8,%esi
	addl $8,%edi
	cmpl %edx,%esi; jbe .L203

	.p2align 2
.L205:
	addl $4,%edx  /* now edx = top-4 */
	cmpl %edx,%esi; ja .L207

	movd (%edi),%mm1
	movd (%esi),%mm2
	pmaddwd %mm2,%mm1
	paddd %mm1,%mm0

	addl $4,%esi
	addl $4,%edi

	.p2align 2
.L207:
	addl $2,%edx  /* now edx = top-2 */
	cmpl %edx,%esi; ja .L209

	movswl (%edi),%eax
	movd %eax,%mm1
	movswl (%esi),%eax
	movd %eax,%mm2
	pmaddwd %mm2,%mm1
	paddd %mm1,%mm0

	.p2align 2
.L209:
	movq %mm0,%mm1
	punpckhdq %mm0,%mm1  /* mm1 has high int32 of mm0 dup'd */
	paddd %mm1,%mm0;
	movd %mm0,%eax  /* eax has result */

	emms
	popl %esi
	popl %edi
	leave
	ret
.Lfe3:
	.size	 k6iprod,.Lfe3-k6iprod


	.align 4
/* void k6vsraw P3((short *p, int n, int bits) */
.globl k6vsraw
	.type	 k6vsraw,@function
k6vsraw:
	pushl %ebp
	movl %esp,%ebp
	pushl %esi
	movl 8(%ebp),%esi
	movl 16(%ebp),%ecx
	andl %ecx,%ecx; jle .L399
	movl 12(%ebp),%eax
	leal -16(%esi,%eax,2),%edx /* edx = top - 16 */
	emms
	movd %ecx,%mm3
	movq ones,%mm2
	psllw %mm3,%mm2; psrlw $1,%mm2
	cmpl %edx,%esi; ja .L306

	.p2align 2
.L302: /* 8 words per iteration */
	movq (%esi),%mm0
	movq 8(%esi),%mm1
	paddsw %mm2,%mm0
	psraw %mm3,%mm0;
	paddsw %mm2,%mm1
	psraw %mm3,%mm1;
	movq %mm0,(%esi)
	movq %mm1,8(%esi)
	addl $16,%esi
	cmpl %edx,%esi
	jbe .L302

	.p2align 2
.L306:
	addl $12,%edx /* now edx = top-4 */
	cmpl %edx,%esi; ja .L310

	.p2align 2
.L308: /* do up to 6 words, two at a time */
	movd  (%esi),%mm0
	paddsw %mm2,%mm0
	psraw %mm3,%mm0;
	movd %mm0,(%esi)
	addl $4,%esi
	cmpl %edx,%esi
	jbe .L308

	.p2align 2
.L310:
	addl $2,%edx /* now edx = top-2 */
	cmpl %edx,%esi; ja .L315
	
	movzwl (%esi),%eax
	movd %eax,%mm0
	paddsw %mm2,%mm0
	psraw %mm3,%mm0;
	movd %mm0,%eax
	movw %ax,(%esi)

	.p2align 2
.L315:
	emms
.L399:
	popl %esi
	leave
	ret
.Lfe4:
	.size	 k6vsraw,.Lfe4-k6vsraw
	
	.align 4
/* void k6vsllw P3((short *p, int n, int bits) */
.globl k6vsllw
	.type	 k6vsllw,@function
k6vsllw:
	pushl %ebp
	movl %esp,%ebp
	pushl %esi
	movl 8(%ebp),%esi
	movl 16(%ebp),%ecx
	andl %ecx,%ecx; jle .L499
	movl 12(%ebp),%eax
	leal -16(%esi,%eax,2),%edx /* edx = top - 16 */
	emms
	movd %ecx,%mm3
	cmpl %edx,%esi; ja .L406

	.p2align 2
.L402: /* 8 words per iteration */
	movq (%esi),%mm0
	movq 8(%esi),%mm1
	psllw %mm3,%mm0;
	psllw %mm3,%mm1;
	movq %mm0,(%esi)
	movq %mm1,8(%esi)
	addl $16,%esi
	cmpl %edx,%esi
	jbe .L402

	.p2align 2
.L406:
	addl $12,%edx /* now edx = top-4 */
	cmpl %edx,%esi; ja .L410

	.p2align 2
.L408: /* do up to 6 words, two at a time */
	movd (%esi),%mm0
	psllw %mm3,%mm0;
	movd %mm0,(%esi)
	addl $4,%esi
	cmpl %edx,%esi
	jbe .L408

	.p2align 2
.L410:
	addl $2,%edx /* now edx = top-2 */
	cmpl %edx,%esi; ja .L415
	
	movzwl (%esi),%eax
	movd %eax,%mm0
	psllw %mm3,%mm0;
	movd %mm0,%eax
	movw %ax,(%esi)

	.p2align 2
.L415:
	emms
.L499:
	popl %esi
	leave
	ret
.Lfe5:
	.size	 k6vsllw,.Lfe5-k6vsllw

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -