⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 short_dotprod_mmx64.s

📁 这是用python语言写的一个数字广播的信号处理工具包。利用它
💻 S
字号:
## Copyright 2002,2005 Free Software Foundation, Inc.# # This file is part of GNU Radio# # GNU Radio is free software; you can redistribute it and/or modify# it under the terms of the GNU General Public License as published by# the Free Software Foundation; either version 3, or (at your option)# any later version.# # GNU Radio is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the# GNU General Public License for more details.# # You should have received a copy of the GNU General Public License# along with GNU Radio; see the file COPYING.  If not, write to# the Free Software Foundation, Inc., 51 Franklin Street,# Boston, MA 02110-1301, USA.# # SIMD MMX dot product# Equivalent to the following C code:# long dotprod(signed short *a,signed short *b,int cnt)# {#	long sum = 0; #	cnt *= 4; #	while(cnt--)#		sum += *a++ + *b++;#	return sum;# }# a and b should also be 64-bit aligned, or speed will suffer greatly# Copyright 1999, Phil Karn KA9Q# May be used under the terms of the GNU public license	#include "assembly.h"	.file	"short_dotprod_mmx64.S"	.version	"01.01".text	.p2align 3.globl GLOB_SYMB(short_dotprod_mmx)	DEF_FUNC_HEAD(short_dotprod_mmx)GLOB_SYMB(short_dotprod_mmx):	# a: rdi, b: rsi, cnt: rdx	pxor %mm0,%mm0		# clear running sum (in two 32-bit halves)	# MMX dot product loop unrolled 4 times, crunching 16 terms per loop	.p2align 4.Loop1mmx:	sub $4,%rdx	jl   .Loop1Done		movq (%rdi),%mm1	# mm1 = a[3],a[2],a[1],a[0] 	pmaddwd (%rsi),%mm1	# mm1 = b[3]*a[3]+b[2]*a[2],b[1]*a[1]+b[0]*a[0]	paddd %mm1,%mm0		movq 8(%rdi),%mm1	pmaddwd 8(%rsi),%mm1	paddd %mm1,%mm0	movq 16(%rdi),%mm1	pmaddwd 16(%rsi),%mm1	paddd %mm1,%mm0	movq 24(%rdi),%mm1	add $32,%rdi		pmaddwd 24(%rsi),%mm1	add $32,%rsi		paddd %mm1,%mm0	jmp .Loop1mmx.Loop1Done:		add $4,%rdx		# MMX dot product loop, not unrolled, crunching 4 terms per loop# This could be redone as Duff's Device on the unrolled loop above.Loop2:	sub $1,%rdx	jl   .Loop2Done		movq (%rdi),%mm1	add $8,%rdi	pmaddwd (%rsi),%mm1	add $8,%rsi	paddd %mm1,%mm0	jmp .Loop2.Loop2Done:		movd %mm0,%edx		# right-hand word to edx	punpckhdq %mm0,%mm0	# left-hand word to right side of %mm0	movd %mm0,%eax	addl %edx,%eax		# running sum now in %eax	emms			# done with MMX		retqFUNC_TAIL(short_dotprod_mmx)	.ident	"Hand coded x86_64 MMX assembly"#if defined(__linux__) && defined(__ELF__).section .note.GNU-stack,"",%progbits#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -