short_dotprod_mmx64.s

来自「这是用python语言写的一个数字广播的信号处理工具包。利用它」· S 代码 · 共 106 行

106 行

## Copyright 2002,2005 Free Software Foundation, Inc.# # This file is part of GNU Radio# # GNU Radio is free software; you can redistribute it and/or modify# it under the terms of the GNU General Public License as published by# the Free Software Foundation; either version 3, or (at your option)# any later version.# # GNU Radio is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the# GNU General Public License for more details.# # You should have received a copy of the GNU General Public License# along with GNU Radio; see the file COPYING.  If not, write to# the Free Software Foundation, Inc., 51 Franklin Street,# Boston, MA 02110-1301, USA.# # SIMD MMX dot product# Equivalent to the following C code:# long dotprod(signed short *a,signed short *b,int cnt)# {#	long sum = 0; #	cnt *= 4; #	while(cnt--)#		sum += *a++ + *b++;#	return sum;# }# a and b should also be 64-bit aligned, or speed will suffer greatly# Copyright 1999, Phil Karn KA9Q# May be used under the terms of the GNU public license	#include "assembly.h"	.file	"short_dotprod_mmx64.S"	.version	"01.01".text	.p2align 3.globl GLOB_SYMB(short_dotprod_mmx)	DEF_FUNC_HEAD(short_dotprod_mmx)GLOB_SYMB(short_dotprod_mmx):	# a: rdi, b: rsi, cnt: rdx	pxor %mm0,%mm0		# clear running sum (in two 32-bit halves)	# MMX dot product loop unrolled 4 times, crunching 16 terms per loop	.p2align 4.Loop1mmx:	sub $4,%rdx	jl   .Loop1Done		movq (%rdi),%mm1	# mm1 = a[3],a[2],a[1],a[0] 	pmaddwd (%rsi),%mm1	# mm1 = b[3]*a[3]+b[2]*a[2],b[1]*a[1]+b[0]*a[0]	paddd %mm1,%mm0		movq 8(%rdi),%mm1	pmaddwd 8(%rsi),%mm1	paddd %mm1,%mm0	movq 16(%rdi),%mm1	pmaddwd 16(%rsi),%mm1	paddd %mm1,%mm0	movq 24(%rdi),%mm1	add $32,%rdi		pmaddwd 24(%rsi),%mm1	add $32,%rsi		paddd %mm1,%mm0	jmp .Loop1mmx.Loop1Done:		add $4,%rdx		# MMX dot product loop, not unrolled, crunching 4 terms per loop# This could be redone as Duff's Device on the unrolled loop above.Loop2:	sub $1,%rdx	jl   .Loop2Done		movq (%rdi),%mm1	add $8,%rdi	pmaddwd (%rsi),%mm1	add $8,%rsi	paddd %mm1,%mm0	jmp .Loop2.Loop2Done:		movd %mm0,%edx		# right-hand word to edx	punpckhdq %mm0,%mm0	# left-hand word to right side of %mm0	movd %mm0,%eax	addl %edx,%eax		# running sum now in %eax	emms			# done with MMX		retqFUNC_TAIL(short_dotprod_mmx)	.ident	"Hand coded x86_64 MMX assembly"#if defined(__linux__) && defined(__ELF__).section .note.GNU-stack,"",%progbits#endif

short_dotprod_mmx64.s - 源码说明

本页面展示了「这是用python语言写的一个数字广播的信号处理工具包。利用它」中的 short_dotprod_mmx64.s 源码文件，采用 S 编程语言编写，共 106 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫开发者社区收录了大量与Python相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?