📄 3dnow_float_dotprod_simple.s
字号:
## Copyright 2002 Free Software Foundation, Inc.# # This file is part of GNU Radio# # GNU Radio is free software; you can redistribute it and/or modify# it under the terms of the GNU General Public License as published by# the Free Software Foundation; either version 2, or (at your option)# any later version.# # GNU Radio is distributed in the hope that it will be useful,# but WITHOUT ANY WARRANTY; without even the implied warranty of# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the# GNU General Public License for more details.# # You should have received a copy of the GNU General Public License# along with GNU Radio; see the file COPYING. If not, write to# the Free Software Foundation, Inc., 59 Temple Place - Suite 330,# Boston, MA 02111-1307, USA.# # input and taps are guarenteed to be 16 byte aligned.# n_4_float_blocks is != 0# ## float # sse_float_dotprod (const float *input,# const float *taps, unsigned n_4_float_blocks)# {# float sum0 = 0;# float sum1 = 0;# float sum2 = 0;# float sum3 = 0;# # do {# # sum0 += input[0] * taps[0];# sum1 += input[1] * taps[1];# sum2 += input[2] * taps[2];# sum3 += input[3] * taps[3];# # input += 4;# taps += 4;# # } while (--n_4_float_blocks != 0);# # # return sum0 + sum1 + sum2 + sum3;# }# .file "3dnow_float_dotprod_simple.s" .version "01.01".text .align 16.globl sse_float_dotprod .type sse_float_dotprod,@functionsse_float_dotprod: pushl %ebp movl %esp, %ebp movl 8(%ebp), %edx movl 12(%ebp), %eax movl 16(%ebp), %ecx # The plan is to get it computing the correct answer, and # then to unroll and schedule the inner loop. pxor %mm4, %mm4 # mm4 = 0 0 pxor %mm5, %mm5 # mm5 = 0 0 .p2align 4.loop1: movq 0(%eax), %mm0 movq 8(%eax), %mm1 pfmul 0(%edx), %mm0 pfadd %mm0, %mm4 pfmul 8(%edx), %mm1 pfadd %mm1, %mm5 addl $16, %edx addl $16, %eax decl %ecx jne .loop1 # at this point mm4 and mm5 contain partial sums pfadd %mm5, %mm4 pfacc %mm4, %mm4 movd %mm4, 16(%ebp) femms flds 16(%ebp) popl %ebp ret.Lfe1: .size sse_float_dotprod,.Lfe1-sse_float_dotprod .ident "Hand coded x86 3DNow! assembly"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -