📄 n1k7i_128.s
字号:
/* * Copyright (c) 2003 Matteo Frigo * Copyright (c) 2003 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Jul 5 21:51:41 EDT 2003 */#include "config.h"/* cheap-mode: VECTGRADE_FULL succeeded. (8036 steps) *//* Generated by: /homee/stevenj/cvs/fftw3.0.1/genfft-k7/gen_notw -no-randomized-cse -n 128 -sign 1 -name n1k7i_128 *//* * Generator Id's : * $Id: algsimp.ml,v 1.3 2003/03/15 20:29:42 stevenj Exp $ * $Id: fft.ml,v 1.3 2003/03/15 20:29:43 stevenj Exp $ * $Id: gen_notw.ml,v 1.11 2003/04/18 01:21:45 athena Exp $ *//* The following asm code is Copyright (c) 2000-2001 Stefan Kral */.section .rodata .balign 64chs_lo: .long 0x80000000, 0x00000000chs_hi: .long 0x00000000, 0x80000000KP707106781KP707106781: .float +0.707106781186547524400844362104849039284835938, +0.707106781186547524400844362104849039284835938KP923879532KP382683432: .float +0.923879532511286756128183189396788286822416626, +0.382683432365089771728459984030398866761344562KP382683432KP923879532: .float +0.382683432365089771728459984030398866761344562, +0.923879532511286756128183189396788286822416626KP980785280KP195090322: .float +0.980785280403230449126182236134239036973933731, +0.195090322016128267848284868477022240927691618KP555570233KP831469612: .float +0.555570233019602224742830813948532874374937191, +0.831469612302545237078788377617905756738560812KP195090322KP980785280: .float +0.195090322016128267848284868477022240927691618, +0.980785280403230449126182236134239036973933731KP831469612KP555570233: .float +0.831469612302545237078788377617905756738560812, +0.555570233019602224742830813948532874374937191KP290284677KP956940335: .float +0.290284677254462367636192375817395274691476278, +0.956940335732208864935797886980269969482849206KP956940335KP290284677: .float +0.956940335732208864935797886980269969482849206, +0.290284677254462367636192375817395274691476278KP471396736KP881921264: .float +0.471396736825997648556387625905254377657460319, +0.881921264348355029712756863660388349508442621KP881921264KP471396736: .float +0.881921264348355029712756863660388349508442621, +0.471396736825997648556387625905254377657460319KP634393284KP773010453: .float +0.634393284163645498215171613225493370675687095, +0.773010453362736960810906609758469800971041293KP773010453KP634393284: .float +0.773010453362736960810906609758469800971041293, +0.634393284163645498215171613225493370675687095KP098017140KP995184726: .float +0.098017140329560601994195563888641845861136673, +0.995184726672196886244836953109479921575474869KP995184726KP098017140: .float +0.995184726672196886244836953109479921575474869, +0.098017140329560601994195563888641845861136673KP595699304KP803207531: .float +0.595699304492433343467036528829969889511926338, +0.803207531480644909806676512963141923879569427KP989176509KP146730474: .float +0.989176509964780973451673738016243063983689533, +0.146730474455361751658850129646717819706215317KP242980179KP970031253: .float +0.242980179903263889948274162077471118320990783, +0.970031253194543992603984207286100251456865962KP970031253KP242980179: .float +0.970031253194543992603984207286100251456865962, +0.242980179903263889948274162077471118320990783KP514102744KP857728610: .float +0.514102744193221726593693838968815772608049120, +0.857728610000272069902269984284770137042490799KP857728610KP514102744: .float +0.857728610000272069902269984284770137042490799, +0.514102744193221726593693838968815772608049120KP803207531KP595699304: .float +0.803207531480644909806676512963141923879569427, +0.595699304492433343467036528829969889511926338KP146730474KP989176509: .float +0.146730474455361751658850129646717819706215317, +0.989176509964780973451673738016243063983689533KP336889853KP941544065: .float +0.336889853392220050689253212619147570477766780, +0.941544065183020778412509402599502357185589796KP903989293KP427555093: .float +0.903989293123443331586200297230537048710132025, +0.427555093430282094320966856888798534304578629KP671558954KP740951125: .float +0.671558954847018400625376850427421803228750632, +0.740951125354959091175616897495162729728955309KP740951125KP671558954: .float +0.740951125354959091175616897495162729728955309, +0.671558954847018400625376850427421803228750632KP049067674KP998795456: .float +0.049067674327418014254954976942682658314745363, +0.998795456205172392714771604759100694443203615KP998795456KP049067674: .float +0.998795456205172392714771604759100694443203615, +0.049067674327418014254954976942682658314745363KP941544065KP336889853: .float +0.941544065183020778412509402599502357185589796, +0.336889853392220050689253212619147570477766780KP427555093KP903989293: .float +0.427555093430282094320966856888798534304578629, +0.903989293123443331586200297230537048710132025.text.text .balign 64n1k7i_128: subl $2708, %esp femms movl 2728(%esp), %ecx movl %esi, 2700(%esp) movl 2732(%esp), %esi movl 2716(%esp), %eax movl %ebx, 2704(%esp) movl 2724(%esp), %ebx movl %edi, 2696(%esp) leal (,%ecx,4), %ecx movl %ebp, 2692(%esp) leal (,%esi,4), %esi movl %ecx, %edx movl %esi, %edi sall $6, %edx addl %eax, %edx sall $6, %edi addl %ebx, %edi sall $2, 2740(%esp) sall $2, 2744(%esp) .p2align 4,,7.L0: /* promise simd cell size = 8 */ leal (,%ecx,4), %ebp movq (%eax), %mm1 movq (%edx), %mm0 movq (%edx,%ebp,8), %mm3 movq (%eax,%ebp,8), %mm4 leal (%ecx,%ecx,2), %ebp movq %mm1, %mm2 leal (%ecx,%ebp,2), %ebp pfsub %mm0, %mm1 movq %mm4, %mm5 pfsub %mm3, %mm4 movq (%eax,%ebp,8), %mm6 pfadd %mm0, %mm2 movq (%edx,%ebp,8), %mm7 leal (%ecx,%ecx,2), %ebp pfadd %mm3, %mm5 movq %mm1, 24(%esp) movq %mm4, 32(%esp) movq (%edx,%ebp,8), %mm1 movq (%eax,%ebp,8), %mm4 leal (%ecx,%ecx), %ebp movq %mm7, %mm0 movq %mm2, %mm3 pfsub %mm6, %mm7 pfsub %mm5, %mm2 pfadd %mm6, %mm0 pfadd %mm5, %mm3 movq %mm4, %mm6 movq (%eax,%ebp,8), %mm5 pfsub %mm1, %mm4 movq %mm7, 0(%esp) movq (%edx,%ebp,8), %mm7 leal (%ecx,%ecx), %ebp pfadd %mm1, %mm6 movq %mm2, 40(%esp) movq %mm0, %mm2 movq %mm5, %mm1 leal (%ebp,%ebp,2), %ebp pfsub %mm7, %mm5 pxor chs_lo, %mm4 pfadd %mm7, %mm1 movq (%eax,%ebp,8), %mm7 pfsub %mm6, %mm2 pfadd %mm6, %mm0 movq (%edx,%ebp,8), %mm6 leal (%ecx,%ecx,4), %ebp movq %mm4, 8(%esp) movq %mm2, 16(%esp) movq %mm5, %mm2 movq %mm6, %mm4 pfsub %mm7, %mm6 pfadd %mm7, %mm4 movq %mm1, %mm7 pfpnacc %mm6, %mm2 pswapd %mm6, %mm6 pfpnacc %mm5, %mm6 movq (%edx,%ecx,8), %mm5 pfadd %mm4, %mm7 movq %mm2, 48(%esp) movq (%eax,%ecx,8), %mm2 pfsubr %mm4, %mm1 movq %mm6, 64(%esp) movq (%edx,%ebp,8), %mm6 movq %mm2, %mm4 pfsub %mm5, %mm2 pswapd %mm1, %mm1 pfadd %mm5, %mm4 pxor chs_hi, %mm1 movq (%eax,%ebp,8), %mm5 leal (%ecx,%ecx,2), %ebp movq %mm2, 56(%esp) leal (%ecx,%ebp,2), %ebp movq %mm5, %mm2 pfsub %mm6, %mm5 leal (%ebp,%ebp,8), %ebp pfadd %mm6, %mm2 movq %mm4, %mm6 pswapd %mm5, %mm5 pfadd %mm2, %mm6 pxor chs_lo, %mm5 pfsub %mm2, %mm4 movq %mm3, %mm2 pfadd %mm7, %mm3 pfsub %mm7, %mm2 movq %mm6, %mm7 pfadd %mm0, %mm6 pfsubr %mm0, %mm7 movq %mm3, %mm0 movq %mm2, 72(%esp) movq 40(%esp), %mm2 pfsub %mm6, %mm3 pfadd %mm6, %mm0 movq %mm7, 144(%esp) movq 16(%esp), %mm7 movq %mm2, %mm6 pfsub %mm1, %mm2 movq %mm3, 184(%esp) movq %mm4, %mm3 pfadd %mm1, %mm6 movq %mm0, 192(%esp) movq 56(%esp), %mm1 pfpnacc %mm7, %mm3 pswapd %mm7, %mm7 movq %mm2, 80(%esp) pfpnacc %mm4, %mm7 movq %mm6, 88(%esp) movq 32(%esp), %mm4 movq 24(%esp), %mm6 movq %mm1, %mm0 pfadd %mm5, %mm1 pfsub %mm5, %mm0 pswapd %mm4, %mm4 movq %mm7, %mm2 pfnacc %mm3, %mm7 movq %mm6, %mm5 pxor chs_lo, %mm4 pfacc %mm2, %mm3 movq 64(%esp), %mm2 pfmul KP707106781KP707106781, %mm7 pfsub %mm4, %mm6 pfmul KP707106781KP707106781, %mm3 pfadd %mm4, %mm5 movq %mm6, 104(%esp) movq 48(%esp), %mm4 movq %mm2, %mm6 pfnacc %mm4, %mm2 movq %mm5, 96(%esp) movq 8(%esp), %mm5 pfacc %mm6, %mm4 movq %mm3, 136(%esp) movq %mm7, 112(%esp) movq 0(%esp), %mm6 movq %mm1, %mm7 pfmul KP923879532KP382683432, %mm1 pfmul KP707106781KP707106781, %mm2 pswapd %mm6, %mm6 pfmul KP707106781KP707106781, %mm4 pfmul KP382683432KP923879532, %mm7 movq %mm6, %mm3 pfsub %mm5, %mm6 pfadd %mm5, %mm3 movq %mm0, %mm5 pfmul KP923879532KP382683432, %mm0 movq %mm2, 128(%esp) movq %mm6, %mm2 pfmul KP382683432KP923879532, %mm6 movq %mm4, 120(%esp) pfmul KP382683432KP923879532, %mm5 pfmul KP923879532KP382683432, %mm2 movq %mm3, %mm4 pfmul KP382683432KP923879532, %mm3 pfmul KP923879532KP382683432, %mm4 pfpnacc %mm6, %mm1 movq (%eax,%ebp), %mm6 pfpnacc %mm7, %mm2 movq (%edx,%ebp), %mm7 leal (%ecx,%ecx,2), %ebp pfpnacc %mm0, %mm3 leal (%ebp,%ebp,4), %ebp pfpnacc %mm4, %mm5 movq %mm1, 152(%esp) leal (%ecx,%ebp,2), %ebp movq %mm7, %mm0 pfadd %mm6, %mm7 movq %mm2, 160(%esp) movq (%edx,%ebp), %mm4 movq (%eax,%ebp), %mm1 pfsub %mm6, %mm0 imull $59, %ecx, %ebp movq %mm5, 176(%esp) movq %mm3, 168(%esp) movq %mm7, %mm3 movq %mm1, %mm2 pfadd %mm4, %mm1 movq %mm0, 208(%esp) movq (%eax,%ebp), %mm6 movq (%edx,%ebp), %mm5 leal (%ecx,%ecx,4), %ebp pfsub %mm4, %mm2 leal (%ecx,%ebp,4), %ebp pfsub %mm1, %mm3 pfadd %mm1, %mm7 leal (%ecx,%ebp,2), %ebp movq %mm5, %mm0 pfsub %mm6, %mm5 pxor chs_lo, %mm2 movq (%eax,%ebp), %mm4 movq (%edx,%ebp), %mm1 leal (%ecx,%ecx,2), %ebp movq %mm3, 224(%esp) pfadd %mm6, %mm0 leal (%ebp,%ebp,8), %ebp movq %mm7, 216(%esp) pswapd %mm5, %mm5 movq %mm2, 200(%esp) movq (%edx,%ebp), %mm7 movq (%eax,%ebp), %mm3 leal (%ecx,%ecx,2), %ebp movq %mm1, %mm6 pfadd %mm4, %mm1 leal (%ebp,%ecx,8), %ebp pfsub %mm4, %mm6 movq %mm3, %mm2 pfsub %mm7, %mm3 movq (%edx,%ebp), %mm4 pfadd %mm7, %mm2 movq (%eax,%ebp), %mm7 leal (%ecx,%ecx,2), %ebp pxor chs_lo, %mm3 leal (%ebp,%ebp,4), %ebp movq %mm3, 232(%esp) movq %mm7, %mm3 pfsub %mm4, %mm7 pfadd %mm4, %mm3 movq %mm0, %mm4 pfsub %mm2, %mm0 pfadd %mm2, %mm4 movq %mm3, %mm2 pfsub %mm1, %mm3 movq %mm0, 280(%esp) pswapd %mm6, %mm0 pfadd %mm1, %mm2 movq %mm4, %mm1 pfpnacc %mm7, %mm0 pfpnacc %mm6, %mm7 pxor chs_hi, %mm3 pfsub %mm2, %mm1 pfadd %mm2, %mm4 movq 232(%esp), %mm2 movq %mm7, %mm6 pfnacc %mm0, %mm7 movq %mm3, 272(%esp) movq %mm5, %mm3 movq %mm1, 328(%esp) pfacc %mm6, %mm0 movq (%edx,%ebp), %mm1 movq (%eax,%ebp), %mm6 leal (%ecx,%ecx,4), %ebp pfsub %mm2, %mm5 pfadd %mm2, %mm3 movq %mm4, 304(%esp) pfmul KP707106781KP707106781, %mm7 leal (%ebp,%ebp,8), %ebp pfmul KP707106781KP707106781, %mm0 leal (%ebp,%ecx,2), %ebp movq %mm5, 248(%esp) movq %mm6, %mm2 pfsub %mm1, %mm6 movq %mm3, 264(%esp) movq (%eax,%ebp), %mm4 movq (%edx,%ebp), %mm5 leal (%ecx,%ecx,2), %ebp pfadd %mm1, %mm2 movq %mm7, 256(%esp) leal (%ecx,%ebp,2), %ebp movq %mm0, 240(%esp) movq %mm6, %mm1 movq %mm5, %mm3 pfsub %mm4, %mm5 movq (%edx,%ebp), %mm0 movq %mm2, %mm7 pfadd %mm4, %mm3 movq (%eax,%ebp), %mm4 leal (%ecx,%ecx,2), %ebp pfpnacc %mm5, %mm1 leal (%ebp,%ebp,4), %ebp pswapd %mm5, %mm5 leal (%ebp,%ecx,8), %ebp pfpnacc %mm6, %mm5 pfadd %mm3, %mm7 movq %mm1, 288(%esp) movq (%eax,%ebp), %mm6 pfsub %mm3, %mm2 movq (%edx,%ebp), %mm1 leal (%ecx,%ecx,2), %ebp movq %mm4, %mm3 movq %mm5, 296(%esp) pfsub %mm0, %mm4 leal (%ebp,%ebp,8), %ebp movq %mm7, 312(%esp) pfadd %mm0, %mm3 movq %mm6, %mm0 pxor chs_hi, %mm2 leal (%ecx,%ebp,2), %ebp pfsub %mm1, %mm6 pfadd %mm1, %mm0 movq (%eax,%ebp), %mm7 movq (%edx,%ebp), %mm5 leal (%ecx,%ecx,2), %ebp movq %mm2, 320(%esp) leal (%ecx,%ebp,4), %ebp pxor chs_lo, %mm6 movq %mm5, %mm1 leal (%ebp,%ebp,2), %ebp pfsub %mm7, %mm5 pfadd %mm7, %mm1 movq %mm6, 336(%esp) movq (%edx,%ebp), %mm2 movq (%eax,%ebp), %mm7 leal (%ecx,%ecx,2), %ebp pswapd %mm5, %mm5 movq %mm7, %mm6 pfadd %mm2, %mm7 pfsub %mm2, %mm6 movq %mm3, %mm2 pfadd %mm7, %mm2 pfsub %mm7, %mm3 movq %mm1, %mm7 pfadd %mm0, %mm1 pswapd %mm6, %mm6 pfsub %mm0, %mm7 pxor chs_lo, %mm6 movq %mm2, 400(%esp) movq %mm4, %mm2 movq %mm3, %mm0 movq %mm1, 344(%esp) movq 336(%esp), %mm1 pfpnacc %mm7, %mm0 pswapd %mm7, %mm7 pfsub %mm6, %mm2 pfpnacc %mm3, %mm7 movq %mm5, %mm3 pfadd %mm6, %mm4 movq (%edx,%ebp), %mm6 pfsub %mm1, %mm5 pfadd %mm1, %mm3 movq (%eax,%ebp), %mm1 leal (%ecx,%ecx), %ebp movq %mm7, 408(%esp) leal (%ecx,%ebp,8), %ebp movq %mm4, 376(%esp) movq %mm2, 352(%esp) leal (%ecx,%ebp,2), %ebp movq %mm1, %mm2 movq %mm3, 360(%esp) pfsub %mm6, %mm1 movq %mm5, 392(%esp) movq (%edx,%ebp), %mm4 movq (%eax,%ebp), %mm7 leal (%ecx,%ecx,8), %ebp pfadd %mm6, %mm2 leal (%ecx,%ebp,2), %ebp movq %mm1, 368(%esp) movq (%edx,%ebp), %mm3 movq (%eax,%ebp), %mm6 leal (%ecx,%ecx), %ebp movq %mm7, %mm5 pfsub %mm4, %mm7 leal (%ecx,%ebp,8), %ebp pfadd %mm4, %mm5 leal (%ebp,%ebp,2), %ebp movq %mm6, %mm1 pfsub %mm3, %mm6 movq %mm7, 384(%esp) movq (%edx,%ebp), %mm7 movq (%eax,%ebp), %mm4 pfadd %mm3, %mm1 leal (%ecx,%ecx,8), %ebp movq %mm7, %mm3 pfsub %mm4, %mm7 pfadd %mm4, %mm3 movq %mm1, %mm4 pfsubr %mm3, %mm4 pfadd %mm3, %mm1 movq %mm2, %mm3 pfsub %mm5, %mm2 pfadd %mm5, %mm3 pswapd %mm4, %mm4 movq %mm2, 416(%esp) movq %mm6, %mm2 pxor chs_hi, %mm4 movq %mm3, %mm5 pfadd %mm1, %mm3 pfpnacc %mm7, %mm2 pswapd %mm7, %mm7 pfsub %mm1, %mm5 pfpnacc %mm6, %mm7 movq 328(%esp), %mm6 movq %mm2, %mm1 pfacc %mm7, %mm2 pfnacc %mm1, %mm7 movq %mm5, %mm1 pfpnacc %mm6, %mm1 pswapd %mm6, %mm6 pfmul KP707106781KP707106781, %mm2 pfpnacc %mm5, %mm6 pfmul KP707106781KP707106781, %mm7 movq %mm1, %mm5 pfnacc %mm6, %mm1 pfacc %mm5, %mm6 movq %mm2, 440(%esp) movq 408(%esp), %mm5 movq %mm0, %mm2 movq %mm7, 512(%esp) movq 224(%esp), %mm7 pfmul KP707106781KP707106781, %mm1 pfnacc %mm5, %mm0 pfmul KP707106781KP707106781, %mm6 pfacc %mm2, %mm5 pswapd %mm7, %mm7 movq 320(%esp), %mm2 pfmul KP707106781KP707106781, %mm0 movq %mm1, 432(%esp) pfmul KP707106781KP707106781, %mm5 movq %mm7, %mm1 pfadd %mm2, %mm7 movq %mm6, 424(%esp) pfsub %mm2, %mm1 movq %mm7, %mm2 movq %mm1, %mm6 pfadd %mm5, %mm7 pfadd %mm0, %mm1 pfsub %mm0, %mm6 movq 216(%esp), %mm0 pfsub %mm5, %mm2 movq %mm7, 464(%esp) movq %mm1, 456(%esp) movq %mm6, 448(%esp) movq 312(%esp), %mm5 movq 400(%esp), %mm7 movq 344(%esp), %mm6 movq %mm0, %mm1 movq %mm2, 472(%esp) pfadd %mm5, %mm0 movq %mm7, %mm2 pfsub %mm5, %mm1 pfadd %mm6, %mm2 pfsub %mm6, %mm7 movq %mm0, %mm5 movq 304(%esp), %mm6 pswapd %mm1, %mm1 pfsub %mm2, %mm0 pfadd %mm2, %mm5 movq %mm3, %mm2 pxor chs_hi, %mm7 pfsub %mm6, %mm3 pfadd %mm6, %mm2 movq %mm0, 480(%esp) movq 208(%esp), %mm0 movq %mm5, %mm6 pxor chs_hi, %mm3 pfsub %mm2, %mm6 pfadd %mm2, %mm5 pswapd %mm0, %mm0 movq %mm3, 488(%esp) movq 200(%esp), %mm2 movq %mm0, %mm3 movq %mm6, 496(%esp) movq %mm5, 504(%esp) movq 296(%esp), %mm6 movq 288(%esp), %mm5 pfadd %mm2, %mm0 pfsub %mm2, %mm3 movq %mm6, %mm2 pfacc %mm5, %mm6 pfnacc %mm2, %mm5 movq %mm3, %mm2 pfmul KP707106781KP707106781, %mm6 pfmul KP707106781KP707106781, %mm5 pfadd %mm6, %mm3 pfsub %mm6, %mm2 movq %mm0, %mm6 pfsub %mm5, %mm0 pfadd %mm5, %mm6 movq %mm7, %mm5
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -