⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t2_64.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:38:49 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twiddle -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -name t2_64 -include t.h *//* * This function contains 1154 FP additions, 840 FP multiplications, * (or, 520 additions, 206 multiplications, 634 fused multiply/add), * 349 stack variables, 15 constants, and 256 memory accesses */#include "t.h"static void t2_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP995184726, +0.995184726672196886244836953109479921575474869);     DK(KP773010453, +0.773010453362736960810906609758469800971041293);     DK(KP956940335, +0.956940335732208864935797886980269969482849206);     DK(KP881921264, +0.881921264348355029712756863660388349508442621);     DK(KP820678790, +0.820678790828660330972281985331011598767386482);     DK(KP098491403, +0.098491403357164253077197521291327432293052451);     DK(KP534511135, +0.534511135950791641089685961295362908582039528);     DK(KP303346683, +0.303346683607342391675883946941299872384187453);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     INT m;     for (m = mb, W = W + (mb * 10); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 10, MAKE_VOLATILE_STRIDE(rs)) {	  E Tg0, TlC, TlB, Tg3;	  {	       E T2, T3, Tc, T8, Te, T5, T6, T14, T3d, T3i, TJ, T7, Tr, T3g, TG;	       E T10, T3a, TL, TP, Tb, Tt, T17, Td, Ti, T3N, T3R, T1i, Tu, T1I, T2U;	       E T1t, T3U, T5O, T48, T2u, T7B, TK, T79, T3D, T2h, T2l, T3G, T1x, T3X, T2d;	       E T1M, T2X, T4B, T4x, T3j, T4T, T29, T5s, T81, T5w, T7X, T7N, T7h, T64, T6a;	       E T6e, T7l, T60, T7R, T6h, T5A, T7o, T6J, T6k, T5E, T6N, T7r, T6x, T6t, T7c;	       E TO, T2x, T7E, TU, TQ, T2C, T2y, T5R, T4b, T4c, T4g, T4W, T3m, T3r, T3n;	       E T1k, Tx, Ty, T4p, T4s, TC, T23, T1Z, T19, Th, T31, T35, T1e, T44, T41;	       E T1a, T6W, T70, T55, T59, T3v, T3z, Tf, T1R, T2N, T2Q, T1V, T1p, T1l, Tm;	       {		    E T1H, T1s, T2g, Tg, Tw, TH, T2t, T47, T3h, T3M, T4w, T28, T3Q, T4A, T2c;		    E Ts;		    {			 E T4, T13, TI, TF, TZ, Ta, T9;			 T2 = W[0];			 T3 = W[2];			 Tc = W[5];			 T8 = W[4];			 Te = W[6];			 T4 = T2 * T3;			 T13 = T2 * Tc;			 TI = T3 * Tc;			 TF = T3 * T8;			 T1H = T8 * Te;			 TZ = T2 * T8;			 T5 = W[1];			 T6 = W[3];			 T1s = T3 * Te;			 T2g = T2 * Te;			 T14 = FNMS(T5, T8, T13);			 T3d = FMA(T5, T8, T13);			 T3i = FNMS(T6, T8, TI);			 TJ = FMA(T6, T8, TI);			 T7 = FNMS(T5, T6, T4);			 Tr = FMA(T5, T6, T4);			 Ta = T2 * T6;			 Tg = T7 * Tc;			 Tw = Tr * Tc;			 T3g = FMA(T6, Tc, TF);			 TG = FNMS(T6, Tc, TF);			 T10 = FMA(T5, Tc, TZ);			 T3a = FNMS(T5, Tc, TZ);			 TH = TG * Te;			 T2t = T10 * Te;			 T47 = T3a * Te;			 T3h = T3g * Te;			 TL = W[8];			 TP = W[9];			 T9 = T7 * T8;			 Tb = FMA(T5, T3, Ta);			 Tt = FNMS(T5, T3, Ta);			 T3M = T2 * TL;			 T4w = T8 * TL;			 T28 = T3 * TL;			 T3Q = T2 * TP;			 T4A = T8 * TP;			 T2c = T3 * TP;			 T17 = FNMS(Tb, Tc, T9);			 Td = FMA(Tb, Tc, T9);			 Ts = Tr * T8;			 Ti = W[7];		    }		    {			 E T5r, T80, T1L, T2k, T1w, T5z, T2B, T2v;			 T3N = FMA(T5, TP, T3M);			 T3R = FNMS(T5, TL, T3Q);			 T1i = FMA(Tt, Tc, Ts);			 Tu = FNMS(Tt, Tc, Ts);			 T1I = FNMS(Tc, Ti, T1H);			 T2U = FMA(Tc, Ti, T1H);			 T1t = FMA(T6, Ti, T1s);			 T3U = FNMS(T6, Ti, T1s);			 T5O = FNMS(T3d, Ti, T47);			 T48 = FMA(T3d, Ti, T47);			 T2u = FMA(T14, Ti, T2t);			 T7B = FNMS(T14, Ti, T2t);			 T1L = T8 * Ti;			 T2k = T2 * Ti;			 T1w = T3 * Ti;			 TK = FMA(TJ, Ti, TH);			 T79 = FNMS(TJ, Ti, TH);			 T3D = FMA(T5, Ti, T2g);			 T2h = FNMS(T5, Ti, T2g);			 T2l = FMA(T5, Te, T2k);			 T3G = FNMS(T5, Te, T2k);			 T1x = FNMS(T6, Te, T1w);			 T3X = FMA(T6, Te, T1w);			 T2d = FNMS(T6, TL, T2c);			 T1M = FMA(Tc, Te, T1L);			 T2X = FNMS(Tc, Te, T1L);			 T4B = FNMS(Tc, TL, T4A);			 T4x = FMA(Tc, TP, T4w);			 T3j = FMA(T3i, Ti, T3h);			 T4T = FNMS(T3i, Ti, T3h);			 T29 = FMA(T6, TP, T28);			 T5r = T3g * TL;			 T80 = T7 * TP;			 {			      E T7M, T7g, T63, T5v, T7W;			      T5v = T3g * TP;			      T7W = T7 * TL;			      T5s = FMA(T3i, TP, T5r);			      T81 = FNMS(Tb, TL, T80);			      T5w = FNMS(T3i, TL, T5v);			      T7X = FMA(Tb, TP, T7W);			      T7M = TG * TL;			      T7g = T10 * TL;			      T63 = T3a * TP;			      {				   E T6d, T7k, T69, T5Z, T7Q;				   T69 = Tr * TL;				   T7N = FMA(TJ, TP, T7M);				   T7h = FMA(T14, TP, T7g);				   T64 = FNMS(T3d, TL, T63);				   T6a = FMA(Tt, TP, T69);				   T6d = Tr * TP;				   T7k = T10 * TP;				   T5Z = T3a * TL;				   T7Q = TG * TP;				   T6e = FNMS(Tt, TL, T6d);				   T7l = FNMS(T14, TL, T7k);				   T60 = FMA(T3d, TP, T5Z);				   T7R = FNMS(TJ, TL, T7Q);				   T5z = Tr * Te;			      }			 }			 {			      E T6I, T5D, T6M, T6s, T6w;			      T6I = T7 * Te;			      T5D = Tr * Ti;			      T6M = T7 * Ti;			      T6h = FNMS(Tt, Ti, T5z);			      T5A = FMA(Tt, Ti, T5z);			      T7o = FMA(Tb, Ti, T6I);			      T6J = FNMS(Tb, Ti, T6I);			      T6k = FMA(Tt, Te, T5D);			      T5E = FNMS(Tt, Te, T5D);			      T6N = FMA(Tb, Te, T6M);			      T7r = FNMS(Tb, Te, T6M);			      T6s = T2U * TL;			      T6w = T2U * TP;			      {				   E TN, TT, TM, T2w;				   TN = TG * Ti;				   T2w = T10 * Ti;				   T6x = FNMS(T2X, TL, T6w);				   T6t = FMA(T2X, TP, T6s);				   T7c = FMA(TJ, Te, TN);				   TO = FNMS(TJ, Te, TN);				   TT = TK * TP;				   TM = TK * TL;				   T2x = FNMS(T14, Te, T2w);				   T7E = FMA(T14, Te, T2w);				   TU = FNMS(TO, TL, TT);				   TQ = FMA(TO, TP, TM);				   T2B = T2u * TP;				   T2v = T2u * TL;			      }			 }			 {			      E T1Y, T22, Tv, TB;			      {				   E T49, T4f, T4a, T3l, T3q, T3k;				   T4a = T3a * Ti;				   T2C = FNMS(T2x, TL, T2B);				   T2y = FMA(T2x, TP, T2v);				   T5R = FMA(T3d, Te, T4a);				   T4b = FNMS(T3d, Te, T4a);				   T49 = T48 * TL;				   T4f = T48 * TP;				   T3l = T3g * Ti;				   T4c = FMA(T4b, TP, T49);				   T4g = FNMS(T4b, TL, T4f);				   T4W = FMA(T3i, Te, T3l);				   T3m = FNMS(T3i, Te, T3l);				   T1Y = Tu * TL;				   T3q = T3j * TP;				   T3k = T3j * TL;				   T22 = Tu * TP;				   Tv = Tu * Te;				   T3r = FNMS(T3m, TL, T3q);				   T3n = FMA(T3m, TP, T3k);				   TB = Tu * Ti;				   T1k = FNMS(Tt, T8, Tw);				   Tx = FMA(Tt, T8, Tw);			      }			      {				   E T30, T34, T18, T1d;				   T30 = T17 * TL;				   T34 = T17 * TP;				   T18 = T17 * Te;				   Ty = FMA(Tx, Ti, Tv);				   T4p = FNMS(Tx, Ti, Tv);				   T4s = FMA(Tx, Te, TB);				   TC = FNMS(Tx, Te, TB);				   T23 = FNMS(Tx, TL, T22);				   T1Z = FMA(Tx, TP, T1Y);				   T1d = T17 * Ti;				   T19 = FMA(Tb, T8, Tg);				   Th = FNMS(Tb, T8, Tg);				   {					E T1j, T1o, T1Q, T1U;					T1j = T1i * TL;					{					     E T6V, T6Z, T54, T58;					     T6V = Ty * TL;					     T6Z = Ty * TP;					     T31 = FMA(T19, TP, T30);					     T35 = FNMS(T19, TL, T34);					     T1e = FMA(T19, Te, T1d);					     T44 = FNMS(T19, Te, T1d);					     T41 = FMA(T19, Ti, T18);					     T1a = FNMS(T19, Ti, T18);					     T6W = FMA(TC, TP, T6V);					     T70 = FNMS(TC, TL, T6Z);					     T1o = T1i * TP;					     T54 = T41 * TL;					     T58 = T41 * TP;					     T1Q = T1i * Te;					     T1U = T1i * Ti;					     T55 = FMA(T44, TP, T54);					     T59 = FNMS(T44, TL, T58);					}					T3v = Td * TL;					T3z = Td * TP;					Tf = Td * Te;					T1R = FMA(T1k, Ti, T1Q);					T2N = FNMS(T1k, Ti, T1Q);					T2Q = FMA(T1k, Te, T1U);					T1V = FNMS(T1k, Te, T1U);					T1p = FNMS(T1k, TL, T1o);					T1l = FMA(T1k, TP, T1j);					Tm = Td * Ti;				   }			      }			 }		    }	       }	       {		    E Tl9, TlD, TY, Tg4, T8w, TdS, TkE, Tkd, T2G, Tge, Tgh, TiK, Te1, T98, Te0;		    E T9f, Te5, T9p, Tgq, T39, Te8, T9M, TiN, Tgn, TeE, TbI, Thr, T74, TeP, TcB;		    E Tja, Thc, T8D, TdT, T1B, TkD, T8K, TdU, Tg7, Tk7, T8T, TdY, T27, Tg9, T90;		    E TdX, Tgc, TiJ, T9Y, Tec, T4k, TgB, Tal, Tef, Tgy, TiT, Taz, Tel, T5d, Th0;		    E Tbs, Tew, TgL, TiZ, T3K, Tgo, Tgt, TiO, T9P, Te6, T9E, Te9, T4L, Tgz, TgE;		    E TiU, Tao, Ted, Tad, Teg, T5I, TgM, Th3, Tj0, Tbv, Tem, TaO, Tex, T7v, Thd;		    E Thu, Tjb, TcE, TeF, TbX, TeQ, T68, Tj5, Tez, Teq, Tbj, Tbx, TgS, Th5, T6B;		    E Tj6, TeA, Tet, Tb4, Tby, TgX, Th6, T7V, Tjg, TeS, TeJ, Tcs, TcG, Thj, Thw;		    E T84, T83, T85, Tc7, T8k, Tc3, T86, T89, T8b;		    {			 E T3w, T3A, T4H, T4E, T8e, T8i, T5j, T5n, T4U, T4S, T4V, Tau, T5b, Tbq, T4X;			 E T50, T52;			 {			      E T72, Tcz, Tcv, T6Q, Tha, TbG, T6U, Tcx, T99, T9e;			      {				   E T1, Tkb, Tp, Tka, TR, TV, TE, T8s, TS, T8t;				   {					E Tn, Tj, T8d, T8h, T5i, T5m;					T1 = ri[0];					T8d = T1R * TL;					T8h = T1R * TP;					T3w = FMA(Th, TP, T3v);					T3A = FNMS(Th, TL, T3z);					Tn = FMA(Th, Te, Tm);					T4H = FNMS(Th, Te, Tm);					T4E = FMA(Th, Ti, Tf);					Tj = FNMS(Th, Ti, Tf);					T8e = FMA(T1V, TP, T8d);					T8i = FNMS(T1V, TL, T8h);					Tkb = ii[0];					T5i = T4E * TL;					T5m = T4E * TP;					{					     E Tk, To, Tl, Tk9;					     Tk = ri[WS(rs, 32)];					     To = ii[WS(rs, 32)];					     T5j = FMA(T4H, TP, T5i);					     T5n = FNMS(T4H, TL, T5m);					     Tl = Tj * Tk;					     Tk9 = Tj * To;					     {						  E Tz, TD, TA, T8r;						  Tz = ri[WS(rs, 16)];						  TD = ii[WS(rs, 16)];						  Tp = FMA(Tn, To, Tl);						  Tka = FNMS(Tn, Tk, Tk9);						  TA = Ty * Tz;						  T8r = Ty * TD;						  TR = ri[WS(rs, 48)];						  TV = ii[WS(rs, 48)];						  TE = FMA(TC, TD, TA);						  T8s = FNMS(TC, Tz, T8r);						  TS = TQ * TR;						  T8t = TQ * TV;					     }					}				   }				   {					E T8q, Tq, Tl7, Tkc, TW, T8u;					T8q = T1 - Tp;					Tq = T1 + Tp;					Tl7 = Tkb - Tka;					Tkc = Tka + Tkb;					TW = FMA(TU, TV, TS);					T8u = FNMS(TU, TR, T8t);					{					     E TX, Tl8, T8v, Tk8;					     TX = TE + TW;					     Tl8 = TE - TW;					     T8v = T8s - T8u;					     Tk8 = T8s + T8u;					     Tl9 = Tl7 - Tl8;					     TlD = Tl8 + Tl7;					     TY = Tq + TX;					     Tg4 = Tq - TX;					     T8w = T8q - T8v;					     TdS = T8q + T8v;					     TkE = Tkc - Tk8;					     Tkd = Tk8 + Tkc;					}				   }			      }			      {				   E T2f, T93, T2E, T9d, T2n, T95, T2s, T9b;				   {					E T2a, T2e, T2i, T2m;					T2a = ri[WS(rs, 60)];					T2e = ii[WS(rs, 60)];					{					     E T2z, T2D, T2b, T92, T2A, T9c;					     T2z = ri[WS(rs, 44)];					     T2D = ii[WS(rs, 44)];					     T2b = T29 * T2a;					     T92 = T29 * T2e;					     T2A = T2y * T2z;					     T9c = T2y * T2D;					     T2f = FMA(T2d, T2e, T2b);					     T93 = FNMS(T2d, T2a, T92);					     T2E = FMA(T2C, T2D, T2A);					     T9d = FNMS(T2C, T2z, T9c);					}					T2i = ri[WS(rs, 28)];					T2m = ii[WS(rs, 28)];					{					     E T2p, T2r, T2j, T94, T2q, T9a;					     T2p = ri[WS(rs, 12)];					     T2r = ii[WS(rs, 12)];					     T2j = T2h * T2i;					     T94 = T2h * T2m;					     T2q = TG * T2p;					     T9a = TG * T2r;					     T2n = FMA(T2l, T2m, T2j);					     T95 = FNMS(T2l, T2i, T94);					     T2s = FMA(TJ, T2r, T2q);					     T9b = FNMS(TJ, T2p, T9a);					}				   }				   {					E T2o, Tgf, T96, T97, T2F, Tgg;					T99 = T2f - T2n;					T2o = T2f + T2n;					Tgf = T93 + T95;					T96 = T93 - T95;					T97 = T2s - T2E;					T2F = T2s + T2E;					Tgg = T9b + T9d;					T9e = T9b - T9d;					T2G = T2o + T2F;					Tge = T2o - T2F;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -