⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hf2_32.c

📁 最新的FFT程序
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 20:29:51 EST 2006 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hf2_32 -include hf.h *//* * This function contains 488 FP additions, 350 FP multiplications, * (or, 236 additions, 98 multiplications, 252 fused multiply/add), * 217 stack variables, and 128 memory accesses *//* * Generator Id's :  * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hf.h"static const R *hf2_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT i;     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 8, MAKE_VOLATILE_STRIDE(ios)) {	  E T9A, T9z;	  {	       E T3, T8, T5, T4, T7, T2, TM, Td, T9, TJ, Tb, Tz, T14, T10, T1r;	       E T1n, TD, Ts, T1w, T1d, T26, Tw, T1z, T23, T19, T2r, T3i, T3P, T2v, T2S;	       E T3m, T3q, T3L, T2I, T2E, T2W, T3e, Te, TP, T29, T1R, T2b, T1U, Ti, TR;	       E Tf, T1S, T2f, TV, TQ, Tm, T1Y, T2a, Tg;	       {		    E Th, T1T, T1Q, Ta, Tv, Tr;		    T3 = W[4];		    T8 = W[1];		    T5 = W[0];		    T4 = W[2];		    T7 = W[3];		    Tv = T3 * T8;		    Tr = T3 * T5;		    {			 E T18, Tc, T6, T1c;			 T18 = T3 * T4;			 Tc = T4 * T8;			 T6 = T4 * T5;			 T1c = T3 * T7;			 T2 = W[6];			 TM = FMA(T7, T5, Tc);			 Td = FNMS(T7, T5, Tc);			 T9 = FMA(T7, T8, T6);			 TJ = FNMS(T7, T8, T6);			 Th = T3 * Td;			 T1T = T3 * TM;			 T1Q = T3 * TJ;			 Ta = T3 * T9;			 Tb = W[5];			 Tz = T2 * T4;			 T14 = T2 * T8;			 T10 = T2 * T5;			 T1r = T2 * Tb;			 T1n = T2 * T3;			 TD = T2 * T7;			 Ts = FMA(Tb, T8, Tr);			 T1w = FNMS(Tb, T8, Tr);			 T1d = FMA(Tb, T4, T1c);			 T26 = FNMS(Tb, T4, T1c);			 Tw = FNMS(Tb, T5, Tv);			 T1z = FMA(Tb, T5, Tv);			 T23 = FMA(Tb, T7, T18);			 T19 = FNMS(Tb, T7, T18);		    }		    T2r = T2 * T1w;		    T3i = T2 * TM;		    T3P = T2 * Tw;		    T2v = T2 * T1z;		    T2S = T2 * T23;		    T3m = T2 * T19;		    T3q = T2 * T1d;		    T3L = T2 * Ts;		    T2I = T2 * Td;		    T2E = T2 * T9;		    T2W = T2 * T26;		    T3e = T2 * TJ;		    Te = FMA(Tb, Td, Ta);		    TP = FNMS(Tb, Td, Ta);		    T29 = FMA(Tb, TM, T1Q);		    T1R = FNMS(Tb, TM, T1Q);		    T2b = FNMS(Tb, TJ, T1T);		    T1U = FMA(Tb, TJ, T1T);		    Ti = FNMS(Tb, T9, Th);		    TR = FMA(Tb, T9, Th);		    Tf = T2 * Te;		    T1S = T2 * T1R;		    T2f = T2 * T2b;		    TV = T2 * TR;		    TQ = T2 * TP;		    Tm = T2 * Ti;		    T1Y = T2 * T1U;		    T2a = T2 * T29;		    Tg = W[7];	       }	       {		    E Tq, T46, T8H, T97, TH, T98, T4b, T8D, T7f, TZ, T6t, T4j, T1g, T7g, T6u;		    E T4q, T4G, T6x, T1J, T7m, T6y, T4z, T7l, T8d, T6B, T4O, T7r, T8e, T4V, T6A;		    E T2k, T7o, T5s, T6I, T2N, T7A, T55, T6F, T7x, T8i, T5t, T5c, T7D, T8j, T5j;		    E T5u, T3c, T7y, T5L, T62, T43, T7G, T5S, T63, T7O, T8o, T3G, T7L, T5E, T6M;		    E T7J, T8n, T6P, T61, T90, T8Z;		    {			 E T2F, T31, T1V, T1K, T2T, T2w, T2J, T34, T1N, T2X, T2c, T2s, T2g, T1Z, T3n;			 E T3f, T3z, T3j, T3C, T3r, T3Q, T3M, T4B, T1m, T1x, T4x, T1H, T1y, T1A, T4D;			 E T1u;			 {			      E T1F, T15, T1s, T11, T1C, TW, TS, T1o, Ty, T48, TG, T4a;			      {				   E T1, T8G, Tk, TA, TE, Tn, Tj, To;				   T1 = rio[0];				   T8G = iio[-WS(ios, 31)];				   Tk = rio[WS(ios, 16)];				   T2F = FNMS(Tg, Td, T2E);				   T31 = FMA(Tg, Td, T2E);				   T1V = FMA(Tg, T1U, T1S);				   T1K = FNMS(Tg, T7, Tz);				   TA = FMA(Tg, T7, Tz);				   T2T = FNMS(Tg, T26, T2S);				   T1F = FNMS(Tg, T5, T14);				   T15 = FMA(Tg, T5, T14);				   T2w = FNMS(Tg, T1w, T2v);				   T2J = FMA(Tg, T9, T2I);				   T34 = FNMS(Tg, T9, T2I);				   TE = FNMS(Tg, T4, TD);				   T1N = FMA(Tg, T4, TD);				   T2X = FMA(Tg, T23, T2W);				   T2c = FNMS(Tg, T2b, T2a);				   T2s = FMA(Tg, T1z, T2r);				   T1s = FNMS(Tg, T3, T1r);				   T2g = FMA(Tg, T29, T2f);				   T1Z = FNMS(Tg, T1R, T1Y);				   T11 = FNMS(Tg, T8, T10);				   T1C = FMA(Tg, T8, T10);				   TW = FNMS(Tg, TP, TV);				   T3n = FMA(Tg, T1d, T3m);				   T3f = FNMS(Tg, TM, T3e);				   T3z = FMA(Tg, TM, T3e);				   T3j = FMA(Tg, TJ, T3i);				   T3C = FNMS(Tg, TJ, T3i);				   TS = FMA(Tg, TR, TQ);				   T3r = FNMS(Tg, T19, T3q);				   Tn = FMA(Tg, Te, Tm);				   T3Q = FMA(Tg, Ts, T3P);				   T1o = FMA(Tg, Tb, T1n);				   T3M = FNMS(Tg, Tw, T3L);				   Tj = FNMS(Tg, Ti, Tf);				   To = iio[-WS(ios, 15)];				   {					E Tx, TF, T47, Tu, TC, T49, Tl, T8E;					Tx = iio[-WS(ios, 23)];					TF = iio[-WS(ios, 7)];					Tl = Tj * Tk;					T8E = Tj * To;					{					     E Tt, TB, Tp, T8F;					     Tt = rio[WS(ios, 8)];					     TB = rio[WS(ios, 24)];					     Tp = FMA(Tn, To, Tl);					     T8F = FNMS(Tn, Tk, T8E);					     T47 = Tw * Tt;					     Tu = Ts * Tt;					     Tq = T1 + Tp;					     T46 = T1 - Tp;					     T8H = T8F + T8G;					     T97 = T8G - T8F;					     TC = TA * TB;					     T49 = TE * TB;					}					Ty = FNMS(Tw, Tx, Tu);					T48 = FMA(Ts, Tx, T47);					TG = FNMS(TE, TF, TC);					T4a = FMA(TA, TF, T49);				   }			      }			      {				   E TX, TT, TO, T4f;				   {					E TK, TN, TL, T4e;					TK = rio[WS(ios, 4)];					TN = iio[-WS(ios, 27)];					TH = Ty + TG;					T98 = Ty - TG;					T4b = T48 - T4a;					T8D = T48 + T4a;					TL = TJ * TK;					T4e = TJ * TN;					TX = iio[-WS(ios, 11)];					TT = rio[WS(ios, 20)];					TO = FMA(TM, TN, TL);					T4f = FNMS(TM, TK, T4e);				   }				   {					E T17, T4m, T1a, T1e, T1b, T4i, T4d;					{					     E T12, T16, T13, T4h, TY, T4l, T4g, TU;					     T12 = rio[WS(ios, 28)];					     T4g = TW * TT;					     TU = TS * TT;					     T16 = iio[-WS(ios, 3)];					     T13 = T11 * T12;					     T4h = FMA(TS, TX, T4g);					     TY = FNMS(TW, TX, TU);					     T4l = T11 * T16;					     T17 = FMA(T15, T16, T13);					     T4i = T4f - T4h;					     T7f = T4f + T4h;					     T4d = TO - TY;					     TZ = TO + TY;					     T4m = FNMS(T15, T12, T4l);					}					T1a = rio[WS(ios, 12)];					T6t = T4i - T4d;					T4j = T4d + T4i;					T1e = iio[-WS(ios, 19)];					T1b = T19 * T1a;					{					     E T1G, T4k, T4p, T1D;					     {						  E T1l, T1j, T4n, T1f, T4A, T1k, T4o;						  T1l = iio[-WS(ios, 29)];						  T1j = rio[WS(ios, 2)];						  T4n = T19 * T1e;						  T1f = FMA(T1d, T1e, T1b);						  T1G = iio[-WS(ios, 5)];						  T4A = Td * T1j;						  T1k = T9 * T1j;						  T4o = FNMS(T1d, T1a, T4n);						  T4k = T17 - T1f;						  T1g = T17 + T1f;						  T4B = FMA(T9, T1l, T4A);						  T1m = FNMS(Td, T1l, T1k);						  T4p = T4m - T4o;						  T7g = T4m + T4o;						  T1D = rio[WS(ios, 26)];					     }					     {						  E T1t, T4w, T1E, T1p, T4C, T1q;						  T1t = iio[-WS(ios, 13)];						  T6u = T4k + T4p;						  T4q = T4k - T4p;						  T4w = T1F * T1D;						  T1E = T1C * T1D;						  T1p = rio[WS(ios, 18)];						  T1x = rio[WS(ios, 10)];						  T4x = FMA(T1C, T1G, T4w);						  T1H = FNMS(T1F, T1G, T1E);						  T4C = T1s * T1p;						  T1q = T1o * T1p;						  T1y = T1w * T1x;						  T1A = iio[-WS(ios, 21)];						  T4D = FMA(T1o, T1t, T4C);						  T1u = FNMS(T1s, T1t, T1q);					     }					}				   }			      }			 }			 {			      E T2q, T5n, T2A, T2L, T53, T2B, T2C, T5p, T2y;			      {				   E T1P, T4Q, T2i, T27, T4M, T24, T4S, T21, T2G, T2K;				   {					E T1L, T2d, T4P, T2e, T2h;					{					     E T7j, T4t, T1M, T4y, T7k, T1O;					     {						  E T4E, T1v, T1I, T4F, T4v, T1B, T4u;						  T1L = rio[WS(ios, 30)];						  T1B = FMA(T1z, T1A, T1y);						  T4u = T1w * T1A;						  T7j = T4B + T4D;						  T4E = T4B - T4D;						  T1v = T1m + T1u;						  T4t = T1m - T1u;						  T1I = T1B + T1H;						  T4F = T1B - T1H;						  T4v = FNMS(T1z, T1x, T4u);						  T1M = T1K * T1L;						  T4G = T4E - T4F;						  T6x = T4E + T4F;						  T1J = T1v + T1I;						  T7m = T1v - T1I;						  T4y = T4v - T4x;						  T7k = T4v + T4x;						  T1O = iio[-WS(ios, 1)];					     }					     T2d = rio[WS(ios, 22)];					     T6y = T4t - T4y;					     T4z = T4t + T4y;					     T7l = T7j - T7k;					     T8d = T7j + T7k;					     T1P = FMA(T1N, T1O, T1M);					     T4P = T1K * T1O;					     T2e = T2c * T2d;					     T2h = iio[-WS(ios, 9)];					}					{					     E T20, T1W, T4L, T4R, T1X;					     T20 = iio[-WS(ios, 17)];					     T4Q = FNMS(T1N, T1L, T4P);					     T1W = rio[WS(ios, 14)];					     T2i = FMA(T2g, T2h, T2e);					     T4L = T2c * T2h;					     T27 = iio[-WS(ios, 25)];					     T4R = T1Z * T1W;					     T1X = T1V * T1W;					     T4M = FNMS(T2g, T2d, T4L);					     T24 = rio[WS(ios, 6)];					     T4S = FMA(T1V, T20, T4R);					     T21 = FNMS(T1Z, T20, T1X);					}				   }				   {					E T2n, T5m, T2o, T2p;					{					     E T4T, T7p, T4I, T22, T4K, T28, T4J, T25;					     T2n = rio[WS(ios, 1)];					     T4J = T26 * T24;					     T25 = T23 * T24;					     T4T = T4Q - T4S;					     T7p = T4Q + T4S;					     T4I = T1P - T21;					     T22 = T1P + T21;					     T4K = FMA(T23, T27, T4J);					     T28 = FNMS(T26, T27, T25);					     T2o = T5 * T2n;					     T2p = iio[-WS(ios, 30)];					     {						  E T4N, T7q, T4U, T2j;						  T4N = T4K - T4M;						  T7q = T4K + T4M;						  T4U = T28 - T2i;						  T2j = T28 + T2i;						  T6B = T4I - T4N;						  T4O = T4I + T4N;						  T7r = T7p - T7q;						  T8e = T7p + T7q;						  T4V = T4T - T4U;						  T6A = T4T + T4U;						  T2k = T22 + T2j;						  T7o = T22 - T2j;						  T5m = T5 * T2p;					     }					}					T2q = FMA(T8, T2p, T2o);					T2G = rio[WS(ios, 25)];					T2K = iio[-WS(ios, 6)];					T5n = FNMS(T8, T2n, T5m);				   }				   {					E T2x, T2H, T52, T2t, T5o, T2u;					T2x = iio[-WS(ios, 14)];					T2H = T2F * T2G;					T52 = T2F * T2K;					T2t = rio[WS(ios, 17)];					T2A = rio[WS(ios, 9)];					T2L = FMA(T2J, T2K, T2H);					T53 = FNMS(T2J, T2G, T52);					T5o = T2w * T2t;					T2u = T2s * T2t;					T2B = T3 * T2A;					T2C = iio[-WS(ios, 22)];					T5p = FMA(T2s, T2x, T5o);					T2y = FNMS(T2w, T2x, T2u);				   }			      }			      {				   E T3a, T5h, T35, T56, T30, T7B, T5b, T5e, T33;				   {					E T2R, T58, T2Z, T5a, T32, T37, T39;					{					     E T2Q, T7v, T4Z, T7w, T54, T2P, T57;					     T2Q = iio[-WS(ios, 26)];					     {						  E T2D, T50, T5q, T2z;						  T2D = FMA(Tb, T2C, T2B);						  T50 = T3 * T2C;						  T7v = T5n + T5p;						  T5q = T5n - T5p;						  T2z = T2q + T2y;						  T4Z = T2q - T2y;						  {						       E T2M, T5r, T51, T2O;						       T2M = T2D + T2L;						       T5r = T2D - T2L;						       T51 = FNMS(Tb, T2A, T50);						       T2O = rio[WS(ios, 5)];						       T5s = T5q - T5r;						       T6I = T5q + T5r;						       T2N = T2z + T2M;						       T7A = T2z - T2M;						       T7w = T51 + T53;						       T54 = T51 - T53;						       T2P = T29 * T2O;						       T57 = T2b * T2O;						  }					     }					     T55 = T4Z + T54;					     T6F = T4Z - T54;					     T7x = T7v - T7w;					     T8i = T7v + T7w;					     T2R = FNMS(T2b, T2Q, T2P);					     T58 = FMA(T29, T2Q, T57);					}					T37 = rio[WS(ios, 13)];					T39 = iio[-WS(ios, 18)];					{					     E T2U, T2Y, T38, T5g, T2V, T59;					     T2U = rio[WS(ios, 21)];					     T2Y = iio[-WS(ios, 10)];					     T38 = T1R * T37;					     T5g = T1R * T39;					     T2V = T2T * T2U;					     T59 = T2T * T2Y;					     T3a = FMA(T1U, T39, T38);					     T5h = FNMS(T1U, T37, T5g);					     T2Z = FMA(T2X, T2Y, T2V);					     T5a = FNMS(T2X, T2U, T59);					}					T35 = iio[-WS(ios, 2)];					T32 = rio[WS(ios, 29)];					T56 = T2R - T2Z;					T30 = T2R + T2Z;					T7B = T58 + T5a;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -