⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hb_64.c

📁 最新的FFT程序
💻 C
📖 第 1 页 / 共 5 页
字号:
					T6t = T6l + T6s;					T6W = T6s - T6l;					T66 = FMA(KP707106781, T65, T64);					T76 = FNMS(KP707106781, T65, T64);					T7k = T6c - T69;					T6d = T69 + T6c;					T6R = T6E - T6F;					T6G = T6E + T6F;					T6e = FMA(KP923879532, T6d, T66);					T6Q = FNMS(KP923879532, T6d, T66);				   }				   {					E T6N, T6S, T6K, T73, T72, T71;					{					     E T6H, T6w, T70, T6u, T63;					     T6N = FNMS(KP980785280, T6G, T6D);					     T6H = FMA(KP980785280, T6G, T6D);					     T6w = W[3];					     T70 = FNMS(KP980785280, T6R, T6Q);					     T6S = FMA(KP980785280, T6R, T6Q);					     T6u = FMA(KP980785280, T6t, T6e);					     T6K = FNMS(KP980785280, T6t, T6e);					     T63 = W[2];					     {						  E T6Z, T74, T6I, T6v;						  T73 = FNMS(KP980785280, T6W, T6V);						  T6X = FMA(KP980785280, T6W, T6V);						  T72 = W[35];						  T6I = T6w * T6u;						  T6v = T63 * T6u;						  T6Z = W[34];						  T74 = T72 * T70;						  iio[-WS(ios, 61)] = FMA(T63, T6H, T6I);						  rio[WS(ios, 2)] = FNMS(T6w, T6H, T6v);						  T71 = T6Z * T70;						  iio[-WS(ios, 45)] = FMA(T6Z, T73, T74);					     }					}					{					     E T6M, T6J, T6O, T6L;					     T6M = W[67];					     rio[WS(ios, 18)] = FNMS(T72, T73, T71);					     T6J = W[66];					     T6O = T6M * T6K;					     T6U = W[99];					     T6L = T6J * T6K;					     T6P = W[98];					     iio[-WS(ios, 29)] = FMA(T6J, T6N, T6O);					     T6Y = T6U * T6S;					     rio[WS(ios, 34)] = FNMS(T6M, T6N, T6L);					     T6T = T6P * T6S;					}				   }			      }			      {				   E TfQ, TfD, Tfk, TfI, TfT, TfA, Tfx;				   {					E Tfb, Tfi, Tf4, TfL, TfM, Tfw, Tfp, Tfs;					{					     E Tfu, Tfv, TeW, Tf3;					     Tg9 = FNMS(KP923879532, TeV, TeU);					     TeW = FMA(KP923879532, TeV, TeU);					     iio[-WS(ios, 13)] = FMA(T6P, T6X, T6Y);					     TfX = Tf2 - TeZ;					     Tf3 = TeZ + Tf2;					     rio[WS(ios, 50)] = FNMS(T6U, T6X, T6T);					     Tfb = FNMS(KP303346683, Tfa, Tf7);					     Tfu = FMA(KP303346683, Tf7, Tfa);					     Tfv = FNMS(KP303346683, Tfe, Tfh);					     Tfi = FMA(KP303346683, Tfh, Tfe);					     Tf4 = FMA(KP831469612, Tf3, TeW);					     TfL = FNMS(KP831469612, Tf3, TeW);					     TfM = Tfu - Tfv;					     Tfw = Tfu + Tfv;					     TfW = FNMS(KP923879532, Tfo, Tfn);					     Tfp = FMA(KP923879532, Tfo, Tfn);					     Tfs = Tfq + Tfr;					     Tga = Tfq - Tfr;					}					{					     E TfH, Tfj, TfG, Tft;					     TfN = FMA(KP956940335, TfM, TfL);					     TfQ = FNMS(KP956940335, TfM, TfL);					     TfH = Tfi - Tfb;					     Tfj = Tfb + Tfi;					     TfG = FNMS(KP831469612, Tfs, Tfp);					     Tft = FMA(KP831469612, Tfs, Tfp);					     TfD = FNMS(KP956940335, Tfj, Tf4);					     Tfk = FMA(KP956940335, Tfj, Tf4);					     TfI = FMA(KP956940335, TfH, TfG);					     TfT = FNMS(KP956940335, TfH, TfG);					     TfA = FNMS(KP956940335, Tfw, Tft);					     Tfx = FMA(KP956940335, Tfw, Tft);					}				   }				   {					E TfS, TfR, TeT, Tfm, Tfz, TfC;					TeT = W[120];					Tfm = W[121];					{					     E TfP, Tfy, Tfl, TfU;					     TfP = W[88];					     TfS = W[89];					     Tfy = TeT * Tfx;					     Tfl = TeT * Tfk;					     TfU = TfP * TfT;					     TfR = TfP * TfQ;					     rio[WS(ios, 61)] = FNMS(Tfm, Tfk, Tfy);					     iio[-WS(ios, 2)] = FMA(Tfm, Tfx, Tfl);					     rio[WS(ios, 45)] = FNMS(TfS, TfQ, TfU);					}					iio[-WS(ios, 18)] = FMA(TfS, TfT, TfR);					Tfz = W[56];					TfC = W[57];					{					     E TfF, TfE, TfB, TfO;					     TfF = W[24];					     TfK = W[25];					     TfE = Tfz * TfD;					     TfB = Tfz * TfA;					     TfO = TfF * TfN;					     TfJ = TfF * TfI;					     iio[-WS(ios, 34)] = FMA(TfC, TfA, TfE);					     rio[WS(ios, 29)] = FNMS(TfC, TfD, TfB);					     iio[-WS(ios, 50)] = FMA(TfK, TfI, TfO);					}				   }			      }			 }			 rio[WS(ios, 13)] = FNMS(TfK, TfN, TfJ);			 {			      E T7F, T7C, T7x, T7G, T7B;			      {				   E T7D, T7l, T7f, T7E, T7z, T7o, T78, T7y;				   {					E T7m, T7n, T7b, T7e;					T7m = FNMS(KP668178637, T79, T7a);					T7b = FMA(KP668178637, T7a, T79);					T7e = FNMS(KP668178637, T7d, T7c);					T7n = FMA(KP668178637, T7c, T7d);					T7D = FNMS(KP923879532, T7k, T7j);					T7l = FMA(KP923879532, T7k, T7j);					T7f = T7b + T7e;					T7E = T7b - T7e;					T7z = T7n - T7m;					T7o = T7m + T7n;					T78 = FMA(KP923879532, T77, T76);					T7y = FNMS(KP923879532, T77, T76);				   }				   {					E T7v, T7A, T7s, T7L, T7K, T7J;					{					     E T7p, T7i, T7I, T7g, T75;					     T7v = FNMS(KP831469612, T7o, T7l);					     T7p = FMA(KP831469612, T7o, T7l);					     T7i = W[115];					     T7I = FNMS(KP831469612, T7z, T7y);					     T7A = FMA(KP831469612, T7z, T7y);					     T7g = FMA(KP831469612, T7f, T78);					     T7s = FNMS(KP831469612, T7f, T78);					     T75 = W[114];					     {						  E T7H, T7M, T7q, T7h;						  T7L = FNMS(KP831469612, T7E, T7D);						  T7F = FMA(KP831469612, T7E, T7D);						  T7K = W[83];						  T7q = T7i * T7g;						  T7h = T75 * T7g;						  T7H = W[82];						  T7M = T7K * T7I;						  iio[-WS(ios, 5)] = FMA(T75, T7p, T7q);						  rio[WS(ios, 58)] = FNMS(T7i, T7p, T7h);						  T7J = T7H * T7I;						  iio[-WS(ios, 21)] = FMA(T7H, T7L, T7M);					     }					}					{					     E T7u, T7r, T7w, T7t;					     T7u = W[51];					     rio[WS(ios, 42)] = FNMS(T7K, T7L, T7J);					     T7r = W[50];					     T7w = T7u * T7s;					     T7C = W[19];					     T7t = T7r * T7s;					     T7x = W[18];					     iio[-WS(ios, 37)] = FMA(T7r, T7v, T7w);					     T7G = T7C * T7A;					     rio[WS(ios, 26)] = FNMS(T7u, T7v, T7t);					     T7B = T7x * T7A;					}				   }			      }			      {				   E Tgy, Tgl, Tg6, Tgq, TgB, Tgi, Tgf;				   {					E Tg1, Tg4, TfY, Tgt, Tgu, Tge, Tgc, Tgd;					iio[-WS(ios, 53)] = FMA(T7x, T7F, T7G);					rio[WS(ios, 10)] = FNMS(T7C, T7F, T7B);					Tg1 = FNMS(KP534511135, Tg0, TfZ);					Tgc = FMA(KP534511135, TfZ, Tg0);					Tgd = FNMS(KP534511135, Tg2, Tg3);					Tg4 = FMA(KP534511135, Tg3, Tg2);					TfY = FMA(KP831469612, TfX, TfW);					Tgt = FNMS(KP831469612, TfX, TfW);					Tgu = Tgc - Tgd;					Tge = Tgc + Tgd;					{					     E Tgp, Tg5, Tgo, Tgb;					     Tgv = FMA(KP881921264, Tgu, Tgt);					     Tgy = FNMS(KP881921264, Tgu, Tgt);					     Tgp = Tg4 - Tg1;					     Tg5 = Tg1 + Tg4;					     Tgo = FNMS(KP831469612, Tga, Tg9);					     Tgb = FMA(KP831469612, Tga, Tg9);					     Tgl = FNMS(KP881921264, Tg5, TfY);					     Tg6 = FMA(KP881921264, Tg5, TfY);					     Tgq = FMA(KP881921264, Tgp, Tgo);					     TgB = FNMS(KP881921264, Tgp, Tgo);					     Tgi = FNMS(KP881921264, Tge, Tgb);					     Tgf = FMA(KP881921264, Tge, Tgb);					}				   }				   {					E TgA, Tgz, TfV, Tg8, Tgh, Tgk;					TfV = W[8];					Tg8 = W[9];					{					     E Tgx, Tgg, Tg7, TgC;					     Tgx = W[40];					     TgA = W[41];					     Tgg = TfV * Tgf;					     Tg7 = TfV * Tg6;					     TgC = Tgx * TgB;					     Tgz = Tgx * Tgy;					     iio[-WS(ios, 58)] = FMA(Tg8, Tg6, Tgg);					     rio[WS(ios, 5)] = FNMS(Tg8, Tgf, Tg7);					     iio[-WS(ios, 42)] = FMA(TgA, Tgy, TgC);					}					rio[WS(ios, 21)] = FNMS(TgA, TgB, Tgz);					Tgh = W[72];					Tgk = W[73];					{					     E Tgn, Tgm, Tgj, Tgw;					     Tgn = W[104];					     Tgs = W[105];					     Tgm = Tgh * Tgl;					     Tgj = Tgh * Tgi;					     Tgw = Tgn * Tgv;					     Tgr = Tgn * Tgq;					     rio[WS(ios, 37)] = FNMS(Tgk, Tgi, Tgm);					     iio[-WS(ios, 26)] = FMA(Tgk, Tgl, Tgj);					     rio[WS(ios, 53)] = FNMS(Tgs, Tgq, Tgw);					}				   }			      }			 }		    }	       }	  }	  iio[-WS(ios, 10)] = FMA(Tgs, Tgv, Tgr);     }     return W;}static const tw_instr twinstr[] = {     {TW_FULL, 0, 64},     {TW_NEXT, 1, 0}};static const hc2hc_desc desc = { 64, "hb_64", twinstr, &GENUS, {520, 126, 518, 0}, 0, 0, 0 };void X(codelet_hb_64) (planner *p) {     X(khc2hc_register) (p, hb_64, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h *//* * This function contains 1038 FP additions, 500 FP multiplications, * (or, 808 additions, 270 multiplications, 230 fused multiply/add), * 196 stack variables, and 256 memory accesses *//* * Generator Id's :  * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hb.h"static const R *hb_64(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){     DK(KP634393284, +0.634393284163645498215171613225493370675687095);     DK(KP773010453, +0.773010453362736960810906609758469800971041293);     DK(KP098017140, +0.098017140329560601994195563888641845861136673);     DK(KP995184726, +0.995184726672196886244836953109479921575474869);     DK(KP471396736, +0.471396736825997648556387625905254377657460319);     DK(KP881921264, +0.881921264348355029712756863660388349508442621);     DK(KP290284677, +0.290284677254462367636192375817395274691476278);     DK(KP956940335, +0.956940335732208864935797886980269969482849206);     DK(KP195090322, +0.195090322016128267848284868477022240927691618);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP555570233, +0.555570233019602224742830813948532874374937191);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP382683432, +0.382683432365089771728459984030398866761344562);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT i;     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 126, MAKE_VOLATILE_STRIDE(ios)) {	  E Tf, T7i, Tfa, ThM, Tgp, ThH, T2c, T5O, T4T, T6n, Tcp, Ted, TcA, TdE, T87;	  E T9o, TK, T93, T2P, T4F, Tfo, Thz, T5T, T6j, Tbx, TdI, Tfl, ThA, T7r, T81;	  E TbE, TdH, TZ, T94, T38, T4G, Tfv, ThC, T5W, T6k, TbQ, TdK, Tfs, ThD, T7w;	  E T82, TbX, TdL, Tu, T84, Tfh, ThG, Tgm, ThN, T2v, T6m, T4K, T5P, Tce, TdF;	  E TcD, Tec, T7l, T9p, T1L, T20, T9c, T9d, T9e, T9f, T40, T66, Tg1, Thu, Tg8;	  E Thv, Tg5, Thr, T4n, T67, T4j, T69, T4w, T6a, TaT, TdW, Tb8, TdZ, TfU, Ths;	  E T7O, T8y, T7T, T8z, Tbc, TdX, Tbj, Te0, T1g, T1v, T97, T98, T99, T9a, T3j;	  E T5Z, TfI, Thk, TfP, Thl, TfM, Tho, T3G, T60, T3C, T62, T3P, T63, Tak, TdQ;	  E Tav, TdT, TfB, Thn, T7D, T8v, T7I, T8w, TaD, TdP, TaG, TdS;	  {	       E T3, Tcm, T4O, Tcv, T6, Tcu, T4R, Tcn, Td, Tcy, T2a, Tch, Ta, Tcx, T27;	       E Tck;	       {		    E T1, T2, T4P, T4Q;		    T1 = rio[0];		    T2 = iio[-WS(ios, 32)];		    T3 = T1 + T2;		    Tcm = T1 - T2;		    {			 E T4M, T4N, T4, T5;			 T4M = iio[0];			 T4N = rio[WS(ios, 32)];			 T4O = T4M - T4N;			 Tcv = T4M + T4N;			 T4 = rio[WS(ios, 16)];			 T5 = iio[-WS(ios, 48)];			 T6 = T4 + T5;			 Tcu = T4 - T5;		    }		    T4P = iio[-WS(ios, 16)];		    T4Q = rio[WS(ios, 48)];		    T4R = T4P - T4Q;		    Tcn = T4P + T4Q;		    {			 E Tb, Tc, Tcf, T28, T29, Tcg;			 Tb = iio[-WS(ios, 56)];			 Tc = rio[WS(ios, 24)];			 Tcf = Tb - Tc;			 T28 = iio[-WS(ios, 24)];			 T29 = rio[WS(ios, 56)];			 Tcg = T29 + T28;			 Td = Tb + Tc;			 Tcy = Tcf + Tcg;			 T2a = T28 - T29;			 Tch = Tcf - Tcg;		    }		    {			 E T8, T9, Tcj, T25, T26, Tci;			 T8 = rio[WS(ios, 8)];			 T9 = iio[-WS(ios, 40)];			 Tcj = T8 - T9;			 T25 = iio[-WS(ios, 8)];			 T26 = rio[WS(ios, 40)];			 Tci = T25 + T26;			 Ta = T8 + T9;			 Tcx = Tcj + Tci;			 T27 = T25 - T26;			 Tck = Tci - Tcj;		    }	       }	       {		    E T7, Te, Tf8, Tf9;		    T7 = T3 + T6;		    Te = Ta + Td;		    Tf = T7 + Te;		    T7i = T7 - Te;		    Tf8 = Tcv - Tcu;		    Tf9 = KP707106781 * (Tck + Tch);		    Tfa = Tf8 + Tf9;		    ThM = Tf8 - Tf9;	       }	       {		    E Tgn, Tgo, T24, T2b;		    Tgn = KP707106781 * (Tcx + Tcy);		    Tgo = Tcm + Tcn;		    Tgp = Tgn + Tgo;		    ThH = Tgo - Tgn;		    T24 = T3 - T6;		    T2b = T27 - T2a;		    T2c = T24 + T2b;		    T5O = T24 - T2b;	       }	       {		    E T4L, T4S, Tcl, Tco;		    T4L = Td - Ta;		    T4S = T4O - T4R;		    T4T = T4L + T4S;		    T6n = T4S - T4L;		    Tcl = KP707106781 * (Tch - Tck);		    Tco = Tcm - Tcn;		    Tcp = Tcl + Tco;		    Ted = Tco - Tcl;	       }	       {		    E Tcw, Tcz, T85, T86;		    Tcw = Tcu + Tcv;		    Tcz = KP707106781 * (Tcx - Tcy);		    TcA = Tcw + Tcz;		    TdE = Tcw - Tcz;		    T85 = T4O + T4R;		    T86 = T27 + T2a;		    T87 = T85 - T86;		    T9o = T86 + T85;	       }	  }	  {	       E TC, Tby, T2x, Tbu, T2N, Tbz, T7o, Tbv, TJ, TbB, TbC, T2E, T2G, Tbp, Tbs;	       E T7p, Tfj, Tfk;	       {		    E Tw, Tx, Ty, Tz, TA, TB;		    Tw = rio[WS(ios, 2)];		    Tx = iio[-WS(ios, 34)];		    Ty = Tw + Tx;		    Tz = rio[WS(ios, 18)];		    TA = iio[-WS(ios, 50)];		    TB = Tz + TA;		    TC = Ty + TB;		    Tby = Tz - TA;		    T2x = Ty - TB;		    Tbu = Tw - Tx;	       }	       {		    E T2H, T2I, T2J, T2K, T2L, T2M;		    T2H = iio[-WS(ios, 2)];		    T2I = rio[WS(ios, 34)];		    T2J = T2H - T2I;		    T2K = iio[-WS(ios, 18)];		    T2L = 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -