⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hb_64.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 5 页
字号:
					cr[WS(rs, 5)] = FNMS(T7C, T7D, T7B);					T8u = T84 + T81;					T85 = T81 - T84;					T8b = FNMS(KP831469612, T8a, T89);					T8t = FMA(KP831469612, T8a, T89);					T7Y = FNMS(KP831469612, T7X, T7W);					T8o = FMA(KP831469612, T7X, T7W);					T8p = T8c + T8d;					T8e = T8c - T8d;					T86 = FNMS(KP956940335, T85, T7Y);					T8i = FMA(KP956940335, T85, T7Y);					T8y = FMA(KP956940335, T8p, T8o);					T8q = FNMS(KP956940335, T8p, T8o);					T8l = FMA(KP956940335, T8e, T8b);					T8f = FNMS(KP956940335, T8e, T8b);				   }				   {					E T8k, T8j, T7V, T88, T8v, T8s, T8n;					T7V = W[88];					T88 = W[89];					{					     E T8h, T8g, T87, T8m;					     T8h = W[24];					     T8k = W[25];					     T8g = T7V * T8f;					     T87 = T7V * T86;					     T8m = T8h * T8l;					     T8j = T8h * T8i;					     ci[WS(rs, 45)] = FMA(T88, T86, T8g);					     cr[WS(rs, 45)] = FNMS(T88, T8f, T87);					     ci[WS(rs, 13)] = FMA(T8k, T8i, T8m);					}					cr[WS(rs, 13)] = FNMS(T8k, T8l, T8j);					T8B = FMA(KP956940335, T8u, T8t);					T8v = FNMS(KP956940335, T8u, T8t);					T8s = W[57];					T8n = W[56];					{					     E T8x, T8C, T8w, T8r;					     T8A = W[121];					     T8w = T8s * T8q;					     T8r = T8n * T8q;					     T8x = W[120];					     T8C = T8A * T8y;					     ci[WS(rs, 29)] = FMA(T8n, T8v, T8w);					     cr[WS(rs, 29)] = FNMS(T8s, T8v, T8r);					     T8z = T8x * T8y;					     ci[WS(rs, 61)] = FMA(T8x, T8B, T8C);					}				   }			      }			 }			 {			      E Ta5, Ta4, Ta3, TeN, TeM, TeL;			      {				   E T9V, T9Y, Tai, Taa, Tal, Taf, Ta2, T9I;				   {					E T9n, T9G, Tad, Ta9, T94, Ta8, T9W, T9X, Tae, T9H;					cr[WS(rs, 61)] = FNMS(T8A, T8B, T8z);					T9n = FNMS(KP534511135, T9m, T9f);					T9W = FMA(KP534511135, T9f, T9m);					T9X = FMA(KP534511135, T9y, T9F);					T9G = FNMS(KP534511135, T9F, T9y);					T9V = FMA(KP831469612, T9U, T9R);					Tad = FNMS(KP831469612, T9U, T9R);					Ta9 = T9W + T9X;					T9Y = T9W - T9X;					T94 = FNMS(KP831469612, T93, T8O);					Ta8 = FMA(KP831469612, T93, T8O);					Tae = T9G - T9n;					T9H = T9n + T9G;					Tai = FMA(KP881921264, Ta9, Ta8);					Taa = FNMS(KP881921264, Ta9, Ta8);					Tal = FNMS(KP881921264, Tae, Tad);					Taf = FMA(KP881921264, Tae, Tad);					Ta2 = FNMS(KP881921264, T9H, T94);					T9I = FMA(KP881921264, T9H, T94);				   }				   {					E Tak, Taj, Ta7, Tac, T9Z, T9K, T8D;					Ta7 = W[52];					Tac = W[53];					{					     E Tah, Tag, Tab, Tam;					     Tah = W[116];					     Tak = W[117];					     Tag = Ta7 * Taf;					     Tab = Ta7 * Taa;					     Tam = Tah * Tal;					     Taj = Tah * Tai;					     ci[WS(rs, 27)] = FMA(Tac, Taa, Tag);					     cr[WS(rs, 27)] = FNMS(Tac, Taf, Tab);					     ci[WS(rs, 59)] = FMA(Tak, Tai, Tam);					}					cr[WS(rs, 59)] = FNMS(Tak, Tal, Taj);					Ta5 = FMA(KP881921264, T9Y, T9V);					T9Z = FNMS(KP881921264, T9Y, T9V);					T9K = W[85];					T8D = W[84];					{					     E Ta1, Ta6, Ta0, T9J;					     Ta4 = W[21];					     Ta0 = T9K * T9I;					     T9J = T8D * T9I;					     Ta1 = W[20];					     Ta6 = Ta4 * Ta2;					     ci[WS(rs, 43)] = FMA(T8D, T9Z, Ta0);					     cr[WS(rs, 43)] = FNMS(T9K, T9Z, T9J);					     Ta3 = Ta1 * Ta2;					     ci[WS(rs, 11)] = FMA(Ta1, Ta5, Ta6);					}				   }			      }			      {				   E TeD, TeG, Tf0, TeS, Tf3, TeX, TeK, Teo;				   {					E Tem, TdV, TeV, TeR, Tdu, TeQ, TeE, TeF, TeW, Ten;					cr[WS(rs, 11)] = FNMS(Ta4, Ta5, Ta3);					Tem = FMA(KP668178637, Tel, Tec);					TeE = FNMS(KP668178637, Tec, Tel);					TeF = FMA(KP668178637, TdL, TdU);					TdV = FNMS(KP668178637, TdU, TdL);					TeD = FNMS(KP923879532, TeC, Tez);					TeV = FMA(KP923879532, TeC, Tez);					TeR = TeE + TeF;					TeG = TeE - TeF;					Tdu = FNMS(KP923879532, Tdt, Td6);					TeQ = FMA(KP923879532, Tdt, Td6);					TeW = Tem + TdV;					Ten = TdV - Tem;					Tf0 = FMA(KP831469612, TeR, TeQ);					TeS = FNMS(KP831469612, TeR, TeQ);					Tf3 = FMA(KP831469612, TeW, TeV);					TeX = FNMS(KP831469612, TeW, TeV);					TeK = FMA(KP831469612, Ten, Tdu);					Teo = FNMS(KP831469612, Ten, Tdu);				   }				   {					E Tf2, Tf1, TeP, TeU, TeH, Teq, TcP;					TeP = W[74];					TeU = W[75];					{					     E TeZ, TeY, TeT, Tf4;					     TeZ = W[10];					     Tf2 = W[11];					     TeY = TeP * TeX;					     TeT = TeP * TeS;					     Tf4 = TeZ * Tf3;					     Tf1 = TeZ * Tf0;					     ci[WS(rs, 38)] = FMA(TeU, TeS, TeY);					     cr[WS(rs, 38)] = FNMS(TeU, TeX, TeT);					     ci[WS(rs, 6)] = FMA(Tf2, Tf0, Tf4);					}					cr[WS(rs, 6)] = FNMS(Tf2, Tf3, Tf1);					TeN = FMA(KP831469612, TeG, TeD);					TeH = FNMS(KP831469612, TeG, TeD);					Teq = W[107];					TcP = W[106];					{					     E TeJ, TeO, TeI, Tep;					     TeM = W[43];					     TeI = Teq * Teo;					     Tep = TcP * Teo;					     TeJ = W[42];					     TeO = TeM * TeK;					     ci[WS(rs, 54)] = FMA(TcP, TeH, TeI);					     cr[WS(rs, 54)] = FNMS(Teq, TeH, Tep);					     TeL = TeJ * TeK;					     ci[WS(rs, 22)] = FMA(TeJ, TeN, TeO);					}				   }			      }			      {				   E Tcn, Tcq, TcK, TcC, TcN, TcH, Tcu, Tci;				   {					E Tcd, Tcg, TcF, TcB, Tca, TcA, Tco, Tcp, TcG, Tch;					cr[WS(rs, 22)] = FNMS(TeM, TeN, TeL);					Tcd = FNMS(KP098491403, Tcc, Tcb);					Tco = FMA(KP098491403, Tcb, Tcc);					Tcp = FMA(KP098491403, Tce, Tcf);					Tcg = FNMS(KP098491403, Tcf, Tce);					Tcn = FMA(KP980785280, Tcm, Tcl);					TcF = FNMS(KP980785280, Tcm, Tcl);					TcB = Tco + Tcp;					Tcq = Tco - Tcp;					Tca = FNMS(KP980785280, Tc9, Tc8);					TcA = FMA(KP980785280, Tc9, Tc8);					TcG = Tcg - Tcd;					Tch = Tcd + Tcg;					TcK = FMA(KP995184726, TcB, TcA);					TcC = FNMS(KP995184726, TcB, TcA);					TcN = FNMS(KP995184726, TcG, TcF);					TcH = FMA(KP995184726, TcG, TcF);					Tcu = FNMS(KP995184726, Tch, Tca);					Tci = FMA(KP995184726, Tch, Tca);				   }				   {					E TcM, TcL, Tcz, TcE, Tcr, Tck, Tc7;					Tcz = W[60];					TcE = W[61];					{					     E TcJ, TcI, TcD, TcO;					     TcJ = W[124];					     TcM = W[125];					     TcI = Tcz * TcH;					     TcD = Tcz * TcC;					     TcO = TcJ * TcN;					     TcL = TcJ * TcK;					     ci[WS(rs, 31)] = FMA(TcE, TcC, TcI);					     cr[WS(rs, 31)] = FNMS(TcE, TcH, TcD);					     ci[WS(rs, 63)] = FMA(TcM, TcK, TcO);					}					cr[WS(rs, 63)] = FNMS(TcM, TcN, TcL);					Tcx = FMA(KP995184726, Tcq, Tcn);					Tcr = FNMS(KP995184726, Tcq, Tcn);					Tck = W[93];					Tc7 = W[92];					{					     E Tct, Tcy, Tcs, Tcj;					     Tcw = W[29];					     Tcs = Tck * Tci;					     Tcj = Tc7 * Tci;					     Tct = W[28];					     Tcy = Tcw * Tcu;					     ci[WS(rs, 47)] = FMA(Tc7, Tcr, Tcs);					     cr[WS(rs, 47)] = FNMS(Tck, Tcr, Tcj);					     Tcv = Tct * Tcu;					     ci[WS(rs, 15)] = FMA(Tct, Tcx, Tcy);					}				   }			      }			 }		    }	       }	  }	  cr[WS(rs, 15)] = FNMS(Tcw, Tcx, Tcv);     }}static const tw_instr twinstr[] = {     {TW_FULL, 1, 64},     {TW_NEXT, 1, 0}};static const hc2hc_desc desc = { 64, "hb_64", twinstr, &GENUS, {520, 126, 518, 0} };void X(codelet_hb_64) (planner *p) {     X(khc2hc_register) (p, hb_64, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h *//* * This function contains 1038 FP additions, 500 FP multiplications, * (or, 808 additions, 270 multiplications, 230 fused multiply/add), * 196 stack variables, 15 constants, and 256 memory accesses */#include "hb.h"static void hb_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP098017140, +0.098017140329560601994195563888641845861136673);     DK(KP995184726, +0.995184726672196886244836953109479921575474869);     DK(KP773010453, +0.773010453362736960810906609758469800971041293);     DK(KP634393284, +0.634393284163645498215171613225493370675687095);     DK(KP471396736, +0.471396736825997648556387625905254377657460319);     DK(KP881921264, +0.881921264348355029712756863660388349508442621);     DK(KP956940335, +0.956940335732208864935797886980269969482849206);     DK(KP290284677, +0.290284677254462367636192375817395274691476278);     DK(KP195090322, +0.195090322016128267848284868477022240927691618);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP555570233, +0.555570233019602224742830813948532874374937191);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP382683432, +0.382683432365089771728459984030398866761344562);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(rs)) {	  E Tf, T8C, Tfa, Thk, Tgg, ThM, T2c, T5O, T4K, T6g, Tag, TdE, TcA, Te6, T7P;	  E T94, TK, T7o, T38, T4P, Tfv, Thn, T5W, T6j, Tb0, TdK, Tfs, Tho, T8K, T97;	  E Tb7, TdL, TZ, T7l, T2P, T4Q, Tfo, Thq, T5T, T6k, TaH, TdH, Tfl, Thr, T8H;	  E T98, TaO, TdI, Tu, T95, Tfh, ThN, Tgj, Thl, T2v, T6h, T4N, T5P, Tav, Te7;	  E TcD, TdF, T7S, T8D, T1L, T20, T7A, T7D, T7G, T7H, T40, T62, Tg1, Thv, Tg8;	  E Thz, Tg5, Thw, T4t, T5Z, T4j, T60, T4w, T63, TbY, TdS, Tcd, TdQ, TfU, Thy;	  E T8P, T9z, T8S, T9A, Tcl, TdP, Tco, TdT, T1g, T1v, T7r, T7u, T7x, T7y, T3j;	  E T69, TfI, ThD, TfP, ThG, TfM, ThC, T3M, T66, T3C, T67, T3P, T6a, Tbl, TdZ;	  E TbA, TdX, TfB, ThF, T8W, T9C, T8Z, T9D, TbI, TdW, TbL, Te0;	  {	       E T3, Ta6, T6, Tcu, T4I, Ta7, T4F, Tcv, Td, Tcy, T27, Tae, Ta, Tcx, T2a;	       E Tab;	       {		    E T1, T2, T4D, T4E;		    T1 = cr[0];		    T2 = ci[WS(rs, 31)];		    T3 = T1 + T2;		    Ta6 = T1 - T2;		    {			 E T4, T5, T4G, T4H;			 T4 = cr[WS(rs, 16)];			 T5 = ci[WS(rs, 15)];			 T6 = T4 + T5;			 Tcu = T4 - T5;			 T4G = ci[WS(rs, 47)];			 T4H = cr[WS(rs, 48)];			 T4I = T4G - T4H;			 Ta7 = T4G + T4H;		    }		    T4D = ci[WS(rs, 63)];		    T4E = cr[WS(rs, 32)];		    T4F = T4D - T4E;		    Tcv = T4D + T4E;		    {			 E Tb, Tc, Tac, T25, T26, Tad;			 Tb = ci[WS(rs, 7)];			 Tc = cr[WS(rs, 24)];			 Tac = Tb - Tc;			 T25 = ci[WS(rs, 39)];			 T26 = cr[WS(rs, 56)];			 Tad = T25 + T26;			 Td = Tb + Tc;			 Tcy = Tac + Tad;			 T27 = T25 - T26;			 Tae = Tac - Tad;		    }		    {			 E T8, T9, Ta9, T28, T29, Taa;			 T8 = cr[WS(rs, 8)];			 T9 = ci[WS(rs, 23)];			 Ta9 = T8 - T9;			 T28 = ci[WS(rs, 55)];			 T29 = cr[WS(rs, 40)];			 Taa = T28 + T29;			 Ta = T8 + T9;			 Tcx = Ta9 + Taa;			 T2a = T28 - T29;			 Tab = Ta9 - Taa;		    }	       }	       {		    E T7, Te, Tf8, Tf9;		    T7 = T3 + T6;		    Te = Ta + Td;		    Tf = T7 + Te;		    T8C = T7 - Te;		    Tf8 = Ta6 + Ta7;		    Tf9 = KP707106781 * (Tcx + Tcy);		    Tfa = Tf8 - Tf9;		    Thk = Tf8 + Tf9;	       }	       {		    E Tge, Tgf, T24, T2b;		    Tge = Tcv - Tcu;		    Tgf = KP707106781 * (Tab - Tae);		    Tgg = Tge + Tgf;		    ThM = Tge - Tgf;		    T24 = T3 - T6;		    T2b = T27 - T2a;		    T2c = T24 + T2b;		    T5O = T24 - T2b;	       }	       {		    E T4C, T4J, Ta8, Taf;		    T4C = Ta - Td;		    T4J = T4F - T4I;		    T4K = T4C + T4J;		    T6g = T4J - T4C;		    Ta8 = Ta6 - Ta7;		    Taf = KP707106781 * (Tab + Tae);		    Tag = Ta8 - Taf;		    TdE = Ta8 + Taf;	       }	       {		    E Tcw, Tcz, T7N, T7O;		    Tcw = Tcu + Tcv;		    Tcz = KP707106781 * (Tcx - Tcy);		    TcA = Tcw - Tcz;		    Te6 = Tcw + Tcz;		    T7N = T4F + T4I;		    T7O = T2a + T27;		    T7P = T7N + T7O;		    T94 = T7N - T7O;	       }	  }	  {	       E TC, Tb1, T2Z, TaQ, T2X, Tb2, T7m, TaR, TJ, Tb4, Tb5, T2Q, T36, TaV, TaY;	       E T7n, Tfq, Tfr;	       {		    E Tw, Tx, Ty, Tz, TA, TB;		    Tw = cr[WS(rs, 2)];		    Tx = ci[WS(rs, 29)];		    Ty = Tw + Tx;		    Tz = cr[WS(rs, 18)];		    TA = ci[WS(rs, 13)];		    TB = Tz + TA;		    TC = Ty + TB;		    Tb1 = Tz - TA;		    T2Z = Ty - TB;		    TaQ = Tw - Tx;	       }	       {		    E T2R, T2S, T2T, T2U, T2V, T2W;		    T2R = ci[WS(rs, 61)];		    T2S = cr[WS(rs, 34)];		    T2T = T2R - T2S;		    T2U = ci[WS(rs, 45)];		    T2V = cr[WS(rs, 50)];		    T2W = T2U - T2V;		    T2X = T2T - T2W;		    Tb2 = T2R + T2S;		    T7m = T2T + T2W;		    TaR = T2U + T2V;	       }	       {		    E TF, TaT, T35, TaU, TI, TaW, T32, TaX;		    {			 E TD, TE, T33, T34;			 TD = cr[WS(rs, 10)];			 TE = ci[WS(rs, 21)];			 TF = TD + TE;			 TaT = TD - TE;			 T33 = ci[WS(rs, 53)];			 T34 = cr[WS(rs, 42)];			 T35 = T33 - T34;			 TaU = T33 + T34;		    }		    {			 E TG, TH, T30, T31;			 TG = ci[WS(rs, 5)];			 TH = cr[WS(rs, 26)];			 TI = TG + TH;			 TaW = TG - TH;			 T30 = ci[WS(rs, 37)];			 T31 = cr[WS(rs, 58)];			 T32 = T30 - T31;			 TaX = T30 + T31;		    }		    TJ = TF + TI;		    Tb4 = TaT + TaU;		    Tb5 = TaW + TaX;		    T2Q = TF

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -