⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hb_32.c

📁 最新的FFT程序
💻 C
📖 第 1 页 / 共 4 页
字号:
	  rio[WS(ios, 15)] = FNMS(T80, T81, T7Z);	  T8c = T83 * T8b;	  T87 = T83 * T86;	  iio[-WS(ios, 24)] = FMA(T88, T86, T8c);	  rio[WS(ios, 7)] = FNMS(T88, T8b, T87);     }     return W;}static const tw_instr twinstr[] = {     {TW_FULL, 0, 32},     {TW_NEXT, 1, 0}};static const hc2hc_desc desc = { 32, "hb_32", twinstr, &GENUS, {236, 62, 198, 0}, 0, 0, 0 };void X(codelet_hb_32) (planner *p) {     X(khc2hc_register) (p, hb_32, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h *//* * This function contains 434 FP additions, 208 FP multiplications, * (or, 340 additions, 114 multiplications, 94 fused multiply/add), * 98 stack variables, and 128 memory accesses *//* * Generator Id's :  * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hb.h"static const R *hb_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){     DK(KP555570233, +0.555570233019602224742830813948532874374937191);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP195090322, +0.195090322016128267848284868477022240927691618);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP382683432, +0.382683432365089771728459984030398866761344562);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT i;     for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 62, MAKE_VOLATILE_STRIDE(ios)) {	  E T5n, T6y, T77, T5u, Tf, T3i, T5x, T76, T3G, T47, T1a, T2I, T5k, T6z, T2o;	  E T2Y, Tu, T3D, T6D, T73, T6G, T74, T1j, T2d, T1s, T2e, T55, T5z, T5c, T5A;	  E T3l, T48, TK, T3n, T6L, T7t, T6O, T7s, T1D, T2L, T1M, T2M, T4w, T62, T4D;	  E T61, T3q, T41, TZ, T3s, T6S, T7w, T6V, T7v, T1W, T2O, T25, T2P, T4P, T64;	  E T4W, T65, T3v, T42;	  {	       E T3, T5l, T2j, T5t, T6, T5s, T2m, T5m, Ta, T5i, T15, T5h, Td, T5e, T18;	       E T5f;	       {		    E T1, T2, T2h, T2i;		    T1 = rio[0];		    T2 = iio[-WS(ios, 16)];		    T3 = T1 + T2;		    T5l = T1 - T2;		    T2h = iio[0];		    T2i = rio[WS(ios, 16)];		    T2j = T2h - T2i;		    T5t = T2h + T2i;	       }	       {		    E T4, T5, T2k, T2l;		    T4 = rio[WS(ios, 8)];		    T5 = iio[-WS(ios, 24)];		    T6 = T4 + T5;		    T5s = T4 - T5;		    T2k = iio[-WS(ios, 8)];		    T2l = rio[WS(ios, 24)];		    T2m = T2k - T2l;		    T5m = T2k + T2l;	       }	       {		    E T8, T9, T13, T14;		    T8 = rio[WS(ios, 4)];		    T9 = iio[-WS(ios, 20)];		    Ta = T8 + T9;		    T5i = T8 - T9;		    T13 = iio[-WS(ios, 4)];		    T14 = rio[WS(ios, 20)];		    T15 = T13 - T14;		    T5h = T13 + T14;	       }	       {		    E Tb, Tc, T16, T17;		    Tb = iio[-WS(ios, 28)];		    Tc = rio[WS(ios, 12)];		    Td = Tb + Tc;		    T5e = Tb - Tc;		    T16 = iio[-WS(ios, 12)];		    T17 = rio[WS(ios, 28)];		    T18 = T16 - T17;		    T5f = T17 + T16;	       }	       {		    E T7, Te, T12, T19;		    T5n = T5l - T5m;		    T6y = T5t - T5s;		    T77 = T5l + T5m;		    T5u = T5s + T5t;		    T7 = T3 + T6;		    Te = Ta + Td;		    Tf = T7 + Te;		    T3i = T7 - Te;		    {			 E T5v, T5w, T3E, T3F;			 T5v = T5i + T5h;			 T5w = T5e + T5f;			 T5x = KP707106781 * (T5v - T5w);			 T76 = KP707106781 * (T5v + T5w);			 T3E = T2j + T2m;			 T3F = T15 + T18;			 T3G = T3E - T3F;			 T47 = T3F + T3E;		    }		    T12 = T3 - T6;		    T19 = T15 - T18;		    T1a = T12 + T19;		    T2I = T12 - T19;		    {			 E T5g, T5j, T2g, T2n;			 T5g = T5e - T5f;			 T5j = T5h - T5i;			 T5k = KP707106781 * (T5g - T5j);			 T6z = KP707106781 * (T5j + T5g);			 T2g = Td - Ta;			 T2n = T2j - T2m;			 T2o = T2g + T2n;			 T2Y = T2n - T2g;		    }	       }	  }	  {	       E Ti, T4Z, T1e, T53, Tl, T52, T1h, T50, Tp, T56, T1n, T5a, Ts, T59, T1q;	       E T57;	       {		    E Tg, Th, T1c, T1d;		    Tg = rio[WS(ios, 2)];		    Th = iio[-WS(ios, 18)];		    Ti = Tg + Th;		    T4Z = Tg - Th;		    T1c = iio[-WS(ios, 2)];		    T1d = rio[WS(ios, 18)];		    T1e = T1c - T1d;		    T53 = T1c + T1d;	       }	       {		    E Tj, Tk, T1f, T1g;		    Tj = rio[WS(ios, 10)];		    Tk = iio[-WS(ios, 26)];		    Tl = Tj + Tk;		    T52 = Tj - Tk;		    T1f = iio[-WS(ios, 10)];		    T1g = rio[WS(ios, 26)];		    T1h = T1f - T1g;		    T50 = T1f + T1g;	       }	       {		    E Tn, To, T1l, T1m;		    Tn = iio[-WS(ios, 30)];		    To = rio[WS(ios, 14)];		    Tp = Tn + To;		    T56 = Tn - To;		    T1l = iio[-WS(ios, 14)];		    T1m = rio[WS(ios, 30)];		    T1n = T1l - T1m;		    T5a = T1m + T1l;	       }	       {		    E Tq, Tr, T1o, T1p;		    Tq = rio[WS(ios, 6)];		    Tr = iio[-WS(ios, 22)];		    Ts = Tq + Tr;		    T59 = Tq - Tr;		    T1o = iio[-WS(ios, 6)];		    T1p = rio[WS(ios, 22)];		    T1q = T1o - T1p;		    T57 = T1o + T1p;	       }	       {		    E Tm, Tt, T6B, T6C;		    Tm = Ti + Tl;		    Tt = Tp + Ts;		    Tu = Tm + Tt;		    T3D = Tt - Tm;		    T6B = T53 - T52;		    T6C = T4Z + T50;		    T6D = FNMS(KP382683432, T6C, KP923879532 * T6B);		    T73 = FMA(KP382683432, T6B, KP923879532 * T6C);	       }	       {		    E T6E, T6F, T1b, T1i;		    T6E = T56 + T57;		    T6F = T59 + T5a;		    T6G = FNMS(KP923879532, T6F, KP382683432 * T6E);		    T74 = FMA(KP923879532, T6E, KP382683432 * T6F);		    T1b = Ti - Tl;		    T1i = T1e - T1h;		    T1j = T1b + T1i;		    T2d = T1i - T1b;	       }	       {		    E T1k, T1r, T51, T54;		    T1k = Tp - Ts;		    T1r = T1n - T1q;		    T1s = T1k - T1r;		    T2e = T1k + T1r;		    T51 = T4Z - T50;		    T54 = T52 + T53;		    T55 = FNMS(KP382683432, T54, KP923879532 * T51);		    T5z = FMA(KP923879532, T54, KP382683432 * T51);	       }	       {		    E T58, T5b, T3j, T3k;		    T58 = T56 - T57;		    T5b = T59 - T5a;		    T5c = FMA(KP923879532, T58, KP382683432 * T5b);		    T5A = FNMS(KP382683432, T58, KP923879532 * T5b);		    T3j = T1e + T1h;		    T3k = T1q + T1n;		    T3l = T3j - T3k;		    T48 = T3j + T3k;	       }	  }	  {	       E Ty, T4t, T1H, T4y, TB, T4x, T1K, T4u, TI, T4B, T1B, T4o, TF, T4A, T1y;	       E T4r;	       {		    E Tw, Tx, T1I, T1J;		    Tw = rio[WS(ios, 1)];		    Tx = iio[-WS(ios, 17)];		    Ty = Tw + Tx;		    T4t = Tw - Tx;		    {			 E T1F, T1G, Tz, TA;			 T1F = iio[-WS(ios, 1)];			 T1G = rio[WS(ios, 17)];			 T1H = T1F - T1G;			 T4y = T1F + T1G;			 Tz = rio[WS(ios, 9)];			 TA = iio[-WS(ios, 25)];			 TB = Tz + TA;			 T4x = Tz - TA;		    }		    T1I = iio[-WS(ios, 9)];		    T1J = rio[WS(ios, 25)];		    T1K = T1I - T1J;		    T4u = T1I + T1J;		    {			 E TG, TH, T4m, T1z, T1A, T4n;			 TG = iio[-WS(ios, 29)];			 TH = rio[WS(ios, 13)];			 T4m = TG - TH;			 T1z = iio[-WS(ios, 13)];			 T1A = rio[WS(ios, 29)];			 T4n = T1A + T1z;			 TI = TG + TH;			 T4B = T4m + T4n;			 T1B = T1z - T1A;			 T4o = T4m - T4n;		    }		    {			 E TD, TE, T4q, T1w, T1x, T4p;			 TD = rio[WS(ios, 5)];			 TE = iio[-WS(ios, 21)];			 T4q = TD - TE;			 T1w = iio[-WS(ios, 5)];			 T1x = rio[WS(ios, 21)];			 T4p = T1w + T1x;			 TF = TD + TE;			 T4A = T4q + T4p;			 T1y = T1w - T1x;			 T4r = T4p - T4q;		    }	       }	       {		    E TC, TJ, T6J, T6K;		    TC = Ty + TB;		    TJ = TF + TI;		    TK = TC + TJ;		    T3n = TC - TJ;		    T6J = T4y - T4x;		    T6K = KP707106781 * (T4r + T4o);		    T6L = T6J + T6K;		    T7t = T6J - T6K;	       }	       {		    E T6M, T6N, T1v, T1C;		    T6M = KP707106781 * (T4A + T4B);		    T6N = T4t + T4u;		    T6O = T6M + T6N;		    T7s = T6N - T6M;		    T1v = Ty - TB;		    T1C = T1y - T1B;		    T1D = T1v + T1C;		    T2L = T1v - T1C;	       }	       {		    E T1E, T1L, T4s, T4v;		    T1E = TI - TF;		    T1L = T1H - T1K;		    T1M = T1E + T1L;		    T2M = T1L - T1E;		    T4s = KP707106781 * (T4o - T4r);		    T4v = T4t - T4u;		    T4w = T4s + T4v;		    T62 = T4v - T4s;	       }	       {		    E T4z, T4C, T3o, T3p;		    T4z = T4x + T4y;		    T4C = KP707106781 * (T4A - T4B);		    T4D = T4z + T4C;		    T61 = T4z - T4C;		    T3o = T1H + T1K;		    T3p = T1y + T1B;		    T3q = T3o - T3p;		    T41 = T3p + T3o;	       }	  }	  {	       E TN, T4T, T20, T4N, TQ, T4M, T23, T4U, TX, T4Q, T1U, T4K, TU, T4R, T1R;	       E T4H;	       {		    E TL, TM, T21, T22;		    TL = iio[-WS(ios, 31)];		    TM = rio[WS(ios, 15)];		    TN = TL + TM;		    T4T = TL - TM;		    {			 E T1Y, T1Z, TO, TP;			 T1Y = iio[-WS(ios, 15)];			 T1Z = rio[WS(ios, 31)];			 T20 = T1Y - T1Z;			 T4N = T1Z + T1Y;			 TO = rio[WS(ios, 7)];			 TP = iio[-WS(ios, 23)];			 TQ = TO + TP;			 T4M = TO - TP;		    }		    T21 = iio[-WS(ios, 7)];		    T22 = rio[WS(ios, 23)];		    T23 = T21 - T22;		    T4U = T21 + T22;		    {			 E TV, TW, T4I, T1S, T1T, T4J;			 TV = iio[-WS(ios, 27)];			 TW = rio[WS(ios, 11)];			 T4I = TV - TW;			 T1S = iio[-WS(ios, 11)];			 T1T = rio[WS(ios, 27)];			 T4J = T1T + T1S;			 TX = TV + TW;			 T4Q = T4I - T4J;			 T1U = T1S - T1T;			 T4K = T4I + T4J;		    }		    {			 E TS, TT, T4F, T1P, T1Q, T4G;			 TS = rio[WS(ios, 3)];			 TT = iio[-WS(ios, 19)];			 T4F = TS - TT;			 T1P = iio[-WS(ios, 3)];			 T1Q = rio[WS(ios, 19)];			 T4G = T1P + T1Q;			 TU = TS + TT;			 T4R = T4G - T4F;			 T1R = T1P - T1Q;			 T4H = T4F + T4G;		    }	       }	       {		    E TR, TY, T6Q, T6R;		    TR = TN + TQ;		    TY = TU + TX;		    TZ = TR + TY;		    T3s = TR - TY;		    T6Q = KP707106781 * (T4R + T4Q);		    T6R = T4M + T4N;		    T6S = T6Q - T6R;		    T7w = T6Q + T6R;	       }	       {		    E T6T, T6U, T1O, T1V;		    T6T = KP707106781 * (T4H + T4K);		    T6U = T4T + T4U;		    T6V = T6T + T6U;		    T7v = T6U - T6T;		    T1O = TN - TQ;		    T1V = T1R - T1U;		    T1W = T1O + T1V;		    T2O = T1O - T1V;	       }	       {		    E T1X, T24, T4L, T4O;		    T1X = TX - TU;		    T24 = T20 - T23;		    T25 = T1X + T24;		    T2P = T24 - T1X;		    T4L = KP707106781 * (T4H - T4K);		    T4O = T4M - T4N;		    T4P = T4L + T4O;		    T64 = T4O - T4L;	       }	       {		    E T4S, T4V, T3t, T3u;		    T4S = KP707106781 * (T4Q - T4R);		    T4V = T4T - T4U;		    T4W = T4S + T4V;		    T65 = T4V - T4S;		    T3t = T20 + T23;		    T3u = T1R + T1U;		    T3v = T3t - T3u;		    T42 = T3u + T3t;	       }	  }	  {	       E Tv, T10, T4g, T4i, T4j, T4k, T4f, T4h;	       Tv = Tf + Tu;	       T10 = TK + TZ;	       T4g = Tv - T10;	       T4i = T48 + T47;	       T4j = T41 + T42;	       T4k = T4i - T4j;	       rio[0] = Tv + T10;	       iio[-WS(ios, 31)] = T4j + T4i;	       T4f = W[30];	       T4h = W[31];	       rio[WS(ios, 16)] = FNMS(T4h, T4k, T4f * T4g);	       iio[-WS(ios, 15)] = FMA(T4h, T4g, T4f * T4k);	  }	  {	       E T44, T4c, T4a, T4e;	       {		    E T40, T43, T46, T49;		    T40 = Tf - Tu;		    T43 = T41 - T42;		    T44 = T40 + T43;		    T4c = T40 - T43;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -