⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hb2_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 4 页
字号:
			 E T5F, T5S, T6a, T6g;			 T5F = FMA(KP980785280, T5E, T5x);			 T5S = FNMS(KP980785280, T5E, T5x);			 T6a = T60 * T69;			 cr[WS(rs, 15)] = FNMS(T66, T69, T64);			 T6g = T6b * T6f;			 cr[WS(rs, 31)] = FNMS(T6e, T6f, T6d);			 {			      E T5W, T5T, T5Q, T5G;			      T5W = T5U * T5S;			      T5T = T5R * T5S;			      T5Q = T5I * T5F;			      T5G = T5u * T5F;			      ci[WS(rs, 15)] = FMA(T66, T63, T6a);			      ci[WS(rs, 31)] = FMA(T6e, T6c, T6g);			      ci[WS(rs, 7)] = FMA(T5R, T5V, T5W);			      cr[WS(rs, 7)] = FNMS(T5U, T5V, T5T);			      ci[WS(rs, 23)] = FMA(T5u, T5P, T5Q);			      cr[WS(rs, 23)] = FNMS(T5I, T5P, T5G);			 }		    }	       }	  }     }}static const tw_instr twinstr[] = {     {TW_CEXP, 1, 1},     {TW_CEXP, 1, 3},     {TW_CEXP, 1, 9},     {TW_CEXP, 1, 27},     {TW_NEXT, 1, 0}};static const hc2hc_desc desc = { 32, "hb2_32", twinstr, &GENUS, {236, 98, 252, 0} };void X(codelet_hb2_32) (planner *p) {     X(khc2hc_register) (p, hb2_32, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hb2_32 -include hb.h *//* * This function contains 488 FP additions, 280 FP multiplications, * (or, 376 additions, 168 multiplications, 112 fused multiply/add), * 160 stack variables, 7 constants, and 128 memory accesses */#include "hb.h"static void hb2_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP555570233, +0.555570233019602224742830813948532874374937191);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP195090322, +0.195090322016128267848284868477022240927691618);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP382683432, +0.382683432365089771728459984030398866761344562);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) {	  E T11, T14, T12, T15, T17, T2z, T2B, T1c, T18, T1d, T1g, T1k, T2F, T2L, T3t;	  E T4H, T3h, T3V, T3b, T4v, T4T, T4X, T6t, T71, T6z, T75, T81, T8x, T8f, T8z;	  E T2R, T2V, T8p, T8t, T4r, T4t, T53, T69, T3n, T3r, T7P, T7T, T4P, T4R, T6F;	  E T6R, T1f, T2X, T1j, T2Y, T1l, T31, T2d, T2Z, T49, T4h, T4c, T4i, T4d, T4n;	  E T4f, T4j;	  {	       E T2P, T3q, T2U, T3l, T2Q, T3p, T2T, T3m, T2D, T3g, T2K, T39, T2E, T3f, T2J;	       E T3a;	       {		    E T13, T1b, T16, T1a;		    T11 = W[0];		    T14 = W[1];		    T12 = W[2];		    T15 = W[3];		    T13 = T11 * T12;		    T1b = T14 * T12;		    T16 = T14 * T15;		    T1a = T11 * T15;		    T17 = T13 + T16;		    T2z = T13 - T16;		    T2B = T1a + T1b;		    T1c = T1a - T1b;		    T18 = W[4];		    T2P = T12 * T18;		    T3q = T14 * T18;		    T2U = T15 * T18;		    T3l = T11 * T18;		    T1d = W[5];		    T2Q = T15 * T1d;		    T3p = T11 * T1d;		    T2T = T12 * T1d;		    T3m = T14 * T1d;		    T1g = W[6];		    T2D = T11 * T1g;		    T3g = T15 * T1g;		    T2K = T14 * T1g;		    T39 = T12 * T1g;		    T1k = W[7];		    T2E = T14 * T1k;		    T3f = T12 * T1k;		    T2J = T11 * T1k;		    T3a = T15 * T1k;	       }	       T2F = T2D - T2E;	       T2L = T2J + T2K;	       T3t = T39 - T3a;	       T4H = T2J - T2K;	       T3h = T3f - T3g;	       T3V = T3f + T3g;	       T3b = T39 + T3a;	       T4v = T2D + T2E;	       T4T = FMA(T18, T1g, T1d * T1k);	       T4X = FNMS(T1d, T1g, T18 * T1k);	       {		    E T6r, T6s, T6x, T6y;		    T6r = T17 * T1g;		    T6s = T1c * T1k;		    T6t = T6r - T6s;		    T71 = T6r + T6s;		    T6x = T17 * T1k;		    T6y = T1c * T1g;		    T6z = T6x + T6y;		    T75 = T6x - T6y;	       }	       {		    E T7Z, T80, T8d, T8e;		    T7Z = T2z * T1g;		    T80 = T2B * T1k;		    T81 = T7Z + T80;		    T8x = T7Z - T80;		    T8d = T2z * T1k;		    T8e = T2B * T1g;		    T8f = T8d - T8e;		    T8z = T8d + T8e;		    T2R = T2P - T2Q;		    T2V = T2T + T2U;		    T8p = FMA(T2R, T1g, T2V * T1k);		    T8t = FNMS(T2V, T1g, T2R * T1k);	       }	       T4r = T2P + T2Q;	       T4t = T2T - T2U;	       T53 = FMA(T4r, T1g, T4t * T1k);	       T69 = FNMS(T4t, T1g, T4r * T1k);	       T3n = T3l + T3m;	       T3r = T3p - T3q;	       T7P = FMA(T3n, T1g, T3r * T1k);	       T7T = FNMS(T3r, T1g, T3n * T1k);	       T4P = T3l - T3m;	       T4R = T3p + T3q;	       T6F = FMA(T4P, T1g, T4R * T1k);	       T6R = FNMS(T4R, T1g, T4P * T1k);	       {		    E T19, T1e, T1h, T1i;		    T19 = T17 * T18;		    T1e = T1c * T1d;		    T1f = T19 + T1e;		    T2X = T19 - T1e;		    T1h = T17 * T1d;		    T1i = T1c * T18;		    T1j = T1h - T1i;		    T2Y = T1h + T1i;	       }	       T1l = FMA(T1f, T1g, T1j * T1k);	       T31 = FNMS(T2Y, T1g, T2X * T1k);	       T2d = FNMS(T1j, T1g, T1f * T1k);	       T2Z = FMA(T2X, T1g, T2Y * T1k);	       {		    E T47, T48, T4a, T4b;		    T47 = T2z * T18;		    T48 = T2B * T1d;		    T49 = T47 - T48;		    T4h = T47 + T48;		    T4a = T2z * T1d;		    T4b = T2B * T18;		    T4c = T4a + T4b;		    T4i = T4a - T4b;	       }	       T4d = FMA(T49, T1g, T4c * T1k);	       T4n = FNMS(T4i, T1g, T4h * T1k);	       T4f = FNMS(T4c, T1g, T49 * T1k);	       T4j = FMA(T4h, T1g, T4i * T1k);	  }	  {	       E T56, T7b, T7C, T6c, Tf, T1m, T6f, T7c, T3Y, T4I, T2t, T32, T5d, T7D, T3w;	       E T4w, Tu, T2e, T7g, T7F, T7j, T7G, T1B, T33, T3z, T40, T5l, T6i, T5s, T6h;	       E T3C, T3Z, TK, T1D, T7v, T86, T7y, T85, T1S, T35, T3O, T4C, T5F, T6J, T5M;	       E T6K, T3R, T4D, TZ, T1U, T7o, T89, T7r, T88, T29, T36, T3H, T4z, T5Y, T6M;	       E T65, T6N, T3K, T4A;	       {		    E T3, T54, T2o, T58, T2r, T5b, T6, T6a, Ta, T57, T2h, T6b, T2k, T55, Td;		    E T5a;		    {			 E T1, T2, T2m, T2n;			 T1 = cr[0];			 T2 = ci[WS(rs, 15)];			 T3 = T1 + T2;			 T54 = T1 - T2;			 T2m = ci[WS(rs, 27)];			 T2n = cr[WS(rs, 20)];			 T2o = T2m - T2n;			 T58 = T2m + T2n;		    }		    {			 E T2p, T2q, T4, T5;			 T2p = ci[WS(rs, 19)];			 T2q = cr[WS(rs, 28)];			 T2r = T2p - T2q;			 T5b = T2p + T2q;			 T4 = cr[WS(rs, 8)];			 T5 = ci[WS(rs, 7)];			 T6 = T4 + T5;			 T6a = T4 - T5;		    }		    {			 E T8, T9, T2f, T2g;			 T8 = cr[WS(rs, 4)];			 T9 = ci[WS(rs, 11)];			 Ta = T8 + T9;			 T57 = T8 - T9;			 T2f = ci[WS(rs, 31)];			 T2g = cr[WS(rs, 16)];			 T2h = T2f - T2g;			 T6b = T2f + T2g;		    }		    {			 E T2i, T2j, Tb, Tc;			 T2i = ci[WS(rs, 23)];			 T2j = cr[WS(rs, 24)];			 T2k = T2i - T2j;			 T55 = T2i + T2j;			 Tb = ci[WS(rs, 3)];			 Tc = cr[WS(rs, 12)];			 Td = Tb + Tc;			 T5a = Tb - Tc;		    }		    {			 E T7, Te, T2l, T2s;			 T56 = T54 - T55;			 T7b = T54 + T55;			 T7C = T6b - T6a;			 T6c = T6a + T6b;			 T7 = T3 + T6;			 Te = Ta + Td;			 Tf = T7 + Te;			 T1m = T7 - Te;			 {			      E T6d, T6e, T3W, T3X;			      T6d = T57 + T58;			      T6e = T5a + T5b;			      T6f = KP707106781 * (T6d - T6e);			      T7c = KP707106781 * (T6d + T6e);			      T3W = T2h - T2k;			      T3X = Ta - Td;			      T3Y = T3W - T3X;			      T4I = T3X + T3W;			 }			 T2l = T2h + T2k;			 T2s = T2o + T2r;			 T2t = T2l - T2s;			 T32 = T2l + T2s;			 {			      E T59, T5c, T3u, T3v;			      T59 = T57 - T58;			      T5c = T5a - T5b;			      T5d = KP707106781 * (T59 + T5c);			      T7D = KP707106781 * (T59 - T5c);			      T3u = T3 - T6;			      T3v = T2r - T2o;			      T3w = T3u - T3v;			      T4w = T3u + T3v;			 }		    }	       }	       {		    E Ti, T5p, T1w, T5n, T1z, T5q, Tl, T5m, Tp, T5i, T1p, T5g, T1s, T5j, Ts;		    E T5f;		    {			 E Tg, Th, T1u, T1v;			 Tg = cr[WS(rs, 2)];			 Th = ci[WS(rs, 13)];			 Ti = Tg + Th;			 T5p = Tg - Th;			 T1u = ci[WS(rs, 29)];			 T1v = cr[WS(rs, 18)];			 T1w = T1u - T1v;			 T5n = T1u + T1v;		    }		    {			 E T1x, T1y, Tj, Tk;			 T1x = ci[WS(rs, 21)];			 T1y = cr[WS(rs, 26)];			 T1z = T1x - T1y;			 T5q = T1x + T1y;			 Tj = cr[WS(rs, 10)];			 Tk = ci[WS(rs, 5)];			 Tl = Tj + Tk;			 T5m = Tj - Tk;		    }		    {			 E Tn, To, T1n, T1o;			 Tn = ci[WS(rs, 1)];			 To = cr[WS(rs, 14)];			 Tp = Tn + To;			 T5i = Tn - To;			 T1n = ci[WS(rs, 17)];			 T1o = cr[WS(rs, 30)];			 T1p = T1n - T1o;			 T5g = T1n + T1o;		    }		    {			 E T1q, T1r, Tq, Tr;			 T1q = ci[WS(rs, 25)];			 T1r = cr[WS(rs, 22)];			 T1s = T1q - T1r;			 T5j = T1q + T1r;			 Tq = cr[WS(rs, 6)];			 Tr = ci[WS(rs, 9)];			 Ts = Tq + Tr;			 T5f = Tq - Tr;		    }		    {			 E Tm, Tt, T7e, T7f;			 Tm = Ti + Tl;			 Tt = Tp + Ts;			 Tu = Tm + Tt;			 T2e = Tm - Tt;			 T7e = T5p + T5q;			 T7f = T5n - T5m;			 T7g = FNMS(KP923879532, T7f, KP382683432 * T7e);			 T7F = FMA(KP382683432, T7f, KP923879532 * T7e);		    }		    {			 E T7h, T7i, T1t, T1A;			 T7h = T5i + T5j;			 T7i = T5f + T5g;			 T7j = FNMS(KP923879532, T7i, KP382683432 * T7h);			 T7G = FMA(KP382683432, T7i, KP923879532 * T7h);			 T1t = T1p + T1s;			 T1A = T1w + T1z;			 T1B = T1t - T1A;			 T33 = T1A + T1t;		    }		    {			 E T3x, T3y, T5h, T5k;			 T3x = T1p - T1s;			 T3y = Tp - Ts;			 T3z = T3x - T3y;			 T40 = T3y + T3x;			 T5h = T5f - T5g;			 T5k = T5i - T5j;			 T5l = FNMS(KP382683432, T5k, KP923879532 * T5h);			 T6i = FMA(KP382683432, T5h, KP923879532 * T5k);		    }		    {			 E T5o, T5r, T3A, T3B;			 T5o = T5m + T5n;			 T5r = T5p - T5q;			 T5s = FMA(KP923879532, T5o, KP382683432 * T5r);			 T6h = FNMS(KP382683432, T5o, KP923879532 * T5r);			 T3A = Ti - Tl;			 T3B = T1w - T1z;			 T3C = T3A + T3B;			 T3Z = T3A - T3B;		    }	       }	       {		    E Ty, T5v, TB, T5G, T1J, T5w, T1G, T5H, TI, T5K, T1Q, T5D, TF, T5J, T1N;		    E T5A;		    {			 E Tw, Tx, T1E, T1F;			 Tw = cr[WS(rs, 1)];			 Tx = ci[WS(rs, 14)];			 Ty = Tw + Tx;			 T5v = Tw - Tx;			 {			      E Tz, TA, T1H, T1I;			      Tz = cr[WS(rs, 9)];			      TA = ci[WS(rs, 6)];			      TB = Tz + TA;			      T5G = Tz - TA;			      T1H = ci[WS(rs, 22)];			      T1I = cr[WS(rs, 25)];			      T1J = T1H - T1I;			      T5w = T1H + T1I;			 }			 T1E = ci[WS(rs, 30)];			 T1F = cr[WS(rs, 17)];			 T1G = T1E - T1F;			 T5H = T1E + T1F;			 {			      E TG, TH, T5B, T1O, T1P, T5C;			      TG = ci[WS(rs, 2)];			      TH = cr[WS(rs, 13)];			      T5B = TG - TH;			      T1O = ci[WS(rs, 18)];			      T1P = cr[WS(rs, 29)];			      T5C = T1O + T1P;			      TI = TG + TH;			      T5K = T5B + T5C;			      T1Q = T1O - T1P;			      T5D = T5B - T5C;			 }			 {			      E TD, TE, T5y, T1L, T1M, T5z;			      TD = cr[WS(rs, 5)];			      TE = ci[WS(rs, 10)];			      T5y = TD - TE;			      T1L = ci[WS(rs, 26)];			      T1M = cr[WS(rs, 21)];			      T5z = T1L + T1M;			      TF = TD + TE;			      T5J = T5y + T5z;			      T1N = T1L - T1M;			      T5A = T5y - T5z;			 }		    }		    {			 E TC, TJ, T7t, T7u;			 TC = Ty + TB;			 TJ = TF + TI;			 TK = TC + TJ;			 T1D = TC - TJ;			 T7t = T5H - T5G;			 T7u = KP707106781 * (T5A - T5D);			 T7v = T7t + T7u;			 T86 = T7t - T7u;		    }		    {			 E T7w, T7x, T1K, T1R;			 T7w = T5v + T5w;			 T7x = KP707106781 * (T5J + T5K);			 T7y = T7w - T7x;			 T85 = T7w + T7x;			 T1K = T1G + T1J;			 T1R = T1N + T1Q;			 T1S = T1K - T1R;			 T35 = T1K + T1R;		    }		    {			 E T3M, T3N, T5x, T5E;			 T3M = T1G - T1J;			 T3N = TF - TI;			 T3O = T3M - T3N;			 T4C = T3N + T3M;			 T5x = T5v - T5w;			 T5E = KP707106781 * (T5A + T5D);			 T5F = T5x - T5E;			 T6J = T5x + T5E;		    }		    {			 E T5I, T5L, T3P, T3Q;			 T5I = T5G + T5H;			 T5L = KP707106781 * (T5J - T5K);			 T5M = T5I - T5L;			 T6K = T5I + T5L;			 T3P = Ty - TB;			 T3Q = T1Q - T1N;			 T3R = T3P - T3Q;			 T4D = T3P + T3Q;		    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -