⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hf_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 4 页
字号:
			 }		    }	       }	  }	  cr[WS(rs, 5)] = FMA(KP831469612, T6D, T6A);	  ci[WS(rs, 10)] = FNMS(KP831469612, T6D, T6A);     }}static const tw_instr twinstr[] = {     {TW_FULL, 1, 32},     {TW_NEXT, 1, 0}};static const hc2hc_desc desc = { 32, "hf_32", twinstr, &GENUS, {236, 62, 198, 0} };void X(codelet_hf_32) (planner *p) {     X(khc2hc_register) (p, hf_32, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2hc -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include hf.h *//* * This function contains 434 FP additions, 208 FP multiplications, * (or, 340 additions, 114 multiplications, 94 fused multiply/add), * 96 stack variables, 7 constants, and 128 memory accesses */#include "hf.h"static void hf_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP555570233, +0.555570233019602224742830813948532874374937191);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP195090322, +0.195090322016128267848284868477022240927691618);     DK(KP382683432, +0.382683432365089771728459984030398866761344562);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {	  E Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T56, T41;	  E T59, T2B, T67, T6e, T6O, T4b, T5g, T4s, T5d, TG, T7l, T5I, T73, T3a, T4U;	  E T3f, T4V, T14, T5K, T5N, T6F, T3m, T4Z, T3r, T4Y, T1r, T5P, T5S, T6E, T3x;	  E T52, T3C, T51, T2d, T5Z, T64, T6K, T3V, T5a, T44, T57, T2Y, T6f, T6a, T6P;	  E T4m, T5e, T4v, T5h;	  {	       E T1, T76, T6, T75, Tc, T32, Th, T33;	       T1 = cr[0];	       T76 = ci[0];	       {		    E T3, T5, T2, T4;		    T3 = cr[WS(rs, 16)];		    T5 = ci[WS(rs, 16)];		    T2 = W[30];		    T4 = W[31];		    T6 = FMA(T2, T3, T4 * T5);		    T75 = FNMS(T4, T3, T2 * T5);	       }	       {		    E T9, Tb, T8, Ta;		    T9 = cr[WS(rs, 8)];		    Tb = ci[WS(rs, 8)];		    T8 = W[14];		    Ta = W[15];		    Tc = FMA(T8, T9, Ta * Tb);		    T32 = FNMS(Ta, T9, T8 * Tb);	       }	       {		    E Te, Tg, Td, Tf;		    Te = cr[WS(rs, 24)];		    Tg = ci[WS(rs, 24)];		    Td = W[46];		    Tf = W[47];		    Th = FMA(Td, Te, Tf * Tg);		    T33 = FNMS(Tf, Te, Td * Tg);	       }	       {		    E T7, Ti, T7A, T7B;		    T7 = T1 + T6;		    Ti = Tc + Th;		    Tj = T7 + Ti;		    T5F = T7 - Ti;		    T7A = Tc - Th;		    T7B = T76 - T75;		    T7C = T7A + T7B;		    T7Q = T7B - T7A;	       }	       {		    E T31, T34, T74, T77;		    T31 = T1 - T6;		    T34 = T32 - T33;		    T35 = T31 + T34;		    T4T = T31 - T34;		    T74 = T32 + T33;		    T77 = T75 + T76;		    T78 = T74 + T77;		    T7m = T77 - T74;	       }	  }	  {	       E T1y, T3X, T1O, T3I, T1D, T3Y, T1J, T3H;	       {		    E T1v, T1x, T1u, T1w;		    T1v = cr[WS(rs, 1)];		    T1x = ci[WS(rs, 1)];		    T1u = W[0];		    T1w = W[1];		    T1y = FMA(T1u, T1v, T1w * T1x);		    T3X = FNMS(T1w, T1v, T1u * T1x);	       }	       {		    E T1L, T1N, T1K, T1M;		    T1L = cr[WS(rs, 25)];		    T1N = ci[WS(rs, 25)];		    T1K = W[48];		    T1M = W[49];		    T1O = FMA(T1K, T1L, T1M * T1N);		    T3I = FNMS(T1M, T1L, T1K * T1N);	       }	       {		    E T1A, T1C, T1z, T1B;		    T1A = cr[WS(rs, 17)];		    T1C = ci[WS(rs, 17)];		    T1z = W[32];		    T1B = W[33];		    T1D = FMA(T1z, T1A, T1B * T1C);		    T3Y = FNMS(T1B, T1A, T1z * T1C);	       }	       {		    E T1G, T1I, T1F, T1H;		    T1G = cr[WS(rs, 9)];		    T1I = ci[WS(rs, 9)];		    T1F = W[16];		    T1H = W[17];		    T1J = FMA(T1F, T1G, T1H * T1I);		    T3H = FNMS(T1H, T1G, T1F * T1I);	       }	       {		    E T1E, T1P, T5W, T5X;		    T1E = T1y + T1D;		    T1P = T1J + T1O;		    T1Q = T1E + T1P;		    T61 = T1E - T1P;		    T5W = T3X + T3Y;		    T5X = T3H + T3I;		    T5Y = T5W - T5X;		    T6J = T5W + T5X;	       }	       {		    E T3G, T3J, T3Z, T40;		    T3G = T1y - T1D;		    T3J = T3H - T3I;		    T3K = T3G + T3J;		    T56 = T3G - T3J;		    T3Z = T3X - T3Y;		    T40 = T1J - T1O;		    T41 = T3Z - T40;		    T59 = T3Z + T40;	       }	  }	  {	       E T2j, T47, T2z, T4q, T2o, T48, T2u, T4p;	       {		    E T2g, T2i, T2f, T2h;		    T2g = cr[WS(rs, 31)];		    T2i = ci[WS(rs, 31)];		    T2f = W[60];		    T2h = W[61];		    T2j = FMA(T2f, T2g, T2h * T2i);		    T47 = FNMS(T2h, T2g, T2f * T2i);	       }	       {		    E T2w, T2y, T2v, T2x;		    T2w = cr[WS(rs, 23)];		    T2y = ci[WS(rs, 23)];		    T2v = W[44];		    T2x = W[45];		    T2z = FMA(T2v, T2w, T2x * T2y);		    T4q = FNMS(T2x, T2w, T2v * T2y);	       }	       {		    E T2l, T2n, T2k, T2m;		    T2l = cr[WS(rs, 15)];		    T2n = ci[WS(rs, 15)];		    T2k = W[28];		    T2m = W[29];		    T2o = FMA(T2k, T2l, T2m * T2n);		    T48 = FNMS(T2m, T2l, T2k * T2n);	       }	       {		    E T2r, T2t, T2q, T2s;		    T2r = cr[WS(rs, 7)];		    T2t = ci[WS(rs, 7)];		    T2q = W[12];		    T2s = W[13];		    T2u = FMA(T2q, T2r, T2s * T2t);		    T4p = FNMS(T2s, T2r, T2q * T2t);	       }	       {		    E T2p, T2A, T6c, T6d;		    T2p = T2j + T2o;		    T2A = T2u + T2z;		    T2B = T2p + T2A;		    T67 = T2p - T2A;		    T6c = T47 + T48;		    T6d = T4p + T4q;		    T6e = T6c - T6d;		    T6O = T6c + T6d;	       }	       {		    E T49, T4a, T4o, T4r;		    T49 = T47 - T48;		    T4a = T2u - T2z;		    T4b = T49 - T4a;		    T5g = T49 + T4a;		    T4o = T2j - T2o;		    T4r = T4p - T4q;		    T4s = T4o + T4r;		    T5d = T4o - T4r;	       }	  }	  {	       E To, T37, TE, T3d, Tt, T38, Tz, T3c;	       {		    E Tl, Tn, Tk, Tm;		    Tl = cr[WS(rs, 4)];		    Tn = ci[WS(rs, 4)];		    Tk = W[6];		    Tm = W[7];		    To = FMA(Tk, Tl, Tm * Tn);		    T37 = FNMS(Tm, Tl, Tk * Tn);	       }	       {		    E TB, TD, TA, TC;		    TB = cr[WS(rs, 12)];		    TD = ci[WS(rs, 12)];		    TA = W[22];		    TC = W[23];		    TE = FMA(TA, TB, TC * TD);		    T3d = FNMS(TC, TB, TA * TD);	       }	       {		    E Tq, Ts, Tp, Tr;		    Tq = cr[WS(rs, 20)];		    Ts = ci[WS(rs, 20)];		    Tp = W[38];		    Tr = W[39];		    Tt = FMA(Tp, Tq, Tr * Ts);		    T38 = FNMS(Tr, Tq, Tp * Ts);	       }	       {		    E Tw, Ty, Tv, Tx;		    Tw = cr[WS(rs, 28)];		    Ty = ci[WS(rs, 28)];		    Tv = W[54];		    Tx = W[55];		    Tz = FMA(Tv, Tw, Tx * Ty);		    T3c = FNMS(Tx, Tw, Tv * Ty);	       }	       {		    E Tu, TF, T5G, T5H;		    Tu = To + Tt;		    TF = Tz + TE;		    TG = Tu + TF;		    T7l = Tu - TF;		    T5G = T3c + T3d;		    T5H = T37 + T38;		    T5I = T5G - T5H;		    T73 = T5H + T5G;	       }	       {		    E T36, T39, T3b, T3e;		    T36 = To - Tt;		    T39 = T37 - T38;		    T3a = T36 + T39;		    T4U = T36 - T39;		    T3b = Tz - TE;		    T3e = T3c - T3d;		    T3f = T3b - T3e;		    T4V = T3b + T3e;	       }	  }	  {	       E TM, T3n, T12, T3k, TR, T3o, TX, T3j;	       {		    E TJ, TL, TI, TK;		    TJ = cr[WS(rs, 2)];		    TL = ci[WS(rs, 2)];		    TI = W[2];		    TK = W[3];		    TM = FMA(TI, TJ, TK * TL);		    T3n = FNMS(TK, TJ, TI * TL);	       }	       {		    E TZ, T11, TY, T10;		    TZ = cr[WS(rs, 26)];		    T11 = ci[WS(rs, 26)];		    TY = W[50];		    T10 = W[51];		    T12 = FMA(TY, TZ, T10 * T11);		    T3k = FNMS(T10, TZ, TY * T11);	       }	       {		    E TO, TQ, TN, TP;		    TO = cr[WS(rs, 18)];		    TQ = ci[WS(rs, 18)];		    TN = W[34];		    TP = W[35];		    TR = FMA(TN, TO, TP * TQ);		    T3o = FNMS(TP, TO, TN * TQ);	       }	       {		    E TU, TW, TT, TV;		    TU = cr[WS(rs, 10)];		    TW = ci[WS(rs, 10)];		    TT = W[18];		    TV = W[19];		    TX = FMA(TT, TU, TV * TW);		    T3j = FNMS(TV, TU, TT * TW);	       }	       {		    E TS, T13, T5L, T5M;		    TS = TM + TR;		    T13 = TX + T12;		    T14 = TS + T13;		    T5K = TS - T13;		    T5L = T3n + T3o;		    T5M = T3j + T3k;		    T5N = T5L - T5M;		    T6F = T5L + T5M;	       }	       {		    E T3i, T3l, T3p, T3q;		    T3i = TM - TR;		    T3l = T3j - T3k;		    T3m = T3i + T3l;		    T4Z = T3i - T3l;		    T3p = T3n - T3o;		    T3q = TX - T12;		    T3r = T3p - T3q;		    T4Y = T3p + T3q;	       }	  }	  {	       E T19, T3t, T1p, T3A, T1e, T3u, T1k, T3z;	       {		    E T16, T18, T15, T17;		    T16 = cr[WS(rs, 30)];		    T18 = ci[WS(rs, 30)];		    T15 = W[58];		    T17 = W[59];		    T19 = FMA(T15, T16, T17 * T18);		    T3t = FNMS(T17, T16, T15 * T18);	       }	       {		    E T1m, T1o, T1l, T1n;		    T1m = cr[WS(rs, 22)];		    T1o = ci[WS(rs, 22)];		    T1l = W[42];		    T1n = W[43];		    T1p = FMA(T1l, T1m, T1n * T1o);		    T3A = FNMS(T1n, T1m, T1l * T1o);	       }	       {		    E T1b, T1d, T1a, T1c;		    T1b = cr[WS(rs, 14)];		    T1d = ci[WS(rs, 14)];		    T1a = W[26];		    T1c = W[27];		    T1e = FMA(T1a, T1b, T1c * T1d);		    T3u = FNMS(T1c, T1b, T1a * T1d);	       }	       {		    E T1h, T1j, T1g, T1i;		    T1h = cr[WS(rs, 6)];		    T1j = ci[WS(rs, 6)];		    T1g = W[10];		    T1i = W[11];		    T1k = FMA(T1g, T1h, T1i * T1j);		    T3z = FNMS(T1i, T1h, T1g * T1j);	       }	       {		    E T1f, T1q, T5Q, T5R;		    T1f = T19 + T1e;		    T1q = T1k + T1p;		    T1r = T1f + T1q;		    T5P = T1f - T1q;		    T5Q = T3t + T3u;		    T5R = T3z + T3A;		    T5S = T5Q - T5R;		    T6E = T5Q + T5R;	       }	       {		    E T3v, T3w, T3y, T3B;		    T3v = T3t - T3u;		    T3w = T1k - T1p;		    T3x = T3v - T3w;		    T52 = T3v + T3w;		    T3y = T19 - T1e;		    T3B = T3z - T3A;		    T3C = T3y + T3B;		    T51 = T3y - T3B;	       }	  }	  {	       E T1V, T3M, T20, T3N, T3L, T3O, T26, T3Q, T2b, T3R, T3S, T3T;	       {		    E T1S, T1U, T1R, T1T;		    T1S = cr[WS(rs, 5)];		    T1U = ci[WS(rs, 5)];		    T1R = W[8];		    T1T = W[9];		    T1V = FMA(T1R, T1S, T1T * T1U);		    T3M = FNMS(T1T, T1S, T1R * T1U);	       }	       {		    E T1X, T1Z, T1W, T1Y;		    T1X = cr[WS(rs, 21)];		    T1Z = ci[WS(rs, 21)];		    T1W = W[40];		    T1Y = W[41];		    T20 = FMA(T1W, T1X, T1Y * T1Z);		    T3N = FNMS(T1Y, T1X, T1W * T1Z);	       }	       T3L = T1V - T20;	       T3O = T3M - T3N;	       {		    E T23, T25, T22, T24;		    T23 = cr[WS(rs, 29)];		    T25 = ci[WS(rs, 29)];		    T22 = W[56];		    T24 = W[57];		    T26 = FMA(T22, T23, T24 * T25);		    T3Q = FNMS(T24, T23, T22 * T25);	       }	       {		    E T28, T2a, T27, T29;		    T28 = cr[WS(rs, 13)];		    T2a = ci[WS(rs, 13)];		    T27 = W[24];		    T29 = W[25];

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -