⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cfdft_20.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 3 页
字号:
					     T3V = T36 + T34;					     T37 = T34 - T36;					     T51 = T3R + T3T;					     T3U = T3R - T3T;					     T38 = T1c + T1m;					     T1n = T1c - T1m;					     T5w = T51 - T52;					     T53 = T51 + T52;					     T2Q = T1n + T12;					     T1o = T12 - T1n;					     T3A = T38 + T37;					     T39 = T37 - T38;					}				   }			      }			 }		    }	       }	       {		    E T4l, T4m, T4n, T4w, T4u;		    {			 E T4L, T2O, T3W, T4K, T4I, T4G, T4S, T4U, T4J, T4z, T4H;			 {			      E T4C, T2N, T4R, T1p, T4E, T2q, T4Q;			      T4L = T4A + T4B;			      T4C = T4A - T4B;			      T2N = T2E + T2M;			      T2O = T2M - T2E;			      T4R = T1o - TH;			      T1p = TH + T1o;			      T4E = T3U - T3V;			      T3W = T3U + T3V;			      T2q = T1Y + T2p;			      T4Q = T2p - T1Y;			      {				   E T4y, T4x, T4F, T2r;				   T4F = T4D - T4E;				   T4K = T4D + T4E;				   T4y = T1p - T2q;				   T2r = T1p + T2q;				   T4I = FMA(KP618033988, T4C, T4F);				   T4G = FNMS(KP618033988, T4F, T4C);				   T4S = FNMS(KP618033988, T4R, T4Q);				   T4U = FMA(KP618033988, T4Q, T4R);				   Im[WS(rs, 4)] = KP500000000 * (T2r - T2N);				   T4x = FMA(KP250000000, T2r, T2N);				   T4J = T4j - T4k;				   T4l = T4j + T4k;				   T4z = FMA(KP559016994, T4y, T4x);				   T4H = FNMS(KP559016994, T4y, T4x);			      }			 }			 {			      E T2R, T4s, T4d, T4f, T4t, T2U, T4P, T4T;			      {				   E T3X, T4O, T4M, T4c, T4N;				   T4m = T3P + T3W;				   T3X = T3P - T3W;				   Ip[WS(rs, 7)] = KP500000000 * (FMA(KP951056516, T4G, T4z));				   Ip[WS(rs, 3)] = KP500000000 * (FNMS(KP951056516, T4G, T4z));				   Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP951056516, T4I, T4H)));				   Im[0] = -(KP500000000 * (FMA(KP951056516, T4I, T4H)));				   T4O = T4K - T4L;				   T4M = T4K + T4L;				   T4c = T44 - T4b;				   T4n = T44 + T4b;				   T2R = T2P + T2Q;				   T4s = T2P - T2Q;				   Rm[WS(rs, 4)] = KP500000000 * (T4J + T4M);				   T4N = FNMS(KP250000000, T4M, T4J);				   T4d = FMA(KP618033988, T4c, T3X);				   T4f = FNMS(KP618033988, T3X, T4c);				   T4t = T2S - T2T;				   T2U = T2S + T2T;				   T4P = FNMS(KP559016994, T4O, T4N);				   T4T = FMA(KP559016994, T4O, T4N);			      }			      {				   E T3H, T3G, T2V, T3I, T4e;				   T2V = T2R + T2U;				   T3H = T2R - T2U;				   Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T4S, T4P));				   Rp[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T4S, T4P));				   Rm[0] = KP500000000 * (FNMS(KP951056516, T4U, T4T));				   Rm[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T4U, T4T));				   Ip[WS(rs, 5)] = KP500000000 * (T2O + T2V);				   T3G = FNMS(KP250000000, T2V, T2O);				   T3I = FMA(KP559016994, T3H, T3G);				   T4e = FNMS(KP559016994, T3H, T3G);				   T4w = FNMS(KP618033988, T4s, T4t);				   T4u = FMA(KP618033988, T4t, T4s);				   Ip[WS(rs, 9)] = KP500000000 * (FMA(KP951056516, T4d, T3I));				   Ip[WS(rs, 1)] = KP500000000 * (FNMS(KP951056516, T4d, T3I));				   Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP951056516, T4f, T4e)));				   Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP951056516, T4f, T4e)));			      }			 }		    }		    {			 E T3y, T5O, T5Q, T5F, T5K, T5I;			 {			      E T5G, T5H, T3x, T4q, T5E, T5C, T3a, T5N, T4p, T5M, T3p, T5y, T5B, T4o;			      T5G = T5x + T5w;			      T5y = T5w - T5x;			      T5B = T5z - T5A;			      T5H = T5z + T5A;			      T3y = T3w - T3v;			      T3x = T3v + T3w;			      T4q = T4m - T4n;			      T4o = T4m + T4n;			      T5E = FMA(KP618033988, T5y, T5B);			      T5C = FNMS(KP618033988, T5B, T5y);			      T3a = T32 + T39;			      T5N = T39 - T32;			      Rp[WS(rs, 5)] = KP500000000 * (T4l + T4o);			      T4p = FNMS(KP250000000, T4o, T4l);			      T5M = T3o - T3h;			      T3p = T3h + T3o;			      {				   E T5u, T5t, T4r, T4v, T3q, T5D, T5v;				   T4r = FMA(KP559016994, T4q, T4p);				   T4v = FNMS(KP559016994, T4q, T4p);				   T5u = T3p - T3a;				   T3q = T3a + T3p;				   Rp[WS(rs, 9)] = KP500000000 * (FNMS(KP951056516, T4u, T4r));				   Rp[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T4u, T4r));				   Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T4w, T4v));				   Rm[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T4w, T4v));				   Im[WS(rs, 9)] = KP500000000 * (T3q - T3x);				   T5t = FMA(KP250000000, T3q, T3x);				   T5O = FNMS(KP618033988, T5N, T5M);				   T5Q = FMA(KP618033988, T5M, T5N);				   T5F = T4V - T4W;				   T4X = T4V + T4W;				   T5D = FNMS(KP559016994, T5u, T5t);				   T5v = FMA(KP559016994, T5u, T5t);				   Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP951056516, T5C, T5v)));				   Ip[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T5C, T5v));				   Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP951056516, T5E, T5D)));				   Ip[WS(rs, 2)] = KP500000000 * (FMA(KP951056516, T5E, T5D));				   T5K = T5G - T5H;				   T5I = T5G + T5H;			      }			 }			 {			      E T54, T5b, T5s, T5q, T5g, T5h, T3F, T5m, T5o, T5p, T5J, T5l, T5r, T5n;			      T54 = T50 + T53;			      T5o = T50 - T53;			      T5p = T5a - T57;			      T5b = T57 + T5a;			      Rm[WS(rs, 9)] = KP500000000 * (T5F + T5I);			      T5J = FNMS(KP250000000, T5I, T5F);			      T5s = FMA(KP618033988, T5o, T5p);			      T5q = FNMS(KP618033988, T5p, T5o);			      {				   E T5L, T5P, T3B, T3E;				   T5L = FNMS(KP559016994, T5K, T5J);				   T5P = FMA(KP559016994, T5K, T5J);				   T3B = T3z + T3A;				   T5g = T3z - T3A;				   T5h = T3C - T3D;				   T3E = T3C + T3D;				   Rm[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T5O, T5L));				   Rp[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T5O, T5L));				   Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP951056516, T5Q, T5P));				   Rp[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T5Q, T5P));				   T3F = T3B + T3E;				   T5m = T3B - T3E;			      }			      Ip[0] = KP500000000 * (T3y + T3F);			      T5l = FNMS(KP250000000, T3F, T3y);			      T5i = FMA(KP618033988, T5h, T5g);			      T5k = FNMS(KP618033988, T5g, T5h);			      T5r = FNMS(KP559016994, T5m, T5l);			      T5n = FMA(KP559016994, T5m, T5l);			      Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP951056516, T5q, T5n)));			      Ip[WS(rs, 4)] = KP500000000 * (FMA(KP951056516, T5q, T5n));			      Im[WS(rs, 7)] = -(KP500000000 * (FNMS(KP951056516, T5s, T5r)));			      Ip[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5s, T5r));			      T5e = T54 - T5b;			      T5c = T54 + T5b;			 }		    }	       }	  }	  Rp[0] = KP500000000 * (T4X + T5c);	  T5d = FNMS(KP250000000, T5c, T4X);	  T5j = FNMS(KP559016994, T5e, T5d);	  T5f = FMA(KP559016994, T5e, T5d);	  Rm[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T5i, T5f));	  Rp[WS(rs, 4)] = KP500000000 * (FNMS(KP951056516, T5i, T5f));	  Rm[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T5k, T5j));	  Rp[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5k, T5j));     }}static const tw_instr twinstr[] = {     {TW_FULL, 1, 20},     {TW_NEXT, 1, 0}};static const hc2c_desc desc = { 20, "hc2cfdft_20", twinstr, &GENUS, {176, 78, 110, 0} };void X(codelet_hc2cfdft_20) (planner *p) {     X(khc2c_register) (p, hc2cfdft_20, &desc, HC2C_VIA_DFT);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2cdft -compact -variables 4 -pipeline-latency 4 -n 20 -dit -name hc2cfdft_20 -include hc2cf.h *//* * This function contains 286 FP additions, 140 FP multiplications, * (or, 224 additions, 78 multiplications, 62 fused multiply/add), * 98 stack variables, 5 constants, and 80 memory accesses */#include "hc2cf.h"static void hc2cfdft_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP125000000, +0.125000000000000000000000000000000000000000000);     DK(KP500000000, +0.500000000000000000000000000000000000000000000);     DK(KP279508497, +0.279508497187473712051146708591409529430077295);     DK(KP293892626, +0.293892626146236564584352977319536384298826219);     DK(KP475528258, +0.475528258147576786058219666689691071702849317);     INT m;     for (m = mb, W = W + ((mb - 1) * 38); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 38, MAKE_VOLATILE_STRIDE(rs)) {	  E T12, T2w, T4o, T4V, T2H, T3a, T4y, T4Y, T1z, T2v, T25, T2y, T2s, T2z, T4v;	  E T4X, T4r, T4U, T3A, T3Z, T2X, T37, T3k, T41, T2M, T39, T3v, T3Y, T2S, T36;	  E T3p, T42, Td, T4G, T33, T3N, Tw, T4H, T32, T3O;	  {	       E T3, T3L, T1x, T2V, Th, Tl, TC, T3g, Tq, Tu, TH, T3h, T7, Tb, T1q;	       E T2U, TR, T2P, T1F, T3r, T23, T2K, T2f, T3y, T1k, T3m, T2q, T2E, T10, T2Q;	       E T1K, T3s, T1U, T2J, T2a, T3x, T1b, T3l, T2l, T2D;	       {		    E T1, T2, T1s, T1u, T1v, T1w, T1r, T1t;		    T1 = Ip[0];		    T2 = Im[0];		    T1s = T1 + T2;		    T1u = Rp[0];		    T1v = Rm[0];		    T1w = T1u - T1v;		    T3 = T1 - T2;		    T3L = T1u + T1v;		    T1r = W[0];		    T1t = W[1];		    T1x = FNMS(T1t, T1w, T1r * T1s);		    T2V = FMA(T1r, T1w, T1t * T1s);	       }	       {		    E Tf, Tg, Tz, Tj, Tk, TB, Ty, TA;		    Tf = Ip[WS(rs, 2)];		    Tg = Im[WS(rs, 2)];		    Tz = Tf - Tg;		    Tj = Rp[WS(rs, 2)];		    Tk = Rm[WS(rs, 2)];		    TB = Tj + Tk;		    Th = Tf + Tg;		    Tl = Tj - Tk;		    Ty = W[6];		    TA = W[7];		    TC = FNMS(TA, TB, Ty * Tz);		    T3g = FMA(TA, Tz, Ty * TB);	       }	       {		    E To, Tp, TE, Ts, Tt, TG, TD, TF;		    To = Ip[WS(rs, 7)];		    Tp = Im[WS(rs, 7)];		    TE = To - Tp;		    Ts = Rp[WS(rs, 7)];		    Tt = Rm[WS(rs, 7)];		    TG = Ts + Tt;		    Tq = To + Tp;		    Tu = Ts - Tt;		    TD = W[26];		    TF = W[27];		    TH = FNMS(TF, TG, TD * TE);		    T3h = FMA(TF, TE, TD * TG);	       }	       {		    E T5, T6, T1n, T9, Ta, T1p, T1m, T1o;		    T5 = Ip[WS(rs, 5)];		    T6 = Im[WS(rs, 5)];		    T1n = T5 + T6;		    T9 = Rp[WS(rs, 5)];		    Ta = Rm[WS(rs, 5)];		    T1p = T9 - Ta;		    T7 = T5 - T6;		    Tb = T9 + Ta;		    T1m = W[20];		    T1o = W[21];		    T1q = FNMS(T1o, T1p, T1m * T1n);		    T2U = FMA(T1m, T1p, T1o * T1n);	       }	       {		    E TM, T1C, TQ, T1E;		    {			 E TK, TL, TO, TP;			 TK = Ip[WS(rs, 4)];			 TL = Im[WS(rs, 4)];			 TM = TK + TL;			 T1C = TK - TL;			 TO = Rp[WS(rs, 4)];			 TP = Rm[WS(rs, 4)];			 TQ = TO - TP;			 T1E = TO + TP;		    }		    {			 E TJ, TN, T1B, T1D;			 TJ = W[16];			 TN = W[17];			 TR = FNMS(TN, TQ, TJ * TM);			 T2P = FMA(TN, TM, TJ * TQ);			 T1B = W[14];			 T1D = W[15];			 T1F = FNMS(T1D, T1E, T1B * T1C);			 T3r = FMA(T1D, T1C, T1B * T1E);		    }	       }	       {		    E T1Y, T2c, T22, T2e;		    {			 E T1W, T1X, T20, T21;			 T1W = Ip[WS(rs, 1)];			 T1X = Im[WS(rs, 1)];			 T1Y = T1W + T1X;			 T2c = T1W - T1X;			 T20 = Rp[WS(rs, 1)];			 T21 = Rm[WS(rs, 1)];			 T22 = T20 - T21;			 T2e = T20 + T21;		    }		    {			 E T1V, T1Z, T2b, T2d;			 T1V = W[4];			 T1Z = W[5];			 T23 = FNMS(T1Z, T22, T1V * T1Y);			 T2K = FMA(T1Z, T1Y, T1V * T22);			 T2b = W[2];			 T2d = W[3];			 T2f = FNMS(T2d, T2e, T2b * T2c);			 T3y = FMA(T2d, T2c, T2b * T2e);		    }	       }	       {		    E T1f, T2n, T1j, T2p;		    {			 E T1d, T1e, T1h, T1i;			 T1d = Ip[WS(rs, 3)];			 T1e = Im[WS(rs, 3)];			 T1f = T1d - T1e;			 T2n = T1d + T1e;			 T1h = Rp[WS(rs, 3)];			 T1i = Rm[WS(rs, 3)];			 T1j = T1h + T1i;			 T2p = T1h - T1i;		    }		    {			 E T1c, T1g, T2m, T2o;			 T1c = W[10];			 T1g = W[11];			 T1k = FNMS(T1g, T1j, T1c * T1f);			 T3m = FMA(T1c, T1j, T1g * T1f);			 T2m = W[12];			 T2o = W[13];			 T2q = FNMS(T2o, T2p, T2m * T2n);			 T2E = FMA(T2m, T2p, T2o * T2n);		    }	       }	       {		    E TV, T1H, TZ, T1J;		    {			 E TT, TU, TX, TY;			 TT = Ip[WS(rs, 9)];			 TU = Im[WS(rs, 9)];			 TV = TT + TU;			 T1H = TT - TU;			 TX = Rp[WS(rs, 9)];			 TY = Rm[WS(rs, 9)];			 TZ = TX - TY;			 T1J = TX + TY;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -