⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cfdft2_20.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 3 页
字号:
			      {				   E T1s, T5m, T1g, T5l, T4e, T1E, T3o, T3q;				   {					E T17, T1f, T4b, T4d;					T17 = FNMS(T13, T16, T11);					T1f = FMA(Tm, T1e, T1b);					T4b = FMA(T1o, T1l, T4a);					T4d = FMA(T1A, T1x, T4c);					T1s = FNMS(T1o, T1r, T1m);					T5m = T17 + T1f;					T1g = T17 - T1f;					T5l = T4b + T4d;					T4e = T4b - T4d;					T1E = FNMS(T1A, T1D, T1y);					T3o = FNMS(Tp, T1e, T3n);					T3q = FMA(T13, T10, T3p);				   }				   {					E T3s, T4f, T3r, T1F;					T5Q = T5l - T5m;					T5n = T5l + T5m;					T3s = T1s + T1E;					T1F = T1s - T1E;					T4f = T3q + T3o;					T3r = T3o - T3q;					T3a = T1F + T1g;					T1G = T1g - T1F;					T3U = T3s + T3r;					T3t = T3r - T3s;					T4g = T4e + T4f;					T4Y = T4e - T4f;				   }			      }			 }		    }		    {			 E T4F, T4G, T4H, T4x, T4z, T41, T4O, T4Q, T40;			 {			      E T55, T38, T54, T50, T52, T53, T5e, T5c, T51, T4T;			      {				   E T4W, T37, T4Z, T1H, T5b, T5a, T2K, T2L, T4S, T4R;				   T55 = T4U + T4V;				   T4W = T4U - T4V;				   T37 = T2Y + T36;				   T38 = T36 - T2Y;				   T54 = T4X + T4Y;				   T4Z = T4X - T4Y;				   T1H = TT + T1G;				   T5b = T1G - TT;				   T5a = T2J - T2i;				   T2K = T2i + T2J;				   T50 = FNMS(KP618033988, T4Z, T4W);				   T52 = FMA(KP618033988, T4W, T4Z);				   T2L = T1H + T2K;				   T4S = T1H - T2K;				   T53 = T4D - T4E;				   T4F = T4D + T4E;				   Im[WS(rs, 4)] = KP500000000 * (T2L - T37);				   T4R = FMA(KP250000000, T2L, T37);				   T5e = FMA(KP618033988, T5a, T5b);				   T5c = FNMS(KP618033988, T5b, T5a);				   T51 = FNMS(KP559016994, T4S, T4R);				   T4T = FMA(KP559016994, T4S, T4R);			      }			      {				   E T3b, T4M, T4N, T3e, T3f;				   {					E T4h, T58, T57, T4w, T56, T5d, T59;					T4G = T49 + T4g;					T4h = T49 - T4g;					T58 = T54 - T55;					T56 = T54 + T55;					Ip[WS(rs, 7)] = KP500000000 * (FMA(KP951056516, T50, T4T));					Ip[WS(rs, 3)] = KP500000000 * (FNMS(KP951056516, T50, T4T));					Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP951056516, T52, T51)));					Im[0] = -(KP500000000 * (FMA(KP951056516, T52, T51)));					Rm[WS(rs, 4)] = KP500000000 * (T53 + T56);					T57 = FNMS(KP250000000, T56, T53);					T4w = T4o - T4v;					T4H = T4o + T4v;					T3b = T39 + T3a;					T4M = T39 - T3a;					T5d = FMA(KP559016994, T58, T57);					T59 = FNMS(KP559016994, T58, T57);					T4x = FMA(KP618033988, T4w, T4h);					T4z = FNMS(KP618033988, T4h, T4w);					Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T5c, T59));					Rp[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T5c, T59));					Rm[0] = KP500000000 * (FNMS(KP951056516, T5e, T5d));					Rm[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5e, T5d));					T4N = T3c - T3d;					T3e = T3c + T3d;				   }				   T3f = T3b + T3e;				   T41 = T3b - T3e;				   T4O = FMA(KP618033988, T4N, T4M);				   T4Q = FNMS(KP618033988, T4M, T4N);				   Ip[WS(rs, 5)] = KP500000000 * (T38 + T3f);				   T40 = FNMS(KP250000000, T3f, T38);			      }			 }			 {			      E T3S, T5Z, T68, T6a, T64, T62;			      {				   E T60, T61, T5Y, T5W, T3R, T67, T66, T3K, T5O, T4K, T4J, T5N, T5X, T5P;				   {					E T5S, T5V, T4y, T42, T4I;					T60 = T5R + T5Q;					T5S = T5Q - T5R;					T5V = T5T - T5U;					T61 = T5T + T5U;					T4y = FNMS(KP559016994, T41, T40);					T42 = FMA(KP559016994, T41, T40);					T4I = T4G + T4H;					T4K = T4G - T4H;					Ip[WS(rs, 9)] = KP500000000 * (FMA(KP951056516, T4x, T42));					Ip[WS(rs, 1)] = KP500000000 * (FNMS(KP951056516, T4x, T42));					Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP951056516, T4z, T4y)));					Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP951056516, T4z, T4y)));					Rp[WS(rs, 5)] = KP500000000 * (T4F + T4I);					T4J = FNMS(KP250000000, T4I, T4F);					T5Y = FMA(KP618033988, T5S, T5V);					T5W = FNMS(KP618033988, T5V, T5S);				   }				   T3S = T3Q - T3P;				   T3R = T3P + T3Q;				   {					E T4L, T4P, T3u, T3J;					T4L = FMA(KP559016994, T4K, T4J);					T4P = FNMS(KP559016994, T4K, T4J);					T3u = T3m + T3t;					T67 = T3t - T3m;					T66 = T3I - T3B;					T3J = T3B + T3I;					Rp[WS(rs, 9)] = KP500000000 * (FNMS(KP951056516, T4O, T4L));					Rp[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T4O, T4L));					Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T4Q, T4P));					Rm[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T4Q, T4P));					T3K = T3u + T3J;					T5O = T3J - T3u;				   }				   Im[WS(rs, 9)] = KP500000000 * (T3K - T3R);				   T5N = FMA(KP250000000, T3K, T3R);				   T5Z = T5f - T5g;				   T5h = T5f + T5g;				   T68 = FNMS(KP618033988, T67, T66);				   T6a = FMA(KP618033988, T66, T67);				   T5X = FNMS(KP559016994, T5O, T5N);				   T5P = FMA(KP559016994, T5O, T5N);				   Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP951056516, T5W, T5P)));				   Ip[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T5W, T5P));				   Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP951056516, T5Y, T5X)));				   Ip[WS(rs, 2)] = KP500000000 * (FMA(KP951056516, T5Y, T5X));				   T64 = T60 - T61;				   T62 = T60 + T61;			      }			      {				   E T5o, T5v, T5M, T5K, T5A, T5B, T3Z, T5G, T5I, T5J, T63, T5F, T5L, T5H;				   T5o = T5k + T5n;				   T5I = T5k - T5n;				   T5J = T5u - T5r;				   T5v = T5r + T5u;				   Rm[WS(rs, 9)] = KP500000000 * (T5Z + T62);				   T63 = FNMS(KP250000000, T62, T5Z);				   T5M = FMA(KP618033988, T5I, T5J);				   T5K = FNMS(KP618033988, T5J, T5I);				   {					E T65, T69, T3V, T3Y;					T65 = FNMS(KP559016994, T64, T63);					T69 = FMA(KP559016994, T64, T63);					T3V = T3T + T3U;					T5A = T3T - T3U;					T5B = T3W - T3X;					T3Y = T3W + T3X;					Rm[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T68, T65));					Rp[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T68, T65));					Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP951056516, T6a, T69));					Rp[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T6a, T69));					T3Z = T3V + T3Y;					T5G = T3V - T3Y;				   }				   Ip[0] = KP500000000 * (T3S + T3Z);				   T5F = FNMS(KP250000000, T3Z, T3S);				   T5C = FMA(KP618033988, T5B, T5A);				   T5E = FNMS(KP618033988, T5A, T5B);				   T5L = FNMS(KP559016994, T5G, T5F);				   T5H = FMA(KP559016994, T5G, T5F);				   Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP951056516, T5K, T5H)));				   Ip[WS(rs, 4)] = KP500000000 * (FMA(KP951056516, T5K, T5H));				   Im[WS(rs, 7)] = -(KP500000000 * (FNMS(KP951056516, T5M, T5L)));				   Ip[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5M, T5L));				   T5y = T5o - T5v;				   T5w = T5o + T5v;			      }			 }		    }	       }	  }	  Rp[0] = KP500000000 * (T5h + T5w);	  T5x = FNMS(KP250000000, T5w, T5h);	  T5D = FNMS(KP559016994, T5y, T5x);	  T5z = FMA(KP559016994, T5y, T5x);	  Rm[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T5C, T5z));	  Rp[WS(rs, 4)] = KP500000000 * (FNMS(KP951056516, T5C, T5z));	  Rm[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T5E, T5D));	  Rp[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5E, T5D));     }}static const tw_instr twinstr[] = {     {TW_CEXP, 1, 1},     {TW_CEXP, 1, 3},     {TW_CEXP, 1, 9},     {TW_CEXP, 1, 19},     {TW_NEXT, 1, 0}};static const hc2c_desc desc = { 20, "hc2cfdft2_20", twinstr, &GENUS, {176, 98, 140, 0} };void X(codelet_hc2cfdft2_20) (planner *p) {     X(khc2c_register) (p, hc2cfdft2_20, &desc, HC2C_VIA_DFT);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2cdft -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cfdft2_20 -include hc2cf.h *//* * This function contains 316 FP additions, 180 FP multiplications, * (or, 244 additions, 108 multiplications, 72 fused multiply/add), * 134 stack variables, 5 constants, and 80 memory accesses */#include "hc2cf.h"static void hc2cfdft2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP125000000, +0.125000000000000000000000000000000000000000000);     DK(KP500000000, +0.500000000000000000000000000000000000000000000);     DK(KP279508497, +0.279508497187473712051146708591409529430077295);     DK(KP293892626, +0.293892626146236564584352977319536384298826219);     DK(KP475528258, +0.475528258147576786058219666689691071702849317);     INT m;     for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) {	  E T4, T7, Tm, To, Tq, Tu, T1I, T1G, T8, T5, Ta, T1u, T2u, Tg, T2s;	  E T21, T1A, T1Z, T1O, T2I, T1K, T2G, Tw, TC, T2a, T2e, TH, TI, TJ, TX;	  E T2D, TN, T2B, T26, T1n, TZ, T24, T1j;	  {	       E T9, T1y, Te, T1t, T6, T1z, Tf, T1s;	       {		    E Tn, Tt, Tp, Ts;		    T4 = W[0];		    T7 = W[1];		    Tm = W[2];		    To = W[3];		    Tn = T4 * Tm;		    Tt = T7 * Tm;		    Tp = T7 * To;		    Ts = T4 * To;		    Tq = Tn - Tp;		    Tu = Ts + Tt;		    T1I = Ts - Tt;		    T1G = Tn + Tp;		    T8 = W[5];		    T9 = T7 * T8;		    T1y = Tm * T8;		    Te = T4 * T8;		    T1t = To * T8;		    T5 = W[4];		    T6 = T4 * T5;		    T1z = To * T5;		    Tf = T7 * T5;		    T1s = Tm * T5;	       }	       Ta = T6 - T9;	       T1u = T1s + T1t;	       T2u = T1y + T1z;	       Tg = Te + Tf;	       T2s = T1s - T1t;	       T21 = Te - Tf;	       T1A = T1y - T1z;	       T1Z = T6 + T9;	       {		    E T1M, T1N, T1H, T1J;		    T1M = T1G * T8;		    T1N = T1I * T5;		    T1O = T1M + T1N;		    T2I = T1M - T1N;		    T1H = T1G * T5;		    T1J = T1I * T8;		    T1K = T1H - T1J;		    T2G = T1H + T1J;		    {			 E Tr, Tv, TA, TB;			 Tr = Tq * T5;			 Tv = Tu * T8;			 Tw = Tr + Tv;			 TA = Tq * T8;			 TB = Tu * T5;			 TC = TA - TB;			 T2a = Tr - Tv;			 T2e = TA + TB;			 TH = W[6];			 TI = W[7];			 TJ = FMA(Tq, TH, Tu * TI);			 TX = FMA(Tw, TH, TC * TI);			 T2D = FMA(T1G, TH, T1I * TI);			 TN = FNMS(Tu, TH, Tq * TI);			 T2B = FNMS(T1I, TH, T1G * TI);			 T26 = FNMS(T7, TH, T4 * TI);			 T1n = FNMS(To, TH, Tm * TI);			 TZ = FNMS(TC, TH, Tw * TI);			 T24 = FMA(T4, TH, T7 * TI);			 T1j = FMA(Tm, TH, To * TI);		    }	       }	  }	  {	       E Tl, T3n, T1i, T2Q, T47, T50, T4S, T5i, T2M, T2T, T4I, T5f, T4L, T5e, T4P;	       E T5h, T2r, T2S, T1X, T2P, T31, T3u, T36, T3t, T3E, T4l, T3U, T4j, T3h, T3r;	       E T3J, T4m, T3c, T3q, T3P, T4i, TS, T51, T3m, T48;	       {		    E T3, T45, T1V, T3f, Tz, TF, TW, T3A, TM, TQ, T11, T3B, Td, Tj, T1Q;		    E T3e, T19, T3L, T23, T39, T2p, T3S, T2z, T34, T1E, T3G, T2K, T2Y, T1g, T3M;		    E T28, T3a, T2i, T3R, T2w, T33, T1r, T3F, T2F, T2X, T4N, T4O;		    {			 E T1, T2, T1R, T1S, T1T, T1U;			 T1 = Ip[0];			 T2 = Im[0];			 T1R = T1 + T2;			 T1S = Rp[0];			 T1T = Rm[0];			 T1U = T1S - T1T;			 T3 = T1 - T2;			 T45 = T1S + T1T;			 T1V = FNMS(T7, T1U, T4 * T1R);			 T3f = FMA(T4, T1U, T7 * T1R);		    }		    {			 E Tx, Ty, TU, TD, TE, TV;			 Tx = Ip[WS(rs, 2)];			 Ty = Im[WS(rs, 2)];			 TU = Tx - Ty;			 TD = Rp[WS(rs, 2)];			 TE = Rm[WS(rs, 2)];			 TV = TD + TE;			 Tz = Tx + Ty;			 TF = TD - TE;			 TW = FNMS(Tu, TV, Tq * TU);			 T3A = FMA(Tu, TU, Tq * TV);		    }		    {			 E TK, TL, TY, TO, TP, T10;			 TK = Ip[WS(rs, 7)];			 TL = Im[WS(rs, 7)];			 TY = TK - TL;			 TO = Rp[WS(rs, 7)];			 TP = Rm[WS(rs, 7)];			 T10 = TO + TP;			 TM = TK + TL;			 TQ = TO - TP;			 T11 = FNMS(TZ, T10, TX * TY);			 T3B = FMA(TZ, TY, TX * T10);		    }		    {			 E Tb, Tc, T1L, Th, Ti, T1P;			 Tb = Ip[WS(rs, 5)];			 Tc = Im[WS(rs, 5)];			 T1L = Tb + Tc;			 Th = Rp[WS(rs, 5)];			 Ti = Rm[WS(rs, 5)];			 T1P = Th - Ti;			 Td = Tb - Tc;			 Tj = Th + Ti;			 T1Q = FNMS(T1O, T1P, T1K * T1L);			 T3e = FMA(T1K, T1P, T1O * T1L);		    }		    {			 E T15, T20, T18, T22;			 {			      E T13, T14, T16, T17;			      T13 = Ip[WS(rs, 4)];			      T14 = Im[WS(rs, 4)];			      T15 = T13 + T14;			      T20 = T13 - T14;			      T16 = Rp[WS(rs, 4)];			      T17 = Rm[WS(rs, 4)];			      T18 = T16 - T17;			      T22 = T16 + T17;			 }			 T19 = FNMS(T8, T18, T5 * T15);			 T3L = FMA(T21, T20, T1Z * T22);			 T23 = FNMS(T21, T22, T1Z * T20);			 T39 = FMA(T8, T15, T5 * T18);		    }		    {			 E T2l, T2x, T2o, T2y;			 {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -