📄 hc2cfdft2_20.c
字号:
{ E T1s, T5m, T1g, T5l, T4e, T1E, T3o, T3q; { E T17, T1f, T4b, T4d; T17 = FNMS(T13, T16, T11); T1f = FMA(Tm, T1e, T1b); T4b = FMA(T1o, T1l, T4a); T4d = FMA(T1A, T1x, T4c); T1s = FNMS(T1o, T1r, T1m); T5m = T17 + T1f; T1g = T17 - T1f; T5l = T4b + T4d; T4e = T4b - T4d; T1E = FNMS(T1A, T1D, T1y); T3o = FNMS(Tp, T1e, T3n); T3q = FMA(T13, T10, T3p); } { E T3s, T4f, T3r, T1F; T5Q = T5l - T5m; T5n = T5l + T5m; T3s = T1s + T1E; T1F = T1s - T1E; T4f = T3q + T3o; T3r = T3o - T3q; T3a = T1F + T1g; T1G = T1g - T1F; T3U = T3s + T3r; T3t = T3r - T3s; T4g = T4e + T4f; T4Y = T4e - T4f; } } } } { E T4F, T4G, T4H, T4x, T4z, T41, T4O, T4Q, T40; { E T55, T38, T54, T50, T52, T53, T5e, T5c, T51, T4T; { E T4W, T37, T4Z, T1H, T5b, T5a, T2K, T2L, T4S, T4R; T55 = T4U + T4V; T4W = T4U - T4V; T37 = T2Y + T36; T38 = T36 - T2Y; T54 = T4X + T4Y; T4Z = T4X - T4Y; T1H = TT + T1G; T5b = T1G - TT; T5a = T2J - T2i; T2K = T2i + T2J; T50 = FNMS(KP618033988, T4Z, T4W); T52 = FMA(KP618033988, T4W, T4Z); T2L = T1H + T2K; T4S = T1H - T2K; T53 = T4D - T4E; T4F = T4D + T4E; Im[WS(rs, 4)] = KP500000000 * (T2L - T37); T4R = FMA(KP250000000, T2L, T37); T5e = FMA(KP618033988, T5a, T5b); T5c = FNMS(KP618033988, T5b, T5a); T51 = FNMS(KP559016994, T4S, T4R); T4T = FMA(KP559016994, T4S, T4R); } { E T3b, T4M, T4N, T3e, T3f; { E T4h, T58, T57, T4w, T56, T5d, T59; T4G = T49 + T4g; T4h = T49 - T4g; T58 = T54 - T55; T56 = T54 + T55; Ip[WS(rs, 7)] = KP500000000 * (FMA(KP951056516, T50, T4T)); Ip[WS(rs, 3)] = KP500000000 * (FNMS(KP951056516, T50, T4T)); Im[WS(rs, 8)] = -(KP500000000 * (FNMS(KP951056516, T52, T51))); Im[0] = -(KP500000000 * (FMA(KP951056516, T52, T51))); Rm[WS(rs, 4)] = KP500000000 * (T53 + T56); T57 = FNMS(KP250000000, T56, T53); T4w = T4o - T4v; T4H = T4o + T4v; T3b = T39 + T3a; T4M = T39 - T3a; T5d = FMA(KP559016994, T58, T57); T59 = FNMS(KP559016994, T58, T57); T4x = FMA(KP618033988, T4w, T4h); T4z = FNMS(KP618033988, T4h, T4w); Rp[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T5c, T59)); Rp[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T5c, T59)); Rm[0] = KP500000000 * (FNMS(KP951056516, T5e, T5d)); Rm[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5e, T5d)); T4N = T3c - T3d; T3e = T3c + T3d; } T3f = T3b + T3e; T41 = T3b - T3e; T4O = FMA(KP618033988, T4N, T4M); T4Q = FNMS(KP618033988, T4M, T4N); Ip[WS(rs, 5)] = KP500000000 * (T38 + T3f); T40 = FNMS(KP250000000, T3f, T38); } } { E T3S, T5Z, T68, T6a, T64, T62; { E T60, T61, T5Y, T5W, T3R, T67, T66, T3K, T5O, T4K, T4J, T5N, T5X, T5P; { E T5S, T5V, T4y, T42, T4I; T60 = T5R + T5Q; T5S = T5Q - T5R; T5V = T5T - T5U; T61 = T5T + T5U; T4y = FNMS(KP559016994, T41, T40); T42 = FMA(KP559016994, T41, T40); T4I = T4G + T4H; T4K = T4G - T4H; Ip[WS(rs, 9)] = KP500000000 * (FMA(KP951056516, T4x, T42)); Ip[WS(rs, 1)] = KP500000000 * (FNMS(KP951056516, T4x, T42)); Im[WS(rs, 6)] = -(KP500000000 * (FNMS(KP951056516, T4z, T4y))); Im[WS(rs, 2)] = -(KP500000000 * (FMA(KP951056516, T4z, T4y))); Rp[WS(rs, 5)] = KP500000000 * (T4F + T4I); T4J = FNMS(KP250000000, T4I, T4F); T5Y = FMA(KP618033988, T5S, T5V); T5W = FNMS(KP618033988, T5V, T5S); } T3S = T3Q - T3P; T3R = T3P + T3Q; { E T4L, T4P, T3u, T3J; T4L = FMA(KP559016994, T4K, T4J); T4P = FNMS(KP559016994, T4K, T4J); T3u = T3m + T3t; T67 = T3t - T3m; T66 = T3I - T3B; T3J = T3B + T3I; Rp[WS(rs, 9)] = KP500000000 * (FNMS(KP951056516, T4O, T4L)); Rp[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T4O, T4L)); Rm[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T4Q, T4P)); Rm[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T4Q, T4P)); T3K = T3u + T3J; T5O = T3J - T3u; } Im[WS(rs, 9)] = KP500000000 * (T3K - T3R); T5N = FMA(KP250000000, T3K, T3R); T5Z = T5f - T5g; T5h = T5f + T5g; T68 = FNMS(KP618033988, T67, T66); T6a = FMA(KP618033988, T66, T67); T5X = FNMS(KP559016994, T5O, T5N); T5P = FMA(KP559016994, T5O, T5N); Im[WS(rs, 5)] = -(KP500000000 * (FNMS(KP951056516, T5W, T5P))); Ip[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T5W, T5P)); Im[WS(rs, 1)] = -(KP500000000 * (FNMS(KP951056516, T5Y, T5X))); Ip[WS(rs, 2)] = KP500000000 * (FMA(KP951056516, T5Y, T5X)); T64 = T60 - T61; T62 = T60 + T61; } { E T5o, T5v, T5M, T5K, T5A, T5B, T3Z, T5G, T5I, T5J, T63, T5F, T5L, T5H; T5o = T5k + T5n; T5I = T5k - T5n; T5J = T5u - T5r; T5v = T5r + T5u; Rm[WS(rs, 9)] = KP500000000 * (T5Z + T62); T63 = FNMS(KP250000000, T62, T5Z); T5M = FMA(KP618033988, T5I, T5J); T5K = FNMS(KP618033988, T5J, T5I); { E T65, T69, T3V, T3Y; T65 = FNMS(KP559016994, T64, T63); T69 = FMA(KP559016994, T64, T63); T3V = T3T + T3U; T5A = T3T - T3U; T5B = T3W - T3X; T3Y = T3W + T3X; Rm[WS(rs, 1)] = KP500000000 * (FMA(KP951056516, T68, T65)); Rp[WS(rs, 2)] = KP500000000 * (FNMS(KP951056516, T68, T65)); Rm[WS(rs, 5)] = KP500000000 * (FNMS(KP951056516, T6a, T69)); Rp[WS(rs, 6)] = KP500000000 * (FMA(KP951056516, T6a, T69)); T3Z = T3V + T3Y; T5G = T3V - T3Y; } Ip[0] = KP500000000 * (T3S + T3Z); T5F = FNMS(KP250000000, T3Z, T3S); T5C = FMA(KP618033988, T5B, T5A); T5E = FNMS(KP618033988, T5A, T5B); T5L = FNMS(KP559016994, T5G, T5F); T5H = FMA(KP559016994, T5G, T5F); Im[WS(rs, 3)] = -(KP500000000 * (FNMS(KP951056516, T5K, T5H))); Ip[WS(rs, 4)] = KP500000000 * (FMA(KP951056516, T5K, T5H)); Im[WS(rs, 7)] = -(KP500000000 * (FNMS(KP951056516, T5M, T5L))); Ip[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5M, T5L)); T5y = T5o - T5v; T5w = T5o + T5v; } } } } } Rp[0] = KP500000000 * (T5h + T5w); T5x = FNMS(KP250000000, T5w, T5h); T5D = FNMS(KP559016994, T5y, T5x); T5z = FMA(KP559016994, T5y, T5x); Rm[WS(rs, 3)] = KP500000000 * (FMA(KP951056516, T5C, T5z)); Rp[WS(rs, 4)] = KP500000000 * (FNMS(KP951056516, T5C, T5z)); Rm[WS(rs, 7)] = KP500000000 * (FNMS(KP951056516, T5E, T5D)); Rp[WS(rs, 8)] = KP500000000 * (FMA(KP951056516, T5E, T5D)); }}static const tw_instr twinstr[] = { {TW_CEXP, 1, 1}, {TW_CEXP, 1, 3}, {TW_CEXP, 1, 9}, {TW_CEXP, 1, 19}, {TW_NEXT, 1, 0}};static const hc2c_desc desc = { 20, "hc2cfdft2_20", twinstr, &GENUS, {176, 98, 140, 0} };void X(codelet_hc2cfdft2_20) (planner *p) { X(khc2c_register) (p, hc2cfdft2_20, &desc, HC2C_VIA_DFT);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2cdft -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cfdft2_20 -include hc2cf.h *//* * This function contains 316 FP additions, 180 FP multiplications, * (or, 244 additions, 108 multiplications, 72 fused multiply/add), * 134 stack variables, 5 constants, and 80 memory accesses */#include "hc2cf.h"static void hc2cfdft2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP125000000, +0.125000000000000000000000000000000000000000000); DK(KP500000000, +0.500000000000000000000000000000000000000000000); DK(KP279508497, +0.279508497187473712051146708591409529430077295); DK(KP293892626, +0.293892626146236564584352977319536384298826219); DK(KP475528258, +0.475528258147576786058219666689691071702849317); INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) { E T4, T7, Tm, To, Tq, Tu, T1I, T1G, T8, T5, Ta, T1u, T2u, Tg, T2s; E T21, T1A, T1Z, T1O, T2I, T1K, T2G, Tw, TC, T2a, T2e, TH, TI, TJ, TX; E T2D, TN, T2B, T26, T1n, TZ, T24, T1j; { E T9, T1y, Te, T1t, T6, T1z, Tf, T1s; { E Tn, Tt, Tp, Ts; T4 = W[0]; T7 = W[1]; Tm = W[2]; To = W[3]; Tn = T4 * Tm; Tt = T7 * Tm; Tp = T7 * To; Ts = T4 * To; Tq = Tn - Tp; Tu = Ts + Tt; T1I = Ts - Tt; T1G = Tn + Tp; T8 = W[5]; T9 = T7 * T8; T1y = Tm * T8; Te = T4 * T8; T1t = To * T8; T5 = W[4]; T6 = T4 * T5; T1z = To * T5; Tf = T7 * T5; T1s = Tm * T5; } Ta = T6 - T9; T1u = T1s + T1t; T2u = T1y + T1z; Tg = Te + Tf; T2s = T1s - T1t; T21 = Te - Tf; T1A = T1y - T1z; T1Z = T6 + T9; { E T1M, T1N, T1H, T1J; T1M = T1G * T8; T1N = T1I * T5; T1O = T1M + T1N; T2I = T1M - T1N; T1H = T1G * T5; T1J = T1I * T8; T1K = T1H - T1J; T2G = T1H + T1J; { E Tr, Tv, TA, TB; Tr = Tq * T5; Tv = Tu * T8; Tw = Tr + Tv; TA = Tq * T8; TB = Tu * T5; TC = TA - TB; T2a = Tr - Tv; T2e = TA + TB; TH = W[6]; TI = W[7]; TJ = FMA(Tq, TH, Tu * TI); TX = FMA(Tw, TH, TC * TI); T2D = FMA(T1G, TH, T1I * TI); TN = FNMS(Tu, TH, Tq * TI); T2B = FNMS(T1I, TH, T1G * TI); T26 = FNMS(T7, TH, T4 * TI); T1n = FNMS(To, TH, Tm * TI); TZ = FNMS(TC, TH, Tw * TI); T24 = FMA(T4, TH, T7 * TI); T1j = FMA(Tm, TH, To * TI); } } } { E Tl, T3n, T1i, T2Q, T47, T50, T4S, T5i, T2M, T2T, T4I, T5f, T4L, T5e, T4P; E T5h, T2r, T2S, T1X, T2P, T31, T3u, T36, T3t, T3E, T4l, T3U, T4j, T3h, T3r; E T3J, T4m, T3c, T3q, T3P, T4i, TS, T51, T3m, T48; { E T3, T45, T1V, T3f, Tz, TF, TW, T3A, TM, TQ, T11, T3B, Td, Tj, T1Q; E T3e, T19, T3L, T23, T39, T2p, T3S, T2z, T34, T1E, T3G, T2K, T2Y, T1g, T3M; E T28, T3a, T2i, T3R, T2w, T33, T1r, T3F, T2F, T2X, T4N, T4O; { E T1, T2, T1R, T1S, T1T, T1U; T1 = Ip[0]; T2 = Im[0]; T1R = T1 + T2; T1S = Rp[0]; T1T = Rm[0]; T1U = T1S - T1T; T3 = T1 - T2; T45 = T1S + T1T; T1V = FNMS(T7, T1U, T4 * T1R); T3f = FMA(T4, T1U, T7 * T1R); } { E Tx, Ty, TU, TD, TE, TV; Tx = Ip[WS(rs, 2)]; Ty = Im[WS(rs, 2)]; TU = Tx - Ty; TD = Rp[WS(rs, 2)]; TE = Rm[WS(rs, 2)]; TV = TD + TE; Tz = Tx + Ty; TF = TD - TE; TW = FNMS(Tu, TV, Tq * TU); T3A = FMA(Tu, TU, Tq * TV); } { E TK, TL, TY, TO, TP, T10; TK = Ip[WS(rs, 7)]; TL = Im[WS(rs, 7)]; TY = TK - TL; TO = Rp[WS(rs, 7)]; TP = Rm[WS(rs, 7)]; T10 = TO + TP; TM = TK + TL; TQ = TO - TP; T11 = FNMS(TZ, T10, TX * TY); T3B = FMA(TZ, TY, TX * T10); } { E Tb, Tc, T1L, Th, Ti, T1P; Tb = Ip[WS(rs, 5)]; Tc = Im[WS(rs, 5)]; T1L = Tb + Tc; Th = Rp[WS(rs, 5)]; Ti = Rm[WS(rs, 5)]; T1P = Th - Ti; Td = Tb - Tc; Tj = Th + Ti; T1Q = FNMS(T1O, T1P, T1K * T1L); T3e = FMA(T1K, T1P, T1O * T1L); } { E T15, T20, T18, T22; { E T13, T14, T16, T17; T13 = Ip[WS(rs, 4)]; T14 = Im[WS(rs, 4)]; T15 = T13 + T14; T20 = T13 - T14; T16 = Rp[WS(rs, 4)]; T17 = Rm[WS(rs, 4)]; T18 = T16 - T17; T22 = T16 + T17; } T19 = FNMS(T8, T18, T5 * T15); T3L = FMA(T21, T20, T1Z * T22); T23 = FNMS(T21, T22, T1Z * T20); T39 = FMA(T8, T15, T5 * T18); } { E T2l, T2x, T2o, T2y; {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -