📄 hc2cb2_32.c
字号:
E T64, T6d, T6f, T69; T64 = T60 * T63; T6d = T6b * T6c; T6f = FNMS(KP980785280, T68, T67); T69 = FMA(KP980785280, T68, T67); { E T5F, T5S, T6a, T6g; T5F = FMA(KP980785280, T5E, T5x); T5S = FNMS(KP980785280, T5E, T5x); T6a = T60 * T69; Ip[WS(rs, 7)] = FNMS(T66, T69, T64); T6g = T6b * T6f; Ip[WS(rs, 15)] = FNMS(T6e, T6f, T6d); { E T5W, T5T, T5Q, T5G; T5W = T5U * T5S; T5T = T5R * T5S; T5Q = T5I * T5F; T5G = T5u * T5F; Im[WS(rs, 7)] = FMA(T66, T63, T6a); Im[WS(rs, 15)] = FMA(T6e, T6c, T6g); Im[WS(rs, 3)] = FMA(T5R, T5V, T5W); Ip[WS(rs, 3)] = FNMS(T5U, T5V, T5T); Im[WS(rs, 11)] = FMA(T5u, T5P, T5Q); Ip[WS(rs, 11)] = FNMS(T5I, T5P, T5G); } } } } }}static const tw_instr twinstr[] = { {TW_CEXP, 1, 1}, {TW_CEXP, 1, 3}, {TW_CEXP, 1, 9}, {TW_CEXP, 1, 27}, {TW_NEXT, 1, 0}};static const hc2c_desc desc = { 32, "hc2cb2_32", twinstr, &GENUS, {236, 98, 252, 0} };void X(codelet_hc2cb2_32) (planner *p) { X(khc2c_register) (p, hc2cb2_32, &desc, HC2C_VIA_RDFT);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2c -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hc2cb2_32 -include hc2cb.h *//* * This function contains 488 FP additions, 280 FP multiplications, * (or, 376 additions, 168 multiplications, 112 fused multiply/add), * 160 stack variables, 7 constants, and 128 memory accesses */#include "hc2cb.h"static void hc2cb2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP555570233, +0.555570233019602224742830813948532874374937191); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP195090322, +0.195090322016128267848284868477022240927691618); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP382683432, +0.382683432365089771728459984030398866761344562); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) { E T11, T14, T12, T15, T17, T2z, T2B, T1c, T18, T1d, T1g, T1k, T2F, T2L, T3t; E T4H, T3h, T3V, T3b, T4v, T4T, T4X, T6t, T71, T6z, T75, T81, T8x, T8f, T8z; E T2R, T2V, T8p, T8t, T4r, T4t, T53, T69, T3n, T3r, T7P, T7T, T4P, T4R, T6F; E T6R, T1f, T2X, T1j, T2Y, T1l, T31, T2d, T2Z, T49, T4h, T4c, T4i, T4d, T4n; E T4f, T4j; { E T2P, T3q, T2U, T3l, T2Q, T3p, T2T, T3m, T2D, T3g, T2K, T39, T2E, T3f, T2J; E T3a; { E T13, T1b, T16, T1a; T11 = W[0]; T14 = W[1]; T12 = W[2]; T15 = W[3]; T13 = T11 * T12; T1b = T14 * T12; T16 = T14 * T15; T1a = T11 * T15; T17 = T13 + T16; T2z = T13 - T16; T2B = T1a + T1b; T1c = T1a - T1b; T18 = W[4]; T2P = T12 * T18; T3q = T14 * T18; T2U = T15 * T18; T3l = T11 * T18; T1d = W[5]; T2Q = T15 * T1d; T3p = T11 * T1d; T2T = T12 * T1d; T3m = T14 * T1d; T1g = W[6]; T2D = T11 * T1g; T3g = T15 * T1g; T2K = T14 * T1g; T39 = T12 * T1g; T1k = W[7]; T2E = T14 * T1k; T3f = T12 * T1k; T2J = T11 * T1k; T3a = T15 * T1k; } T2F = T2D - T2E; T2L = T2J + T2K; T3t = T39 - T3a; T4H = T2J - T2K; T3h = T3f - T3g; T3V = T3f + T3g; T3b = T39 + T3a; T4v = T2D + T2E; T4T = FMA(T18, T1g, T1d * T1k); T4X = FNMS(T1d, T1g, T18 * T1k); { E T6r, T6s, T6x, T6y; T6r = T17 * T1g; T6s = T1c * T1k; T6t = T6r - T6s; T71 = T6r + T6s; T6x = T17 * T1k; T6y = T1c * T1g; T6z = T6x + T6y; T75 = T6x - T6y; } { E T7Z, T80, T8d, T8e; T7Z = T2z * T1g; T80 = T2B * T1k; T81 = T7Z + T80; T8x = T7Z - T80; T8d = T2z * T1k; T8e = T2B * T1g; T8f = T8d - T8e; T8z = T8d + T8e; T2R = T2P - T2Q; T2V = T2T + T2U; T8p = FMA(T2R, T1g, T2V * T1k); T8t = FNMS(T2V, T1g, T2R * T1k); } T4r = T2P + T2Q; T4t = T2T - T2U; T53 = FMA(T4r, T1g, T4t * T1k); T69 = FNMS(T4t, T1g, T4r * T1k); T3n = T3l + T3m; T3r = T3p - T3q; T7P = FMA(T3n, T1g, T3r * T1k); T7T = FNMS(T3r, T1g, T3n * T1k); T4P = T3l - T3m; T4R = T3p + T3q; T6F = FMA(T4P, T1g, T4R * T1k); T6R = FNMS(T4R, T1g, T4P * T1k); { E T19, T1e, T1h, T1i; T19 = T17 * T18; T1e = T1c * T1d; T1f = T19 + T1e; T2X = T19 - T1e; T1h = T17 * T1d; T1i = T1c * T18; T1j = T1h - T1i; T2Y = T1h + T1i; } T1l = FMA(T1f, T1g, T1j * T1k); T31 = FNMS(T2Y, T1g, T2X * T1k); T2d = FNMS(T1j, T1g, T1f * T1k); T2Z = FMA(T2X, T1g, T2Y * T1k); { E T47, T48, T4a, T4b; T47 = T2z * T18; T48 = T2B * T1d; T49 = T47 - T48; T4h = T47 + T48; T4a = T2z * T1d; T4b = T2B * T18; T4c = T4a + T4b; T4i = T4a - T4b; } T4d = FMA(T49, T1g, T4c * T1k); T4n = FNMS(T4i, T1g, T4h * T1k); T4f = FNMS(T4c, T1g, T49 * T1k); T4j = FMA(T4h, T1g, T4i * T1k); } { E T56, T7b, T7C, T6c, Tf, T1m, T6f, T7c, T3Y, T4I, T2t, T32, T5d, T7D, T3w; E T4w, Tu, T2e, T7g, T7F, T7j, T7G, T1B, T33, T3z, T40, T5l, T6i, T5s, T6h; E T3C, T3Z, TK, T1D, T7v, T86, T7y, T85, T1S, T35, T3O, T4C, T5F, T6J, T5M; E T6K, T3R, T4D, TZ, T1U, T7o, T89, T7r, T88, T29, T36, T3H, T4z, T5Y, T6M; E T65, T6N, T3K, T4A; { E T3, T54, T2h, T6b, T6, T6a, T2k, T55, Ta, T57, T2o, T58, Td, T5a, T2r; E T5b; { E T1, T2, T2f, T2g; T1 = Rp[0]; T2 = Rm[WS(rs, 15)]; T3 = T1 + T2; T54 = T1 - T2; T2f = Ip[0]; T2g = Im[WS(rs, 15)]; T2h = T2f - T2g; T6b = T2f + T2g; } { E T4, T5, T2i, T2j; T4 = Rp[WS(rs, 8)]; T5 = Rm[WS(rs, 7)]; T6 = T4 + T5; T6a = T4 - T5; T2i = Ip[WS(rs, 8)]; T2j = Im[WS(rs, 7)]; T2k = T2i - T2j; T55 = T2i + T2j; } { E T8, T9, T2m, T2n; T8 = Rp[WS(rs, 4)]; T9 = Rm[WS(rs, 11)]; Ta = T8 + T9; T57 = T8 - T9; T2m = Ip[WS(rs, 4)]; T2n = Im[WS(rs, 11)]; T2o = T2m - T2n; T58 = T2m + T2n; } { E Tb, Tc, T2p, T2q; Tb = Rm[WS(rs, 3)]; Tc = Rp[WS(rs, 12)]; Td = Tb + Tc; T5a = Tb - Tc; T2p = Ip[WS(rs, 12)]; T2q = Im[WS(rs, 3)]; T2r = T2p - T2q; T5b = T2p + T2q; } { E T7, Te, T2l, T2s; T56 = T54 - T55; T7b = T54 + T55; T7C = T6b - T6a; T6c = T6a + T6b; T7 = T3 + T6; Te = Ta + Td; Tf = T7 + Te; T1m = T7 - Te; { E T6d, T6e, T3W, T3X; T6d = T57 + T58; T6e = T5a + T5b; T6f = KP707106781 * (T6d - T6e); T7c = KP707106781 * (T6d + T6e); T3W = T2h - T2k; T3X = Ta - Td; T3Y = T3W - T3X; T4I = T3X + T3W; } T2l = T2h + T2k; T2s = T2o + T2r; T2t = T2l - T2s; T32 = T2l + T2s; { E T59, T5c, T3u, T3v; T59 = T57 - T58; T5c = T5a - T5b; T5d = KP707106781 * (T59 + T5c); T7D = KP707106781 * (T59 - T5c); T3u = T3 - T6; T3v = T2r - T2o; T3w = T3u - T3v; T4w = T3u + T3v; } } } { E Ti, T5p, T1w, T5n, Tl, T5m, T1z, T5q, Tp, T5i, T1p, T5g, Ts, T5f, T1s; E T5j; { E Tg, Th, T1u, T1v; Tg = Rp[WS(rs, 2)]; Th = Rm[WS(rs, 13)]; Ti = Tg + Th; T5p = Tg - Th; T1u = Ip[WS(rs, 2)]; T1v = Im[WS(rs, 13)]; T1w = T1u - T1v; T5n = T1u + T1v; } { E Tj, Tk, T1x, T1y; Tj = Rp[WS(rs, 10)]; Tk = Rm[WS(rs, 5)]; Tl = Tj + Tk; T5m = Tj - Tk; T1x = Ip[WS(rs, 10)]; T1y = Im[WS(rs, 5)]; T1z = T1x - T1y; T5q = T1x + T1y; } { E Tn, To, T1n, T1o; Tn = Rm[WS(rs, 1)]; To = Rp[WS(rs, 14)]; Tp = Tn + To; T5i = Tn - To; T1n = Ip[WS(rs, 14)]; T1o = Im[WS(rs, 1)]; T1p = T1n - T1o; T5g = T1n + T1o; } { E Tq, Tr, T1q, T1r; Tq = Rp[WS(rs, 6)]; Tr = Rm[WS(rs, 9)]; Ts = Tq + Tr; T5f = Tq - Tr; T1q = Ip[WS(rs, 6)]; T1r = Im[WS(rs, 9)]; T1s = T1q - T1r; T5j = T1q + T1r; } { E Tm, Tt, T7e, T7f; Tm = Ti + Tl; Tt = Tp + Ts; Tu = Tm + Tt; T2e = Tm - Tt; T7e = T5p + T5q; T7f = T5n - T5m; T7g = FNMS(KP923879532, T7f, KP382683432 * T7e); T7F = FMA(KP382683432, T7f, KP923879532 * T7e); } { E T7h, T7i, T1t, T1A; T7h = T5i + T5j; T7i = T5f + T5g; T7j = FNMS(KP923879532, T7i, KP382683432 * T7h); T7G = FMA(KP382683432, T7i, KP923879532 * T7h); T1t = T1p + T1s; T1A = T1w + T1z; T1B = T1t - T1A; T33 = T1A + T1t; } { E T3x, T3y, T5h, T5k; T3x = T1p - T1s; T3y = Tp - Ts; T3z = T3x - T3y; T40 = T3y + T3x; T5h = T5f - T5g; T5k = T5i - T5j; T5l = FNMS(KP382683432, T5k, KP923879532 * T5h); T6i = FMA(KP382683432, T5h, KP923879532 * T5k); } { E T5o, T5r, T3A, T3B; T5o = T5m + T5n; T5r = T5p - T5q; T5s = FMA(KP923879532, T5o, KP382683432 * T5r); T6h = FNMS(KP382683432, T5o, KP923879532 * T5r); T3A = Ti - Tl; T3B = T1w - T1z; T3C = T3A + T3B; T3Z = T3A - T3B; } } { E Ty, T5v, T1G, T5H, TB, T5G, T1J, T5w, TI, T5K, T1Q, T5D, TF, T5J, T1N; E T5A; { E Tw, Tx, T1H, T1I; Tw = Rp[WS(rs, 1)]; Tx = Rm[WS(rs, 14)]; Ty = Tw + Tx; T5v = Tw - Tx; { E T1E, T1F, Tz, TA; T1E = Ip[WS(rs, 1)]; T1F = Im[WS(rs, 14)]; T1G = T1E - T1F; T5H = T1E + T1F; Tz = Rp[WS(rs, 9)]; TA = Rm[WS(rs, 6)]; TB = Tz + TA; T5G = Tz - TA; } T1H = Ip[WS(rs, 9)]; T1I = Im[WS(rs, 6)]; T1J = T1H - T1I; T5w = T1H + T1I; { E TG, TH, T5B, T1O, T1P, T5C; TG = Rm[WS(rs, 2)]; TH = Rp[WS(rs, 13)]; T5B = TG - TH; T1O = Ip[WS(rs, 13)]; T1P = Im[WS(rs, 2)]; T5C = T1O + T1P; TI = TG + TH; T5K = T5B + T5C; T1Q = T1O - T1P; T5D = T5B - T5C; } { E TD, TE, T5y, T1L, T1M, T5z; TD = Rp[WS(rs, 5)]; TE = Rm[WS(rs, 10)]; T5y = TD - TE; T1L = Ip[WS(rs, 5)]; T1M = Im[WS(rs, 10)]; T5z = T1L + T1M; TF = TD + TE; T5J = T5y + T5z; T1N = T1L - T1M; T5A = T5y - T5z; } } { E TC, TJ, T7t, T7u; TC = Ty + TB; TJ = TF + TI; TK = TC + TJ; T1D = TC - TJ; T7t = T5H - T5G; T7u = KP707106781 * (T5A - T5D); T7v = T7t + T7u; T86 = T7t - T7u; } { E T7w, T7x, T1K, T1R; T7w = T5v + T5w; T7x = KP707106781 * (T5J + T5K); T7y = T7w - T7x; T85 = T7w + T7x; T1K = T1G + T1J; T1R = T1N + T1Q; T1S = T1K - T1R; T35 = T1K + T1R; } { E T3M, T3N, T5x, T5E; T3M = T1G - T1J; T3N = TF - TI; T3O = T3M - T3N; T4C = T3N + T3M; T5x = T5v - T5w; T5E = KP707106781 * (T5A + T5D); T5F = T5x - T5E; T6J = T5x + T5E; } { E T5I, T5L, T3P, T3Q; T5I = T5G + T5H; T5L = KP707106781 * (T5J - T5K); T5M = T5I - T5L; T6K = T5I + T5L; T3P = Ty - TB;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -