📄 t2_25.c
字号:
T77 = FMA(KP968479752, T76, T73); T8e = FNMS(KP968479752, T76, T73); T8a = FMA(KP906616052, T89, T88); T8c = FNMS(KP906616052, T89, T88); T78 = FMA(KP906616052, T77, T70); T7a = FNMS(KP906616052, T77, T70); T6H = FNMS(KP249506682, T6G, T6b); ii[WS(rs, 2)] = FNMS(KP998026728, T8a, T87); ri[WS(rs, 2)] = FMA(KP998026728, T78, T6T); T8u = FNMS(KP614372930, T8q, T8r); T8s = FMA(KP621716863, T8r, T8q); T6J = FNMS(KP557913902, T6I, T6H); T6R = FMA(KP557913902, T6I, T6H); T8n = FMA(KP249506682, T8m, T8j); ri[WS(rs, 18)] = FNMS(KP949179823, T6S, T6R); ri[WS(rs, 13)] = FMA(KP949179823, T6S, T6R); ri[WS(rs, 8)] = FMA(KP943557151, T6Q, T6J); ri[WS(rs, 23)] = FNMS(KP943557151, T6Q, T6J); T8t = FNMS(KP557913902, T8o, T8n); T8p = FMA(KP557913902, T8o, T8n); } T7k = FNMS(KP560319534, T7e, T7h); T7i = FMA(KP681693190, T7h, T7e); ii[WS(rs, 23)] = FMA(KP943557151, T8s, T8p); ii[WS(rs, 8)] = FNMS(KP943557151, T8s, T8p); ii[WS(rs, 13)] = FMA(KP949179823, T8u, T8t); ii[WS(rs, 18)] = FNMS(KP949179823, T8u, T8t); T79 = FNMS(KP249506682, T78, T6T); T8i = FNMS(KP560319534, T8e, T8f); T8g = FMA(KP681693190, T8f, T8e); T8b = FMA(KP249506682, T8a, T87); T7j = FMA(KP557913902, T7a, T79); T7b = FNMS(KP557913902, T7a, T79); } } } } } } } ri[WS(rs, 12)] = FNMS(KP949179823, T7k, T7j); ri[WS(rs, 17)] = FMA(KP949179823, T7k, T7j); ri[WS(rs, 7)] = FMA(KP860541664, T7i, T7b); ri[WS(rs, 22)] = FNMS(KP860541664, T7i, T7b); T8d = FMA(KP557913902, T8c, T8b); T8h = FNMS(KP557913902, T8c, T8b); ii[WS(rs, 12)] = FNMS(KP949179823, T8i, T8h); ii[WS(rs, 17)] = FMA(KP949179823, T8i, T8h); ii[WS(rs, 22)] = FNMS(KP860541664, T8g, T8d); ii[WS(rs, 7)] = FMA(KP860541664, T8g, T8d); }}static const tw_instr twinstr[] = { {TW_CEXP, 0, 1}, {TW_CEXP, 0, 3}, {TW_CEXP, 0, 9}, {TW_CEXP, 0, 24}, {TW_NEXT, 1, 0}};static const ct_desc desc = { 25, "t2_25", twinstr, &GENUS, {84, 78, 356, 0}, 0, 0, 0 };void X(codelet_t2_25) (planner *p) { X(kdft_dit_register) (p, t2_25, &desc);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_twiddle -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 25 -name t2_25 -include t.h *//* * This function contains 440 FP additions, 340 FP multiplications, * (or, 280 additions, 180 multiplications, 160 fused multiply/add), * 149 stack variables, 20 constants, and 100 memory accesses */#include "t.h"static void t2_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP998026728, +0.998026728428271561952336806863450553336905220); DK(KP062790519, +0.062790519529313376076178224565631133122484832); DK(KP425779291, +0.425779291565072648862502445744251703979973042); DK(KP904827052, +0.904827052466019527713668647932697593970413911); DK(KP992114701, +0.992114701314477831049793042785778521453036709); DK(KP125333233, +0.125333233564304245373118759816508793942918247); DK(KP637423989, +0.637423989748689710176712811676016195434917298); DK(KP770513242, +0.770513242775789230803009636396177847271667672); DK(KP684547105, +0.684547105928688673732283357621209269889519233); DK(KP728968627, +0.728968627421411523146730319055259111372571664); DK(KP481753674, +0.481753674101715274987191502872129653528542010); DK(KP876306680, +0.876306680043863587308115903922062583399064238); DK(KP844327925, +0.844327925502015078548558063966681505381659241); DK(KP535826794, +0.535826794978996618271308767867639978063575346); DK(KP248689887, +0.248689887164854788242283746006447968417567406); DK(KP968583161, +0.968583161128631119490168375464735813836012403); DK(KP587785252, +0.587785252292473129168705954639072768597652438); DK(KP951056516, +0.951056516295153572116439333379382143405698634); DK(KP250000000, +0.250000000000000000000000000000000000000000000); DK(KP559016994, +0.559016994374947424102293417182819058860154590); INT m; for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) { E T2, T5, T3, T6, T8, Td, T16, T14, Te, T9, T21, T23, Tx, TR, T1g; E TB, T1f, TV, T1Q, Tg, T1S, Tk, T18, T2s, T1c, T2q, Tn, To, Tp, Tr; E T28, T2x, TY, T2k, T2m, T2v, TG, TE, T10, T1h, T1E, T26, T1B, T1G, T1V; E T1X, T1z, T1j; { E Tw, TT, Tz, TQ, Tv, TU, TA, TP; { E T4, Tc, T7, Tb; T2 = W[0]; T5 = W[1]; T3 = W[2]; T6 = W[3]; T4 = T2 * T3; Tc = T5 * T3; T7 = T5 * T6; Tb = T2 * T6; T8 = T4 - T7; Td = Tb + Tc; T16 = Tb - Tc; T14 = T4 + T7; Te = W[5]; Tw = T5 * Te; TT = T3 * Te; Tz = T2 * Te; TQ = T6 * Te; T9 = W[4]; Tv = T2 * T9; TU = T6 * T9; TA = T5 * T9; TP = T3 * T9; } T21 = TP - TQ; T23 = TT + TU; { E T15, T17, Ta, Tf, T1a, T1b, Ti, Tj; Tx = Tv - Tw; TR = TP + TQ; T1g = Tz - TA; TB = Tz + TA; T1f = Tv + Tw; TV = TT - TU; T15 = T14 * T9; T17 = T16 * Te; T1Q = T15 + T17; Ta = T8 * T9; Tf = Td * Te; Tg = Ta + Tf; T1a = T14 * Te; T1b = T16 * T9; T1S = T1a - T1b; Ti = T8 * Te; Tj = Td * T9; Tk = Ti - Tj; T18 = T15 - T17; T2s = Ti + Tj; T1c = T1a + T1b; T2q = Ta - Tf; Tn = W[6]; To = W[7]; Tp = FMA(T8, Tn, Td * To); Tr = FNMS(Td, Tn, T8 * To); T28 = FNMS(T1S, Tn, T1Q * To); T2x = FNMS(TV, Tn, TR * To); TY = FMA(T3, Tn, T6 * To); T2k = FMA(T2, Tn, T5 * To); T2m = FNMS(T5, Tn, T2 * To); T2v = FMA(TR, Tn, TV * To); TG = FNMS(Te, Tn, T9 * To); TE = FMA(T9, Tn, Te * To); T10 = FNMS(T6, Tn, T3 * To); T1h = FMA(T1f, Tn, T1g * To); T1E = FMA(Tg, Tn, Tk * To); T26 = FMA(T1Q, Tn, T1S * To); T1B = FNMS(TB, Tn, Tx * To); T1G = FNMS(Tk, Tn, Tg * To); T1V = FMA(T14, Tn, T16 * To); T1X = FNMS(T16, Tn, T14 * To); T1z = FMA(Tx, Tn, TB * To); T1j = FNMS(T1g, Tn, T1f * To); } } { E T1, T6v, T2F, T6I, TK, T2G, T6u, T6J, T6N, T7c, T2O, T52, T2C, T6k, T48; E T5X, T4L, T5s, T4j, T5W, T4K, T5v, T1o, T6g, T30, T5M, T4A, T56, T3b, T5N; E T4B, T59, T1L, T6h, T3n, T5Q, T4D, T5g, T3y, T5P, T4E, T5d, T2d, T6j, T3L; E T5T, T4I, T5l, T3W, T5U, T4H, T5o; { E Tm, T2I, Tt, T2J, Tu, T6s, TD, T2L, TI, T2M, TJ, T6t; T1 = ri[0]; T6v = ii[0]; { E Th, Tl, Tq, Ts; Th = ri[WS(rs, 5)]; Tl = ii[WS(rs, 5)]; Tm = FMA(Tg, Th, Tk * Tl); T2I = FNMS(Tk, Th, Tg * Tl); Tq = ri[WS(rs, 20)]; Ts = ii[WS(rs, 20)]; Tt = FMA(Tp, Tq, Tr * Ts); T2J = FNMS(Tr, Tq, Tp * Ts); } Tu = Tm + Tt; T6s = T2I + T2J; { E Ty, TC, TF, TH; Ty = ri[WS(rs, 10)]; TC = ii[WS(rs, 10)]; TD = FMA(Tx, Ty, TB * TC); T2L = FNMS(TB, Ty, Tx * TC); TF = ri[WS(rs, 15)]; TH = ii[WS(rs, 15)]; TI = FMA(TE, TF, TG * TH); T2M = FNMS(TG, TF, TE * TH); } TJ = TD + TI; T6t = T2L + T2M; T2F = KP559016994 * (Tu - TJ); T6I = KP559016994 * (T6s - T6t); TK = Tu + TJ; T2G = FNMS(KP250000000, TK, T1); T6u = T6s + T6t; T6J = FNMS(KP250000000, T6u, T6v); { E T6L, T6M, T2K, T2N; T6L = Tm - Tt; T6M = TD - TI; T6N = FMA(KP951056516, T6L, KP587785252 * T6M); T7c = FNMS(KP587785252, T6L, KP951056516 * T6M); T2K = T2I - T2J; T2N = T2L - T2M; T2O = FMA(KP951056516, T2K, KP587785252 * T2N); T52 = FNMS(KP587785252, T2K, KP951056516 * T2N); } } { E T2g, T4c, T43, T46, T4h, T4g, T49, T4a, T4d, T2p, T2A, T2B, T2e, T2f; T2e = ri[WS(rs, 3)]; T2f = ii[WS(rs, 3)]; T2g = FMA(T3, T2e, T6 * T2f); T4c = FNMS(T6, T2e, T3 * T2f); { E T2j, T41, T2z, T45, T2o, T42, T2u, T44; { E T2h, T2i, T2w, T2y; T2h = ri[WS(rs, 8)]; T2i = ii[WS(rs, 8)]; T2j = FMA(T1f, T2h, T1g * T2i); T41 = FNMS(T1g, T2h, T1f * T2i); T2w = ri[WS(rs, 18)]; T2y = ii[WS(rs, 18)]; T2z = FMA(T2v, T2w, T2x * T2y); T45 = FNMS(T2x, T2w, T2v * T2y); } { E T2l, T2n, T2r, T2t; T2l = ri[WS(rs, 23)]; T2n = ii[WS(rs, 23)]; T2o = FMA(T2k, T2l, T2m * T2n); T42 = FNMS(T2m, T2l, T2k * T2n); T2r = ri[WS(rs, 13)]; T2t = ii[WS(rs, 13)]; T2u = FMA(T2q, T2r, T2s * T2t); T44 = FNMS(T2s, T2r, T2q * T2t); } T43 = T41 - T42; T46 = T44 - T45; T4h = T2u - T2z; T4g = T2j - T2o; T49 = T41 + T42; T4a = T44 + T45; T4d = T49 + T4a; T2p = T2j + T2o; T2A = T2u + T2z; T2B = T2p + T2A; } T2C = T2g + T2B; T6k = T4c + T4d; { E T47, T5r, T40, T5q, T3Y, T3Z; T47 = FMA(KP951056516, T43, KP587785252 * T46); T5r = FNMS(KP587785252, T43, KP951056516 * T46); T3Y = KP559016994 * (T2p - T2A); T3Z = FNMS(KP250000000, T2B, T2g); T40 = T3Y + T3Z; T5q = T3Z - T3Y; T48 = T40 + T47; T5X = T5q + T5r; T4L = T40 - T47; T5s = T5q - T5r; } { E T4i, T5t, T4f, T5u, T4b, T4e; T4i = FMA(KP951056516, T4g, KP587785252 * T4h); T5t = FNMS(KP587785252, T4g, KP951056516 * T4h); T4b = KP559016994 * (T49 - T4a); T4e = FNMS(KP250000000, T4d, T4c); T4f = T4b + T4e; T5u = T4e - T4b; T4j = T4f - T4i; T5W = T5u - T5t; T4K = T4i + T4f; T5v = T5t + T5u; } } { E TO, T34, T2V, T2Y, T39, T38, T31, T32, T35, T13, T1m, T1n, TM, TN; TM = ri[WS(rs, 1)]; TN = ii[WS(rs, 1)]; TO = FMA(T2, TM, T5 * TN); T34 = FNMS(T5, TM, T2 * TN); { E TX, T2T, T1l, T2X, T12, T2U, T1e, T2W; { E TS, TW, T1i, T1k; TS = ri[WS(rs, 6)]; TW = ii[WS(rs, 6)]; TX = FMA(TR, TS, TV * TW); T2T = FNMS(TV, TS, TR * TW); T1i = ri[WS(rs, 16)]; T1k = ii[WS(rs, 16)]; T1l = FMA(T1h, T1i, T1j * T1k); T2X = FNMS(T1j, T1i, T1h * T1k); } { E TZ, T11, T19, T1d; TZ = ri[WS(rs, 21)]; T11 = ii[WS(rs, 21)]; T12 = FMA(TY, TZ, T10 * T11); T2U = FNMS(T10, TZ, TY * T11); T19 = ri[WS(rs, 11)]; T1d = ii[WS(rs, 11)]; T1e = FMA(T18, T19, T1c * T1d); T2W = FNMS(T1c, T19, T18 * T1d); } T2V = T2T - T2U; T2Y = T2W - T2X; T39 = T1e - T1l; T38 = TX - T12; T31 = T2T + T2U; T32 = T2W + T2X; T35 = T31 + T32; T13 = TX + T12; T1m = T1e + T1l; T1n = T13 + T1m; } T1o = TO + T1n; T6g = T34 + T35; { E T2Z, T55, T2S, T54, T2Q, T2R; T2Z = FMA(KP951056516, T2V, KP587785252 * T2Y); T55 = FNMS(KP587785252, T2V, KP951056516 * T2Y); T2Q = KP559016994 * (T13 - T1m); T2R = FNMS(KP250000000, T1n, TO); T2S = T2Q + T2R; T54 = T2R - T2Q; T30 = T2S + T2Z; T5M = T54 + T55; T4A = T2S - T2Z; T56 = T54 - T55; } { E T3a, T57, T37, T58, T33, T36; T3a = FMA(KP951056516, T38, KP587785252 * T39); T57 = FNMS(KP587785252, T38, KP951056516 * T39); T33 = KP559016994 * (T31 - T32); T36 = FNMS(KP250000000, T35, T34); T37 = T33 + T36; T58 = T36 - T33; T3b = T37 - T3a; T5N = T58 - T57; T4B = T3a + T37; T59 = T57 + T58; } } { E T1r, T3r, T3i, T3l, T3w, T3v, T3o, T3p, T3s, T1y, T1J, T1K, T1p, T1q; T1p = ri[WS(rs, 4)]; T1q = ii[WS(rs, 4)]; T1r = FMA(T8, T1p, Td * T1q); T3r = FNMS(Td, T1p, T8 * T1q); { E T1u, T3g, T1I, T3k, T1x, T3h, T1D, T3j; { E T1s, T1t, T1F, T1H; T1s = ri[WS(rs, 9)]; T1t = ii[WS(rs, 9)]; T1u = FMA(T9, T1s, Te * T1t); T3g = FNMS(Te, T1s, T9 * T1t); T1F = ri[WS(rs, 19)]; T1H = ii[WS(rs, 19)]; T1I = FMA(T1E, T1F, T1G * T1H); T3k = FNMS(T1G, T1F, T1E * T1H); } { E T1v, T1w, T1A, T1C; T1v = ri[WS(rs, 24)]; T1w = ii[WS(rs, 24)]; T1x = FMA(Tn, T1v, To * T1w); T3h = FNMS(To, T1v, Tn * T1w); T1A = ri[WS(rs, 14)]; T1C = ii[WS(rs, 14)]; T1D = FMA(T1z, T1A, T1B * T1C); T3j = FNMS(T1B, T1A, T1z * T1C);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -