📄 t2_32.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 19:31:37 EST 2006 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twiddle -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -name t2_32 -include t.h *//* * This function contains 488 FP additions, 350 FP multiplications, * (or, 236 additions, 98 multiplications, 252 fused multiply/add), * 212 stack variables, and 128 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_twiddle.ml,v 1.23 2006-01-05 03:04:27 stevenj Exp $ */#include "t.h"static const R *t2_32(R *ri, R *ii, const R *W, stride ios, INT m, INT dist){ DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = m; i > 0; i = i - 1, ri = ri + dist, ii = ii + dist, W = W + 8, MAKE_VOLATILE_STRIDE(ios)) { E T9A, T9z; { E T3, T8, T5, T4, T7, T2, TM, Td, T9, TJ, Tb, Tz, T14, T10, T1r; E T1n, TD, Ts, T1w, T1d, T26, Tw, T1z, T23, T19, T2r, T3i, T3P, T2v, T2S; E T3m, T3q, T3L, T2I, T2E, T2W, T3e, Te, TP, T29, T1R, T2b, T1U, Ti, TR; E Tf, T1S, T2f, TV, TQ, Tm, T1Y, T2a, Tg; { E Th, T1T, T1Q, Ta, Tv, Tr; T3 = W[4]; T8 = W[1]; T5 = W[0]; T4 = W[2]; T7 = W[3]; Tv = T3 * T8; Tr = T3 * T5; { E T18, Tc, T6, T1c; T18 = T3 * T4; Tc = T4 * T8; T6 = T4 * T5; T1c = T3 * T7; T2 = W[6]; TM = FMA(T7, T5, Tc); Td = FNMS(T7, T5, Tc); T9 = FMA(T7, T8, T6); TJ = FNMS(T7, T8, T6); Th = T3 * Td; T1T = T3 * TM; T1Q = T3 * TJ; Ta = T3 * T9; Tb = W[5]; Tz = T2 * T4; T14 = T2 * T8; T10 = T2 * T5; T1r = T2 * Tb; T1n = T2 * T3; TD = T2 * T7; Ts = FMA(Tb, T8, Tr); T1w = FNMS(Tb, T8, Tr); T1d = FMA(Tb, T4, T1c); T26 = FNMS(Tb, T4, T1c); Tw = FNMS(Tb, T5, Tv); T1z = FMA(Tb, T5, Tv); T23 = FMA(Tb, T7, T18); T19 = FNMS(Tb, T7, T18); } T2r = T2 * T1w; T3i = T2 * TM; T3P = T2 * Tw; T2v = T2 * T1z; T2S = T2 * T23; T3m = T2 * T19; T3q = T2 * T1d; T3L = T2 * Ts; T2I = T2 * Td; T2E = T2 * T9; T2W = T2 * T26; T3e = T2 * TJ; Te = FMA(Tb, Td, Ta); TP = FNMS(Tb, Td, Ta); T29 = FMA(Tb, TM, T1Q); T1R = FNMS(Tb, TM, T1Q); T2b = FNMS(Tb, TJ, T1T); T1U = FMA(Tb, TJ, T1T); Ti = FNMS(Tb, T9, Th); TR = FMA(Tb, T9, Th); Tf = T2 * Te; T1S = T2 * T1R; T2f = T2 * T2b; TV = T2 * TR; TQ = T2 * TP; Tm = T2 * Ti; T1Y = T2 * T1U; T2a = T2 * T29; Tg = W[7]; } { E T31, T34, T2X, T46, Tq, T97, T8H, T8D, T4b, T98, TH, T7f, TZ, T4j, T6t; E T1g, T7g, T6u, T4q, T4z, T6x, T1J, T7m, T6y, T4G, T7l, T8d, T6B, T4V, T7r; E T8e, T4O, T6A, T2k, T7o, T5E, T6P, T3G, T7L, T61, T6M, T7I, T8n, T55, T6I; E T2N, T7A, T5s, T6F, T7x, T8i, T62, T5L, T7J, T43, T63, T5S, T8o, T7O, T57; E T2R, T2U, T3a, T35, T5h, T58, T2Z, T32; { E T2F, T1V, T1K, T2T, T2w, T2J, T1N, T2c, T2s, T2g, T1Z, T3n, T3f, T3z, T3j; E T3C, T3r, T3Q, T3M, T1x, T4E, T1H, T7j, T4x, T4A, T1v, T1B, T4B; { E TA, T1F, T15, TE, T1s, T11, T1C, TW, TS, T1o, TF, Ty, T49, TC, T48; { E T1, T8G, Tk, Tn, Tj, To; T1 = ri[0]; T8G = ii[0]; Tk = ri[WS(ios, 16)]; T2F = FNMS(Tg, Td, T2E); T31 = FMA(Tg, Td, T2E); T1V = FMA(Tg, T1U, T1S); T1K = FNMS(Tg, T7, Tz); TA = FMA(Tg, T7, Tz); T2T = FNMS(Tg, T26, T2S); T1F = FNMS(Tg, T5, T14); T15 = FMA(Tg, T5, T14); T2w = FNMS(Tg, T1w, T2v); T2J = FMA(Tg, T9, T2I); T34 = FNMS(Tg, T9, T2I); TE = FNMS(Tg, T4, TD); T1N = FMA(Tg, T4, TD); T2X = FMA(Tg, T23, T2W); T2c = FNMS(Tg, T2b, T2a); T2s = FMA(Tg, T1z, T2r); T1s = FNMS(Tg, T3, T1r); T2g = FMA(Tg, T29, T2f); T1Z = FNMS(Tg, T1R, T1Y); T11 = FNMS(Tg, T8, T10); T1C = FMA(Tg, T8, T10); TW = FNMS(Tg, TP, TV); T3n = FMA(Tg, T1d, T3m); T3f = FNMS(Tg, TM, T3e); T3z = FMA(Tg, TM, T3e); T3j = FMA(Tg, TJ, T3i); T3C = FNMS(Tg, TJ, T3i); TS = FMA(Tg, TR, TQ); T3r = FNMS(Tg, T19, T3q); Tn = FMA(Tg, Te, Tm); T3Q = FMA(Tg, Ts, T3P); T1o = FMA(Tg, Tb, T1n); T3M = FNMS(Tg, Tw, T3L); Tj = FNMS(Tg, Ti, Tf); To = ii[WS(ios, 16)]; { E Tx, T47, Tu, Tp, T8F, TB, Tt, Tl, T8E; Tx = ii[WS(ios, 8)]; Tt = ri[WS(ios, 8)]; Tl = Tj * Tk; T8E = Tj * To; TF = ii[WS(ios, 24)]; T47 = Tw * Tt; Tu = Ts * Tt; Tp = FMA(Tn, To, Tl); T8F = FNMS(Tn, Tk, T8E); TB = ri[WS(ios, 24)]; Ty = FNMS(Tw, Tx, Tu); T46 = T1 - Tp; Tq = T1 + Tp; T97 = T8G - T8F; T8H = T8F + T8G; T49 = TE * TB; TC = TA * TB; T48 = FMA(Ts, Tx, T47); } } { E TO, TX, TT, T4f, TK, T4e; { E TN, TL, T4a, TG; TK = ri[WS(ios, 4)]; T4a = FMA(TA, TF, T49); TG = FNMS(TE, TF, TC); TN = ii[WS(ios, 4)]; TL = TJ * TK; T8D = T48 + T4a; T4b = T48 - T4a; T98 = Ty - TG; TH = Ty + TG; T4e = TJ * TN; TO = FMA(TM, TN, TL); } TX = ii[WS(ios, 20)]; TT = ri[WS(ios, 20)]; T4f = FNMS(TM, TK, T4e); { E T17, T4m, T1a, T1e, T1b, T4n; { E T4h, TY, T4i, T4d; { E T12, T16, T4g, TU, T13, T4l; T12 = ri[WS(ios, 28)]; T16 = ii[WS(ios, 28)]; T4g = TW * TT; TU = TS * TT; T13 = T11 * T12; T4l = T11 * T16; T4h = FMA(TS, TX, T4g); TY = FNMS(TW, TX, TU); T17 = FMA(T15, T16, T13); T4m = FNMS(T15, T12, T4l); } T7f = T4f + T4h; T4i = T4f - T4h; TZ = TO + TY; T4d = TO - TY; T1a = ri[WS(ios, 12)]; T1e = ii[WS(ios, 12)]; T4j = T4d + T4i; T6t = T4i - T4d; T1b = T19 * T1a; T4n = T19 * T1e; } { E T4u, T1m, T4w, T1u, T1y, T1A; { E T1G, T1D, T1t, T1p; { E T1l, T1j, T1f, T4o; T1l = ii[WS(ios, 2)]; T1j = ri[WS(ios, 2)]; T1f = FMA(T1d, T1e, T1b); T4o = FNMS(T1d, T1a, T4n); T1G = ii[WS(ios, 26)]; { E T4t, T1k, T4k, T4p; T4t = Td * T1j; T1k = T9 * T1j; T4k = T17 - T1f; T1g = T17 + T1f; T4p = T4m - T4o; T7g = T4m + T4o; T4u = FMA(T9, T1l, T4t); T1m = FNMS(Td, T1l, T1k); T6u = T4k + T4p; T4q = T4k - T4p; T1D = ri[WS(ios, 26)]; } } T1t = ii[WS(ios, 18)]; T1p = ri[WS(ios, 18)]; T1x = ri[WS(ios, 10)]; { E T4D, T1E, T4v, T1q; T4D = T1F * T1D; T1E = T1C * T1D; T4v = T1s * T1p; T1q = T1o * T1p; T4E = FMA(T1C, T1G, T4D); T1H = FNMS(T1F, T1G, T1E); T4w = FMA(T1o, T1t, T4v); T1u = FNMS(T1s, T1t, T1q); T1y = T1w * T1x; } T1A = ii[WS(ios, 10)]; } T7j = T4u + T4w; T4x = T4u - T4w; T4A = T1m - T1u; T1v = T1m + T1u; T1B = FMA(T1z, T1A, T1y); T4B = T1w * T1A; } } } } { E T3v, T5Z, T3E, T7G, T5C, T5V, T3u, T3y, T5W; { E T1P, T4J, T2i, T27, T4T, T24, T4L, T21; { E T1L, T2d, T4I, T2e, T2h; { E T1M, T4F, T7k, T1O, T1I, T4y, T4C; T1L = ri[WS(ios, 30)]; T1I = T1B + T1H; T4y = T1B - T1H; T4C = FNMS(T1z, T1x, T4B); T1M = T1K * T1L; T4z = T4x - T4y; T6x = T4x + T4y; T1J = T1v + T1I; T7m = T1v - T1I; T4F = T4C - T4E; T7k = T4C + T4E; T1O = ii[WS(ios, 30)]; T2d = ri[WS(ios, 22)]; T6y = T4A - T4F; T4G = T4A + T4F; T7l = T7j - T7k; T8d = T7j + T7k; T1P = FMA(T1N, T1O, T1M); T4I = T1K * T1O; T2e = T2c * T2d; T2h = ii[WS(ios, 22)]; } { E T20, T1W, T4S, T4K, T1X; T20 = ii[WS(ios, 14)]; T4J = FNMS(T1N, T1L, T4I); T1W = ri[WS(ios, 14)]; T2i = FMA(T2g, T2h, T2e); T4S = T2c * T2h; T27 = ii[WS(ios, 6)]; T4K = T1Z * T1W; T1X = T1V * T1W; T4T = FNMS(T2g, T2d, T4S); T24 = ri[WS(ios, 6)]; T4L = FMA(T1V, T20, T4K); T21 = FNMS(T1Z, T20, T1X); } } { E T3l, T5z, T5B, T3t, T3w, T3x, T3D, T3A; { E T3g, T5y, T3h, T3k; { E T4M, T7p, T4P, T22, T4R, T28, T4Q, T25; T3g = ri[WS(ios, 31)]; T4Q = T26 * T24; T25 = T23 * T24; T4M = T4J - T4L; T7p = T4J + T4L; T4P = T1P - T21; T22 = T1P + T21; T4R = FMA(T23, T27, T4Q); T28 = FNMS(T26, T27, T25); T3h = T3f * T3g; T3k = ii[WS(ios, 31)]; { E T4U, T7q, T4N, T2j; T4U = T4R - T4T; T7q = T4R + T4T; T4N = T28 - T2i; T2j = T28 + T2i; T6B = T4P - T4U; T4V = T4P + T4U; T7r = T7p - T7q; T8e = T7p + T7q; T4O = T4M - T4N; T6A = T4M + T4N; T2k = T22 + T2j; T7o = T22 - T2j; T5y = T3f * T3k; } } T3l = FMA(T3j, T3k, T3h); T3D = ii[WS(ios, 23)]; T3A = ri[WS(ios, 23)]; T5z = FNMS(T3j, T3g, T5y); } { E T3s, T3o, T5Y, T3B, T5A, T3p; T3s = ii[WS(ios, 15)]; T3o = ri[WS(ios, 15)]; T5Y = T3C * T3A; T3B = T3z * T3A; T3v = ri[WS(ios, 7)]; T5A = T3r * T3o; T3p = T3n * T3o; T5Z = FMA(T3z, T3D, T5Y); T3E = FNMS(T3C, T3D, T3B); T5B = FMA(T3n, T3s, T5A); T3t = FNMS(T3r, T3s, T3p); T3w = TP * T3v; T3x = ii[WS(ios, 7)]; } T7G = T5z + T5B; T5C = T5z - T5B; T5V = T3l - T3t; T3u = T3l + T3t; T3y = FMA(TR, T3x, T3w); T5W = TP * T3x; } } { E T2q, T50, T2L, T2A, T5q, T2B, T52, T2y, T2C; { E T2n, T2G, T4Z, T2H, T2K; { E T2o, T7H, T60, T2p, T3F, T5D, T5X; T2n = ri[WS(ios, 1)]; T3F = T3y + T3E; T5D = T3y - T3E; T5X = FNMS(TR, T3v, T5W); T2o = T5 * T2n; T5E = T5C - T5D; T6P = T5C + T5D; T3G = T3u + T3F; T7L = T3u - T3F; T7H = T5X + T5Z; T60 = T5X - T5Z; T2p = ii[WS(ios, 1)]; T2G = ri[WS(ios, 25)]; T61 = T5V + T60; T6M = T5V - T60; T7I = T7G - T7H; T8n = T7G + T7H; T2q = FMA(T8, T2p, T2o); T4Z = T5 * T2p; T2H = T2F * T2G; T2K = ii[WS(ios, 25)]; } { E T2x, T2t, T5p, T51, T2u; T2x = ii[WS(ios, 17)]; T50 = FNMS(T8, T2n, T4Z); T2t = ri[WS(ios, 17)]; T2L = FMA(T2J, T2K, T2H); T5p = T2F * T2K; T2A = ri[WS(ios, 9)]; T51 = T2w * T2t; T2u = T2s * T2t; T5q = FNMS(T2J, T2G, T5p); T2B = T3 * T2A; T52 = FMA(T2s, T2x, T51); T2y = FNMS(T2w, T2x, T2u); T2C = ii[WS(ios, 9)]; } } { E T3U, T5Q, T41, T3T, T5K, T5J, T7M, T3X, T5N; {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -