📄 hf2_32.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 20:29:51 EST 2006 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hf2_32 -include hf.h *//* * This function contains 488 FP additions, 350 FP multiplications, * (or, 236 additions, 98 multiplications, 252 fused multiply/add), * 217 stack variables, and 128 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hf.h"static const R *hf2_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){ DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 8, MAKE_VOLATILE_STRIDE(ios)) { E T9A, T9z; { E T3, T8, T5, T4, T7, T2, TM, Td, T9, TJ, Tb, Tz, T14, T10, T1r; E T1n, TD, Ts, T1w, T1d, T26, Tw, T1z, T23, T19, T2r, T3i, T3P, T2v, T2S; E T3m, T3q, T3L, T2I, T2E, T2W, T3e, Te, TP, T29, T1R, T2b, T1U, Ti, TR; E Tf, T1S, T2f, TV, TQ, Tm, T1Y, T2a, Tg; { E Th, T1T, T1Q, Ta, Tv, Tr; T3 = W[4]; T8 = W[1]; T5 = W[0]; T4 = W[2]; T7 = W[3]; Tv = T3 * T8; Tr = T3 * T5; { E T18, Tc, T6, T1c; T18 = T3 * T4; Tc = T4 * T8; T6 = T4 * T5; T1c = T3 * T7; T2 = W[6]; TM = FMA(T7, T5, Tc); Td = FNMS(T7, T5, Tc); T9 = FMA(T7, T8, T6); TJ = FNMS(T7, T8, T6); Th = T3 * Td; T1T = T3 * TM; T1Q = T3 * TJ; Ta = T3 * T9; Tb = W[5]; Tz = T2 * T4; T14 = T2 * T8; T10 = T2 * T5; T1r = T2 * Tb; T1n = T2 * T3; TD = T2 * T7; Ts = FMA(Tb, T8, Tr); T1w = FNMS(Tb, T8, Tr); T1d = FMA(Tb, T4, T1c); T26 = FNMS(Tb, T4, T1c); Tw = FNMS(Tb, T5, Tv); T1z = FMA(Tb, T5, Tv); T23 = FMA(Tb, T7, T18); T19 = FNMS(Tb, T7, T18); } T2r = T2 * T1w; T3i = T2 * TM; T3P = T2 * Tw; T2v = T2 * T1z; T2S = T2 * T23; T3m = T2 * T19; T3q = T2 * T1d; T3L = T2 * Ts; T2I = T2 * Td; T2E = T2 * T9; T2W = T2 * T26; T3e = T2 * TJ; Te = FMA(Tb, Td, Ta); TP = FNMS(Tb, Td, Ta); T29 = FMA(Tb, TM, T1Q); T1R = FNMS(Tb, TM, T1Q); T2b = FNMS(Tb, TJ, T1T); T1U = FMA(Tb, TJ, T1T); Ti = FNMS(Tb, T9, Th); TR = FMA(Tb, T9, Th); Tf = T2 * Te; T1S = T2 * T1R; T2f = T2 * T2b; TV = T2 * TR; TQ = T2 * TP; Tm = T2 * Ti; T1Y = T2 * T1U; T2a = T2 * T29; Tg = W[7]; } { E Tq, T46, T8H, T97, TH, T98, T4b, T8D, T7f, TZ, T6t, T4j, T1g, T7g, T6u; E T4q, T4G, T6x, T1J, T7m, T6y, T4z, T7l, T8d, T6B, T4O, T7r, T8e, T4V, T6A; E T2k, T7o, T5s, T6I, T2N, T7A, T55, T6F, T7x, T8i, T5t, T5c, T7D, T8j, T5j; E T5u, T3c, T7y, T5L, T62, T43, T7G, T5S, T63, T7O, T8o, T3G, T7L, T5E, T6M; E T7J, T8n, T6P, T61, T90, T8Z; { E T2F, T31, T1V, T1K, T2T, T2w, T2J, T34, T1N, T2X, T2c, T2s, T2g, T1Z, T3n; E T3f, T3z, T3j, T3C, T3r, T3Q, T3M, T4B, T1m, T1x, T4x, T1H, T1y, T1A, T4D; E T1u; { E T1F, T15, T1s, T11, T1C, TW, TS, T1o, Ty, T48, TG, T4a; { E T1, T8G, Tk, TA, TE, Tn, Tj, To; T1 = rio[0]; T8G = iio[-WS(ios, 31)]; Tk = rio[WS(ios, 16)]; T2F = FNMS(Tg, Td, T2E); T31 = FMA(Tg, Td, T2E); T1V = FMA(Tg, T1U, T1S); T1K = FNMS(Tg, T7, Tz); TA = FMA(Tg, T7, Tz); T2T = FNMS(Tg, T26, T2S); T1F = FNMS(Tg, T5, T14); T15 = FMA(Tg, T5, T14); T2w = FNMS(Tg, T1w, T2v); T2J = FMA(Tg, T9, T2I); T34 = FNMS(Tg, T9, T2I); TE = FNMS(Tg, T4, TD); T1N = FMA(Tg, T4, TD); T2X = FMA(Tg, T23, T2W); T2c = FNMS(Tg, T2b, T2a); T2s = FMA(Tg, T1z, T2r); T1s = FNMS(Tg, T3, T1r); T2g = FMA(Tg, T29, T2f); T1Z = FNMS(Tg, T1R, T1Y); T11 = FNMS(Tg, T8, T10); T1C = FMA(Tg, T8, T10); TW = FNMS(Tg, TP, TV); T3n = FMA(Tg, T1d, T3m); T3f = FNMS(Tg, TM, T3e); T3z = FMA(Tg, TM, T3e); T3j = FMA(Tg, TJ, T3i); T3C = FNMS(Tg, TJ, T3i); TS = FMA(Tg, TR, TQ); T3r = FNMS(Tg, T19, T3q); Tn = FMA(Tg, Te, Tm); T3Q = FMA(Tg, Ts, T3P); T1o = FMA(Tg, Tb, T1n); T3M = FNMS(Tg, Tw, T3L); Tj = FNMS(Tg, Ti, Tf); To = iio[-WS(ios, 15)]; { E Tx, TF, T47, Tu, TC, T49, Tl, T8E; Tx = iio[-WS(ios, 23)]; TF = iio[-WS(ios, 7)]; Tl = Tj * Tk; T8E = Tj * To; { E Tt, TB, Tp, T8F; Tt = rio[WS(ios, 8)]; TB = rio[WS(ios, 24)]; Tp = FMA(Tn, To, Tl); T8F = FNMS(Tn, Tk, T8E); T47 = Tw * Tt; Tu = Ts * Tt; Tq = T1 + Tp; T46 = T1 - Tp; T8H = T8F + T8G; T97 = T8G - T8F; TC = TA * TB; T49 = TE * TB; } Ty = FNMS(Tw, Tx, Tu); T48 = FMA(Ts, Tx, T47); TG = FNMS(TE, TF, TC); T4a = FMA(TA, TF, T49); } } { E TX, TT, TO, T4f; { E TK, TN, TL, T4e; TK = rio[WS(ios, 4)]; TN = iio[-WS(ios, 27)]; TH = Ty + TG; T98 = Ty - TG; T4b = T48 - T4a; T8D = T48 + T4a; TL = TJ * TK; T4e = TJ * TN; TX = iio[-WS(ios, 11)]; TT = rio[WS(ios, 20)]; TO = FMA(TM, TN, TL); T4f = FNMS(TM, TK, T4e); } { E T17, T4m, T1a, T1e, T1b, T4i, T4d; { E T12, T16, T13, T4h, TY, T4l, T4g, TU; T12 = rio[WS(ios, 28)]; T4g = TW * TT; TU = TS * TT; T16 = iio[-WS(ios, 3)]; T13 = T11 * T12; T4h = FMA(TS, TX, T4g); TY = FNMS(TW, TX, TU); T4l = T11 * T16; T17 = FMA(T15, T16, T13); T4i = T4f - T4h; T7f = T4f + T4h; T4d = TO - TY; TZ = TO + TY; T4m = FNMS(T15, T12, T4l); } T1a = rio[WS(ios, 12)]; T6t = T4i - T4d; T4j = T4d + T4i; T1e = iio[-WS(ios, 19)]; T1b = T19 * T1a; { E T1G, T4k, T4p, T1D; { E T1l, T1j, T4n, T1f, T4A, T1k, T4o; T1l = iio[-WS(ios, 29)]; T1j = rio[WS(ios, 2)]; T4n = T19 * T1e; T1f = FMA(T1d, T1e, T1b); T1G = iio[-WS(ios, 5)]; T4A = Td * T1j; T1k = T9 * T1j; T4o = FNMS(T1d, T1a, T4n); T4k = T17 - T1f; T1g = T17 + T1f; T4B = FMA(T9, T1l, T4A); T1m = FNMS(Td, T1l, T1k); T4p = T4m - T4o; T7g = T4m + T4o; T1D = rio[WS(ios, 26)]; } { E T1t, T4w, T1E, T1p, T4C, T1q; T1t = iio[-WS(ios, 13)]; T6u = T4k + T4p; T4q = T4k - T4p; T4w = T1F * T1D; T1E = T1C * T1D; T1p = rio[WS(ios, 18)]; T1x = rio[WS(ios, 10)]; T4x = FMA(T1C, T1G, T4w); T1H = FNMS(T1F, T1G, T1E); T4C = T1s * T1p; T1q = T1o * T1p; T1y = T1w * T1x; T1A = iio[-WS(ios, 21)]; T4D = FMA(T1o, T1t, T4C); T1u = FNMS(T1s, T1t, T1q); } } } } } { E T2q, T5n, T2A, T2L, T53, T2B, T2C, T5p, T2y; { E T1P, T4Q, T2i, T27, T4M, T24, T4S, T21, T2G, T2K; { E T1L, T2d, T4P, T2e, T2h; { E T7j, T4t, T1M, T4y, T7k, T1O; { E T4E, T1v, T1I, T4F, T4v, T1B, T4u; T1L = rio[WS(ios, 30)]; T1B = FMA(T1z, T1A, T1y); T4u = T1w * T1A; T7j = T4B + T4D; T4E = T4B - T4D; T1v = T1m + T1u; T4t = T1m - T1u; T1I = T1B + T1H; T4F = T1B - T1H; T4v = FNMS(T1z, T1x, T4u); T1M = T1K * T1L; T4G = T4E - T4F; T6x = T4E + T4F; T1J = T1v + T1I; T7m = T1v - T1I; T4y = T4v - T4x; T7k = T4v + T4x; T1O = iio[-WS(ios, 1)]; } T2d = rio[WS(ios, 22)]; T6y = T4t - T4y; T4z = T4t + T4y; T7l = T7j - T7k; T8d = T7j + T7k; T1P = FMA(T1N, T1O, T1M); T4P = T1K * T1O; T2e = T2c * T2d; T2h = iio[-WS(ios, 9)]; } { E T20, T1W, T4L, T4R, T1X; T20 = iio[-WS(ios, 17)]; T4Q = FNMS(T1N, T1L, T4P); T1W = rio[WS(ios, 14)]; T2i = FMA(T2g, T2h, T2e); T4L = T2c * T2h; T27 = iio[-WS(ios, 25)]; T4R = T1Z * T1W; T1X = T1V * T1W; T4M = FNMS(T2g, T2d, T4L); T24 = rio[WS(ios, 6)]; T4S = FMA(T1V, T20, T4R); T21 = FNMS(T1Z, T20, T1X); } } { E T2n, T5m, T2o, T2p; { E T4T, T7p, T4I, T22, T4K, T28, T4J, T25; T2n = rio[WS(ios, 1)]; T4J = T26 * T24; T25 = T23 * T24; T4T = T4Q - T4S; T7p = T4Q + T4S; T4I = T1P - T21; T22 = T1P + T21; T4K = FMA(T23, T27, T4J); T28 = FNMS(T26, T27, T25); T2o = T5 * T2n; T2p = iio[-WS(ios, 30)]; { E T4N, T7q, T4U, T2j; T4N = T4K - T4M; T7q = T4K + T4M; T4U = T28 - T2i; T2j = T28 + T2i; T6B = T4I - T4N; T4O = T4I + T4N; T7r = T7p - T7q; T8e = T7p + T7q; T4V = T4T - T4U; T6A = T4T + T4U; T2k = T22 + T2j; T7o = T22 - T2j; T5m = T5 * T2p; } } T2q = FMA(T8, T2p, T2o); T2G = rio[WS(ios, 25)]; T2K = iio[-WS(ios, 6)]; T5n = FNMS(T8, T2n, T5m); } { E T2x, T2H, T52, T2t, T5o, T2u; T2x = iio[-WS(ios, 14)]; T2H = T2F * T2G; T52 = T2F * T2K; T2t = rio[WS(ios, 17)]; T2A = rio[WS(ios, 9)]; T2L = FMA(T2J, T2K, T2H); T53 = FNMS(T2J, T2G, T52); T5o = T2w * T2t; T2u = T2s * T2t; T2B = T3 * T2A; T2C = iio[-WS(ios, 22)]; T5p = FMA(T2s, T2x, T5o); T2y = FNMS(T2w, T2x, T2u); } } { E T3a, T5h, T35, T56, T30, T7B, T5b, T5e, T33; { E T2R, T58, T2Z, T5a, T32, T37, T39; { E T2Q, T7v, T4Z, T7w, T54, T2P, T57; T2Q = iio[-WS(ios, 26)]; { E T2D, T50, T5q, T2z; T2D = FMA(Tb, T2C, T2B); T50 = T3 * T2C; T7v = T5n + T5p; T5q = T5n - T5p; T2z = T2q + T2y; T4Z = T2q - T2y; { E T2M, T5r, T51, T2O; T2M = T2D + T2L; T5r = T2D - T2L; T51 = FNMS(Tb, T2A, T50); T2O = rio[WS(ios, 5)]; T5s = T5q - T5r; T6I = T5q + T5r; T2N = T2z + T2M; T7A = T2z - T2M; T7w = T51 + T53; T54 = T51 - T53; T2P = T29 * T2O; T57 = T2b * T2O; } } T55 = T4Z + T54; T6F = T4Z - T54; T7x = T7v - T7w; T8i = T7v + T7w; T2R = FNMS(T2b, T2Q, T2P); T58 = FMA(T29, T2Q, T57); } T37 = rio[WS(ios, 13)]; T39 = iio[-WS(ios, 18)]; { E T2U, T2Y, T38, T5g, T2V, T59; T2U = rio[WS(ios, 21)]; T2Y = iio[-WS(ios, 10)]; T38 = T1R * T37; T5g = T1R * T39; T2V = T2T * T2U; T59 = T2T * T2Y; T3a = FMA(T1U, T39, T38); T5h = FNMS(T1U, T37, T5g); T2Z = FMA(T2X, T2Y, T2V); T5a = FNMS(T2X, T2U, T59); } T35 = iio[-WS(ios, 2)]; T32 = rio[WS(ios, 29)]; T56 = T2R - T2Z; T30 = T2R + T2Z; T7B = T58 + T5a;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -