📄 hf_32.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 20:19:34 EST 2006 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hf_32 -include hf.h *//* * This function contains 434 FP additions, 260 FP multiplications, * (or, 236 additions, 62 multiplications, 198 fused multiply/add), * 137 stack variables, and 128 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hf.h"static const R *hf_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){ DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 62, MAKE_VOLATILE_STRIDE(ios)) { E T90, T8Z; { E T8x, T87, T8, T3w, T83, T3B, T8y, Tl, T6F, Tz, T3J, T5T, T6G, TM, T3Q; E T5U, T3Z, T5Y, T7D, T6L, T5X, T46, T6M, T1f, T4e, T61, T7E, T6R, T6O, T1G; E T60, T4l, T4v, T65, T6X, T7I, T29, T70, T68, T4S, T4T, T4C, T7J, T73, T6Y; E T2A, T4U, T4J, T5s, T5b, T7e, T7O, T5i, T5t, T3t, T76, T54, T6c, T79, T7N; E T32, T7b, T6f, T5r, T8q, T8p; { E T3X, T1d, T44, T6J, T11, T3T, T3V, T17, T4I, T4D; { E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti; { E T1, T86, T3, T6, T2, T5; T1 = rio[0]; T86 = iio[-WS(ios, 31)]; T3 = rio[WS(ios, 16)]; T6 = iio[-WS(ios, 15)]; T2 = W[30]; T5 = W[31]; { E T84, T4, T9, T85, T7; Ta = rio[WS(ios, 8)]; Td = iio[-WS(ios, 23)]; T84 = T2 * T6; T4 = T2 * T3; T9 = W[14]; Tg = rio[WS(ios, 24)]; T85 = FNMS(T5, T3, T84); T7 = FMA(T5, T6, T4); T3x = T9 * Td; Tb = T9 * Ta; T8x = T86 - T85; T87 = T85 + T86; T8 = T1 + T7; T3w = T1 - T7; Tj = iio[-WS(ios, 7)]; Tf = W[46]; } Tc = W[15]; Ti = W[47]; } { E Tu, Tx, T3F, Ts, Tw, T3G, Tv; { E To, Tr, Tp, T3E, Tq, Tt; { E T3y, Te, T3A, Tk, T3z, Th, Tn; To = rio[WS(ios, 4)]; T3z = Tf * Tj; Th = Tf * Tg; T3y = FNMS(Tc, Ta, T3x); Te = FMA(Tc, Td, Tb); T3A = FNMS(Ti, Tg, T3z); Tk = FMA(Ti, Tj, Th); Tr = iio[-WS(ios, 27)]; Tn = W[6]; T83 = T3y + T3A; T3B = T3y - T3A; T8y = Te - Tk; Tl = Te + Tk; Tp = Tn * To; T3E = Tn * Tr; } Tq = W[7]; Tu = rio[WS(ios, 20)]; Tx = iio[-WS(ios, 11)]; Tt = W[38]; T3F = FNMS(Tq, To, T3E); Ts = FMA(Tq, Tr, Tp); Tw = W[39]; T3G = Tt * Tx; Tv = Tt * Tu; } { E T3M, TF, TH, TK, TG, TJ, TE, TD, TC; { E TB, T3H, Ty, TA, T3I, T3D, T3L; TB = rio[WS(ios, 28)]; TE = iio[-WS(ios, 3)]; T3H = FNMS(Tw, Tu, T3G); Ty = FMA(Tw, Tx, Tv); TA = W[54]; TD = W[55]; T6F = T3F + T3H; T3I = T3F - T3H; Tz = Ts + Ty; T3D = Ts - Ty; T3L = TA * TE; TC = TA * TB; T3J = T3D + T3I; T5T = T3I - T3D; T3M = FNMS(TD, TB, T3L); } TF = FMA(TD, TE, TC); TH = rio[WS(ios, 12)]; TK = iio[-WS(ios, 19)]; TG = W[22]; TJ = W[23]; { E TU, T41, T13, T16, T43, T10, T12, T15, T3U, T14; { E T19, T1c, T18, T1b, T3P, T3K; { E TQ, TT, T3N, TI, TP, TS; TQ = rio[WS(ios, 2)]; TT = iio[-WS(ios, 29)]; T3N = TG * TK; TI = TG * TH; TP = W[2]; TS = W[3]; { E T3O, TL, T40, TR; T3O = FNMS(TJ, TH, T3N); TL = FMA(TJ, TK, TI); T40 = TP * TT; TR = TP * TQ; T6G = T3M + T3O; T3P = T3M - T3O; TM = TF + TL; T3K = TF - TL; TU = FMA(TS, TT, TR); T41 = FNMS(TS, TQ, T40); } } T3Q = T3K - T3P; T5U = T3K + T3P; T19 = rio[WS(ios, 26)]; T1c = iio[-WS(ios, 5)]; T18 = W[50]; T1b = W[51]; { E TW, TZ, TY, T42, TX, T3W, T1a, TV; TW = rio[WS(ios, 18)]; TZ = iio[-WS(ios, 13)]; T3W = T18 * T1c; T1a = T18 * T19; TV = W[34]; TY = W[35]; T3X = FNMS(T1b, T19, T3W); T1d = FMA(T1b, T1c, T1a); T42 = TV * TZ; TX = TV * TW; T13 = rio[WS(ios, 10)]; T16 = iio[-WS(ios, 21)]; T43 = FNMS(TY, TW, T42); T10 = FMA(TY, TZ, TX); T12 = W[18]; T15 = W[19]; } } T44 = T41 - T43; T6J = T41 + T43; T11 = TU + T10; T3T = TU - T10; T3U = T12 * T16; T14 = T12 * T13; T3V = FNMS(T15, T13, T3U); T17 = FMA(T15, T16, T14); } } } } { E T4g, T1l, T4c, T1E, T1u, T1x, T1w, T4i, T1r, T49, T1v; { E T1A, T1D, T1C, T4b, T1B; { E T1h, T1k, T1g, T1j, T4f, T1i, T1z; T1h = rio[WS(ios, 30)]; T1k = iio[-WS(ios, 1)]; { E T6K, T3Y, T1e, T45; T6K = T3V + T3X; T3Y = T3V - T3X; T1e = T17 + T1d; T45 = T17 - T1d; T3Z = T3T + T3Y; T5Y = T3T - T3Y; T7D = T6J + T6K; T6L = T6J - T6K; T5X = T44 + T45; T46 = T44 - T45; T6M = T11 - T1e; T1f = T11 + T1e; T1g = W[58]; } T1j = W[59]; T1A = rio[WS(ios, 22)]; T1D = iio[-WS(ios, 9)]; T4f = T1g * T1k; T1i = T1g * T1h; T1z = W[42]; T1C = W[43]; T4g = FNMS(T1j, T1h, T4f); T1l = FMA(T1j, T1k, T1i); T4b = T1z * T1D; T1B = T1z * T1A; } { E T1n, T1q, T1m, T1p, T4h, T1o, T1t; T1n = rio[WS(ios, 14)]; T1q = iio[-WS(ios, 17)]; T4c = FNMS(T1C, T1A, T4b); T1E = FMA(T1C, T1D, T1B); T1m = W[26]; T1p = W[27]; T1u = rio[WS(ios, 6)]; T1x = iio[-WS(ios, 25)]; T4h = T1m * T1q; T1o = T1m * T1n; T1t = W[10]; T1w = W[11]; T4i = FNMS(T1p, T1n, T4h); T1r = FMA(T1p, T1q, T1o); T49 = T1t * T1x; T1v = T1t * T1u; } } { E T4j, T6P, T1s, T48, T4a, T1y; T4j = T4g - T4i; T6P = T4g + T4i; T1s = T1l + T1r; T48 = T1l - T1r; T4a = FNMS(T1w, T1u, T49); T1y = FMA(T1w, T1x, T1v); { E T6Q, T4d, T4k, T1F; T6Q = T4a + T4c; T4d = T4a - T4c; T4k = T1y - T1E; T1F = T1y + T1E; T4e = T48 + T4d; T61 = T48 - T4d; T7E = T6P + T6Q; T6R = T6P - T6Q; T6O = T1s - T1F; T1G = T1s + T1F; T60 = T4j + T4k; T4l = T4j - T4k; } } } { E T4N, T1O, T4t, T27, T1X, T20, T1Z, T4P, T1U, T4q, T1Y; { E T23, T26, T25, T4s, T24; { E T1K, T1N, T1J, T1M, T4M, T1L, T22; T1K = rio[WS(ios, 1)]; T1N = iio[-WS(ios, 30)]; T1J = W[0]; T1M = W[1]; T23 = rio[WS(ios, 25)]; T26 = iio[-WS(ios, 6)]; T4M = T1J * T1N; T1L = T1J * T1K; T22 = W[48]; T25 = W[49]; T4N = FNMS(T1M, T1K, T4M); T1O = FMA(T1M, T1N, T1L); T4s = T22 * T26; T24 = T22 * T23; } { E T1Q, T1T, T1P, T1S, T4O, T1R, T1W; T1Q = rio[WS(ios, 17)]; T1T = iio[-WS(ios, 14)]; T4t = FNMS(T25, T23, T4s); T27 = FMA(T25, T26, T24); T1P = W[32]; T1S = W[33]; T1X = rio[WS(ios, 9)]; T20 = iio[-WS(ios, 22)]; T4O = T1P * T1T; T1R = T1P * T1Q; T1W = W[16]; T1Z = W[17]; T4P = FNMS(T1S, T1Q, T4O); T1U = FMA(T1S, T1T, T1R); T4q = T1W * T20; T1Y = T1W * T1X; } } { E T4Q, T6V, T1V, T4p, T4r, T21; T4Q = T4N - T4P; T6V = T4N + T4P; T1V = T1O + T1U; T4p = T1O - T1U; T4r = FNMS(T1Z, T1X, T4q); T21 = FMA(T1Z, T20, T1Y); { E T6W, T4u, T4R, T28; T6W = T4r + T4t; T4u = T4r - T4t; T4R = T21 - T27; T28 = T21 + T27; T4v = T4p + T4u; T65 = T4p - T4u; T6X = T6V - T6W; T7I = T6V + T6W; T29 = T1V + T28; T70 = T1V - T28; T68 = T4Q + T4R; T4S = T4Q - T4R; } } } { E T4y, T2f, T4H, T2y, T2o, T2r, T2q, T4A, T2l, T4E, T2p; { E T2u, T2x, T2w, T4G, T2v; { E T2b, T2e, T2a, T2d, T4x, T2c, T2t; T2b = rio[WS(ios, 5)]; T2e = iio[-WS(ios, 26)]; T2a = W[8]; T2d = W[9]; T2u = rio[WS(ios, 13)]; T2x = iio[-WS(ios, 18)]; T4x = T2a * T2e; T2c = T2a * T2b; T2t = W[24]; T2w = W[25]; T4y = FNMS(T2d, T2b, T4x); T2f = FMA(T2d, T2e, T2c); T4G = T2t * T2x; T2v = T2t * T2u; } { E T2h, T2k, T2g, T2j, T4z, T2i, T2n; T2h = rio[WS(ios, 21)]; T2k = iio[-WS(ios, 10)]; T4H = FNMS(T2w, T2u, T4G); T2y = FMA(T2w, T2x, T2v); T2g = W[40]; T2j = W[41]; T2o = rio[WS(ios, 29)]; T2r = iio[-WS(ios, 2)]; T4z = T2g * T2k; T2i = T2g * T2h; T2n = W[56]; T2q = W[57]; T4A = FNMS(T2j, T2h, T4z); T2l = FMA(T2j, T2k, T2i); T4E = T2n * T2r; T2p = T2n * T2o; } } { E T4B, T71, T2m, T4w, T4F, T2s, T72, T2z; T4B = T4y - T4A; T71 = T4y + T4A; T2m = T2f + T2l; T4w = T2f - T2l; T4F = FNMS(T2q, T2o, T4E); T2s = FMA(T2q, T2r, T2p); T4I = T4F - T4H; T72 = T4F + T4H; T2z = T2s + T2y; T4D = T2s - T2y; T4T = T4B - T4w; T4C = T4w + T4B; T7J = T71 + T72; T73 = T71 - T72; T6Y = T2z - T2m; T2A = T2m + T2z; } } { E T5g, T3r, T5a, T7c, T3f, T55, T5e, T3l; { E T57, T38, T3h, T3k, T59, T3e, T3g, T3j, T5d, T3i; { E T3n, T3q, T3m, T3p; { E T34, T37, T36, T56, T35, T33; T34 = rio[WS(ios, 3)]; T37 = iio[-WS(ios, 28)]; T33 = W[4]; T4U = T4D + T4I; T4J = T4D - T4I; T36 = W[5]; T56 = T33 * T37; T35 = T33 * T34; T3n = rio[WS(ios, 11)]; T3q = iio[-WS(ios, 20)]; T57 = FNMS(T36, T34, T56); T38 = FMA(T36, T37, T35); T3m = W[20];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -