📄 hf2_64.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Oct 4 10:31:02 EDT 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -dit -name hf2_64 -include hf.h *//* * This function contains 1154 FP additions, 840 FP multiplications, * (or, 520 additions, 206 multiplications, 634 fused multiply/add), * 349 stack variables, and 256 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.16 2006-02-12 23:34:12 athena Exp $ */#include "hf.h"static const R *hf2_64(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){ DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 10, MAKE_VOLATILE_STRIDE(ios)) { E Tg0, TlC, TlB, Tg3; { E T2, T3, Tc, T8, Te, T5, T6, T14, T3d, T3i, TJ, T7, Tr, T3g, TG; E T10, T3a, TL, TP, Tb, Tt, T17, Td, Ti, T3N, T3R, T1i, Tu, T1I, T2U; E T1t, T3U, T5O, T48, T2u, T7B, TK, T79, T3D, T2h, T2l, T3G, T1x, T3X, T2d; E T1M, T2X, T4B, T4x, T3j, T4T, T29, T5s, T81, T5w, T7X, T7N, T7h, T64, T6a; E T6e, T7l, T60, T7R, T6h, T5A, T7o, T6J, T6k, T5E, T6N, T7r, T6x, T6t, T7c; E TO, T2x, T7E, TU, TQ, T2C, T2y, T5R, T4b, T4c, T4g, T4W, T3m, T3r, T3n; E T1k, Tx, Ty, T4p, T4s, TC, T23, T1Z, T19, Th, T31, T35, T1e, T44, T41; E T1a, T6W, T70, T55, T59, T3v, T3z, Tf, T1R, T2N, T2Q, T1V, T1p, T1l, Tm; { E T1H, T1s, T2g, Tg, Tw, TH, T2t, T47, T3h, T3M, T4w, T28, T3Q, T4A, T2c; E Ts; { E T4, T13, TI, TF, TZ, Ta, T9; T2 = W[0]; T3 = W[2]; Tc = W[5]; T8 = W[4]; Te = W[6]; T4 = T2 * T3; T13 = T2 * Tc; TI = T3 * Tc; TF = T3 * T8; T1H = T8 * Te; TZ = T2 * T8; T5 = W[1]; T6 = W[3]; T1s = T3 * Te; T2g = T2 * Te; T14 = FNMS(T5, T8, T13); T3d = FMA(T5, T8, T13); T3i = FNMS(T6, T8, TI); TJ = FMA(T6, T8, TI); T7 = FNMS(T5, T6, T4); Tr = FMA(T5, T6, T4); Ta = T2 * T6; Tg = T7 * Tc; Tw = Tr * Tc; T3g = FMA(T6, Tc, TF); TG = FNMS(T6, Tc, TF); T10 = FMA(T5, Tc, TZ); T3a = FNMS(T5, Tc, TZ); TH = TG * Te; T2t = T10 * Te; T47 = T3a * Te; T3h = T3g * Te; TL = W[8]; TP = W[9]; T9 = T7 * T8; Tb = FMA(T5, T3, Ta); Tt = FNMS(T5, T3, Ta); T3M = T2 * TL; T4w = T8 * TL; T28 = T3 * TL; T3Q = T2 * TP; T4A = T8 * TP; T2c = T3 * TP; T17 = FNMS(Tb, Tc, T9); Td = FMA(Tb, Tc, T9); Ts = Tr * T8; Ti = W[7]; } { E T5r, T80, T1L, T2k, T1w, T5z, T2B, T2v; T3N = FMA(T5, TP, T3M); T3R = FNMS(T5, TL, T3Q); T1i = FMA(Tt, Tc, Ts); Tu = FNMS(Tt, Tc, Ts); T1I = FNMS(Tc, Ti, T1H); T2U = FMA(Tc, Ti, T1H); T1t = FMA(T6, Ti, T1s); T3U = FNMS(T6, Ti, T1s); T5O = FNMS(T3d, Ti, T47); T48 = FMA(T3d, Ti, T47); T2u = FMA(T14, Ti, T2t); T7B = FNMS(T14, Ti, T2t); T1L = T8 * Ti; T2k = T2 * Ti; T1w = T3 * Ti; TK = FMA(TJ, Ti, TH); T79 = FNMS(TJ, Ti, TH); T3D = FMA(T5, Ti, T2g); T2h = FNMS(T5, Ti, T2g); T2l = FMA(T5, Te, T2k); T3G = FNMS(T5, Te, T2k); T1x = FNMS(T6, Te, T1w); T3X = FMA(T6, Te, T1w); T2d = FNMS(T6, TL, T2c); T1M = FMA(Tc, Te, T1L); T2X = FNMS(Tc, Te, T1L); T4B = FNMS(Tc, TL, T4A); T4x = FMA(Tc, TP, T4w); T3j = FMA(T3i, Ti, T3h); T4T = FNMS(T3i, Ti, T3h); T29 = FMA(T6, TP, T28); T5r = T3g * TL; T80 = T7 * TP; { E T7M, T7g, T63, T5v, T7W; T5v = T3g * TP; T7W = T7 * TL; T5s = FMA(T3i, TP, T5r); T81 = FNMS(Tb, TL, T80); T5w = FNMS(T3i, TL, T5v); T7X = FMA(Tb, TP, T7W); T7M = TG * TL; T7g = T10 * TL; T63 = T3a * TP; { E T6d, T7k, T69, T5Z, T7Q; T69 = Tr * TL; T7N = FMA(TJ, TP, T7M); T7h = FMA(T14, TP, T7g); T64 = FNMS(T3d, TL, T63); T6a = FMA(Tt, TP, T69); T6d = Tr * TP; T7k = T10 * TP; T5Z = T3a * TL; T7Q = TG * TP; T6e = FNMS(Tt, TL, T6d); T7l = FNMS(T14, TL, T7k); T60 = FMA(T3d, TP, T5Z); T7R = FNMS(TJ, TL, T7Q); T5z = Tr * Te; } } { E T6I, T5D, T6M, T6s, T6w; T6I = T7 * Te; T5D = Tr * Ti; T6M = T7 * Ti; T6h = FNMS(Tt, Ti, T5z); T5A = FMA(Tt, Ti, T5z); T7o = FMA(Tb, Ti, T6I); T6J = FNMS(Tb, Ti, T6I); T6k = FMA(Tt, Te, T5D); T5E = FNMS(Tt, Te, T5D); T6N = FMA(Tb, Te, T6M); T7r = FNMS(Tb, Te, T6M); T6s = T2U * TL; T6w = T2U * TP; { E TN, TT, TM, T2w; TN = TG * Ti; T2w = T10 * Ti; T6x = FNMS(T2X, TL, T6w); T6t = FMA(T2X, TP, T6s); T7c = FMA(TJ, Te, TN); TO = FNMS(TJ, Te, TN); TT = TK * TP; TM = TK * TL; T2x = FNMS(T14, Te, T2w); T7E = FMA(T14, Te, T2w); TU = FNMS(TO, TL, TT); TQ = FMA(TO, TP, TM); T2B = T2u * TP; T2v = T2u * TL; } } { E T1Y, T22, Tv, TB; { E T49, T4f, T4a, T3l, T3q, T3k; T4a = T3a * Ti; T2C = FNMS(T2x, TL, T2B); T2y = FMA(T2x, TP, T2v); T5R = FMA(T3d, Te, T4a); T4b = FNMS(T3d, Te, T4a); T49 = T48 * TL; T4f = T48 * TP; T3l = T3g * Ti; T4c = FMA(T4b, TP, T49); T4g = FNMS(T4b, TL, T4f); T4W = FMA(T3i, Te, T3l); T3m = FNMS(T3i, Te, T3l); T1Y = Tu * TL; T3q = T3j * TP; T3k = T3j * TL; T22 = Tu * TP; Tv = Tu * Te; T3r = FNMS(T3m, TL, T3q); T3n = FMA(T3m, TP, T3k); TB = Tu * Ti; T1k = FNMS(Tt, T8, Tw); Tx = FMA(Tt, T8, Tw); } { E T30, T34, T18, T1d; T30 = T17 * TL; T34 = T17 * TP; T18 = T17 * Te; Ty = FMA(Tx, Ti, Tv); T4p = FNMS(Tx, Ti, Tv); T4s = FMA(Tx, Te, TB); TC = FNMS(Tx, Te, TB); T23 = FNMS(Tx, TL, T22); T1Z = FMA(Tx, TP, T1Y); T1d = T17 * Ti; T19 = FMA(Tb, T8, Tg); Th = FNMS(Tb, T8, Tg); { E T1j, T1o, T1Q, T1U; T1j = T1i * TL; { E T6V, T6Z, T54, T58; T6V = Ty * TL; T6Z = Ty * TP; T31 = FMA(T19, TP, T30); T35 = FNMS(T19, TL, T34); T1e = FMA(T19, Te, T1d); T44 = FNMS(T19, Te, T1d); T41 = FMA(T19, Ti, T18); T1a = FNMS(T19, Ti, T18); T6W = FMA(TC, TP, T6V); T70 = FNMS(TC, TL, T6Z); T1o = T1i * TP; T54 = T41 * TL; T58 = T41 * TP; T1Q = T1i * Te; T1U = T1i * Ti; T55 = FMA(T44, TP, T54); T59 = FNMS(T44, TL, T58); } T3v = Td * TL; T3z = Td * TP; Tf = Td * Te; T1R = FMA(T1k, Ti, T1Q); T2N = FNMS(T1k, Ti, T1Q); T2Q = FMA(T1k, Te, T1U); T1V = FNMS(T1k, Te, T1U); T1p = FNMS(T1k, TL, T1o); T1l = FMA(T1k, TP, T1j); Tm = Td * Ti; } } } } } { E Tl9, TlD, TY, Tg4, T8w, TdS, TkE, Tkd, T74, Tha, Tht, Tjb, TeE, TbI, TeP; E TcB, T8D, TdT, TkD, T1B, Tk7, Tg7, TdU, T8K, T98, Te1, T2G, Tge, T9f, Te0; E Tgh, TiK, Tbs, Tew, T5d, TgJ, Taz, Tel, Th2, TiZ, Tal, Tef, T4k, Tgw, T9Y; E Tec, TgD, TiT, T9M, Te8, T39, Tgl, T9p, Te5, Tgs, TiN, T8T, TdY, T27, Tg9; E T90, TdX, Tgc, TiJ, T3K, Tgt, Tgo, TiO, T9P, Te6, T9E, Te9, T7v, Thu, Thd; E Tjc, TcE, TeF, TbX, TeQ, T5I, Th3, TgM, Tj0, Tbv, Tem, TaO, Tex, T4L, TgE; E Tgz, TiU, Tao, Ted, Tad, Teg, T7V, Tjg, TeS, TeJ, Tcs, TcH, Thj, Thw, T8m; E Tjh, TeT, TeM, Tcd, TcG, Tho, Thx, T68, Tj5, Tez, Teq, Tb4, Tby, TgS, Th5; E T6i, T6g, T6j, Tb6, T6z, Tbg, T6l, T6o, T6q; { E T3w, T3A, T4H, T4E, T8e, T8i, T5j, T5n, T1J, T1G, T1K, T8O, T25, T8Y, T1N; E T1S, T1W; { E T2i, T2f, T2j, T93, T2E, T9d, T2m, T2p, T2r, TcA, Tcv; { E T1, Tkb, Tp, Tka, TR, TV, TE, T8s, TS, T8t; { E Tn, Tj, T8d, T8h, T5i, T5m; T1 = rio[0]; T8d = T1R * TL; T8h = T1R * TP; T3w = FMA(Th, TP, T3v); T3A = FNMS(Th, TL, T3z); Tn = FMA(Th, Te, Tm); T4H = FNMS(Th, Te, Tm); T4E = FMA(Th, Ti, Tf); Tj = FNMS(Th, Ti, Tf); T8e = FMA(T1V, TP, T8d); T8i = FNMS(T1V, TL, T8h); Tkb = iio[-WS(ios, 63)]; T5i = T4E * TL; T5m = T4E * TP; { E Tk, To, Tl, Tk9; Tk = rio[WS(ios, 32)]; To = iio[-WS(ios, 31)]; T5j = FMA(T4H, TP, T5i); T5n = FNMS(T4H, TL, T5m); Tl = Tj * Tk; Tk9 = Tj * To; { E Tz, TD, TA, T8r; Tz = rio[WS(ios, 16)]; TD = iio[-WS(ios, 47)]; Tp = FMA(Tn, To, Tl); Tka = FNMS(Tn, Tk, Tk9); TA = Ty * Tz; T8r = Ty * TD; TR = rio[WS(ios, 48)]; TV = iio[-WS(ios, 15)]; TE = FMA(TC, TD, TA); T8s = FNMS(TC, Tz, T8r); TS = TQ * TR; T8t = TQ * TV; } } } { E T8q, Tq, Tl7, Tkc, TW, T8u; T8q = T1 - Tp; Tq = T1 + Tp; Tl7 = Tkb - Tka; Tkc = Tka + Tkb; TW = FMA(TU, TV, TS); T8u = FNMS(TU, TR, T8t); { E TX, Tl8, T8v, Tk8; TX = TE + TW; Tl8 = TE - TW; T8v = T8s - T8u; Tk8 = T8s + T8u; Tl9 = Tl7 - Tl8; TlD = Tl8 + Tl7; TY = Tq + TX; Tg4 = Tq - TX; T8w = T8q - T8v; TdS = T8q + T8v; TkE = Tkc - Tk8; Tkd = Tk8 + Tkc; } } } { E T6H, Tcx, T72, TbG, T6P, Tcz, T6U, TbE; { E T6E, T6G, T6K, T6O; T6E = rio[WS(ios, 63)]; T6G = iio[0]; { E T6X, T71, T6F, Tcw, T6Y, TbF; T6X = rio[WS(ios, 47)]; T71 = iio[-WS(ios, 16)]; T6F = TL * T6E; Tcw = TL * T6G; T6Y = T6W * T6X; TbF = T6W * T71; T6H = FMA(TP, T6G, T6F); Tcx = FNMS(TP, T6E, Tcw); T72 = FMA(T70, T71, T6Y); TbG = FNMS(T70, T6X, TbF); } T6K = rio[WS(ios, 31)]; T6O = iio[-WS(ios, 32)]; { E T6R, T6T, T6L, Tcy, T6S, TbD; T6R = rio[WS(ios, 15)]; T6T = iio[-WS(ios, 48)]; T6L = T6J * T6K; Tcy = T6J * T6O; T6S = TK * T6R; TbD = TK * T6T; T6P = FMA(T6N, T6O, T6L); Tcz = FNMS(T6N, T6K, Tcy); T6U = FMA(TO, T6T, T6S); TbE = FNMS(TO, T6R, TbD); } } {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -