📄 hf2_64.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 20:29:53 EST 2006 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 64 -dit -name hf2_64 -include hf.h *//* * This function contains 1154 FP additions, 840 FP multiplications, * (or, 520 additions, 206 multiplications, 634 fused multiply/add), * 371 stack variables, and 256 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hf.h"static const R *hf2_64(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){ DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 10, MAKE_VOLATILE_STRIDE(ios)) { E Tg0, TlC, TlB, Tg3; { E T4, T8, T5, T2, Tb, T3, T7, Td, Tt, T9, Tr, TH, T3g, T3i, TK; E T10, T3a, T14, T3d, T1i, Tu, T17, Te, T1k, Tx, Ti, T19, Tg, TF, T3G; E T2l, T2h, T3D, T3U, T1t, T3m, T4W, T4T, T3j, T1x, T3X, T5O, T48, TL, T79; E T1I, T2U, T2X, T1M, T7c, TP, T2u, T7B, T4b, T5R, T7E, T2x, T3M, T3Q, T4w; E T4A, T2c, T28, T7o, T6J, T6N, T7r, T1j, T6s, T6k, T5E, T6h, T5A, T1Y, T5v; E T80, T1o, T7W, T6d, T5r, T3z, T6w, T7k, T7Q, T34, T30, T7M, T22, T7g, T3v; E T69, T63, T5Z, T4H, Tn, T2N, T1R, T44, T1e, T2Q, T1V, T4E, Tj, T4f, T41; E T1a, T2v, T3k, T49, TM, T4s, TC, T3q, T4p, Ty, T2B, TT, T54, T8d, T8h; E T58, T5i, T6V, T6Z, T5m, TN; { E T1L, T1s, T1H, T1w, T2g, T2k, TI, TO, T2t, T3l, T3h, T4a, T2w, T47, Tc; E T6; T4 = W[2]; T8 = W[1]; T5 = W[0]; T2 = W[6]; Tb = W[5]; Tc = T4 * T8; T6 = T4 * T5; T3 = W[4]; T7 = W[3]; T1L = T2 * Tb; { E T13, TZ, TG, TJ; T1s = T2 * T4; T13 = T3 * T8; T1H = T2 * T3; TZ = T3 * T5; TG = T3 * T4; Td = FMA(T7, T5, Tc); Tt = FNMS(T7, T5, Tc); T9 = FNMS(T7, T8, T6); Tr = FMA(T7, T8, T6); TJ = T3 * T7; T1w = T2 * T7; T2g = T2 * T5; T2k = T2 * T8; TH = FNMS(Tb, T7, TG); T3g = FMA(Tb, T7, TG); T3i = FNMS(Tb, T4, TJ); TK = FMA(Tb, T4, TJ); T10 = FMA(Tb, T8, TZ); T3a = FNMS(Tb, T8, TZ); TI = T2 * TH; TO = T2 * TK; T2t = T2 * T10; T3l = T2 * T3i; { E Ts, Ta, Tw, Th; T14 = FNMS(Tb, T5, T13); T3d = FMA(Tb, T5, T13); T3h = T2 * T3g; T4a = T2 * T3d; T2w = T2 * T14; T47 = T2 * T3a; Ts = T3 * Tr; Ta = T3 * T9; Tw = T3 * Tt; Th = T3 * Td; T1i = FNMS(Tb, Tt, Ts); Tu = FMA(Tb, Tt, Ts); T17 = FNMS(Tb, Td, Ta); Te = FMA(Tb, Td, Ta); T1k = FMA(Tb, Tr, Tw); Tx = FNMS(Tb, Tr, Tw); Ti = FNMS(Tb, T9, Th); T19 = FMA(Tb, T9, Th); Tg = W[7]; TF = W[8]; } } { E T6I, T6M, T5D, T5z; T6I = T2 * T9; T6M = T2 * Td; T3G = FNMS(Tg, T5, T2k); T2l = FMA(Tg, T5, T2k); T2h = FNMS(Tg, T8, T2g); T3D = FMA(Tg, T8, T2g); T3U = FNMS(Tg, T7, T1s); T1t = FMA(Tg, T7, T1s); T3m = FMA(Tg, T3g, T3l); T4W = FNMS(Tg, T3g, T3l); T4T = FMA(Tg, T3i, T3h); T3j = FNMS(Tg, T3i, T3h); T1x = FNMS(Tg, T4, T1w); T3X = FMA(Tg, T4, T1w); T5O = FNMS(Tg, T3d, T47); T48 = FMA(Tg, T3d, T47); TL = FMA(Tg, TK, TI); T79 = FNMS(Tg, TK, TI); T1I = FNMS(Tg, Tb, T1H); T2U = FMA(Tg, Tb, T1H); T2X = FNMS(Tg, T3, T1L); T1M = FMA(Tg, T3, T1L); T7c = FMA(Tg, TH, TO); TP = FNMS(Tg, TH, TO); T2u = FNMS(Tg, T14, T2t); T7B = FMA(Tg, T14, T2t); T4b = FNMS(Tg, T3a, T4a); T5R = FMA(Tg, T3a, T4a); T7E = FNMS(Tg, T10, T2w); T2x = FMA(Tg, T10, T2w); T3M = TF * T5; T3Q = TF * T8; T4w = TF * T3; T4A = TF * Tb; T2c = TF * T7; T28 = TF * T4; T7o = FMA(Tg, Td, T6I); T6J = FNMS(Tg, Td, T6I); T6N = FMA(Tg, T9, T6M); T7r = FNMS(Tg, T9, T6M); T5D = T2 * Tt; T5z = T2 * Tr; T1j = TF * T1i; T6s = TF * T2U; T6k = FNMS(Tg, Tr, T5D); T5E = FMA(Tg, Tr, T5D); T6h = FMA(Tg, Tt, T5z); T5A = FNMS(Tg, Tt, T5z); T1Y = TF * Tu; T5v = TF * T3i; T80 = TF * Td; T1o = TF * T1k; T7W = TF * T9; T6d = TF * Tt; T5r = TF * T3g; T3z = TF * Ti; T6w = TF * T2X; T7k = TF * T14; T7Q = TF * TK; T34 = TF * T19; T30 = TF * T17; T7M = TF * TH; T22 = TF * Tx; T7g = TF * T10; T3v = TF * Te; { E Tf, T18, TB, Tv; { E Tm, T1Q, T1d, T1U; T69 = TF * Tr; T63 = TF * T3d; T5Z = TF * T3a; Tm = T2 * Ti; T1Q = T2 * T1i; T1d = T2 * T19; T1U = T2 * T1k; T4H = FMA(Tg, Te, Tm); Tn = FNMS(Tg, Te, Tm); T2N = FNMS(Tg, T1k, T1Q); T1R = FMA(Tg, T1k, T1Q); T44 = FNMS(Tg, T17, T1d); T1e = FMA(Tg, T17, T1d); T2Q = FMA(Tg, T1i, T1U); T1V = FNMS(Tg, T1i, T1U); Tf = T2 * Te; } T18 = T2 * T17; T4E = FNMS(Tg, Ti, Tf); Tj = FMA(Tg, Ti, Tf); T4f = TF * T4b; T41 = FMA(Tg, T19, T18); T1a = FNMS(Tg, T19, T18); T2v = TF * T2u; TB = T2 * Tx; T3k = TF * T3j; T49 = TF * T48; TM = TF * TL; T4s = FNMS(Tg, Tu, TB); TC = FMA(Tg, Tu, TB); Tv = T2 * Tu; T3q = TF * T3m; T4p = FMA(Tg, Tx, Tv); Ty = FNMS(Tg, Tx, Tv); T2B = TF * T2x; TT = TF * TP; } T54 = TF * T41; T8d = TF * T1R; T8h = TF * T1V; T58 = TF * T44; T5i = TF * T4E; T6V = TF * Ty; T6Z = TF * TC; T5m = TF * T4H; TN = W[9]; } } { E Tl9, TlD, TY, Tg4, Tkd, TkE, T8w, TdS, TeE, TbI, Tjb, Tht, TeP, TcB, Tha; E T74, TdT, T8D, Tk7, Tg7, TdU, T8K, TkD, T1B, T98, Te1, T2G, Tge, T9f, Te0; E Tgh, TiK, Tel, Taz, TiZ, Th2, Tew, Tbs, TgJ, T5d, T4k, Tgw, T9Y, Tec, TgD; E TiT, Tef, Tal, T9p, Te5, Tgs, TiN, T9M, Te8, T39, Tgl, T90, TdX, Tgc, TiJ; E T8T, TdY, T27, Tg9, Tgt, T3K, TiO, Tgo, Te9, T9E, Te6, T9P, T7v, Thu, Thd; E Tjc, TcE, TeF, TbX, TeQ, Th3, T5I, Tj0, TgM, Tex, TaO, Tem, Tbv, TiU, Tgz; E TgE, T4L, Ted, Tao, Teg, Tad, T7V, Tjg, Thw, Thj, TcH, Tcs, TeJ, TeS, Tjh; E T8m, TeT, TeM, Tcd, TcG, Tho, Thx, T68, Tj5, Teq, Tez, Tby, Tb4, Th5, TgS; E Tbb, Tes, TgT, T6B, TgW, Tj6, Ter, Tbi; { E T60, T5s, T3A, T64, T3w, T5w, T5n, T4x, T3n, T6e, T4B, T3r, T5j, T6a, T6t; E T7R, T7l, T7X, T7N, T7h, T8e, T6x, T81, T8i, T1J, T1G, T1K, T8O, T25, T8Y; E T1N, T1W, T1S; { E T4c, T3R, T3N, T4g, T59, T55, T35, T1Z, T23, T31, T2p, T9d, T2E, T2o, T99; E T96, Tgf, T2s, T9a; { E T2d, T29, T1p, T2y, T2C, T1l, TbG, T72, T6T, TbC, T6Q, Thr, TcA, TbD, T6S; { E TU, T6W, T70, TR, TE, T8s, Tq, T8q, Tkc, Tl7, TW, T8t; { E T1, Tkb, TQ, Tka, Tp, TS, TV; T1 = rio[0]; Tkb = iio[-WS(ios, 63)]; { E To, Tk, Tz, TD; To = iio[-WS(ios, 31)]; T60 = FMA(TN, T3d, T5Z); T4c = FNMS(TN, T4b, T49); T5s = FNMS(TN, T3i, T5r); T3A = FMA(TN, Te, T3z); T64 = FNMS(TN, T3a, T63); T3R = FNMS(TN, T5, T3Q); T3N = FMA(TN, T8, T3M); T3w = FNMS(TN, Ti, T3v); T5w = FMA(TN, T3g, T5v); T5n = FNMS(TN, T4E, T5m); T4g = FMA(TN, T48, T4f); T4x = FMA(TN, Tb, T4w); T3n = FMA(TN, T3m, T3k); T6e = FMA(TN, Tr, T6d); T59 = FMA(TN, T41, T58); T4B = FNMS(TN, T3, T4A); T3r = FNMS(TN, T3j, T3q); T5j = FMA(TN, T4H, T5i); T55 = FNMS(TN, T44, T54); T6a = FNMS(TN, Tt, T69); T6t = FNMS(TN, T2X, T6s); T35 = FNMS(TN, T17, T34); T7R = FNMS(TN, TH, T7Q); T2d = FNMS(TN, T4, T2c); T7l = FMA(TN, T10, T7k); T1Z = FNMS(TN, Tx, T1Y); T7X = FMA(TN, Td, T7W); T29 = FMA(TN, T7, T28); T23 = FMA(TN, Tu, T22); T1p = FNMS(TN, T1i, T1o); T7N = FMA(TN, TK, T7M); T7h = FNMS(TN, T14, T7g); T2y = FMA(TN, T2x, T2v); T2C = FNMS(TN, T2u, T2B); TU = FMA(TN, TL, TT); T8e = FNMS(TN, T1V, T8d); T6W = FMA(TN, TC, T6V); T31 = FMA(TN, T19, T30); T1l = FMA(TN, T1k, T1j); T70 = FNMS(TN, Ty, T6Z); T6x = FMA(TN, T2U, T6w); T81 = FNMS(TN, T9, T80); T8i = FMA(TN, T1R, T8h); TQ = FNMS(TN, TP, TM); Tk = rio[WS(ios, 32)]; Tz = rio[WS(ios, 16)]; TD = iio[-WS(ios, 47)]; TR = rio[WS(ios, 48)]; { E Tk9, Tl, TA, T8r; Tk9 = Tn * Tk; Tl = Tj * Tk; TA = Ty * Tz; T8r = Ty * TD; Tka = FMA(Tj, To, Tk9); Tp = FNMS(Tn, To, Tl); TE = FMA(TC, TD, TA); T8s = FNMS(TC, Tz, T8r); TS = TQ * TR; } TV = iio[-WS(ios, 15)]; } Tq = T1 + Tp; T8q = T1 - Tp; Tkc = Tka + Tkb; Tl7 = Tkb - Tka; TW = FMA(TU, TV, TS); T8t = TQ * TV; } { E T6H, Tcx, T6P, Tcz, T6R; { E T6E, T71, Tcw, T6X; { E T6F, T8v, Tk8, T6G, TX, Tl8, T8u; T6E = rio[WS(ios, 63)]; TX = TE + TW; Tl8 = TE - TW; T8u = FNMS(TU, TR, T8t); T6F = TF * T6E; Tl9 = Tl7 - Tl8; TlD = Tl8 + Tl7; TY = Tq + TX; Tg4 = Tq - TX; T8v = T8s - T8u; Tk8 = T8s + T8u; T6G = iio[0]; T71 = iio[-WS(ios, 16)]; Tkd = Tk8 + Tkc; TkE = Tkc - Tk8; T8w = T8q - T8v; TdS = T8q + T8v; T6H = FMA(TN, T6G, T6F); Tcw = TF * T6G; T6X = rio[WS(ios, 47)]; } { E T6K, T6O, TbF, T6Y, T6L, Tcy; T6K = rio[WS(ios, 31)]; T6O = iio[-WS(ios, 32)]; Tcx = FNMS(TN, T6E, Tcw); TbF = T70 * T6X; T6Y = T6W * T6X; T6L = T6J * T6K; Tcy = T6J * T6O; TbG = FMA(T6W, T71, TbF); T72 = FNMS(T70, T71, T6Y); T6P = FMA(T6N, T6O, T6L); Tcz = FNMS(T6N, T6K, Tcy); } T6T = iio[-WS(ios, 48)]; T6R = rio[WS(ios, 15)]; } TbC = T6H - T6P; T6Q = T6H + T6P; Thr = Tcx + Tcz; TcA = Tcx - Tcz; TbD = TP * T6R; T6S = TL * T6R; } } { E T8y, T16, T1q, T8I, T1z, T1g, T8A, T1m; { E T1y, T1u, T1b, T1f; { E T15, TbE, T6U, T11, TbH, Ths; T15 = iio[-WS(ios, 55)]; TbE = FMA(TL, T6T, TbD); T6U = FNMS(TP, T6T, T6S); T11 = rio[WS(ios, 8)]; T1y = iio[-WS(ios, 39)]; TbH = TbE - TbG; Ths = TbE + TbG; { E Tcv, T73, T8x, T12; Tcv = T72 - T6U; T73 = T6U + T72; T8x = T14 * T11;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -