📄 hf_64.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 20:21:05 EST 2006 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -dit -name hf_64 -include hf.h *//* * This function contains 1038 FP additions, 644 FP multiplications, * (or, 520 additions, 126 multiplications, 518 fused multiply/add), * 230 stack variables, and 256 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hf.h"static const R *hf_64(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){ DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 126, MAKE_VOLATILE_STRIDE(ios)) { E TeI, Tkk, Tkj, TeL; { E TiV, Tjm, T7e, TcA, TjR, Tkl, Tm, TeM, Taq, Tdm, Tgb, ThT, T5K, TfS, Tdx; E Tbj, TcB, T7l, TiP, TeP, Tjl, TN, TcC, T7s, T7X, TcI, TeZ, Ths, T7Q, TcJ; E T1G, TeW, T9h, Td3, TfK, ThH, T3X, Tfr, Tde, Taa, T8G, TcU, Tfl, ThB, T32; E Tfe, TcX, T93, T87, TcN, Tfa, Thv, T29, Tf3, TcQ, T8u, TeU, Thr, T7B, TcG; E T1f, TeR, TcF, T7I, Thw, Tf6, Tfb, T2A, TcR, T8m, TcO, T8x, ThU, TfV, Tgc; E T6b, Tdy, TaF, Tdn, Tbm, ThI, Tfu, TfL, T4o, Tdf, T9w, Td4, Tad, ThC, Tfh; E Tfm, T3t, TcY, T8V, TcV, T96, ThY, T6D, Tge, Tg1, Tbp, Tba, Tdr, TdA, ThZ; E T74, Tgf, Tg6, Tbo, TaV, Tdu, TdB, ThN, T4Q, TfN, TfA, Tag, T9M, Td8, Tdh; E TfE, ThO, T9T, Tda, TfB, T5h, Td9, Ta0, T94, T95; { E T7V, T1E, T7O, TeX, T1s, T7R, T7T, T1y, T7r, T7m; { E T7a, Te, T78, T8, TjP, TiU, T7c, Tk; { E T1, TiT, TiS, T7, Tg, Tj, Tf, Ti, T7b, Th; T1 = rio[0]; TiT = iio[-WS(ios, 63)]; { E T3, T6, T2, T5; T3 = rio[WS(ios, 32)]; T6 = iio[-WS(ios, 31)]; T2 = W[62]; T5 = W[63]; { E Ta, Td, Tc, T79, Tb, TiR, T4, T9; Ta = rio[WS(ios, 16)]; Td = iio[-WS(ios, 47)]; TiR = T2 * T6; T4 = T2 * T3; T9 = W[30]; Tc = W[31]; TiS = FNMS(T5, T3, TiR); T7 = FMA(T5, T6, T4); T79 = T9 * Td; Tb = T9 * Ta; Tg = rio[WS(ios, 48)]; Tj = iio[-WS(ios, 15)]; T7a = FNMS(Tc, Ta, T79); Te = FMA(Tc, Td, Tb); Tf = W[94]; Ti = W[95]; } } T78 = T1 - T7; T8 = T1 + T7; TjP = TiT - TiS; TiU = TiS + TiT; T7b = Tf * Tj; Th = Tf * Tg; T7c = FNMS(Ti, Tg, T7b); Tk = FMA(Ti, Tj, Th); } { E Tbf, T5p, Tao, T5I, T5y, T5B, T5A, Tbh, T5v, Tal, T5z; { E T5E, T5H, T5G, Tan, T5F; { E T5l, T5o, T5k, T5n, Tbe, T5m, T5D; T5l = rio[WS(ios, 63)]; T5o = iio[0]; { E T7d, TiQ, Tl, TjQ; T7d = T7a - T7c; TiQ = T7a + T7c; Tl = Te + Tk; TjQ = Te - Tk; TiV = TiQ + TiU; Tjm = TiU - TiQ; T7e = T78 - T7d; TcA = T78 + T7d; TjR = TjP - TjQ; Tkl = TjQ + TjP; Tm = T8 + Tl; TeM = T8 - Tl; T5k = W[124]; } T5n = W[125]; T5E = rio[WS(ios, 47)]; T5H = iio[-WS(ios, 16)]; Tbe = T5k * T5o; T5m = T5k * T5l; T5D = W[92]; T5G = W[93]; Tbf = FNMS(T5n, T5l, Tbe); T5p = FMA(T5n, T5o, T5m); Tan = T5D * T5H; T5F = T5D * T5E; } { E T5r, T5u, T5q, T5t, Tbg, T5s, T5x; T5r = rio[WS(ios, 31)]; T5u = iio[-WS(ios, 32)]; Tao = FNMS(T5G, T5E, Tan); T5I = FMA(T5G, T5H, T5F); T5q = W[60]; T5t = W[61]; T5y = rio[WS(ios, 15)]; T5B = iio[-WS(ios, 48)]; Tbg = T5q * T5u; T5s = T5q * T5r; T5x = W[28]; T5A = W[29]; Tbh = FNMS(T5t, T5r, Tbg); T5v = FMA(T5t, T5u, T5s); Tal = T5x * T5B; T5z = T5x * T5y; } } { E Tbi, Tg9, T5w, Tak, Tam, T5C; Tbi = Tbf - Tbh; Tg9 = Tbf + Tbh; T5w = T5p + T5v; Tak = T5p - T5v; Tam = FNMS(T5A, T5y, Tal); T5C = FMA(T5A, T5B, T5z); { E Tga, Tap, T5J, Tbd; Tga = Tam + Tao; Tap = Tam - Tao; T5J = T5C + T5I; Tbd = T5I - T5C; Taq = Tak - Tap; Tdm = Tak + Tap; Tgb = Tg9 - Tga; ThT = Tg9 + Tga; T5K = T5w + T5J; TfS = T5w - T5J; Tdx = Tbi + Tbd; Tbj = Tbd - Tbi; } } } } { E T7g, Ts, T7q, TL, TB, TE, TD, T7i, Ty, T7n, TC; { E TH, TK, TJ, T7p, TI; { E To, Tr, Tn, Tq, T7f, Tp, TG; To = rio[WS(ios, 8)]; Tr = iio[-WS(ios, 55)]; Tn = W[14]; Tq = W[15]; TH = rio[WS(ios, 24)]; TK = iio[-WS(ios, 39)]; T7f = Tn * Tr; Tp = Tn * To; TG = W[46]; TJ = W[47]; T7g = FNMS(Tq, To, T7f); Ts = FMA(Tq, Tr, Tp); T7p = TG * TK; TI = TG * TH; } { E Tu, Tx, Tt, Tw, T7h, Tv, TA; Tu = rio[WS(ios, 40)]; Tx = iio[-WS(ios, 23)]; T7q = FNMS(TJ, TH, T7p); TL = FMA(TJ, TK, TI); Tt = W[78]; Tw = W[79]; TB = rio[WS(ios, 56)]; TE = iio[-WS(ios, 7)]; T7h = Tt * Tx; Tv = Tt * Tu; TA = W[110]; TD = W[111]; T7i = FNMS(Tw, Tu, T7h); Ty = FMA(Tw, Tx, Tv); T7n = TA * TE; TC = TA * TB; } } { E T7j, TeN, Tz, T7k, T7o, TF, TeO, TM; T7j = T7g - T7i; TeN = T7g + T7i; Tz = Ts + Ty; T7k = Ts - Ty; T7o = FNMS(TD, TB, T7n); TF = FMA(TD, TE, TC); T7r = T7o - T7q; TeO = T7o + T7q; TM = TF + TL; T7m = TF - TL; TcB = T7k + T7j; T7l = T7j - T7k; TiP = TeN + TeO; TeP = TeN - TeO; Tjl = TM - Tz; TN = Tz + TM; } } { E T7L, T1l, T1u, T1x, T7N, T1r, T1t, T1w, T7S, T1v; { E T1A, T1D, T1z, T1C; { E T1h, T1k, T1j, T7K, T1i, T1g; T1h = rio[WS(ios, 60)]; T1k = iio[-WS(ios, 3)]; T1g = W[118]; TcC = T7m - T7r; T7s = T7m + T7r; T1j = W[119]; T7K = T1g * T1k; T1i = T1g * T1h; T1A = rio[WS(ios, 44)]; T1D = iio[-WS(ios, 19)]; T7L = FNMS(T1j, T1h, T7K); T1l = FMA(T1j, T1k, T1i); T1z = W[86]; T1C = W[87]; } { E T1n, T1q, T1p, T7M, T1o, T7U, T1B, T1m; T1n = rio[WS(ios, 28)]; T1q = iio[-WS(ios, 35)]; T7U = T1z * T1D; T1B = T1z * T1A; T1m = W[54]; T1p = W[55]; T7V = FNMS(T1C, T1A, T7U); T1E = FMA(T1C, T1D, T1B); T7M = T1m * T1q; T1o = T1m * T1n; T1u = rio[WS(ios, 12)]; T1x = iio[-WS(ios, 51)]; T7N = FNMS(T1p, T1n, T7M); T1r = FMA(T1p, T1q, T1o); T1t = W[22]; T1w = W[23]; } } T7O = T7L - T7N; TeX = T7L + T7N; T1s = T1l + T1r; T7R = T1l - T1r; T7S = T1t * T1x; T1v = T1t * T1u; T7T = FNMS(T1w, T1u, T7S); T1y = FMA(T1w, T1x, T1v); } { E Ta5, T3C, T9f, T3V, T3L, T3O, T3N, Ta7, T3I, T9c, T3M; { E T3R, T3U, T3T, T9e, T3S; { E T3y, T3B, T3x, T3A, Ta4, T3z, T3Q; T3y = rio[WS(ios, 1)]; T3B = iio[-WS(ios, 62)]; { E TeY, T7W, T1F, T7P; TeY = T7T + T7V; T7W = T7T - T7V; T1F = T1y + T1E; T7P = T1y - T1E; T7X = T7R - T7W; TcI = T7R + T7W; TeZ = TeX - TeY; Ths = TeX + TeY; T7Q = T7O + T7P; TcJ = T7O - T7P; T1G = T1s + T1F; TeW = T1s - T1F; T3x = W[0]; } T3A = W[1]; T3R = rio[WS(ios, 49)]; T3U = iio[-WS(ios, 14)]; Ta4 = T3x * T3B; T3z = T3x * T3y; T3Q = W[96]; T3T = W[97]; Ta5 = FNMS(T3A, T3y, Ta4); T3C = FMA(T3A, T3B, T3z); T9e = T3Q * T3U; T3S = T3Q * T3R; } { E T3E, T3H, T3D, T3G, Ta6, T3F, T3K; T3E = rio[WS(ios, 33)]; T3H = iio[-WS(ios, 30)]; T9f = FNMS(T3T, T3R, T9e); T3V = FMA(T3T, T3U, T3S); T3D = W[64]; T3G = W[65]; T3L = rio[WS(ios, 17)]; T3O = iio[-WS(ios, 46)]; Ta6 = T3D * T3H; T3F = T3D * T3E; T3K = W[32]; T3N = W[33]; Ta7 = FNMS(T3G, T3E, Ta6); T3I = FMA(T3G, T3H, T3F); T9c = T3K * T3O; T3M = T3K * T3L; } } { E Ta8, TfI, T3J, T9b, T9d, T3P; Ta8 = Ta5 - Ta7; TfI = Ta5 + Ta7; T3J = T3C + T3I; T9b = T3C - T3I; T9d = FNMS(T3N, T3L, T9c); T3P = FMA(T3N, T3O, T3M); { E TfJ, T9g, Ta9, T3W; TfJ = T9d + T9f; T9g = T9d - T9f; Ta9 = T3P - T3V; T3W = T3P + T3V; T9h = T9b - T9g; Td3 = T9b + T9g; TfK = TfI - TfJ; ThH = TfI + TfJ; T3X = T3J + T3W; Tfr = T3J - T3W; Tde = Ta8 - Ta9; Taa = Ta8 + Ta9; } } } { E T8Y, T2H, T8E, T30, T2Q, T2T, T2S, T90, T2N, T8B, T2R; { E T2W, T2Z, T2Y, T8D, T2X; { E T2D, T2G, T2C, T2F, T8X, T2E, T2V; T2D = rio[WS(ios, 62)]; T2G = iio[-WS(ios, 1)]; T2C = W[122]; T2F = W[123]; T2W = rio[WS(ios, 46)]; T2Z = iio[-WS(ios, 17)]; T8X = T2C * T2G; T2E = T2C * T2D; T2V = W[90]; T2Y = W[91]; T8Y = FNMS(T2F, T2D, T8X); T2H = FMA(T2F, T2G, T2E); T8D = T2V * T2Z; T2X = T2V * T2W; } { E T2J, T2M, T2I, T2L, T8Z, T2K, T2P; T2J = rio[WS(ios, 30)]; T2M = iio[-WS(ios, 33)]; T8E = FNMS(T2Y, T2W, T8D); T30 = FMA(T2Y, T2Z, T2X); T2I = W[58]; T2L = W[59]; T2Q = rio[WS(ios, 14)]; T2T = iio[-WS(ios, 49)]; T8Z = T2I * T2M; T2K = T2I * T2J; T2P = W[26]; T2S = W[27]; T90 = FNMS(T2L, T2J, T8Z); T2N = FMA(T2L, T2M, T2K); T8B = T2P * T2T; T2R = T2P * T2Q; } } { E T91, Tfj, T2O, T8A, T8C, T2U; T91 = T8Y - T90; Tfj = T8Y + T90; T2O = T2H + T2N; T8A = T2H - T2N; T8C = FNMS(T2S, T2Q, T8B); T2U = FMA(T2S, T2T, T2R); { E Tfk, T8F, T92, T31; Tfk = T8C + T8E; T8F = T8C - T8E; T92 = T2U - T30; T31 = T2U + T30; T8G = T8A - T8F; TcU = T8A + T8F; Tfl = Tfj - Tfk; ThB = Tfj + Tfk; T32 = T2O + T31; Tfe = T2O - T31; TcX = T91 - T92; T93 = T91 + T92;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -