📄 hf_64.c
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:56:48 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -dit -name hf_64 -include hf.h *//* * This function contains 1038 FP additions, 644 FP multiplications, * (or, 520 additions, 126 multiplications, 518 fused multiply/add), * 246 stack variables, 15 constants, and 256 memory accesses */#include "hf.h"static void hf_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); INT m; for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(rs)) { E Tku, Tky, Tkt, Tkx; { E TiV, Tjm, T7e, TcA, TjR, Tkl, Tm, TeM, T7Q, TcI, TeZ, Thr, T1G, TeW, TcJ; E T7X, T87, TcN, Tf5, Thw, T29, Tf8, TcQ, T8u, Taq, Tdm, Tgc, ThX, T5K, TfS; E Tdx, Tbj, TcB, T7l, TiP, TeP, Tjl, TN, TcC, T7s, T7B, TcF, TeU, Ths, T7I; E TcG, T1f, TeR, T8G, TcU, Tfg, ThB, T32, Tfj, TcX, T93, T9h, Td3, TfK, ThM; E T3X, Tfr, Tde, Taa, Thx, Tfb, Tf6, T2A, T8x, TcO, T8m, TcR, Tfm, ThC, T3t; E Tfh, T96, TcV, T8V, TcY, ThN, Tfu, TfL, T4o, Tad, Td4, T9w, Tdf, TfV, ThY; E T6b, Tg9, Tbm, Tdn, TaF, Tdy, ThJ, T4Q, TfN, TfA, Taf, T9M, Td8, Tdh, ThI; E T5h, TfO, TfF, Tag, Ta1, Tdb, Tdi, ThU, T6D, Tgf, Tg1, Tbo, TaV, Tdr, TdA; E Tb2, Tds, Tg5, ThT, Tg2, T74, Tdt, Tb9; { E T7a, Te, T78, T8, TjQ, TiU, T7c, Tk; { E T1, TiT, TiS, T7, Tg, Tj, Tf, Ti, T7b, Th; T1 = cr[0]; TiT = ci[0]; { E T3, T6, T2, T5; T3 = cr[WS(rs, 32)]; T6 = ci[WS(rs, 32)]; T2 = W[62]; T5 = W[63]; { E Ta, Td, Tc, T79, Tb, TiR, T4, T9; Ta = cr[WS(rs, 16)]; Td = ci[WS(rs, 16)]; TiR = T2 * T6; T4 = T2 * T3; T9 = W[30]; Tc = W[31]; TiS = FNMS(T5, T3, TiR); T7 = FMA(T5, T6, T4); T79 = T9 * Td; Tb = T9 * Ta; Tg = cr[WS(rs, 48)]; Tj = ci[WS(rs, 48)]; T7a = FNMS(Tc, Ta, T79); Te = FMA(Tc, Td, Tb); Tf = W[94]; Ti = W[95]; } } T78 = T1 - T7; T8 = T1 + T7; TjQ = TiT - TiS; TiU = TiS + TiT; T7b = Tf * Tj; Th = Tf * Tg; T7c = FNMS(Ti, Tg, T7b); Tk = FMA(Ti, Tj, Th); } { E T7S, T1l, T7O, T1E, T1u, T1x, T1w, T7U, T1r, T7L, T1v; { E T1A, T1D, T1C, T7N, T1B; { E T1h, T1k, T1g, T1j, T7R, T1i, T1z; T1h = cr[WS(rs, 60)]; T1k = ci[WS(rs, 60)]; { E T7d, TiQ, Tl, TjP; T7d = T7a - T7c; TiQ = T7a + T7c; Tl = Te + Tk; TjP = Te - Tk; TiV = TiQ + TiU; Tjm = TiU - TiQ; T7e = T78 - T7d; TcA = T78 + T7d; TjR = TjP + TjQ; Tkl = TjQ - TjP; Tm = T8 + Tl; TeM = T8 - Tl; T1g = W[118]; } T1j = W[119]; T1A = cr[WS(rs, 44)]; T1D = ci[WS(rs, 44)]; T7R = T1g * T1k; T1i = T1g * T1h; T1z = W[86]; T1C = W[87]; T7S = FNMS(T1j, T1h, T7R); T1l = FMA(T1j, T1k, T1i); T7N = T1z * T1D; T1B = T1z * T1A; } { E T1n, T1q, T1m, T1p, T7T, T1o, T1t; T1n = cr[WS(rs, 28)]; T1q = ci[WS(rs, 28)]; T7O = FNMS(T1C, T1A, T7N); T1E = FMA(T1C, T1D, T1B); T1m = W[54]; T1p = W[55]; T1u = cr[WS(rs, 12)]; T1x = ci[WS(rs, 12)]; T7T = T1m * T1q; T1o = T1m * T1n; T1t = W[22]; T1w = W[23]; T7U = FNMS(T1p, T1n, T7T); T1r = FMA(T1p, T1q, T1o); T7L = T1t * T1x; T1v = T1t * T1u; } } { E T7V, TeX, T1s, T7K, T7M, T1y; T7V = T7S - T7U; TeX = T7S + T7U; T1s = T1l + T1r; T7K = T1l - T1r; T7M = FNMS(T1w, T1u, T7L); T1y = FMA(T1w, T1x, T1v); { E TeY, T7P, T7W, T1F; TeY = T7M + T7O; T7P = T7M - T7O; T7W = T1y - T1E; T1F = T1y + T1E; T7Q = T7K - T7P; TcI = T7K + T7P; TeZ = TeX - TeY; Thr = TeX + TeY; T1G = T1s + T1F; TeW = T1s - T1F; TcJ = T7V - T7W; T7X = T7V + T7W; } } } } { E T8p, T1O, T85, T27, T1X, T20, T1Z, T8r, T1U, T82, T1Y; { E T23, T26, T25, T84, T24; { E T1K, T1N, T1J, T1M, T8o, T1L, T22; T1K = cr[WS(rs, 2)]; T1N = ci[WS(rs, 2)]; T1J = W[2]; T1M = W[3]; T23 = cr[WS(rs, 50)]; T26 = ci[WS(rs, 50)]; T8o = T1J * T1N; T1L = T1J * T1K; T22 = W[98]; T25 = W[99]; T8p = FNMS(T1M, T1K, T8o); T1O = FMA(T1M, T1N, T1L); T84 = T22 * T26; T24 = T22 * T23; } { E T1Q, T1T, T1P, T1S, T8q, T1R, T1W; T1Q = cr[WS(rs, 34)]; T1T = ci[WS(rs, 34)]; T85 = FNMS(T25, T23, T84); T27 = FMA(T25, T26, T24); T1P = W[66]; T1S = W[67]; T1X = cr[WS(rs, 18)]; T20 = ci[WS(rs, 18)]; T8q = T1P * T1T; T1R = T1P * T1Q; T1W = W[34]; T1Z = W[35]; T8r = FNMS(T1S, T1Q, T8q); T1U = FMA(T1S, T1T, T1R); T82 = T1W * T20; T1Y = T1W * T1X; } } { E T8s, Tf3, T1V, T81, T83, T21; T8s = T8p - T8r; Tf3 = T8p + T8r; T1V = T1O + T1U; T81 = T1O - T1U; T83 = FNMS(T1Z, T1X, T82); T21 = FMA(T1Z, T20, T1Y); { E Tf4, T86, T8t, T28; Tf4 = T83 + T85; T86 = T83 - T85; T8t = T21 - T27; T28 = T21 + T27; T87 = T81 - T86; TcN = T81 + T86; Tf5 = Tf3 - Tf4; Thw = Tf3 + Tf4; T29 = T1V + T28; Tf8 = T1V - T28; TcQ = T8s - T8t; T8u = T8s + T8t; } } } { E Tbf, T5p, Tao, T5I, T5y, T5B, T5A, Tbh, T5v, Tal, T5z; { E T5E, T5H, T5G, Tan, T5F; { E T5l, T5o, T5k, T5n, Tbe, T5m, T5D; T5l = cr[WS(rs, 63)]; T5o = ci[WS(rs, 63)]; T5k = W[124]; T5n = W[125]; T5E = cr[WS(rs, 47)]; T5H = ci[WS(rs, 47)]; Tbe = T5k * T5o; T5m = T5k * T5l; T5D = W[92]; T5G = W[93]; Tbf = FNMS(T5n, T5l, Tbe); T5p = FMA(T5n, T5o, T5m); Tan = T5D * T5H; T5F = T5D * T5E; } { E T5r, T5u, T5q, T5t, Tbg, T5s, T5x; T5r = cr[WS(rs, 31)]; T5u = ci[WS(rs, 31)]; Tao = FNMS(T5G, T5E, Tan); T5I = FMA(T5G, T5H, T5F); T5q = W[60]; T5t = W[61]; T5y = cr[WS(rs, 15)]; T5B = ci[WS(rs, 15)]; Tbg = T5q * T5u; T5s = T5q * T5r; T5x = W[28]; T5A = W[29]; Tbh = FNMS(T5t, T5r, Tbg); T5v = FMA(T5t, T5u, T5s); Tal = T5x * T5B; T5z = T5x * T5y; } } { E Tbi, Tga, T5w, Tak, Tam, T5C; Tbi = Tbf - Tbh; Tga = Tbf + Tbh; T5w = T5p + T5v; Tak = T5p - T5v; Tam = FNMS(T5A, T5y, Tal); T5C = FMA(T5A, T5B, T5z); { E Tgb, Tap, T5J, Tbd; Tgb = Tam + Tao; Tap = Tam - Tao; T5J = T5C + T5I; Tbd = T5I - T5C; Taq = Tak - Tap; Tdm = Tak + Tap; Tgc = Tga - Tgb; ThX = Tga + Tgb; T5K = T5w + T5J; TfS = T5w - T5J; Tdx = Tbi + Tbd; Tbj = Tbd - Tbi; } } } { E T7z, T1d, T7G, TeS, T11, T7v, T7x, T17, T7r, T7m; { E T7h, Ts, T7q, TL, TB, TE, TD, T7j, Ty, T7n, TC; { E TH, TK, TJ, T7p, TI; { E To, Tr, Tn, Tq, T7g, Tp, TG; To = cr[WS(rs, 8)]; Tr = ci[WS(rs, 8)]; Tn = W[14]; Tq = W[15]; TH = cr[WS(rs, 24)]; TK = ci[WS(rs, 24)]; T7g = Tn * Tr; Tp = Tn * To; TG = W[46]; TJ = W[47]; T7h = FNMS(Tq, To, T7g); Ts = FMA(Tq, Tr, Tp); T7p = TG * TK; TI = TG * TH; } { E Tu, Tx, Tt, Tw, T7i, Tv, TA; Tu = cr[WS(rs, 40)]; Tx = ci[WS(rs, 40)]; T7q = FNMS(TJ, TH, T7p); TL = FMA(TJ, TK, TI); Tt = W[78]; Tw = W[79]; TB = cr[WS(rs, 56)]; TE = ci[WS(rs, 56)]; T7i = Tt * Tx; Tv = Tt * Tu; TA = W[110]; TD = W[111]; T7j = FNMS(Tw, Tu, T7i); Ty = FMA(Tw, Tx, Tv); T7n = TA * TE; TC = TA * TB; } } { E T7k, TeO, Tz, T7f, T7o, TF, TeN, TM; T7k = T7h - T7j; TeO = T7h + T7j; Tz = Ts + Ty; T7f = Ts - Ty; T7o = FNMS(TD, TB, T7n); TF = FMA(TD, TE, TC); T7r = T7o - T7q; TeN = T7o + T7q; TM = TF + TL; T7m = TF - TL; TcB = T7f + T7k; T7l = T7f - T7k; TiP = TeO + TeN; TeP = TeN - TeO; Tjl = Tz - TM; TN = Tz + TM; } } { E T7D, TU, T13, T16, T7F, T10, T12, T15, T7w, T14; { E T19, T1c, T18, T1b; { E TQ, TT, TS, T7C, TR, TP; TQ = cr[WS(rs, 4)]; TT = ci[WS(rs, 4)]; TP = W[6]; TcC = T7m - T7r; T7s = T7m + T7r; TS = W[7]; T7C = TP * TT; TR = TP * TQ; T19 = cr[WS(rs, 52)]; T1c = ci[WS(rs, 52)]; T7D = FNMS(TS, TQ, T7C); TU = FMA(TS, TT, TR); T18 = W[102]; T1b = W[103]; } { E TW, TZ, TY, T7E, TX, T7y, T1a, TV; TW = cr[WS(rs, 36)]; TZ = ci[WS(rs, 36)]; T7y = T18 * T1c; T1a = T18 * T19; TV = W[70]; TY = W[71]; T7z = FNMS(T1b, T19, T7y); T1d = FMA(T1b, T1c, T1a); T7E = TV * TZ; TX = TV * TW; T13 = cr[WS(rs, 20)]; T16 = ci[WS(rs, 20)]; T7F = FNMS(TY, TW, T7E); T10 = FMA(TY, TZ, TX); T12 = W[38]; T15 = W[39]; } } T7G = T7D - T7F; TeS = T7D + T7F; T11 = TU + T10; T7v = TU - T10; T7w = T12 * T16; T14 = T12 * T13; T7x = FNMS(T15, T13, T7w); T17 = FMA(T15, T16, T14); } { E T8Y, T2H, T8E, T30, T2Q, T2T, T2S, T90, T2N, T8B, T2R; { E T2W, T2Z, T2Y, T8D, T2X; { E T2D, T2G, T2C, T2F, T8X, T2E, T2V; T2D = cr[WS(rs, 62)]; T2G = ci[WS(rs, 62)]; { E TeT, T7A, T1e, T7H; TeT = T7x + T7z; T7A = T7x - T7z; T1e = T17 + T1d; T7H = T17 - T1d; T7B = T7v - T7A; TcF = T7v + T7A;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -