📄 hb_64.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 20:44:19 EST 2006 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -dif -name hb_64 -include hb.h *//* * This function contains 1038 FP additions, 644 FP multiplications, * (or, 520 additions, 126 multiplications, 518 fused multiply/add), * 234 stack variables, and 256 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hb.h"static const R *hb_64(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){ DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 126, MAKE_VOLATILE_STRIDE(ios)) { E Tgv, Tgs, Tgr; { E T8v, Ta9, Tv, T9O, Tj7, TgN, TjA, ThQ, Tj6, ThN, T6y, T2v, T64, T2c, T6x; E T4L, TeV, Tbv, Tfn, Tbg, Tjz, TgG, T65, T4O, T96, T7S, Ta8, T8y, Tfo, TdE; E TeU, TdB, T10, Ta7, T9R, TaN, T2P, T4Q, T8A, T7X, T4R, T38, T8B, T82, T6A; E T69, T6B, T6c, TeZ, Tfq, TdG, TbP, Tjb, TjC, ThS, TgV, Tf2, Tfr, TdH, Tc8; E Tje, TjD, ThT, Th2, T9T, T1w, T9a, T8e, TaF, T9W, T99, T89, T6k, T7a, T5p; E T3D, T6h, T79, T5q, T3Q, Tf7, Tg0, Teg, TcB, Tfa, TfZ, Tef, TcM, Tjj, Tkc; E Tis, Thf, Tjm, Tkb, Tir, Thm, T21, T9Y, T9d, T8p, TaG, Ta1, T9c, T8k, T6r; E T7d, T5s, T4k, T6o, T7c, T5t, T4x, Tfe, Tg3, Tej, Tde, Thq, Tjr, Tfh, Tg2; E Tei, Tdp, ThC, ThD, Tjo, ThB, Tjp, Thx; { E Tc3, TgW, TgZ, TgX, Tc0, Tf1, Tc6, Th0, TdA, Tdx; { E Tdw, T4G, Tb7, T4J, Tb6, Tdv, T24, T7, T27, T2a, Tbe, Tdy, Tbb, Tdz, Te; E T4D, Tbt, TgK, T2m, Tt, Tbq, TgL, T7Q, T2t, Tj, Tbh, Ti, Tbl, T2g, Tk; E T2h, T2i; { E T1, T2, T4, T5; { E T4E, T4F, T4H, T4I; T4E = iio[0]; T4F = rio[WS(ios, 32)]; T4H = iio[-WS(ios, 16)]; T4I = rio[WS(ios, 48)]; T1 = rio[0]; Tdw = T4E + T4F; T4G = T4E - T4F; Tb7 = T4H + T4I; T4J = T4H - T4I; T2 = iio[-WS(ios, 32)]; T4 = rio[WS(ios, 16)]; T5 = iio[-WS(ios, 48)]; } { E Ta, Tba, Tb9, Td; { E Tbc, T28, Tbd, T29, Tb, Tc, T8, T9; { E T25, T3, T6, T26; T25 = iio[-WS(ios, 8)]; Tb6 = T1 - T2; T3 = T1 + T2; Tdv = T4 - T5; T6 = T4 + T5; T26 = rio[WS(ios, 40)]; T8 = rio[WS(ios, 8)]; T9 = iio[-WS(ios, 40)]; T24 = T3 - T6; T7 = T3 + T6; Tbc = T25 + T26; T27 = T25 - T26; } T28 = iio[-WS(ios, 24)]; Tbd = T8 - T9; Ta = T8 + T9; T29 = rio[WS(ios, 56)]; Tb = iio[-WS(ios, 56)]; Tc = rio[WS(ios, 24)]; Tba = T29 + T28; T2a = T28 - T29; Tbe = Tbc - Tbd; Tdy = Tbd + Tbc; Tb9 = Tb - Tc; Td = Tb + Tc; } Tbb = Tb9 - Tba; Tdz = Tb9 + Tba; Te = Ta + Td; T4D = Td - Ta; { E Tq, Tbo, Tp, Tbs, T2p, Tr, T2q, T2r; { E Tn, To, T2n, T2o; Tn = iio[-WS(ios, 60)]; To = rio[WS(ios, 28)]; T2n = iio[-WS(ios, 28)]; T2o = rio[WS(ios, 60)]; Tq = rio[WS(ios, 12)]; Tbo = Tn - To; Tp = Tn + To; Tbs = T2o + T2n; T2p = T2n - T2o; Tr = iio[-WS(ios, 44)]; T2q = iio[-WS(ios, 12)]; T2r = rio[WS(ios, 44)]; } { E Tg, Th, T2e, T2f; Tg = rio[WS(ios, 4)]; { E Tbr, Ts, Tbp, T2s; Tbr = Tq - Tr; Ts = Tq + Tr; Tbp = T2q + T2r; T2s = T2q - T2r; Tbt = Tbr - Tbs; TgK = Tbr + Tbs; T2m = Tp - Ts; Tt = Tp + Ts; Tbq = Tbo - Tbp; TgL = Tbo + Tbp; T7Q = T2p + T2s; T2t = T2p - T2s; Th = iio[-WS(ios, 36)]; } T2e = iio[-WS(ios, 4)]; T2f = rio[WS(ios, 36)]; Tj = rio[WS(ios, 20)]; Tbh = Tg - Th; Ti = Tg + Th; Tbl = T2e + T2f; T2g = T2e - T2f; Tk = iio[-WS(ios, 52)]; T2h = iio[-WS(ios, 20)]; T2i = rio[WS(ios, 52)]; } } } } { E T7O, Tbm, Tbj, T7R, Tb8, T4M, T2l, T2u, T4N, T2b, T8x; { E T2d, T2k, ThO, TgJ, TgM, ThP, ThL, ThM; { E Tf, TgH, TgI, Tu, T7P; T7O = T7 - Te; Tf = T7 + Te; { E Tbk, Tl, Tbi, T2j, Tm; Tbk = Tj - Tk; Tl = Tj + Tk; Tbi = T2h + T2i; T2j = T2h - T2i; Tbm = Tbk + Tbl; TgH = Tbl - Tbk; T2d = Ti - Tl; Tm = Ti + Tl; Tbj = Tbh - Tbi; TgI = Tbh + Tbi; T7P = T2g + T2j; T2k = T2g - T2j; T8v = Tt - Tm; Tu = Tm + Tt; } Ta9 = T7P + T7Q; T7R = T7P - T7Q; ThO = FMA(KP414213562, TgH, TgI); TgJ = FNMS(KP414213562, TgI, TgH); TgM = FNMS(KP414213562, TgL, TgK); ThP = FMA(KP414213562, TgK, TgL); Tv = Tf + Tu; T9O = Tf - Tu; } Tj7 = TgJ + TgM; TgN = TgJ - TgM; Tb8 = Tb6 - Tb7; ThL = Tb6 + Tb7; ThM = Tdy + Tdz; TdA = Tdy - Tdz; T4M = T2k - T2d; T2l = T2d + T2k; TjA = ThO - ThP; ThQ = ThO + ThP; Tj6 = FNMS(KP707106781, ThM, ThL); ThN = FMA(KP707106781, ThM, ThL); T2u = T2m - T2t; T4N = T2m + T2t; T2b = T27 - T2a; T8x = T27 + T2a; } { E T8w, TdC, TdD, Tbf, TgF, TgE, T4K, Tbn, Tbu; T6y = T2l - T2u; T2v = T2l + T2u; T64 = T24 - T2b; T2c = T24 + T2b; T4K = T4G - T4J; T8w = T4G + T4J; TdC = FMA(KP414213562, Tbj, Tbm); Tbn = FNMS(KP414213562, Tbm, Tbj); Tbu = FMA(KP414213562, Tbt, Tbq); TdD = FNMS(KP414213562, Tbq, Tbt); T6x = T4K - T4D; T4L = T4D + T4K; TeV = Tbu - Tbn; Tbv = Tbn + Tbu; Tbf = Tbb - Tbe; TgF = Tbe + Tbb; Tdx = Tdv + Tdw; TgE = Tdw - Tdv; Tfn = FNMS(KP707106781, Tbf, Tb8); Tbg = FMA(KP707106781, Tbf, Tb8); Tjz = FNMS(KP707106781, TgF, TgE); TgG = FMA(KP707106781, TgF, TgE); T65 = T4N - T4M; T4O = T4M + T4N; T96 = T7O - T7R; T7S = T7O + T7R; Ta8 = T8x + T8w; T8y = T8w - T8x; Tfo = TdC - TdD; TdE = TdC + TdD; } } } { E TbK, TgP, T2x, TC, Tbz, TgS, T7U, T2N, TbV, Tc4, TY, T2Z, Tc5, TbY, T2X; E T80, TbF, TbL, TJ, T2G, TbM, TbC, T2E, T7V, TbQ, TN, T33, Tc2, T32, Tc1; E TQ, T34; { E T2T, TbW, T2W, TbX; { E Tbx, Ty, T2K, TbJ, T2J, TbI, TB, T2L; { E T2H, T2I, Tw, Tx, Tz, TA; Tw = rio[WS(ios, 2)]; Tx = iio[-WS(ios, 34)]; T2H = iio[-WS(ios, 2)]; TeU = FNMS(KP707106781, TdA, Tdx); TdB = FMA(KP707106781, TdA, Tdx); Tbx = Tw - Tx; Ty = Tw + Tx; T2I = rio[WS(ios, 34)]; Tz = rio[WS(ios, 18)]; TA = iio[-WS(ios, 50)]; T2K = iio[-WS(ios, 18)]; TbJ = T2H + T2I; T2J = T2H - T2I; TbI = Tz - TA; TB = Tz + TA; T2L = rio[WS(ios, 50)]; } { E TbT, TU, T2U, TbU, TX, T2V; { E T2R, T2S, TV, TW; { E TS, Tby, T2M, TT; TS = rio[WS(ios, 6)]; TbK = TbI + TbJ; TgP = TbJ - TbI; T2x = Ty - TB; TC = Ty + TB; Tby = T2K + T2L; T2M = T2K - T2L; TT = iio[-WS(ios, 38)]; T2R = iio[-WS(ios, 6)]; Tbz = Tbx - Tby; TgS = Tbx + Tby; T7U = T2J + T2M; T2N = T2J - T2M; TbT = TS - TT; TU = TS + TT; T2S = rio[WS(ios, 38)]; } TV = iio[-WS(ios, 54)]; TW = rio[WS(ios, 22)]; T2U = iio[-WS(ios, 22)]; TbU = T2R + T2S; T2T = T2R - T2S; TbW = TV - TW; TX = TV + TW; T2V = rio[WS(ios, 54)]; } TbV = TbT - TbU; Tc4 = TbT + TbU; TY = TU + TX; T2Z = TX - TU; T2W = T2U - T2V; TbX = T2V + T2U; } } { E T2A, TbA, T2D, TbB; { E TbE, TF, T2B, TbD, TI, T2C; { E T2y, T2z, TD, TE, TG, TH; TD = rio[WS(ios, 10)]; TE = iio[-WS(ios, 42)]; Tc5 = TbW + TbX; TbY = TbW - TbX; T2X = T2T - T2W; T80 = T2T + T2W; TbE = TD - TE; TF = TD + TE; T2y = iio[-WS(ios, 10)]; T2z = rio[WS(ios, 42)]; TG = iio[-WS(ios, 58)]; TH = rio[WS(ios, 26)]; T2B = iio[-WS(ios, 26)]; TbD = T2y + T2z; T2A = T2y - T2z; TbA = TG - TH; TI = TG + TH; T2C = rio[WS(ios, 58)]; } TbF = TbD - TbE; TbL = TbE + TbD; TJ = TF + TI; T2G = TI - TF; T2D = T2B - T2C; TbB = T2C + T2B; } { E T30, T31, TL, TM, TO, TP; TL = iio[-WS(ios, 62)]; TM = rio[WS(ios, 30)]; TbM = TbA + TbB; TbC = TbA - TbB; T2E = T2A - T2D; T7V = T2A + T2D; TbQ = TL - TM; TN = TL + TM; T30 = iio[-WS(ios, 30)]; T31 = rio[WS(ios, 62)]; TO = rio[WS(ios, 14)]; TP = iio[-WS(ios, 46)]; T33 = iio[-WS(ios, 14)]; Tc2 = T31 + T30; T32 = T30 - T31; Tc1 = TO - TP; TQ = TO + TP; T34 = rio[WS(ios, 46)]; } } } { E T7Y, TbS, T67, T81, T68, T6a, T2Y, T37, T6b; { E T7T, T2Q, T9P, T7W, T36, T2F, T9Q, T2O; { E TK, TR, TbR, T35, T7Z, TZ; T7T = TC - TJ; TK = TC + TJ; Tc3 = Tc1 - Tc2; TgW = Tc1 + Tc2; T2Q = TN - TQ; TR = TN + TQ; TbR = T33 + T34; T35 = T33 - T34; T9P = T7V + T7U; T7W = T7U - T7V; T7Y = TR - TY; TZ = TR + TY; TbS = TbQ - TbR; TgZ = TbQ + TbR; T7Z = T32 + T35; T36 = T32 - T35; T10 = TK + TZ; Ta7 = TZ - TK; T67 = T2x - T2E; T2F = T2x + T2E; T9Q = T80 + T7Z; T81 = T7Z - T80; T68 = T2N - T2G; T2O = T2G + T2N; } T9R = T9P - T9Q; TaN = T9P + T9Q; T2P = FMA(KP414213562, T2O, T2F); T4Q = FNMS(KP414213562, T2F, T2O); T8A = T7W - T7T; T7X = T7T + T7W; T6a = T2Q - T2X; T2Y = T2Q + T2X; T37 = T2Z + T36; T6b = T36 - T2Z; } { E TgQ, TbH, TeY, TbN, TgT, TbG, TbZ, TeX, TbO; T4R = FMA(KP414213562, T2Y, T37); T38 = FNMS(KP414213562, T37, T2Y); T8B = T7Y + T81; T82 = T7Y - T81; TgQ = TbF + TbC; TbG = TbC - TbF; T6A = FMA(KP414213562, T67, T68); T69 = FNMS(KP414213562, T68, T67); T6B = FNMS(KP414213562, T6a, T6b);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -