⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hf_64.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:56:48 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -dit -name hf_64 -include hf.h *//* * This function contains 1038 FP additions, 644 FP multiplications, * (or, 520 additions, 126 multiplications, 518 fused multiply/add), * 246 stack variables, 15 constants, and 256 memory accesses */#include "hf.h"static void hf_64(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP881921264, +0.881921264348355029712756863660388349508442621);     DK(KP956940335, +0.956940335732208864935797886980269969482849206);     DK(KP773010453, +0.773010453362736960810906609758469800971041293);     DK(KP995184726, +0.995184726672196886244836953109479921575474869);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP534511135, +0.534511135950791641089685961295362908582039528);     DK(KP303346683, +0.303346683607342391675883946941299872384187453);     DK(KP098491403, +0.098491403357164253077197521291327432293052451);     DK(KP820678790, +0.820678790828660330972281985331011598767386482);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     INT m;     for (m = mb, W = W + ((mb - 1) * 126); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 126, MAKE_VOLATILE_STRIDE(rs)) {	  E Tku, Tky, Tkt, Tkx;	  {	       E TiV, Tjm, T7e, TcA, TjR, Tkl, Tm, TeM, T7Q, TcI, TeZ, Thr, T1G, TeW, TcJ;	       E T7X, T87, TcN, Tf5, Thw, T29, Tf8, TcQ, T8u, Taq, Tdm, Tgc, ThX, T5K, TfS;	       E Tdx, Tbj, TcB, T7l, TiP, TeP, Tjl, TN, TcC, T7s, T7B, TcF, TeU, Ths, T7I;	       E TcG, T1f, TeR, T8G, TcU, Tfg, ThB, T32, Tfj, TcX, T93, T9h, Td3, TfK, ThM;	       E T3X, Tfr, Tde, Taa, Thx, Tfb, Tf6, T2A, T8x, TcO, T8m, TcR, Tfm, ThC, T3t;	       E Tfh, T96, TcV, T8V, TcY, ThN, Tfu, TfL, T4o, Tad, Td4, T9w, Tdf, TfV, ThY;	       E T6b, Tg9, Tbm, Tdn, TaF, Tdy, ThJ, T4Q, TfN, TfA, Taf, T9M, Td8, Tdh, ThI;	       E T5h, TfO, TfF, Tag, Ta1, Tdb, Tdi, ThU, T6D, Tgf, Tg1, Tbo, TaV, Tdr, TdA;	       E Tb2, Tds, Tg5, ThT, Tg2, T74, Tdt, Tb9;	       {		    E T7a, Te, T78, T8, TjQ, TiU, T7c, Tk;		    {			 E T1, TiT, TiS, T7, Tg, Tj, Tf, Ti, T7b, Th;			 T1 = cr[0];			 TiT = ci[0];			 {			      E T3, T6, T2, T5;			      T3 = cr[WS(rs, 32)];			      T6 = ci[WS(rs, 32)];			      T2 = W[62];			      T5 = W[63];			      {				   E Ta, Td, Tc, T79, Tb, TiR, T4, T9;				   Ta = cr[WS(rs, 16)];				   Td = ci[WS(rs, 16)];				   TiR = T2 * T6;				   T4 = T2 * T3;				   T9 = W[30];				   Tc = W[31];				   TiS = FNMS(T5, T3, TiR);				   T7 = FMA(T5, T6, T4);				   T79 = T9 * Td;				   Tb = T9 * Ta;				   Tg = cr[WS(rs, 48)];				   Tj = ci[WS(rs, 48)];				   T7a = FNMS(Tc, Ta, T79);				   Te = FMA(Tc, Td, Tb);				   Tf = W[94];				   Ti = W[95];			      }			 }			 T78 = T1 - T7;			 T8 = T1 + T7;			 TjQ = TiT - TiS;			 TiU = TiS + TiT;			 T7b = Tf * Tj;			 Th = Tf * Tg;			 T7c = FNMS(Ti, Tg, T7b);			 Tk = FMA(Ti, Tj, Th);		    }		    {			 E T7S, T1l, T7O, T1E, T1u, T1x, T1w, T7U, T1r, T7L, T1v;			 {			      E T1A, T1D, T1C, T7N, T1B;			      {				   E T1h, T1k, T1g, T1j, T7R, T1i, T1z;				   T1h = cr[WS(rs, 60)];				   T1k = ci[WS(rs, 60)];				   {					E T7d, TiQ, Tl, TjP;					T7d = T7a - T7c;					TiQ = T7a + T7c;					Tl = Te + Tk;					TjP = Te - Tk;					TiV = TiQ + TiU;					Tjm = TiU - TiQ;					T7e = T78 - T7d;					TcA = T78 + T7d;					TjR = TjP + TjQ;					Tkl = TjQ - TjP;					Tm = T8 + Tl;					TeM = T8 - Tl;					T1g = W[118];				   }				   T1j = W[119];				   T1A = cr[WS(rs, 44)];				   T1D = ci[WS(rs, 44)];				   T7R = T1g * T1k;				   T1i = T1g * T1h;				   T1z = W[86];				   T1C = W[87];				   T7S = FNMS(T1j, T1h, T7R);				   T1l = FMA(T1j, T1k, T1i);				   T7N = T1z * T1D;				   T1B = T1z * T1A;			      }			      {				   E T1n, T1q, T1m, T1p, T7T, T1o, T1t;				   T1n = cr[WS(rs, 28)];				   T1q = ci[WS(rs, 28)];				   T7O = FNMS(T1C, T1A, T7N);				   T1E = FMA(T1C, T1D, T1B);				   T1m = W[54];				   T1p = W[55];				   T1u = cr[WS(rs, 12)];				   T1x = ci[WS(rs, 12)];				   T7T = T1m * T1q;				   T1o = T1m * T1n;				   T1t = W[22];				   T1w = W[23];				   T7U = FNMS(T1p, T1n, T7T);				   T1r = FMA(T1p, T1q, T1o);				   T7L = T1t * T1x;				   T1v = T1t * T1u;			      }			 }			 {			      E T7V, TeX, T1s, T7K, T7M, T1y;			      T7V = T7S - T7U;			      TeX = T7S + T7U;			      T1s = T1l + T1r;			      T7K = T1l - T1r;			      T7M = FNMS(T1w, T1u, T7L);			      T1y = FMA(T1w, T1x, T1v);			      {				   E TeY, T7P, T7W, T1F;				   TeY = T7M + T7O;				   T7P = T7M - T7O;				   T7W = T1y - T1E;				   T1F = T1y + T1E;				   T7Q = T7K - T7P;				   TcI = T7K + T7P;				   TeZ = TeX - TeY;				   Thr = TeX + TeY;				   T1G = T1s + T1F;				   TeW = T1s - T1F;				   TcJ = T7V - T7W;				   T7X = T7V + T7W;			      }			 }		    }	       }	       {		    E T8p, T1O, T85, T27, T1X, T20, T1Z, T8r, T1U, T82, T1Y;		    {			 E T23, T26, T25, T84, T24;			 {			      E T1K, T1N, T1J, T1M, T8o, T1L, T22;			      T1K = cr[WS(rs, 2)];			      T1N = ci[WS(rs, 2)];			      T1J = W[2];			      T1M = W[3];			      T23 = cr[WS(rs, 50)];			      T26 = ci[WS(rs, 50)];			      T8o = T1J * T1N;			      T1L = T1J * T1K;			      T22 = W[98];			      T25 = W[99];			      T8p = FNMS(T1M, T1K, T8o);			      T1O = FMA(T1M, T1N, T1L);			      T84 = T22 * T26;			      T24 = T22 * T23;			 }			 {			      E T1Q, T1T, T1P, T1S, T8q, T1R, T1W;			      T1Q = cr[WS(rs, 34)];			      T1T = ci[WS(rs, 34)];			      T85 = FNMS(T25, T23, T84);			      T27 = FMA(T25, T26, T24);			      T1P = W[66];			      T1S = W[67];			      T1X = cr[WS(rs, 18)];			      T20 = ci[WS(rs, 18)];			      T8q = T1P * T1T;			      T1R = T1P * T1Q;			      T1W = W[34];			      T1Z = W[35];			      T8r = FNMS(T1S, T1Q, T8q);			      T1U = FMA(T1S, T1T, T1R);			      T82 = T1W * T20;			      T1Y = T1W * T1X;			 }		    }		    {			 E T8s, Tf3, T1V, T81, T83, T21;			 T8s = T8p - T8r;			 Tf3 = T8p + T8r;			 T1V = T1O + T1U;			 T81 = T1O - T1U;			 T83 = FNMS(T1Z, T1X, T82);			 T21 = FMA(T1Z, T20, T1Y);			 {			      E Tf4, T86, T8t, T28;			      Tf4 = T83 + T85;			      T86 = T83 - T85;			      T8t = T21 - T27;			      T28 = T21 + T27;			      T87 = T81 - T86;			      TcN = T81 + T86;			      Tf5 = Tf3 - Tf4;			      Thw = Tf3 + Tf4;			      T29 = T1V + T28;			      Tf8 = T1V - T28;			      TcQ = T8s - T8t;			      T8u = T8s + T8t;			 }		    }	       }	       {		    E Tbf, T5p, Tao, T5I, T5y, T5B, T5A, Tbh, T5v, Tal, T5z;		    {			 E T5E, T5H, T5G, Tan, T5F;			 {			      E T5l, T5o, T5k, T5n, Tbe, T5m, T5D;			      T5l = cr[WS(rs, 63)];			      T5o = ci[WS(rs, 63)];			      T5k = W[124];			      T5n = W[125];			      T5E = cr[WS(rs, 47)];			      T5H = ci[WS(rs, 47)];			      Tbe = T5k * T5o;			      T5m = T5k * T5l;			      T5D = W[92];			      T5G = W[93];			      Tbf = FNMS(T5n, T5l, Tbe);			      T5p = FMA(T5n, T5o, T5m);			      Tan = T5D * T5H;			      T5F = T5D * T5E;			 }			 {			      E T5r, T5u, T5q, T5t, Tbg, T5s, T5x;			      T5r = cr[WS(rs, 31)];			      T5u = ci[WS(rs, 31)];			      Tao = FNMS(T5G, T5E, Tan);			      T5I = FMA(T5G, T5H, T5F);			      T5q = W[60];			      T5t = W[61];			      T5y = cr[WS(rs, 15)];			      T5B = ci[WS(rs, 15)];			      Tbg = T5q * T5u;			      T5s = T5q * T5r;			      T5x = W[28];			      T5A = W[29];			      Tbh = FNMS(T5t, T5r, Tbg);			      T5v = FMA(T5t, T5u, T5s);			      Tal = T5x * T5B;			      T5z = T5x * T5y;			 }		    }		    {			 E Tbi, Tga, T5w, Tak, Tam, T5C;			 Tbi = Tbf - Tbh;			 Tga = Tbf + Tbh;			 T5w = T5p + T5v;			 Tak = T5p - T5v;			 Tam = FNMS(T5A, T5y, Tal);			 T5C = FMA(T5A, T5B, T5z);			 {			      E Tgb, Tap, T5J, Tbd;			      Tgb = Tam + Tao;			      Tap = Tam - Tao;			      T5J = T5C + T5I;			      Tbd = T5I - T5C;			      Taq = Tak - Tap;			      Tdm = Tak + Tap;			      Tgc = Tga - Tgb;			      ThX = Tga + Tgb;			      T5K = T5w + T5J;			      TfS = T5w - T5J;			      Tdx = Tbi + Tbd;			      Tbj = Tbd - Tbi;			 }		    }	       }	       {		    E T7z, T1d, T7G, TeS, T11, T7v, T7x, T17, T7r, T7m;		    {			 E T7h, Ts, T7q, TL, TB, TE, TD, T7j, Ty, T7n, TC;			 {			      E TH, TK, TJ, T7p, TI;			      {				   E To, Tr, Tn, Tq, T7g, Tp, TG;				   To = cr[WS(rs, 8)];				   Tr = ci[WS(rs, 8)];				   Tn = W[14];				   Tq = W[15];				   TH = cr[WS(rs, 24)];				   TK = ci[WS(rs, 24)];				   T7g = Tn * Tr;				   Tp = Tn * To;				   TG = W[46];				   TJ = W[47];				   T7h = FNMS(Tq, To, T7g);				   Ts = FMA(Tq, Tr, Tp);				   T7p = TG * TK;				   TI = TG * TH;			      }			      {				   E Tu, Tx, Tt, Tw, T7i, Tv, TA;				   Tu = cr[WS(rs, 40)];				   Tx = ci[WS(rs, 40)];				   T7q = FNMS(TJ, TH, T7p);				   TL = FMA(TJ, TK, TI);				   Tt = W[78];				   Tw = W[79];				   TB = cr[WS(rs, 56)];				   TE = ci[WS(rs, 56)];				   T7i = Tt * Tx;				   Tv = Tt * Tu;				   TA = W[110];				   TD = W[111];				   T7j = FNMS(Tw, Tu, T7i);				   Ty = FMA(Tw, Tx, Tv);				   T7n = TA * TE;				   TC = TA * TB;			      }			 }			 {			      E T7k, TeO, Tz, T7f, T7o, TF, TeN, TM;			      T7k = T7h - T7j;			      TeO = T7h + T7j;			      Tz = Ts + Ty;			      T7f = Ts - Ty;			      T7o = FNMS(TD, TB, T7n);			      TF = FMA(TD, TE, TC);			      T7r = T7o - T7q;			      TeN = T7o + T7q;			      TM = TF + TL;			      T7m = TF - TL;			      TcB = T7f + T7k;			      T7l = T7f - T7k;			      TiP = TeO + TeN;			      TeP = TeN - TeO;			      Tjl = Tz - TM;			      TN = Tz + TM;			 }		    }		    {			 E T7D, TU, T13, T16, T7F, T10, T12, T15, T7w, T14;			 {			      E T19, T1c, T18, T1b;			      {				   E TQ, TT, TS, T7C, TR, TP;				   TQ = cr[WS(rs, 4)];				   TT = ci[WS(rs, 4)];				   TP = W[6];				   TcC = T7m - T7r;				   T7s = T7m + T7r;				   TS = W[7];				   T7C = TP * TT;				   TR = TP * TQ;				   T19 = cr[WS(rs, 52)];				   T1c = ci[WS(rs, 52)];				   T7D = FNMS(TS, TQ, T7C);				   TU = FMA(TS, TT, TR);				   T18 = W[102];				   T1b = W[103];			      }			      {				   E TW, TZ, TY, T7E, TX, T7y, T1a, TV;				   TW = cr[WS(rs, 36)];				   TZ = ci[WS(rs, 36)];				   T7y = T18 * T1c;				   T1a = T18 * T19;				   TV = W[70];				   TY = W[71];				   T7z = FNMS(T1b, T19, T7y);				   T1d = FMA(T1b, T1c, T1a);				   T7E = TV * TZ;				   TX = TV * TW;				   T13 = cr[WS(rs, 20)];				   T16 = ci[WS(rs, 20)];				   T7F = FNMS(TY, TW, T7E);				   T10 = FMA(TY, TZ, TX);				   T12 = W[38];				   T15 = W[39];			      }			 }			 T7G = T7D - T7F;			 TeS = T7D + T7F;			 T11 = TU + T10;			 T7v = TU - T10;			 T7w = T12 * T16;			 T14 = T12 * T13;			 T7x = FNMS(T15, T13, T7w);			 T17 = FMA(T15, T16, T14);		    }		    {			 E T8Y, T2H, T8E, T30, T2Q, T2T, T2S, T90, T2N, T8B, T2R;			 {			      E T2W, T2Z, T2Y, T8D, T2X;			      {				   E T2D, T2G, T2C, T2F, T8X, T2E, T2V;				   T2D = cr[WS(rs, 62)];				   T2G = ci[WS(rs, 62)];				   {					E TeT, T7A, T1e, T7H;					TeT = T7x + T7z;					T7A = T7x - T7z;					T1e = T17 + T1d;					T7H = T17 - T1d;					T7B = T7v - T7A;					TcF = T7v + T7A;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -