⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t2_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:38:43 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twiddle -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -name t2_32 -include t.h *//* * This function contains 488 FP additions, 350 FP multiplications, * (or, 236 additions, 98 multiplications, 252 fused multiply/add), * 181 stack variables, 7 constants, and 128 memory accesses */#include "t.h"static void t2_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + (mb * 8); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) {	  E T9A, T9z;	  {	       E T2, T8, T3, T6, Te, Tr, T18, T4, Ta, Tz, T1n, T10, Ti, T5, Tc;	       T2 = W[0];	       T8 = W[4];	       T3 = W[2];	       T6 = W[3];	       Te = W[6];	       Tr = T2 * T8;	       T18 = T3 * T8;	       T4 = T2 * T3;	       Ta = T2 * T6;	       Tz = T3 * Te;	       T1n = T8 * Te;	       T10 = T2 * Te;	       Ti = W[7];	       T5 = W[1];	       Tc = W[5];	       {		    E T34, T31, T2X, T2T, Tq, T46, T8H, T97, TH, T98, T4b, T8D, TZ, T7f, T4j;		    E T6t, T1g, T7g, T4q, T6u, T4z, T6x, T1J, T7m, T7l, T8d, T6y, T4G, T2k, T7o;		    E T7r, T8e, T6A, T4O, T6B, T4V, T6P, T5E, T7L, T3G, T6M, T61, T8n, T7I, T6I;		    E T55, T7A, T2N, T6F, T5s, T8i, T7x, T5L, T62, T43, T7J, T5S, T63, T7O, T8o;		    E T2U, T2R, T2V, T57, T3a, T5h, T2Y, T32, T35;		    {			 E T1K, T23, T1N, T26, T2b, T1U, T3C, T3j, T3z, T3f, T1R, T29, TR, Th, T2J;			 E T2F, Td, TP, T3r, T3n, T2w, T2s, T3Q, T3M, T1Z, T1V, T2g, T2c;			 {			      E T11, T1C, TM, Tb, TJ, T7, T1o, T19, T1w, T1F, T15, T1s, T1d, T1z, TW;			      E TS, Ty, T48, TG, T4a;			      {				   E T1, TA, Ts, TE, Tw, Tn, Tj, T8G, Tk, To, T14;				   T1 = ri[0];				   TA = FMA(T6, Ti, Tz);				   T1K = FNMS(T6, Ti, Tz);				   T14 = T2 * Ti;				   {					E T1r, TD, T1c, Tv;					T1r = T8 * Ti;					TD = T3 * Ti;					T11 = FNMS(T5, Ti, T10);					T1C = FMA(T5, Ti, T10);					TM = FMA(T5, T3, Ta);					Tb = FNMS(T5, T3, Ta);					TJ = FNMS(T5, T6, T4);					T7 = FMA(T5, T6, T4);					T1o = FMA(Tc, Ti, T1n);					T23 = FMA(T6, Tc, T18);					T19 = FNMS(T6, Tc, T18);					T1w = FNMS(T5, Tc, Tr);					Ts = FMA(T5, Tc, Tr);					T1c = T3 * Tc;					Tv = T2 * Tc;					T1F = FNMS(T5, Te, T14);					T15 = FMA(T5, Te, T14);					T1s = FNMS(Tc, Te, T1r);					T1N = FMA(T6, Te, TD);					TE = FNMS(T6, Te, TD);					{					     E T1T, T3i, T3e, T1Q;					     T1T = TJ * Tc;					     T3i = TJ * Ti;					     T3e = TJ * Te;					     T1Q = TJ * T8;					     {						  E Tg, T2I, T2E, T9;						  Tg = T7 * Tc;						  T2I = T7 * Ti;						  T2E = T7 * Te;						  T9 = T7 * T8;						  {						       E T3q, T3m, T2v, T2r;						       T3q = T19 * Ti;						       T3m = T19 * Te;						       T2v = T1w * Ti;						       T2r = T1w * Te;						       {							    E T2W, T2S, T3P, T3L;							    T2W = T23 * Ti;							    T2S = T23 * Te;							    T3P = Ts * Ti;							    T3L = Ts * Te;							    T26 = FNMS(T6, T8, T1c);							    T1d = FMA(T6, T8, T1c);							    T1z = FMA(T5, T8, Tv);							    Tw = FNMS(T5, T8, Tv);							    T2b = FNMS(TM, T8, T1T);							    T1U = FMA(TM, T8, T1T);							    T3C = FNMS(TM, Te, T3i);							    T3j = FMA(TM, Te, T3i);							    T3z = FMA(TM, Ti, T3e);							    T3f = FNMS(TM, Ti, T3e);							    T1R = FNMS(TM, Tc, T1Q);							    T29 = FMA(TM, Tc, T1Q);							    TR = FNMS(Tb, T8, Tg);							    Th = FMA(Tb, T8, Tg);							    T34 = FMA(Tb, Te, T2I);							    T2J = FNMS(Tb, Te, T2I);							    T31 = FNMS(Tb, Ti, T2E);							    T2F = FMA(Tb, Ti, T2E);							    Td = FNMS(Tb, Tc, T9);							    TP = FMA(Tb, Tc, T9);							    T2X = FNMS(T26, Te, T2W);							    T2T = FMA(T26, Ti, T2S);							    T3r = FNMS(T1d, Te, T3q);							    T3n = FMA(T1d, Ti, T3m);							    T2w = FNMS(T1z, Te, T2v);							    T2s = FMA(T1z, Ti, T2r);							    T3Q = FNMS(Tw, Te, T3P);							    T3M = FMA(Tw, Ti, T3L);							    {								 E T1Y, T1S, T2f, T2a;								 T1Y = T1R * Ti;								 T1S = T1R * Te;								 T2f = T29 * Ti;								 T2a = T29 * Te;								 {								      E Tm, Tf, TV, TQ;								      Tm = Td * Ti;								      Tf = Td * Te;								      TV = TP * Ti;								      TQ = TP * Te;								      T1Z = FNMS(T1U, Te, T1Y);								      T1V = FMA(T1U, Ti, T1S);								      T2g = FNMS(T2b, Te, T2f);								      T2c = FMA(T2b, Ti, T2a);								      Tn = FNMS(Th, Te, Tm);								      Tj = FMA(Th, Ti, Tf);								      TW = FNMS(TR, Te, TV);								      TS = FMA(TR, Ti, TQ);								      T8G = ii[0];								 }							    }						       }						  }					     }					}				   }				   Tk = ri[WS(rs, 16)];				   To = ii[WS(rs, 16)];				   {					E Tt, Tx, Tu, T47, TB, TF, TC, T49;					{					     E Tl, T8E, Tp, T8F;					     Tt = ri[WS(rs, 8)];					     Tx = ii[WS(rs, 8)];					     Tl = Tj * Tk;					     T8E = Tj * To;					     Tu = Ts * Tt;					     T47 = Ts * Tx;					     Tp = FMA(Tn, To, Tl);					     T8F = FNMS(Tn, Tk, T8E);					     TB = ri[WS(rs, 24)];					     TF = ii[WS(rs, 24)];					     Tq = T1 + Tp;					     T46 = T1 - Tp;					     T8H = T8F + T8G;					     T97 = T8G - T8F;					     TC = TA * TB;					     T49 = TA * TF;					}					Ty = FMA(Tw, Tx, Tu);					T48 = FNMS(Tw, Tt, T47);					TG = FMA(TE, TF, TC);					T4a = FNMS(TE, TB, T49);				   }			      }			      {				   E TT, TX, TO, T4f, TU, T4g;				   {					E TK, TN, TL, T4e;					TK = ri[WS(rs, 4)];					TN = ii[WS(rs, 4)];					TH = Ty + TG;					T98 = Ty - TG;					T4b = T48 - T4a;					T8D = T48 + T4a;					TL = TJ * TK;					T4e = TJ * TN;					TT = ri[WS(rs, 20)];					TX = ii[WS(rs, 20)];					TO = FMA(TM, TN, TL);					T4f = FNMS(TM, TK, T4e);					TU = TS * TT;					T4g = TS * TX;				   }				   {					E T17, T4m, T1a, T1e, T4d, T4i;					{					     E T12, T16, TY, T4h, T13, T4l;					     T12 = ri[WS(rs, 28)];					     T16 = ii[WS(rs, 28)];					     TY = FMA(TW, TX, TU);					     T4h = FNMS(TW, TT, T4g);					     T13 = T11 * T12;					     T4l = T11 * T16;					     TZ = TO + TY;					     T4d = TO - TY;					     T7f = T4f + T4h;					     T4i = T4f - T4h;					     T17 = FMA(T15, T16, T13);					     T4m = FNMS(T15, T12, T4l);					}					T4j = T4d + T4i;					T6t = T4i - T4d;					T1a = ri[WS(rs, 12)];					T1e = ii[WS(rs, 12)];					{					     E T1m, T4u, T1H, T4E, T1x, T1A, T1u, T4w, T1y, T4B;					     {						  E T1D, T1G, T1E, T4D;						  {						       E T1f, T4o, T4k, T4p;						       {							    E T1j, T1l, T1b, T4n, T1k, T4t;							    T1j = ri[WS(rs, 2)];							    T1l = ii[WS(rs, 2)];							    T1b = T19 * T1a;							    T4n = T19 * T1e;							    T1k = T7 * T1j;							    T4t = T7 * T1l;							    T1f = FMA(T1d, T1e, T1b);							    T4o = FNMS(T1d, T1a, T4n);							    T1m = FMA(Tb, T1l, T1k);							    T4u = FNMS(Tb, T1j, T4t);						       }						       T1g = T17 + T1f;						       T4k = T17 - T1f;						       T7g = T4m + T4o;						       T4p = T4m - T4o;						       T1D = ri[WS(rs, 26)];						       T1G = ii[WS(rs, 26)];						       T4q = T4k - T4p;						       T6u = T4k + T4p;						       T1E = T1C * T1D;						       T4D = T1C * T1G;						  }						  {						       E T1p, T1t, T1q, T4v;						       T1p = ri[WS(rs, 18)];						       T1t = ii[WS(rs, 18)];						       T1H = FMA(T1F, T1G, T1E);						       T4E = FNMS(T1F, T1D, T4D);						       T1q = T1o * T1p;						       T4v = T1o * T1t;						       T1x = ri[WS(rs, 10)];						       T1A = ii[WS(rs, 10)];						       T1u = FMA(T1s, T1t, T1q);						       T4w = FNMS(T1s, T1p, T4v);						       T1y = T1w * T1x;						       T4B = T1w * T1A;						  }					     }					     {						  E T4A, T1v, T7j, T4x, T1B, T4C;						  T4A = T1m - T1u;						  T1v = T1m + T1u;						  T7j = T4u + T4w;						  T4x = T4u - T4w;						  T1B = FMA(T1z, T1A, T1y);						  T4C = FNMS(T1z, T1x, T4B);						  {						       E T1I, T4y, T4F, T7k;						       T1I = T1B + T1H;						       T4y = T1B - T1H;						       T4F = T4C - T4E;						       T7k = T4C + T4E;						       T4z = T4x - T4y;						       T6x = T4x + T4y;						       T1J = T1v + T1I;						       T7m = T1v - T1I;						       T7l = T7j - T7k;						       T8d = T7j + T7k;						       T6y = T4A - T4F;						       T4G = T4A + T4F;						  }					     }					}				   }			      }			 }			 {			      E T5Z, T3u, T5V, T5C, T7G, T5D, T3F, T5X, T4P, T4U;			      {				   E T1P, T4J, T2i, T4T, T21, T4L, T28, T4R;				   {					E T1L, T1O, T1W, T20;					T1L = ri[WS(rs, 30)];					T1O = ii[WS(rs, 30)];					{					     E T2d, T2h, T1M, T4I, T2e, T4S;					     T2d = ri[WS(rs, 22)];					     T2h = ii[WS(rs, 22)];					     T1M = T1K * T1L;					     T4I = T1K * T1O;					     T2e = T2c * T2d;					     T4S = T2c * T2h;					     T1P = FMA(T1N, T1O, T1M);					     T4J = FNMS(T1N, T1L, T4I);					     T2i = FMA(T2g, T2h, T2e);					     T4T = FNMS(T2g, T2d, T4S);					}					T1W = ri[WS(rs, 14)];					T20 = ii[WS(rs, 14)];					{					     E T24, T27, T1X, T4K, T25, T4Q;					     T24 = ri[WS(rs, 6)];					     T27 = ii[WS(rs, 6)];					     T1X = T1V * T1W;					     T4K = T1V * T20;					     T25 = T23 * T24;					     T4Q = T23 * T27;					     T21 = FMA(T1Z, T20, T1X);					     T4L = FNMS(T1Z, T1W, T4K);					     T28 = FMA(T26, T27, T25);					     T4R = FNMS(T26, T24, T4Q);					}				   }				   {					E T22, T7p, T4M, T4N, T2j, T7q;					T4P = T1P - T21;					T22 = T1P + T21;					T7p = T4J + T4L;					T4M = T4J - T4L;					T4N = T28 - T2i;					T2j = T28 + T2i;					T7q = T4R + T4T;					T4U = T4R - T4T;					T2k = T22 + T2j;					T7o = T22 - T2j;					T7r = T7p - T7q;					T8e = T7p + T7q;					T6A = T4M + T4N;					T4O = T4M - T4N;				   }			      }			      {				   E T3l, T5z, T3E, T3v, T3t, T3w, T3x, T5B, T3A, T3B, T3D, T3y, T5W;				   {					E T3g, T3k, T3h, T5y;					T3g = ri[WS(rs, 31)];					T3k = ii[WS(rs, 31)];					T3A = ri[WS(rs, 23)];					T6B = T4P - T4U;					T4V = T4P + T4U;					T3h = T3f * T3g;					T5y = T3f * T3k;					T3B = T3z * T3A;					T3D = ii[WS(rs, 23)];					T3l = FMA(T3j, T3k, T3h);					T5z = FNMS(T3j, T3g, T5y);				   }				   {					E T3o, T5Y, T3s, T3p, T5A;					T3o = ri[WS(rs, 15)];					T3E = FMA(T3C, T3D, T3B);					T5Y = T3z * T3D;					T3s = ii[WS(rs, 15)];					T3p = T3n * T3o;					T3v = ri[WS(rs, 7)];					T5Z = FNMS(T3C, T3A, T5Y);					T5A = T3n * T3s;					T3t = FMA(T3r, T3s, T3p);					T3w = TP * T3v;					T3x = ii[WS(rs, 7)];					T5B = FNMS(T3r, T3o, T5A);				   }				   T3u = T3l + T3t;				   T5V = T3l - T3t;				   T3y = FMA(TR, T3x, T3w);				   T5W = TP * T3x;				   T5C = T5z - T5B;				   T7G = T5z + T5B;				   T5D = T3y - T3E;				   T3F = T3y + T3E;				   T5X = FNMS(TR, T3v, T5W);			      }			      {				   E T2L, T5q, T5m, T2z, T7v, T53, T2D, T5o;				   {					E T2q, T50, T2y, T2A, T2C, T52, T2B, T5n;					{					     E T2G, T2K, T2n, T4Z, T2t, T51;					     {						  E T2o, T2p, T60, T7H;						  T2n = ri[WS(rs, 1)];						  T6P = T5C + T5D;						  T5E = T5C - T5D;						  T7L = T3u - T3F;						  T3G = T3u + T3F;						  T60 = T5X - T5Z;						  T7H = T5X + T5Z;						  T2o = T2 * T2n;						  T2p = ii[WS(rs, 1)];						  T6M = T5V - T60;						  T61 = T5V + T60;						  T8n = T7G + T7H;						  T7I = T7G - T7H;						  T4Z = T2 * T2p;						  T2q = FMA(T5, T2p, T2o);					     }					     T2G = ri[WS(rs, 25)];					     T2K = ii[WS(rs, 25)];					     T50 = FNMS(T5, T2n, T4Z);					     {						  E T2x, T2u, T2H, T5p;						  T2t = ri[WS(rs, 17)];						  T2H = T2F * T2G;						  T5p = T2F * T2K;						  T2x = ii[WS(rs, 17)];						  T2u = T2s * T2t;						  T2L = FMA(T2J, T2K, T2H);						  T5q = FNMS(T2J, T2G, T5p);						  T51 = T2s * T2x;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -