⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t2sv_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:54:08 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twiddle -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 32 -name t2sv_32 -include ts.h *//* * This function contains 488 FP additions, 350 FP multiplications, * (or, 236 additions, 98 multiplications, 252 fused multiply/add), * 204 stack variables, 7 constants, and 128 memory accesses */#include "ts.h"static void t2sv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);     DVK(KP668178637, +0.668178637919298919997757686523080761552472251);     DVK(KP198912367, +0.198912367379658006911597622644676228597850501);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP414213562, +0.414213562373095048801688724209698078569671875);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + (mb * 8); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 8), MAKE_VOLATILE_STRIDE(rs)) {	  V T6H, T74, T6U, T6E, T9r, T9t, T78, T7c, T6W, T6S, T73, T6K, T7a, T72, T9x;	  V T9z;	  {	       V T2, T8, T3, T6, Te, Ti, T5, Tc;	       T2 = LDW(&(W[0]));	       T8 = LDW(&(W[TWVL * 4]));	       T3 = LDW(&(W[TWVL * 2]));	       T6 = LDW(&(W[TWVL * 3]));	       Te = LDW(&(W[TWVL * 6]));	       Ti = LDW(&(W[TWVL * 7]));	       T5 = LDW(&(W[TWVL * 1]));	       Tc = LDW(&(W[TWVL * 5]));	       {		    V T2X, T2T, T34, T31, Tq, T46, T97, T8H, TH, T98, T4b, T8D, TZ, T7f, T1g;		    V T7g, T4j, T6t, T4q, T6u, T6x, T4z, T7m, T1J, T4G, T6y, T8d, T7l, T4O, T6A;		    V T2k, T7o, T6B, T4V, T7r, T8e, T5E, T6P, T3G, T7L, T6M, T61, T8n, T7I, T55;		    V T6I, T2N, T7A, T5s, T6F, T7x, T8i, T2R, T2U, T57, T3a, T5h, T62, T5L, T7J;		    V T43, T63, T5S, T8o, T7O, T2V, T2Y, T32, T35;		    {			 V T1w, T23, T1K, T1F, T1s, T1N, T26, T1z, T2w, T2s, T3Q, T3M, T3r, T3n, T2b;			 V T1U, T3C, T3j, T3z, T3f, T1R, T29, TR, Th, T2J, T2F, Td, TP, T1Z, T1V;			 V T2g, T2c, T1m, T4u, T1D, T1G, T1p, T1t, T1E, T4D, T1x, T1A, T1q, T4v;			 {			      V T1, Ts, T19, TJ, T7, TM, Tb, T11, T1C, T1o, TA, T15, TE, T1d, Tw;			      V T8G, Tk, Tn, Tj, TW, TS, To, Tt, Tx, TB, TF, Tl;			      {				   V T1Y, T1S, T2f, T2a;				   T1 = LD(&(ri[0]), ms, &(ri[0]));				   {					V Tr, T18, T4, Ta;					Tr = VMUL(T2, T8);					T18 = VMUL(T3, T8);					T4 = VMUL(T2, T3);					Ta = VMUL(T2, T6);					{					     V T10, T1n, Tz, T14;					     T10 = VMUL(T2, Te);					     T1n = VMUL(T8, Te);					     Tz = VMUL(T3, Te);					     T14 = VMUL(T2, Ti);					     {						  V T1r, TD, T1c, Tv;						  T1r = VMUL(T8, Ti);						  TD = VMUL(T3, Ti);						  T1c = VMUL(T3, Tc);						  Tv = VMUL(T2, Tc);						  T1w = VFNMS(T5, Tc, Tr);						  Ts = VFMA(T5, Tc, Tr);						  T19 = VFNMS(T6, Tc, T18);						  T23 = VFMA(T6, Tc, T18);						  TJ = VFNMS(T5, T6, T4);						  T7 = VFMA(T5, T6, T4);						  TM = VFMA(T5, T3, Ta);						  Tb = VFNMS(T5, T3, Ta);						  T11 = VFNMS(T5, Ti, T10);						  T1C = VFMA(T5, Ti, T10);						  T1o = VFMA(Tc, Ti, T1n);						  TA = VFMA(T6, Ti, Tz);						  T1K = VFNMS(T6, Ti, Tz);						  T1F = VFNMS(T5, Te, T14);						  T15 = VFMA(T5, Te, T14);						  T1s = VFNMS(Tc, Te, T1r);						  T1N = VFMA(T6, Te, TD);						  TE = VFNMS(T6, Te, TD);						  T26 = VFNMS(T6, T8, T1c);						  T1d = VFMA(T6, T8, T1c);						  T1z = VFMA(T5, T8, Tv);						  Tw = VFNMS(T5, T8, Tv);						  {						       V T2v, T2r, T3P, T3L;						       T2v = VMUL(T1w, Ti);						       T2r = VMUL(T1w, Te);						       T3P = VMUL(Ts, Ti);						       T3L = VMUL(Ts, Te);						       {							    V T3q, T3m, T2W, T2S;							    T3q = VMUL(T19, Ti);							    T3m = VMUL(T19, Te);							    T2W = VMUL(T23, Ti);							    T2S = VMUL(T23, Te);							    {								 V T1T, T3i, T3e, T1Q;								 T1T = VMUL(TJ, Tc);								 T3i = VMUL(TJ, Ti);								 T3e = VMUL(TJ, Te);								 T1Q = VMUL(TJ, T8);								 {								      V Tg, T2I, T2E, T9;								      Tg = VMUL(T7, Tc);								      T2I = VMUL(T7, Ti);								      T2E = VMUL(T7, Te);								      T9 = VMUL(T7, T8);								      T2w = VFNMS(T1z, Te, T2v);								      T2s = VFMA(T1z, Ti, T2r);								      T3Q = VFNMS(Tw, Te, T3P);								      T3M = VFMA(Tw, Ti, T3L);								      T3r = VFNMS(T1d, Te, T3q);								      T3n = VFMA(T1d, Ti, T3m);								      T2X = VFNMS(T26, Te, T2W);								      T2T = VFMA(T26, Ti, T2S);								      T2b = VFNMS(TM, T8, T1T);								      T1U = VFMA(TM, T8, T1T);								      T3C = VFNMS(TM, Te, T3i);								      T3j = VFMA(TM, Te, T3i);								      T3z = VFMA(TM, Ti, T3e);								      T3f = VFNMS(TM, Ti, T3e);								      T1R = VFNMS(TM, Tc, T1Q);								      T29 = VFMA(TM, Tc, T1Q);								      TR = VFNMS(Tb, T8, Tg);								      Th = VFMA(Tb, T8, Tg);								      T34 = VFMA(Tb, Te, T2I);								      T2J = VFNMS(Tb, Te, T2I);								      T31 = VFNMS(Tb, Ti, T2E);								      T2F = VFMA(Tb, Ti, T2E);								      Td = VFNMS(Tb, Tc, T9);								      TP = VFMA(Tb, Tc, T9);								      T1Y = VMUL(T1R, Ti);								      T1S = VMUL(T1R, Te);								      T2f = VMUL(T29, Ti);								      T2a = VMUL(T29, Te);								      T8G = LD(&(ii[0]), ms, &(ii[0]));								 }							    }						       }						  }					     }					}				   }				   Tk = LD(&(ri[WS(rs, 16)]), ms, &(ri[0]));				   {					V Tm, Tf, TV, TQ;					Tm = VMUL(Td, Ti);					Tf = VMUL(Td, Te);					TV = VMUL(TP, Ti);					TQ = VMUL(TP, Te);					T1Z = VFNMS(T1U, Te, T1Y);					T1V = VFMA(T1U, Ti, T1S);					T2g = VFNMS(T2b, Te, T2f);					T2c = VFMA(T2b, Ti, T2a);					Tn = VFNMS(Th, Te, Tm);					Tj = VFMA(Th, Ti, Tf);					TW = VFNMS(TR, Te, TV);					TS = VFMA(TR, Ti, TQ);				   }				   To = LD(&(ii[WS(rs, 16)]), ms, &(ii[0]));			      }			      Tt = LD(&(ri[WS(rs, 8)]), ms, &(ri[0]));			      Tx = LD(&(ii[WS(rs, 8)]), ms, &(ii[0]));			      TB = LD(&(ri[WS(rs, 24)]), ms, &(ri[0]));			      TF = LD(&(ii[WS(rs, 24)]), ms, &(ii[0]));			      Tl = VMUL(Tj, Tk);			      {				   V TO, T4f, TT, TX;				   {					V Ty, T48, TG, T4a;					{					     V TK, TN, T8E, Tu, T47, TC, T49, Tp, TL, T4e, T8F;					     TK = LD(&(ri[WS(rs, 4)]), ms, &(ri[0]));					     TN = LD(&(ii[WS(rs, 4)]), ms, &(ii[0]));					     T8E = VMUL(Tj, To);					     Tu = VMUL(Ts, Tt);					     T47 = VMUL(Ts, Tx);					     TC = VMUL(TA, TB);					     T49 = VMUL(TA, TF);					     Tp = VFMA(Tn, To, Tl);					     TL = VMUL(TJ, TK);					     T4e = VMUL(TJ, TN);					     T8F = VFNMS(Tn, Tk, T8E);					     Ty = VFMA(Tw, Tx, Tu);					     T48 = VFNMS(Tw, Tt, T47);					     TG = VFMA(TE, TF, TC);					     T4a = VFNMS(TE, TB, T49);					     Tq = VADD(T1, Tp);					     T46 = VSUB(T1, Tp);					     TO = VFMA(TM, TN, TL);					     T97 = VSUB(T8G, T8F);					     T8H = VADD(T8F, T8G);					     T4f = VFNMS(TM, TK, T4e);					}					TH = VADD(Ty, TG);					T98 = VSUB(Ty, TG);					T4b = VSUB(T48, T4a);					T8D = VADD(T48, T4a);					TT = LD(&(ri[WS(rs, 20)]), ms, &(ri[0]));					TX = LD(&(ii[WS(rs, 20)]), ms, &(ii[0]));				   }				   {					V T12, T16, T1a, T1e, T4k, T4p;					T12 = LD(&(ri[WS(rs, 28)]), ms, &(ri[0]));					T16 = LD(&(ii[WS(rs, 28)]), ms, &(ii[0]));					T1a = LD(&(ri[WS(rs, 12)]), ms, &(ri[0]));					T1e = LD(&(ii[WS(rs, 12)]), ms, &(ii[0]));					{					     V TY, T4h, T17, T4m, T1f, T4o, T4d, T4i;					     {						  V T1j, T1l, TU, T4g, T13, T4l, T1b, T4n, T1k, T4t;						  T1j = LD(&(ri[WS(rs, 2)]), ms, &(ri[0]));						  T1l = LD(&(ii[WS(rs, 2)]), ms, &(ii[0]));						  TU = VMUL(TS, TT);						  T4g = VMUL(TS, TX);						  T13 = VMUL(T11, T12);						  T4l = VMUL(T11, T16);						  T1b = VMUL(T19, T1a);						  T4n = VMUL(T19, T1e);						  T1k = VMUL(T7, T1j);						  T4t = VMUL(T7, T1l);						  TY = VFMA(TW, TX, TU);						  T4h = VFNMS(TW, TT, T4g);						  T17 = VFMA(T15, T16, T13);						  T4m = VFNMS(T15, T12, T4l);						  T1f = VFMA(T1d, T1e, T1b);						  T4o = VFNMS(T1d, T1a, T4n);						  T1m = VFMA(Tb, T1l, T1k);						  T4u = VFNMS(Tb, T1j, T4t);					     }					     TZ = VADD(TO, TY);					     T4d = VSUB(TO, TY);					     T7f = VADD(T4f, T4h);					     T4i = VSUB(T4f, T4h);					     T1g = VADD(T17, T1f);					     T4k = VSUB(T17, T1f);					     T7g = VADD(T4m, T4o);					     T4p = VSUB(T4m, T4o);					     T1D = LD(&(ri[WS(rs, 26)]), ms, &(ri[0]));					     T1G = LD(&(ii[WS(rs, 26)]), ms, &(ii[0]));					     T4j = VADD(T4d, T4i);					     T6t = VSUB(T4i, T4d);					}					T1p = LD(&(ri[WS(rs, 18)]), ms, &(ri[0]));					T1t = LD(&(ii[WS(rs, 18)]), ms, &(ii[0]));					T4q = VSUB(T4k, T4p);					T6u = VADD(T4k, T4p);					T1E = VMUL(T1C, T1D);					T4D = VMUL(T1C, T1G);					T1x = LD(&(ri[WS(rs, 10)]), ms, &(ri[0]));					T1A = LD(&(ii[WS(rs, 10)]), ms, &(ii[0]));					T1q = VMUL(T1o, T1p);					T4v = VMUL(T1o, T1t);				   }			      }			 }			 {			      V T3l, T5z, T3E, T5Z, T3v, T3x, T3w, T3t, T5B, T5W;			      {				   V T1P, T4J, T1W, T20, T2i, T4T, T1X, T4K, T24, T27;				   {					V T2d, T2h, T1v, T4A, T7j, T4x, T2e, T4y, T1I, T4F, T7k, T4S;					{					     V T1L, T1O, T1H, T4E, T1y, T4B, T1u, T4w, T1M, T4I, T1B, T4C;					     T1L = LD(&(ri[WS(rs, 30)]), ms, &(ri[0]));					     T1O = LD(&(ii[WS(rs, 30)]), ms, &(ii[0]));					     T1H = VFMA(T1F, T1G, T1E);					     T4E = VFNMS(T1F, T1D, T4D);					     T1y = VMUL(T1w, T1x);					     T4B = VMUL(T1w, T1A);					     T1u = VFMA(T1s, T1t, T1q);					     T4w = VFNMS(T1s, T1p, T4v);					     T1M = VMUL(T1K, T1L);					     T4I = VMUL(T1K, T1O);					     T2d = LD(&(ri[WS(rs, 22)]), ms, &(ri[0]));					     T2h = LD(&(ii[WS(rs, 22)]), ms, &(ii[0]));					     T1B = VFMA(T1z, T1A, T1y);					     T4C = VFNMS(T1z, T1x, T4B);					     T1v = VADD(T1m, T1u);					     T4A = VSUB(T1m, T1u);					     T7j = VADD(T4u, T4w);					     T4x = VSUB(T4u, T4w);					     T1P = VFMA(T1N, T1O, T1M);					     T4J = VFNMS(T1N, T1L, T4I);					     T2e = VMUL(T2c, T2d);					     T4y = VSUB(T1B, T1H);					     T1I = VADD(T1B, T1H);					     T4F = VSUB(T4C, T4E);					     T7k = VADD(T4C, T4E);					     T4S = VMUL(T2c, T2h);					}					T1W = LD(&(ri[WS(rs, 14)]), ms, &(ri[0]));					T20 = LD(&(ii[WS(rs, 14)]), ms, &(ii[0]));					T2i = VFMA(T2g, T2h, T2e);					T6x = VADD(T4x, T4y);					T4z = VSUB(T4x, T4y);					T7m = VSUB(T1v, T1I);					T1J = VADD(T1v, T1I);					T4G = VADD(T4A, T4F);					T6y = VSUB(T4A, T4F);					T8d = VADD(T7j, T7k);					T7l = VSUB(T7j, T7k);					T4T = VFNMS(T2g, T2d, T4S);					T1X = VMUL(T1V, T1W);					T4K = VMUL(T1V, T20);					T24 = LD(&(ri[WS(rs, 6)]), ms, &(ri[0]));					T27 = LD(&(ii[WS(rs, 6)]), ms, &(ii[0]));				   }				   {					V T22, T4P, T7p, T4M, T28, T4R, T3g, T3k;					T3g = LD(&(ri[WS(rs, 31)]), ms, &(ri[WS(rs, 1)]));					T3k = LD(&(ii[WS(rs, 31)]), ms, &(ii[WS(rs, 1)]));					{					     V T3A, T3D, T21, T4L, T25, T4Q, T3h, T5y, T3B, T5Y;					     T3A = LD(&(ri[WS(rs, 23)]), ms, &(ri[WS(rs, 1)]));					     T3D = LD(&(ii[WS(rs, 23)]), ms, &(ii[WS(rs, 1)]));					     T21 = VFMA(T1Z, T20, T1X);					     T4L = VFNMS(T1Z, T1W, T4K);					     T25 = VMUL(T23, T24);					     T4Q = VMUL(T23, T27);					     T3h = VMUL(T3f, T3g);					     T5y = VMUL(T3f, T3k);					     T3B = VMUL(T3z, T3A);					     T5Y = VMUL(T3z, T3D);					     T22 = VADD(T1P, T21);					     T4P = VSUB(T1P, T21);					     T7p = VADD(T4J, T4L);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -