⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t2sv_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 5 页
字号:
				   }			      }			 }			 {			      V T6s, T9o, T9n, T6v, T6N, T6Q, T6G, T6J, T68, T4Y, T9f, T9d, T9l, T9j, T6g;			      V T6o, T6q, T6m, T66, T6a, T6p, T6j, T5x, T69;			      {				   V T6d, T6e, T6c, T4s, T9c, T4X, T9h, T9b, T5T, T64, T5k, T5v, T9i, T6f;				   {					V T4c, T4r, T4H, T4W, T99, T9a;					T6s = VSUB(T46, T4b);					T4c = VADD(T46, T4b);					T4r = VADD(T4j, T4q);					T9o = VSUB(T4q, T4j);					T6d = VFMA(LDK(KP414213562), T4z, T4G);					T4H = VFNMS(LDK(KP414213562), T4G, T4z);					T4W = VFMA(LDK(KP414213562), T4V, T4O);					T6e = VFNMS(LDK(KP414213562), T4O, T4V);					T9n = VADD(T98, T97);					T99 = VSUB(T97, T98);					T9a = VADD(T6t, T6u);					T6v = VSUB(T6t, T6u);					ST(&(ii[WS(rs, 26)]), VFNMS(LDK(KP923879532), T90, T8Z), ms, &(ii[0]));					ST(&(ii[WS(rs, 10)]), VFMA(LDK(KP923879532), T90, T8Z), ms, &(ii[0]));					T6c = VFMA(LDK(KP707106781), T4r, T4c);					T4s = VFNMS(LDK(KP707106781), T4r, T4c);					T9c = VADD(T4H, T4W);					T4X = VSUB(T4H, T4W);					T9h = VFNMS(LDK(KP707106781), T9a, T99);					T9b = VFMA(LDK(KP707106781), T9a, T99);					T6N = VSUB(T5S, T5L);					T5T = VADD(T5L, T5S);					T64 = VADD(T62, T63);					T6Q = VSUB(T62, T63);					T6G = VSUB(T5j, T5c);					T5k = VADD(T5c, T5j);					T5v = VADD(T5t, T5u);					T6J = VSUB(T5t, T5u);				   }				   T68 = VFNMS(LDK(KP923879532), T4X, T4s);				   T4Y = VFMA(LDK(KP923879532), T4X, T4s);				   T9f = VFNMS(LDK(KP923879532), T9c, T9b);				   T9d = VFMA(LDK(KP923879532), T9c, T9b);				   T9i = VSUB(T6e, T6d);				   T6f = VADD(T6d, T6e);				   {					V T6l, T5U, T6k, T65;					T6l = VFMA(LDK(KP707106781), T5T, T5E);					T5U = VFNMS(LDK(KP707106781), T5T, T5E);					T6k = VFMA(LDK(KP707106781), T64, T61);					T65 = VFNMS(LDK(KP707106781), T64, T61);					{					     V T6i, T5l, T6h, T5w;					     T6i = VFMA(LDK(KP707106781), T5k, T55);					     T5l = VFNMS(LDK(KP707106781), T5k, T55);					     T6h = VFMA(LDK(KP707106781), T5v, T5s);					     T5w = VFNMS(LDK(KP707106781), T5v, T5s);					     T9l = VFNMS(LDK(KP923879532), T9i, T9h);					     T9j = VFMA(LDK(KP923879532), T9i, T9h);					     T6g = VFMA(LDK(KP923879532), T6f, T6c);					     T6o = VFNMS(LDK(KP923879532), T6f, T6c);					     T6q = VFMA(LDK(KP198912367), T6k, T6l);					     T6m = VFNMS(LDK(KP198912367), T6l, T6k);					     T66 = VFNMS(LDK(KP668178637), T65, T5U);					     T6a = VFMA(LDK(KP668178637), T5U, T65);					     T6p = VFNMS(LDK(KP198912367), T6h, T6i);					     T6j = VFMA(LDK(KP198912367), T6i, T6h);					     T5x = VFMA(LDK(KP668178637), T5w, T5l);					     T69 = VFNMS(LDK(KP668178637), T5l, T5w);					}				   }			      }			      {				   V T6Y, T6w, T9w, T6D, T9v, T9p, T9q, T71, T77, T6O, T76, T6R;				   {					V T6Z, T6z, T6C, T70;					{					     V T6n, T9g, T9e, T6r;					     T6n = VADD(T6j, T6m);					     T9g = VSUB(T6m, T6j);					     T9e = VADD(T6p, T6q);					     T6r = VSUB(T6p, T6q);					     {						  V T9k, T6b, T67, T9m;						  T9k = VSUB(T6a, T69);						  T6b = VADD(T69, T6a);						  T67 = VSUB(T5x, T66);						  T9m = VADD(T5x, T66);						  ST(&(ii[WS(rs, 25)]), VFNMS(LDK(KP980785280), T9g, T9f), ms, &(ii[WS(rs, 1)]));						  ST(&(ii[WS(rs, 9)]), VFMA(LDK(KP980785280), T9g, T9f), ms, &(ii[WS(rs, 1)]));						  ST(&(ri[WS(rs, 1)]), VFMA(LDK(KP980785280), T6n, T6g), ms, &(ri[WS(rs, 1)]));						  ST(&(ri[WS(rs, 17)]), VFNMS(LDK(KP980785280), T6n, T6g), ms, &(ri[WS(rs, 1)]));						  ST(&(ri[WS(rs, 9)]), VFMA(LDK(KP980785280), T6r, T6o), ms, &(ri[WS(rs, 1)]));						  ST(&(ri[WS(rs, 25)]), VFNMS(LDK(KP980785280), T6r, T6o), ms, &(ri[WS(rs, 1)]));						  ST(&(ii[WS(rs, 17)]), VFNMS(LDK(KP980785280), T9e, T9d), ms, &(ii[WS(rs, 1)]));						  ST(&(ii[WS(rs, 1)]), VFMA(LDK(KP980785280), T9e, T9d), ms, &(ii[WS(rs, 1)]));						  ST(&(ri[WS(rs, 29)]), VFMA(LDK(KP831469612), T6b, T68), ms, &(ri[WS(rs, 1)]));						  ST(&(ri[WS(rs, 13)]), VFNMS(LDK(KP831469612), T6b, T68), ms, &(ri[WS(rs, 1)]));						  ST(&(ii[WS(rs, 21)]), VFNMS(LDK(KP831469612), T9k, T9j), ms, &(ii[WS(rs, 1)]));						  ST(&(ii[WS(rs, 5)]), VFMA(LDK(KP831469612), T9k, T9j), ms, &(ii[WS(rs, 1)]));						  ST(&(ii[WS(rs, 29)]), VFMA(LDK(KP831469612), T9m, T9l), ms, &(ii[WS(rs, 1)]));						  ST(&(ii[WS(rs, 13)]), VFNMS(LDK(KP831469612), T9m, T9l), ms, &(ii[WS(rs, 1)]));						  ST(&(ri[WS(rs, 5)]), VFMA(LDK(KP831469612), T67, T4Y), ms, &(ri[WS(rs, 1)]));						  ST(&(ri[WS(rs, 21)]), VFNMS(LDK(KP831469612), T67, T4Y), ms, &(ri[WS(rs, 1)]));						  T6Y = VFNMS(LDK(KP707106781), T6v, T6s);						  T6w = VFMA(LDK(KP707106781), T6v, T6s);					     }					}					T6Z = VFNMS(LDK(KP414213562), T6x, T6y);					T6z = VFMA(LDK(KP414213562), T6y, T6x);					T6C = VFNMS(LDK(KP414213562), T6B, T6A);					T70 = VFMA(LDK(KP414213562), T6A, T6B);					T9w = VADD(T6z, T6C);					T6D = VSUB(T6z, T6C);					T9v = VFNMS(LDK(KP707106781), T9o, T9n);					T9p = VFMA(LDK(KP707106781), T9o, T9n);					T9q = VSUB(T70, T6Z);					T71 = VADD(T6Z, T70);					T77 = VFMA(LDK(KP707106781), T6N, T6M);					T6O = VFNMS(LDK(KP707106781), T6N, T6M);					T76 = VFMA(LDK(KP707106781), T6Q, T6P);					T6R = VFNMS(LDK(KP707106781), T6Q, T6P);					T6H = VFNMS(LDK(KP707106781), T6G, T6F);					T74 = VFMA(LDK(KP707106781), T6G, T6F);				   }				   T6U = VFNMS(LDK(KP923879532), T6D, T6w);				   T6E = VFMA(LDK(KP923879532), T6D, T6w);				   T9r = VFMA(LDK(KP923879532), T9q, T9p);				   T9t = VFNMS(LDK(KP923879532), T9q, T9p);				   T78 = VFNMS(LDK(KP198912367), T77, T76);				   T7c = VFMA(LDK(KP198912367), T76, T77);				   T6W = VFMA(LDK(KP668178637), T6O, T6R);				   T6S = VFNMS(LDK(KP668178637), T6R, T6O);				   T73 = VFMA(LDK(KP707106781), T6J, T6I);				   T6K = VFNMS(LDK(KP707106781), T6J, T6I);				   T7a = VFMA(LDK(KP923879532), T71, T6Y);				   T72 = VFNMS(LDK(KP923879532), T71, T6Y);				   T9x = VFNMS(LDK(KP923879532), T9w, T9v);				   T9z = VFMA(LDK(KP923879532), T9w, T9v);			      }			 }		    }	       }	  }	  {	       V T7b, T75, T6L, T6V;	       T7b = VFNMS(LDK(KP198912367), T73, T74);	       T75 = VFMA(LDK(KP198912367), T74, T73);	       T6L = VFMA(LDK(KP668178637), T6K, T6H);	       T6V = VFNMS(LDK(KP668178637), T6H, T6K);	       {		    V T79, T9A, T9y, T7d;		    T79 = VSUB(T75, T78);		    T9A = VADD(T75, T78);		    T9y = VSUB(T7c, T7b);		    T7d = VADD(T7b, T7c);		    {			 V T9s, T6X, T6T, T9u;			 T9s = VADD(T6V, T6W);			 T6X = VSUB(T6V, T6W);			 T6T = VADD(T6L, T6S);			 T9u = VSUB(T6S, T6L);			 ST(&(ii[WS(rs, 31)]), VFMA(LDK(KP980785280), T9A, T9z), ms, &(ii[WS(rs, 1)]));			 ST(&(ii[WS(rs, 15)]), VFNMS(LDK(KP980785280), T9A, T9z), ms, &(ii[WS(rs, 1)]));			 ST(&(ri[WS(rs, 7)]), VFMA(LDK(KP980785280), T79, T72), ms, &(ri[WS(rs, 1)]));			 ST(&(ri[WS(rs, 23)]), VFNMS(LDK(KP980785280), T79, T72), ms, &(ri[WS(rs, 1)]));			 ST(&(ri[WS(rs, 31)]), VFMA(LDK(KP980785280), T7d, T7a), ms, &(ri[WS(rs, 1)]));			 ST(&(ri[WS(rs, 15)]), VFNMS(LDK(KP980785280), T7d, T7a), ms, &(ri[WS(rs, 1)]));			 ST(&(ii[WS(rs, 23)]), VFNMS(LDK(KP980785280), T9y, T9x), ms, &(ii[WS(rs, 1)]));			 ST(&(ii[WS(rs, 7)]), VFMA(LDK(KP980785280), T9y, T9x), ms, &(ii[WS(rs, 1)]));			 ST(&(ri[WS(rs, 11)]), VFMA(LDK(KP831469612), T6X, T6U), ms, &(ri[WS(rs, 1)]));			 ST(&(ri[WS(rs, 27)]), VFNMS(LDK(KP831469612), T6X, T6U), ms, &(ri[WS(rs, 1)]));			 ST(&(ii[WS(rs, 19)]), VFNMS(LDK(KP831469612), T9s, T9r), ms, &(ii[WS(rs, 1)]));			 ST(&(ii[WS(rs, 3)]), VFMA(LDK(KP831469612), T9s, T9r), ms, &(ii[WS(rs, 1)]));			 ST(&(ii[WS(rs, 27)]), VFNMS(LDK(KP831469612), T9u, T9t), ms, &(ii[WS(rs, 1)]));			 ST(&(ii[WS(rs, 11)]), VFMA(LDK(KP831469612), T9u, T9t), ms, &(ii[WS(rs, 1)]));			 ST(&(ri[WS(rs, 3)]), VFMA(LDK(KP831469612), T6T, T6E), ms, &(ri[WS(rs, 1)]));			 ST(&(ri[WS(rs, 19)]), VFNMS(LDK(KP831469612), T6T, T6E), ms, &(ri[WS(rs, 1)]));		    }	       }	  }     }}static const tw_instr twinstr[] = {     VTW(0, 1),     VTW(0, 3),     VTW(0, 9),     VTW(0, 27),     {TW_NEXT, (2 * VL), 0}};static const ct_desc desc = { 32, "t2sv_32", twinstr, &GENUS, {236, 98, 252, 0}, 0, 0, 0 };void X(codelet_t2sv_32) (planner *p) {     X(kdft_dit_register) (p, t2sv_32, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_twiddle -simd -compact -variables 4 -pipeline-latency 8 -twiddle-log3 -precompute-twiddles -n 32 -name t2sv_32 -include ts.h *//* * This function contains 488 FP additions, 280 FP multiplications, * (or, 376 additions, 168 multiplications, 112 fused multiply/add), * 158 stack variables, 7 constants, and 128 memory accesses */#include "ts.h"static void t2sv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){     DVK(KP195090322, +0.195090322016128267848284868477022240927691618);     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);     DVK(KP555570233, +0.555570233019602224742830813948532874374937191);     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + (mb * 8); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 8), MAKE_VOLATILE_STRIDE(rs)) {	  V T2, T5, T3, T6, T8, TM, TO, Td, T9, Te, Th, Tl, TD, TH, T1y;	  V T1H, T15, T1A, T11, T1F, T1n, T1p, T2q, T2I, T2u, T2K, T2V, T3b, T2Z, T3d;	  V Tu, Ty, T3l, T3n, T1t, T1v, T2f, T2h, T1a, T1e, T32, T34, T1W, T1Y, T2C;	  V T2E, Tg, TR, Tk, TS, Tm, TV, To, TT, T1M, T21, T1P, T22, T1Q, T25;	  V T1S, T23;	  {	       V Ts, T1d, Tx, T18, Tt, T1c, Tw, T19, TB, T14, TG, TZ, TC, T13, TF;	       V T10;	       {		    V T4, Tc, T7, Tb;		    T2 = LDW(&(W[0]));		    T5 = LDW(&(W[TWVL * 1]));		    T3 = LDW(&(W[TWVL * 2]));		    T6 = LDW(&(W[TWVL * 3]));		    T4 = VMUL(T2, T3);		    Tc = VMUL(T5, T3);		    T7 = VMUL(T5, T6);		    Tb = VMUL(T2, T6);		    T8 = VADD(T4, T7);		    TM = VSUB(T4, T7);		    TO = VADD(Tb, Tc);		    Td = VSUB(Tb, Tc);		    T9 = LDW(&(W[TWVL * 4]));		    Ts = VMUL(T2, T9);		    T1d = VMUL(T6, T9);		    Tx = VMUL(T5, T9);		    T18 = VMUL(T3, T9);		    Te = LDW(&(W[TWVL * 5]));		    Tt = VMUL(T5, Te);		    T1c = VMUL(T3, Te);		    Tw = VMUL(T2, Te);		    T19 = VMUL(T6, Te);		    Th = LDW(&(W[TWVL * 6]));		    TB = VMUL(T3, Th);		    T14 = VMUL(T5, Th);		    TG = VMUL(T6, Th);		    TZ = VMUL(T2, Th);		    Tl = LDW(&(W[TWVL * 7]));		    TC = VMUL(T6, Tl);		    T13 = VMUL(T2, Tl);		    TF = VMUL(T3, Tl);		    T10 = VMUL(T5, Tl);	       }	       TD = VADD(TB, TC);	       TH = VSUB(TF, TG);	       T1y = VADD(TZ, T10);	       T1H = VADD(TF, TG);	       T15 = VADD(T13, T14);	       T1A = VSUB(T13, T14);	       T11 = VSUB(TZ, T10);	       T1F = VSUB(TB, TC);	       T1n = VFMA(T9, Th, VMUL(Te, Tl));	       T1p = VFNMS(Te, Th, VMUL(T9, Tl));	       {		    V T2o, T2p, T2s, T2t;		    T2o = VMUL(T8, Th);		    T2p = VMUL(Td, Tl);		    T2q = VADD(T2o, T2p);		    T2I = VSUB(T2o, T2p);		    T2s = VMUL(T8, Tl);		    T2t = VMUL(Td, Th);		    T2u = VSUB(T2s, T2t);		    T2K = VADD(T2s, T2t);	       }	       {		    V T2T, T2U, T2X, T2Y;		    T2T = VMUL(TM, Th);		    T2U = VMUL(TO, Tl);		    T2V = VSUB(T2T, T2U);		    T3b = VADD(T2T, T2U);		    T2X = VMUL(TM, Tl);		    T2Y = VMUL(TO, Th);		    T2Z = VADD(T2X, T2Y);		    T3d = VSUB(T2X, T2Y);		    Tu = VADD(Ts, Tt);		    Ty = VSUB(Tw, Tx);		    T3l = VFMA(Tu, Th, VMUL(Ty, Tl));		    T3n = VFNMS(Ty, Th, VMUL(Tu, Tl));	       }	       T1t = VSUB(Ts, Tt);	       T1v = VADD(Tw, Tx);	       T2f = VFMA(T1t, Th, VMUL(T1v, Tl));	       T2h = VFNMS(T1v, Th, VMUL(T1t, Tl));	       T1a = VSUB(T18, T19);	       T1e = VADD(T1c, T1d);	       T32 = VFMA(T1a, Th, VMUL(T1e, Tl));	       T34 = VFNMS(T1e, Th, VMUL(T1a, Tl));	       T1W = VADD(T18, T19);	       T1Y = VSUB(T1c, T1d);	       T2C = VFMA(T1W, Th, VMUL(T1Y, Tl));	       T2E = VFNMS(T1Y, Th, VMUL(T1W, Tl));	       {		    V Ta, Tf, Ti, Tj;		    Ta = VMUL(T8, T9);		    Tf = VMUL(Td, Te);		    Tg = VSUB(Ta, Tf);		    TR = VADD(Ta, Tf);		    Ti = VMUL(T8, Te);		    Tj = VMUL(Td, T9);		    Tk = VADD(Ti, Tj);		    TS = VSUB(Ti, Tj);	       }	       Tm = VFMA(Tg, Th, VMUL(Tk, Tl));	       TV = VFNMS(TS, Th, VMUL(TR, Tl));	       To = VFNMS(Tk, Th, VMUL(Tg, Tl));	       TT = VFMA(TR, Th, VMUL(TS, Tl));	       {		    V T1K, T1L, T1N, T1O;		    T1K = VMUL(TM, T9);		    T1L = VMUL(TO, Te);		    T1M = VSUB(T1K, T1L);		    T21 = VADD(T1K, T1L);		    T1N = VMUL(TM, Te);		    T1O = VMUL(TO, T9);		    T1P = VADD(T1N, T1O);		    T22 = VSUB(T1N, T1O);	       }	       T1Q = VFMA(T1M, Th, VMUL(T1P, Tl));	       T25 = VFNMS(T22, Th, VMUL(T21, Tl));	       T1S = VFNMS(T1P, Th, VMUL(T1M, Tl));	       T23 = VFMA(T21, Th, VMUL(T22, Tl));	  }	  {	       V TL, T6f, T8c, T8q, T3F, T5t, T7I, T7W, T2y, T6B, T6y, T7j, T4k, T5J, T4B;	       V T5G, T3h, T6H, T6O, T7o, T4L, T5N, T52, T5Q, T1i, T7V, T6i, T7D, T3K, T5u;	       V T3P, T5v, T1E, T6n, T6m, T7e, T3W, T5y, T41, T5z, T29, T6p, T6s, T7f, T47;	       V T5B, T4c, T5C, T2R, T6z, T6E, T7k, T4v, T5H, T4E, T5K, T3y, T6P, T6K, T7p;	       V T4W, T5R, T55, T5O;	       {		    V T1, T7G, Tq, T7F, TA, T3C, TJ, T3D, Tn, Tp;		    T1 = LD(&(ri[0]), ms, &(ri[0]));		    T7G = LD(&(ii[0]), ms, &(ii[0]));		    Tn = LD(&(ri[WS(rs, 16)]), ms, &(ri[0]));		    Tp = LD(&(ii[WS(rs, 16)]), ms, &(ii[0]));		    Tq = VFMA(Tm, Tn, VMUL(To, Tp));		    T7F = VFNMS(To, Tn, VMUL(Tm, Tp));		    {			 V Tv, Tz, TE, TI;			 Tv = LD(&(ri[WS(rs, 8)]), ms, &(ri[0]));			 Tz = LD(&(ii[WS(rs, 8)]), ms, &(ii[0]));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -