⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t1sv_32.c

📁 用于DFT计算的c语言的库的最新版本,包含丰富的函数库.
💻 C
📖 第 1 页 / 共 5 页
字号:
				   T4V = VADD(T4T, T4U);				   T69 = VSUB(T4T, T4U);			      }			      T5y = VFNMS(LDK(KP923879532), T4n, T3S);			      T4o = VFMA(LDK(KP923879532), T4n, T3S);			      T8J = VFMA(LDK(KP923879532), T8I, T8H);			      T8L = VFNMS(LDK(KP923879532), T8I, T8H);			      T5M = VFNMS(LDK(KP198912367), T5L, T5K);			      T5Q = VFMA(LDK(KP198912367), T5K, T5L);			      T5A = VFMA(LDK(KP668178637), T5k, T5v);			      T5w = VFNMS(LDK(KP668178637), T5v, T5k);			      T5H = VFMA(LDK(KP707106781), T4V, T4S);			      T4W = VFNMS(LDK(KP707106781), T4V, T4S);			      T5O = VFNMS(LDK(KP923879532), T5F, T5C);			      T5G = VFMA(LDK(KP923879532), T5F, T5C);			      T8D = VFMA(LDK(KP923879532), T8C, T8B);			      T8F = VFNMS(LDK(KP923879532), T8C, T8B);			 }			 {			      V T6p, T6q, T6o, T5W, T8W, T63;			      {				   V T5J, T5P, T5z, T4X, T5Z, T62;				   T5J = VFMA(LDK(KP198912367), T5I, T5H);				   T5P = VFNMS(LDK(KP198912367), T5H, T5I);				   T5z = VFNMS(LDK(KP668178637), T4L, T4W);				   T4X = VFMA(LDK(KP668178637), T4W, T4L);				   T6p = VFNMS(LDK(KP414213562), T5X, T5Y);				   T5Z = VFMA(LDK(KP414213562), T5Y, T5X);				   T62 = VFNMS(LDK(KP414213562), T61, T60);				   T6q = VFMA(LDK(KP414213562), T60, T61);				   {					V T8G, T5N, T5R, T8E;					T8G = VSUB(T5M, T5J);					T5N = VADD(T5J, T5M);					T5R = VSUB(T5P, T5Q);					T8E = VADD(T5P, T5Q);					{					     V T5B, T8K, T8M, T5x;					     T5B = VADD(T5z, T5A);					     T8K = VSUB(T5A, T5z);					     T8M = VADD(T4X, T5w);					     T5x = VSUB(T4X, T5w);					     T6o = VFNMS(LDK(KP707106781), T5V, T5S);					     T5W = VFMA(LDK(KP707106781), T5V, T5S);					     T8W = VADD(T5Z, T62);					     T63 = VSUB(T5Z, T62);					     ST(&(ii[WS(rs, 25)]), VFNMS(LDK(KP980785280), T8G, T8F), ms, &(ii[WS(rs, 1)]));					     ST(&(ii[WS(rs, 9)]), VFMA(LDK(KP980785280), T8G, T8F), ms, &(ii[WS(rs, 1)]));					     ST(&(ri[WS(rs, 1)]), VFMA(LDK(KP980785280), T5N, T5G), ms, &(ri[WS(rs, 1)]));					     ST(&(ri[WS(rs, 17)]), VFNMS(LDK(KP980785280), T5N, T5G), ms, &(ri[WS(rs, 1)]));					     ST(&(ri[WS(rs, 9)]), VFMA(LDK(KP980785280), T5R, T5O), ms, &(ri[WS(rs, 1)]));					     ST(&(ri[WS(rs, 25)]), VFNMS(LDK(KP980785280), T5R, T5O), ms, &(ri[WS(rs, 1)]));					     ST(&(ii[WS(rs, 17)]), VFNMS(LDK(KP980785280), T8E, T8D), ms, &(ii[WS(rs, 1)]));					     ST(&(ii[WS(rs, 1)]), VFMA(LDK(KP980785280), T8E, T8D), ms, &(ii[WS(rs, 1)]));					     ST(&(ri[WS(rs, 29)]), VFMA(LDK(KP831469612), T5B, T5y), ms, &(ri[WS(rs, 1)]));					     ST(&(ri[WS(rs, 13)]), VFNMS(LDK(KP831469612), T5B, T5y), ms, &(ri[WS(rs, 1)]));					     ST(&(ii[WS(rs, 21)]), VFNMS(LDK(KP831469612), T8K, T8J), ms, &(ii[WS(rs, 1)]));					     ST(&(ii[WS(rs, 5)]), VFMA(LDK(KP831469612), T8K, T8J), ms, &(ii[WS(rs, 1)]));					     ST(&(ii[WS(rs, 29)]), VFMA(LDK(KP831469612), T8M, T8L), ms, &(ii[WS(rs, 1)]));					     ST(&(ii[WS(rs, 13)]), VFNMS(LDK(KP831469612), T8M, T8L), ms, &(ii[WS(rs, 1)]));					     ST(&(ri[WS(rs, 5)]), VFMA(LDK(KP831469612), T5x, T4o), ms, &(ri[WS(rs, 1)]));					     ST(&(ri[WS(rs, 21)]), VFNMS(LDK(KP831469612), T5x, T4o), ms, &(ri[WS(rs, 1)]));					}				   }			      }			      {				   V T6k, T64, T8V, T6r, T8R, T8T, T6y, T6C, T6m, T6i, T6v, T6B, T6l, T6b, T6A;				   V T6s, T8X;				   {					V T6x, T6e, T6w, T6h, T6u, T67, T6t, T6a, T8P, T8Q;					T6k = VFNMS(LDK(KP923879532), T63, T5W);					T64 = VFMA(LDK(KP923879532), T63, T5W);					T8V = VFNMS(LDK(KP707106781), T8O, T8N);					T8P = VFMA(LDK(KP707106781), T8O, T8N);					T8Q = VSUB(T6q, T6p);					T6r = VADD(T6p, T6q);					T6x = VFMA(LDK(KP707106781), T6d, T6c);					T6e = VFNMS(LDK(KP707106781), T6d, T6c);					T6w = VFMA(LDK(KP707106781), T6g, T6f);					T6h = VFNMS(LDK(KP707106781), T6g, T6f);					T6u = VFMA(LDK(KP707106781), T66, T65);					T67 = VFNMS(LDK(KP707106781), T66, T65);					T6t = VFMA(LDK(KP707106781), T69, T68);					T6a = VFNMS(LDK(KP707106781), T69, T68);					T8R = VFMA(LDK(KP923879532), T8Q, T8P);					T8T = VFNMS(LDK(KP923879532), T8Q, T8P);					T6y = VFNMS(LDK(KP198912367), T6x, T6w);					T6C = VFMA(LDK(KP198912367), T6w, T6x);					T6m = VFMA(LDK(KP668178637), T6e, T6h);					T6i = VFNMS(LDK(KP668178637), T6h, T6e);					T6v = VFMA(LDK(KP198912367), T6u, T6t);					T6B = VFNMS(LDK(KP198912367), T6t, T6u);					T6l = VFNMS(LDK(KP668178637), T67, T6a);					T6b = VFMA(LDK(KP668178637), T6a, T67);				   }				   T6A = VFMA(LDK(KP923879532), T6r, T6o);				   T6s = VFNMS(LDK(KP923879532), T6r, T6o);				   T8X = VFNMS(LDK(KP923879532), T8W, T8V);				   T8Z = VFMA(LDK(KP923879532), T8W, T8V);				   {					V T6z, T6D, T8Y, T6n, T8S, T8U, T6j;					T6z = VSUB(T6v, T6y);					T90 = VADD(T6v, T6y);					T6D = VADD(T6B, T6C);					T8Y = VSUB(T6C, T6B);					T6n = VSUB(T6l, T6m);					T8S = VADD(T6l, T6m);					T8U = VSUB(T6i, T6b);					T6j = VADD(T6b, T6i);					ST(&(ri[WS(rs, 7)]), VFMA(LDK(KP980785280), T6z, T6s), ms, &(ri[WS(rs, 1)]));					ST(&(ri[WS(rs, 23)]), VFNMS(LDK(KP980785280), T6z, T6s), ms, &(ri[WS(rs, 1)]));					ST(&(ii[WS(rs, 23)]), VFNMS(LDK(KP980785280), T8Y, T8X), ms, &(ii[WS(rs, 1)]));					ST(&(ii[WS(rs, 7)]), VFMA(LDK(KP980785280), T8Y, T8X), ms, &(ii[WS(rs, 1)]));					ST(&(ri[WS(rs, 11)]), VFMA(LDK(KP831469612), T6n, T6k), ms, &(ri[WS(rs, 1)]));					ST(&(ri[WS(rs, 27)]), VFNMS(LDK(KP831469612), T6n, T6k), ms, &(ri[WS(rs, 1)]));					ST(&(ii[WS(rs, 19)]), VFNMS(LDK(KP831469612), T8S, T8R), ms, &(ii[WS(rs, 1)]));					ST(&(ii[WS(rs, 3)]), VFMA(LDK(KP831469612), T8S, T8R), ms, &(ii[WS(rs, 1)]));					ST(&(ii[WS(rs, 27)]), VFNMS(LDK(KP831469612), T8U, T8T), ms, &(ii[WS(rs, 1)]));					ST(&(ii[WS(rs, 11)]), VFMA(LDK(KP831469612), T8U, T8T), ms, &(ii[WS(rs, 1)]));					ST(&(ri[WS(rs, 3)]), VFMA(LDK(KP831469612), T6j, T64), ms, &(ri[WS(rs, 1)]));					ST(&(ri[WS(rs, 19)]), VFNMS(LDK(KP831469612), T6j, T64), ms, &(ri[WS(rs, 1)]));					ST(&(ri[WS(rs, 31)]), VFMA(LDK(KP980785280), T6D, T6A), ms, &(ri[WS(rs, 1)]));					ST(&(ri[WS(rs, 15)]), VFNMS(LDK(KP980785280), T6D, T6A), ms, &(ri[WS(rs, 1)]));				   }			      }			 }		    }	       }	  }	  ST(&(ii[WS(rs, 31)]), VFMA(LDK(KP980785280), T90, T8Z), ms, &(ii[WS(rs, 1)]));	  ST(&(ii[WS(rs, 15)]), VFNMS(LDK(KP980785280), T90, T8Z), ms, &(ii[WS(rs, 1)]));     }}static const tw_instr twinstr[] = {     VTW(0, 1),     VTW(0, 2),     VTW(0, 3),     VTW(0, 4),     VTW(0, 5),     VTW(0, 6),     VTW(0, 7),     VTW(0, 8),     VTW(0, 9),     VTW(0, 10),     VTW(0, 11),     VTW(0, 12),     VTW(0, 13),     VTW(0, 14),     VTW(0, 15),     VTW(0, 16),     VTW(0, 17),     VTW(0, 18),     VTW(0, 19),     VTW(0, 20),     VTW(0, 21),     VTW(0, 22),     VTW(0, 23),     VTW(0, 24),     VTW(0, 25),     VTW(0, 26),     VTW(0, 27),     VTW(0, 28),     VTW(0, 29),     VTW(0, 30),     VTW(0, 31),     {TW_NEXT, (2 * VL), 0}};static const ct_desc desc = { 32, "t1sv_32", twinstr, &GENUS, {236, 62, 198, 0}, 0, 0, 0 };void X(codelet_t1sv_32) (planner *p) {     X(kdft_dit_register) (p, t1sv_32, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_twiddle -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t1sv_32 -include ts.h *//* * This function contains 434 FP additions, 208 FP multiplications, * (or, 340 additions, 114 multiplications, 94 fused multiply/add), * 96 stack variables, 7 constants, and 128 memory accesses */#include "ts.h"static void t1sv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){     DVK(KP195090322, +0.195090322016128267848284868477022240927691618);     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);     DVK(KP555570233, +0.555570233019602224742830813948532874374937191);     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + (mb * 62); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 62), MAKE_VOLATILE_STRIDE(rs)) {	  V Tj, T5F, T7C, T7Q, T35, T4T, T78, T7m, T1Q, T61, T5Y, T6J, T3K, T59, T41;	  V T56, T2B, T67, T6e, T6O, T4b, T5d, T4s, T5g, TG, T7l, T5I, T73, T3a, T4U;	  V T3f, T4V, T14, T5N, T5M, T6E, T3m, T4Y, T3r, T4Z, T1r, T5P, T5S, T6F, T3x;	  V T51, T3C, T52, T2d, T5Z, T64, T6K, T3V, T57, T44, T5a, T2Y, T6f, T6a, T6P;	  V T4m, T5h, T4v, T5e;	  {	       V T1, T76, T6, T75, Tc, T32, Th, T33;	       T1 = LD(&(ri[0]), ms, &(ri[0]));	       T76 = LD(&(ii[0]), ms, &(ii[0]));	       {		    V T3, T5, T2, T4;		    T3 = LD(&(ri[WS(rs, 16)]), ms, &(ri[0]));		    T5 = LD(&(ii[WS(rs, 16)]), ms, &(ii[0]));		    T2 = LDW(&(W[TWVL * 30]));		    T4 = LDW(&(W[TWVL * 31]));		    T6 = VFMA(T2, T3, VMUL(T4, T5));		    T75 = VFNMS(T4, T3, VMUL(T2, T5));	       }	       {		    V T9, Tb, T8, Ta;		    T9 = LD(&(ri[WS(rs, 8)]), ms, &(ri[0]));		    Tb = LD(&(ii[WS(rs, 8)]), ms, &(ii[0]));		    T8 = LDW(&(W[TWVL * 14]));		    Ta = LDW(&(W[TWVL * 15]));		    Tc = VFMA(T8, T9, VMUL(Ta, Tb));		    T32 = VFNMS(Ta, T9, VMUL(T8, Tb));	       }	       {		    V Te, Tg, Td, Tf;		    Te = LD(&(ri[WS(rs, 24)]), ms, &(ri[0]));		    Tg = LD(&(ii[WS(rs, 24)]), ms, &(ii[0]));		    Td = LDW(&(W[TWVL * 46]));		    Tf = LDW(&(W[TWVL * 47]));		    Th = VFMA(Td, Te, VMUL(Tf, Tg));		    T33 = VFNMS(Tf, Te, VMUL(Td, Tg));	       }	       {		    V T7, Ti, T7A, T7B;		    T7 = VADD(T1, T6);		    Ti = VADD(Tc, Th);		    Tj = VADD(T7, Ti);		    T5F = VSUB(T7, Ti);		    T7A = VSUB(T76, T75);		    T7B = VSUB(Tc, Th);		    T7C = VSUB(T7A, T7B);		    T7Q = VADD(T7B, T7A);	       }	       {		    V T31, T34, T74, T77;		    T31 = VSUB(T1, T6);		    T34 = VSUB(T32, T33);		    T35 = VSUB(T31, T34);		    T4T = VADD(T31, T34);		    T74 = VADD(T32, T33);		    T77 = VADD(T75, T76);		    T78 = VADD(T74, T77);		    T7m = VSUB(T77, T74);	       }	  }	  {	       V T1y, T3G, T1O, T3Z, T1D, T3H, T1J, T3Y;	       {		    V T1v, T1x, T1u, T1w;		    T1v = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));		    T1x = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));		    T1u = LDW(&(W[0]));		    T1w = LDW(&(W[TWVL * 1]));		    T1y = VFMA(T1u, T1v, VMUL(T1w, T1x));		    T3G = VFNMS(T1w, T1v, VMUL(T1u, T1x));	       }	       {		    V T1L, T1N, T1K, T1M;		    T1L = LD(&(ri[WS(rs, 25)]), ms, &(ri[WS(rs, 1)]));		    T1N = LD(&(ii[WS(rs, 25)]), ms, &(ii[WS(rs, 1)]));		    T1K = LDW(&(W[TWVL * 48]));		    T1M = LDW(&(W[TWVL * 49]));		    T1O = VFMA(T1K, T1L, VMUL(T1M, T1N));		    T3Z = VFNMS(T1M, T1L, VMUL(T1K, T1N));	       }	       {		    V T1A, T1C, T1z, T1B;		    T1A = LD(&(ri[WS(rs, 17)]), ms, &(ri[WS(rs, 1)]));		    T1C = LD(&(ii[WS(rs, 17)]), ms, &(ii[WS(rs, 1)]));		    T1z = LDW(&(W[TWVL * 32]));		    T1B = LDW(&(W[TWVL * 33]));		    T1D = VFMA(T1z, T1A, VMUL(T1B, T1C));		    T3H = VFNMS(T1B, T1A, VMUL(T1z, T1C));	       }	       {		    V T1G, T1I, T1F, T1H;		    T1G = LD(&(ri[WS(rs, 9)]), ms, &(ri[WS(rs, 1)]));		    T1I = LD(&(ii[WS(rs, 9)]), ms, &(ii[WS(rs, 1)]));		    T1F = LDW(&(W[TWVL * 16]));		    T1H = LDW(&(W[TWVL * 17]));		    T1J = VFMA(T1F, T1G, VMUL(T1H, T1I));		    T3Y = VFNMS(T1H, T1G, VMUL(T1F, T1I));	       }	       {		    V T1E, T1P, T5W, T5X;		    T1E = VADD(T1y, T1D);		    T1P = VADD(T1J, T1O);		    T1Q = VADD(T1E, T1P);		    T61 = VSUB(T1E, T1P);		    T5W = VADD(T3G, T3H);		    T5X = VADD(T3Y, T3Z);		    T5Y = VSUB(T5W, T5X);		    T6J = VADD(T5W, T5X);	       }	       {		    V T3I, T3J, T3X, T40;		    T3I = VSUB(T3G, T3H);		    T3J = VSUB(T1J, T1O);		    T3K = VADD(T3I, T3J);		    T59 = VSUB(T3I, T3J);		    T3X = VSUB(T1y, T1D);		    T40 = VSUB(T3Y, T3Z);		    T41 = VSUB(T3X, T40);		    T56 = VADD(T3X, T40);	       }	  }	  {	       V T2j, T4o, T2z, T49, T2o, T4p, T2u, T48;	       {		    V T2g, T2i, T2f, T2h;		    T2g = LD(&(ri[WS(rs, 31)]), ms, &(ri[WS(rs, 1)]));		    T2i = LD(&(ii[WS(rs, 31)]), ms, &(ii[WS(rs, 1)]));		    T2f = LDW(&(W[TWVL * 60]));		    T2h = LDW(&(W[TWVL * 61]));		    T2j = VFMA(T2f, T2g, VMUL(T2h, T2i));		    T4o = VFNMS(T2h, T2g, VMUL(T2f, T2i));	       }	       {		    V T2w, T2y, T2v, T2x;		    T2w = LD(&(ri[WS(rs, 23)]), ms, &(ri[WS(rs, 1)]));		    T2y = LD(&(ii[WS(rs, 23)]), ms, &(ii[WS(rs, 1)]));		    T2v = LDW(&(W[TWVL * 44]));		    T2x = LDW(&(W[TWVL * 45]));		    T2z = VFMA(T2v, T2w, VMUL(T2x, T2y));		    T49 = VFNMS(T2x, T2w, VMUL(T2v, T2y));	       }	       {		    V T2l, T2n, T2k, T2m;		    T2l = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)]));		    T2n = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)]));		    T2k = LDW(&(W[TWVL * 28]));		    T2m = LDW(&(W[TWVL * 29]));		    T2o = VFMA(T2k, T2l, VMUL(T2m, T2n));		    T4p = VFNMS(T2m, T2l, VMUL(T2k, T2n));	       }	       {		    V T2r, T2t, T2q, T2s;		    T2r = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)]));		    T2t = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)]));		    T2q = LDW(&(W[TWVL * 12]));		    T2s = LDW(&(W[TWVL * 13]));		    T2u = VFMA(T2q, T2r, VMUL(T2s, T2t));		    T48 = VFNMS(T2s, T2r, VMUL(T2q, T2t));	       }	       {		    V T2p, T2A, T6c, T6d;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -