⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 n2sv_64.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 5 页
字号:
					     STM4(&(io[3]), Tgh, ovs, &(io[1]));					     Tgi = VFNMS(LDK(KP956940335), T6C, T6B);					     STM4(&(io[35]), Tgi, ovs, &(io[1]));					     {						  V T6l, T6x, T6d, T6b;						  T6l = VFMA(LDK(KP831469612), T6k, T6j);						  T6x = VFNMS(LDK(KP831469612), T6k, T6j);						  T6d = VFMA(LDK(KP831469612), T6a, T67);						  T6b = VFNMS(LDK(KP831469612), T6a, T67);						  {						       V T6g, T6i, T5W, T6c;						       T6g = VSUB(T6e, T6f);						       T6i = VADD(T6f, T6e);						       T5W = VSUB(T52, T5V);						       T6c = VADD(T52, T5V);						       Tgj = VFMA(LDK(KP956940335), T6w, T6v);						       STM4(&(io[19]), Tgj, ovs, &(io[1]));						       Tgk = VFNMS(LDK(KP956940335), T6w, T6v);						       STM4(&(io[51]), Tgk, ovs, &(io[1]));						       Tgl = VFMA(LDK(KP956940335), T6s, T6l);						       STM4(&(ro[3]), Tgl, ovs, &(ro[1]));						       Tgm = VFNMS(LDK(KP956940335), T6s, T6l);						       STM4(&(ro[35]), Tgm, ovs, &(ro[1]));						       Tgn = VFMA(LDK(KP881921264), T6i, T6h);						       STM4(&(ro[59]), Tgn, ovs, &(ro[1]));						       Tgo = VFNMS(LDK(KP881921264), T6i, T6h);						       STM4(&(ro[27]), Tgo, ovs, &(ro[1]));						       Tgp = VFMA(LDK(KP881921264), T6g, T6d);						       STM4(&(io[11]), Tgp, ovs, &(io[1]));						       Tgq = VFNMS(LDK(KP881921264), T6g, T6d);						       STM4(&(io[43]), Tgq, ovs, &(io[1]));						       Tgr = VFMA(LDK(KP881921264), T6c, T6b);						       STM4(&(io[59]), Tgr, ovs, &(io[1]));						       Tgs = VFNMS(LDK(KP881921264), T6c, T6b);						       STM4(&(io[27]), Tgs, ovs, &(io[1]));						       Tgt = VFMA(LDK(KP881921264), T5W, T49);						       STM4(&(ro[11]), Tgt, ovs, &(ro[1]));						       Tgu = VFNMS(LDK(KP881921264), T5W, T49);						       STM4(&(ro[43]), Tgu, ovs, &(ro[1]));						       Tgv = VFNMS(LDK(KP956940335), T6A, T6x);						       STM4(&(ro[51]), Tgv, ovs, &(ro[1]));						       Tgw = VFMA(LDK(KP956940335), T6A, T6x);						       STM4(&(ro[19]), Tgw, ovs, &(ro[1]));						  }					     }					}				   }				   {					V T8j, T8c, T8C, T8v, T8N, T8M, T8X, T7L, T9c, T92, T9d, T95, T98, T80;					{					     V T90, T91, T93, T94, T7S, T7Z;					     T8j = VFNMS(LDK(KP923879532), T8i, T8f);					     T90 = VFMA(LDK(KP923879532), T8i, T8f);					     T91 = VFMA(LDK(KP923879532), T8b, T84);					     T8c = VFNMS(LDK(KP923879532), T8b, T84);					     T8C = VFNMS(LDK(KP923879532), T8B, T8y);					     T93 = VFMA(LDK(KP923879532), T8B, T8y);					     T94 = VFMA(LDK(KP923879532), T8u, T8n);					     T8v = VFNMS(LDK(KP923879532), T8u, T8n);					     T8N = VFMA(LDK(KP198912367), T7O, T7R);					     T7S = VFNMS(LDK(KP198912367), T7R, T7O);					     T7Z = VFMA(LDK(KP198912367), T7Y, T7V);					     T8M = VFNMS(LDK(KP198912367), T7V, T7Y);					     T8X = VFMA(LDK(KP923879532), T7K, T7D);					     T7L = VFNMS(LDK(KP923879532), T7K, T7D);					     T9c = VFNMS(LDK(KP098491403), T90, T91);					     T92 = VFMA(LDK(KP098491403), T91, T90);					     T9d = VFMA(LDK(KP098491403), T93, T94);					     T95 = VFNMS(LDK(KP098491403), T94, T93);					     T98 = VADD(T7S, T7Z);					     T80 = VSUB(T7S, T7Z);					}					{					     V T8V, T81, T8T, T8k, T97, T8L, T8Y, T8O, T8S, T8D;					     T8V = VFNMS(LDK(KP980785280), T80, T7L);					     T81 = VFMA(LDK(KP980785280), T80, T7L);					     T8T = VFNMS(LDK(KP820678790), T8c, T8j);					     T8k = VFMA(LDK(KP820678790), T8j, T8c);					     T97 = VFMA(LDK(KP923879532), T8K, T8H);					     T8L = VFNMS(LDK(KP923879532), T8K, T8H);					     T8Y = VADD(T8N, T8M);					     T8O = VSUB(T8M, T8N);					     T8S = VFMA(LDK(KP820678790), T8v, T8C);					     T8D = VFNMS(LDK(KP820678790), T8C, T8v);					     {						  V T8R, T8P, T8U, T8W, T8E, T8Q;						  {						       V T96, T9f, T9g, T8Z;						       T9a = VSUB(T95, T92);						       T96 = VADD(T92, T95);						       T9f = VFMA(LDK(KP980785280), T98, T97);						       T99 = VFNMS(LDK(KP980785280), T98, T97);						       T9e = VSUB(T9c, T9d);						       T9g = VADD(T9c, T9d);						       T8Z = VFMA(LDK(KP980785280), T8Y, T8X);						       T9b = VFNMS(LDK(KP980785280), T8Y, T8X);						       T8R = VFMA(LDK(KP980785280), T8O, T8L);						       T8P = VFNMS(LDK(KP980785280), T8O, T8L);						       T8U = VSUB(T8S, T8T);						       T8W = VADD(T8T, T8S);						       T8E = VSUB(T8k, T8D);						       T8Q = VADD(T8k, T8D);						       {							    V Tgx, Tgy, Tgz, TgA;							    Tgx = VFNMS(LDK(KP995184726), T9g, T9f);							    STM4(&(io[33]), Tgx, ovs, &(io[1]));							    STN4(&(io[32]), TeO, Tgx, TfG, Tgi, ovs);							    Tgy = VFMA(LDK(KP995184726), T96, T8Z);							    STM4(&(ro[1]), Tgy, ovs, &(ro[1]));							    STN4(&(ro[0]), TeL, Tgy, Tfv, Tgl, ovs);							    Tgz = VFNMS(LDK(KP995184726), T96, T8Z);							    STM4(&(ro[33]), Tgz, ovs, &(ro[1]));							    STN4(&(ro[32]), TeM, Tgz, Tfw, Tgm, ovs);							    TgA = VFMA(LDK(KP995184726), T9g, T9f);							    STM4(&(io[1]), TgA, ovs, &(io[1]));							    STN4(&(io[0]), TeN, TgA, TfF, Tgh, ovs);						       }						  }						  {						       V TgB, TgC, TgD, TgE;						       TgB = VFMA(LDK(KP773010453), T8W, T8V);						       STM4(&(ro[57]), TgB, ovs, &(ro[1]));						       STN4(&(ro[56]), TeS, TgB, Tfx, Tgn, ovs);						       TgC = VFNMS(LDK(KP773010453), T8W, T8V);						       STM4(&(ro[25]), TgC, ovs, &(ro[1]));						       STN4(&(ro[24]), TeR, TgC, Tfy, Tgo, ovs);						       TgD = VFMA(LDK(KP773010453), T8U, T8R);						       STM4(&(io[9]), TgD, ovs, &(io[1]));						       STN4(&(io[8]), TeT, TgD, Tfz, Tgp, ovs);						       TgE = VFNMS(LDK(KP773010453), T8U, T8R);						       STM4(&(io[41]), TgE, ovs, &(io[1]));						       STN4(&(io[40]), TeU, TgE, TfA, Tgq, ovs);						       {							    V TgF, TgG, TgH, TgI;							    TgF = VFMA(LDK(KP773010453), T8Q, T8P);							    STM4(&(io[57]), TgF, ovs, &(io[1]));							    STN4(&(io[56]), TeW, TgF, TfB, Tgr, ovs);							    TgG = VFNMS(LDK(KP773010453), T8Q, T8P);							    STM4(&(io[25]), TgG, ovs, &(io[1]));							    STN4(&(io[24]), TeV, TgG, TfC, Tgs, ovs);							    TgH = VFMA(LDK(KP773010453), T8E, T81);							    STM4(&(ro[9]), TgH, ovs, &(ro[1]));							    STN4(&(ro[8]), TeX, TgH, TfD, Tgt, ovs);							    TgI = VFNMS(LDK(KP773010453), T8E, T81);							    STM4(&(ro[41]), TgI, ovs, &(ro[1]));							    STN4(&(ro[40]), TeY, TgI, TfE, Tgu, ovs);						       }						  }					     }					}				   }			      }			 }		    }	       }	  }	  {	       V TgJ, TgK, TgL, TgM;	       TgJ = VFMA(LDK(KP995184726), T9a, T99);	       STM4(&(io[17]), TgJ, ovs, &(io[1]));	       STN4(&(io[16]), TeQ, TgJ, TfI, Tgj, ovs);	       TgK = VFNMS(LDK(KP995184726), T9a, T99);	       STM4(&(io[49]), TgK, ovs, &(io[1]));	       STN4(&(io[48]), TeP, TgK, TfH, Tgk, ovs);	       TgL = VFMA(LDK(KP995184726), T9e, T9b);	       STM4(&(ro[17]), TgL, ovs, &(ro[1]));	       STN4(&(ro[16]), TeK, TgL, TfJ, Tgw, ovs);	       TgM = VFNMS(LDK(KP995184726), T9e, T9b);	       STM4(&(ro[49]), TgM, ovs, &(ro[1]));	       STN4(&(ro[48]), TeJ, TgM, TfK, Tgv, ovs);	  }     }}static const kdft_desc desc = { 64, "n2sv_64", {520, 0, 392, 0}, &GENUS, 0, 1, 0, 0 };void X(codelet_n2sv_64) (planner *p) {     X(kdft_register) (p, n2sv_64, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_notw -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name n2sv_64 -with-ostride 1 -include n2s.h -store-multiple 4 *//* * This function contains 912 FP additions, 248 FP multiplications, * (or, 808 additions, 144 multiplications, 104 fused multiply/add), * 260 stack variables, 15 constants, and 288 memory accesses */#include "n2s.h"static void n2sv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs){     DVK(KP773010453, +0.773010453362736960810906609758469800971041293);     DVK(KP634393284, +0.634393284163645498215171613225493370675687095);     DVK(KP098017140, +0.098017140329560601994195563888641845861136673);     DVK(KP995184726, +0.995184726672196886244836953109479921575474869);     DVK(KP881921264, +0.881921264348355029712756863660388349508442621);     DVK(KP471396736, +0.471396736825997648556387625905254377657460319);     DVK(KP290284677, +0.290284677254462367636192375817395274691476278);     DVK(KP956940335, +0.956940335732208864935797886980269969482849206);     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);     DVK(KP555570233, +0.555570233019602224742830813948532874374937191);     DVK(KP195090322, +0.195090322016128267848284868477022240927691618);     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT i;     for (i = v; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * ivs), ii = ii + ((2 * VL) * ivs), ro = ro + ((2 * VL) * ovs), io = io + ((2 * VL) * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {	  V T37, T7B, T8F, T5Z, Tf, Td9, TbB, TcB, T62, T7C, T2i, TdH, Tah, Tcb, T3e;	  V T8G, Tu, TdI, Tak, TbD, Tan, TbC, T2x, Tda, T3m, T65, T7G, T8J, T7J, T8I;	  V T3t, T64, TK, Tdd, Tas, Tce, Tav, Tcf, T2N, Tdc, T3G, T6G, T7O, T9k, T7R;	  V T9l, T3N, T6H, T1L, Tdv, Tbs, Tcw, TdC, Teo, T5j, T6V, T5Q, T6Y, T8y, T9C;	  V Tbb, Tct, T8n, T9z, TZ, Tdf, Taz, Tch, TaC, Tci, T32, Tdg, T3Z, T6J, T7V;	  V T9n, T7Y, T9o, T46, T6K, T1g, Tdp, Tb1, Tcm, Tdm, Tej, T4q, T6R, T4X, T6O;	  V T8f, T9s, TaK, Tcp, T84, T9v, T1v, Tdn, Tb4, Tcq, Tds, Tek, T4N, T6P, T50;	  V T6S, T8i, T9w, TaV, Tcn, T8b, T9t, T20, TdD, Tbv, Tcu, Tdy, Tep, T5G, T6Z;	  V T5T, T6W, T8B, T9A, Tbm, Tcx, T8u, T9D;	  {	       V T3, T35, T26, T5Y, T6, T5X, T29, T36, Ta, T39, T2d, T38, Td, T3b, T2g;	       V T3c;	       {		    V T1, T2, T24, T25;		    T1 = LD(&(ri[0]), ivs, &(ri[0]));		    T2 = LD(&(ri[WS(is, 32)]), ivs, &(ri[0]));		    T3 = VADD(T1, T2);		    T35 = VSUB(T1, T2);		    T24 = LD(&(ii[0]), ivs, &(ii[0]));		    T25 = LD(&(ii[WS(is, 32)]), ivs, &(ii[0]));		    T26 = VADD(T24, T25);		    T5Y = VSUB(T24, T25);	       }	       {		    V T4, T5, T27, T28;		    T4 = LD(&(ri[WS(is, 16)]), ivs, &(ri[0]));		    T5 = LD(&(ri[WS(is, 48)]), ivs, &(ri[0]));		    T6 = VADD(T4, T5);		    T5X = VSUB(T4, T5);		    T27 = LD(&(ii[WS(is, 16)]), ivs, &(ii[0]));		    T28 = LD(&(ii[WS(is, 48)]), ivs, &(ii[0]));		    T29 = VADD(T27, T28);		    T36 = VSUB(T27, T28);	       }	       {		    V T8, T9, T2b, T2c;		    T8 = LD(&(ri[WS(is, 8)]), ivs, &(ri[0]));		    T9 = LD(&(ri[WS(is, 40)]), ivs, &(ri[0]));		    Ta = VADD(T8, T9);		    T39 = VSUB(T8, T9);		    T2b = LD(&(ii[WS(is, 8)]), ivs, &(ii[0]));		    T2c = LD(&(ii[WS(is, 40)]), ivs, &(ii[0]));		    T2d = VADD(T2b, T2c);		    T38 = VSUB(T2b, T2c);	       }	       {		    V Tb, Tc, T2e, T2f;		    Tb = LD(&(ri[WS(is, 56)]), ivs, &(ri[0]));		    Tc = LD(&(ri[WS(is, 24)]), ivs, &(ri[0]));		    Td = VADD(Tb, Tc);		    T3b = VSUB(Tb, Tc);		    T2e = LD(&(ii[WS(is, 56)]), ivs, &(ii[0]));		    T2f = LD(&(ii[WS(is, 24)]), ivs, &(ii[0]));		    T2g = VADD(T2e, T2f);		    T3c = VSUB(T2e, T2f);	       }	       {		    V T7, Te, T2a, T2h;		    T37 = VSUB(T35, T36);		    T7B = VADD(T35, T36);		    T8F = VSUB(T5Y, T5X);		    T5Z = VADD(T5X, T5Y);		    T7 = VADD(T3, T6);		    Te = VADD(Ta, Td);		    Tf = VADD(T7, Te);		    Td9 = VSUB(T7, Te);		    {			 V Tbz, TbA, T60, T61;			 Tbz = VSUB(T26, T29);			 TbA = VSUB(Td, Ta);			 TbB = VSUB(Tbz, TbA);			 TcB = VADD(TbA, Tbz);			 T60 = VSUB(T3b, T3c);			 T61 = VADD(T39, T38);			 T62 = VMUL(LDK(KP707106781), VSUB(T60, T61));			 T7C = VMUL(LDK(KP707106781), VADD(T61, T60));		    }		    T2a = VADD(T26, T29);		    T2h = VADD(T2d, T2g);		    T2i = VADD(T2a, T2h);		    TdH = VSUB(T2a, T2h);		    {			 V Taf, Tag, T3a, T3d;			 Taf = VSUB(T3, T6);			 Tag = VSUB(T2d, T2g);			 Tah = VSUB(Taf, Tag);			 Tcb = VADD(Taf, Tag);			 T3a = VSUB(T38, T39);			 T3d = VADD(T3b, T3c);			 T3e = VMUL(LDK(KP707106781), VSUB(T3a, T3d));			 T8G = VMUL(LDK(KP707106781), VADD(T3a, T3d));		    }	       }	  }	  {	       V Ti, T3j, T2l, T3h, Tl, T3g, T2o, T3k, Tp, T3q, T2s, T3o, Ts, T3n, T2v;	       V T3r;	       {		    V Tg, Th, T2j, T2k;		    Tg = LD(&(ri[WS(is, 4)]), ivs, &(ri[0]));		    Th = LD(&(ri[WS(is, 36)]), ivs, &(ri[0]));		    Ti = VADD(Tg, Th);		    T3j = VSUB(Tg, Th);		    T2j = LD(&(ii[WS(is, 4)]), ivs, &(ii[0]));		    T2k = LD(&(ii[WS(is, 36)]), ivs, &(ii[0]));		    T2l = VADD(T2j, T2k);		    T3h = VSUB(T2j, T2k);	       }	       {		    V Tj, Tk, T2m, T2n;		    Tj = LD(&(ri[WS(is, 20)]), ivs, &(ri[0]));		    Tk = LD(&(ri[WS(is, 52)]), ivs, &(ri[0]));		    Tl = VADD(Tj, Tk);		    T3g = VSUB(Tj, Tk);		    T2m = LD(&(ii[WS(is, 20)]), ivs, &(ii[0]));		    T2n = LD(&(ii[WS(is, 52)]), ivs, &(ii[0]));		    T2o = VADD(T2m, T2n);		    T3k = VSUB(T2m, T2n);	       }	       {		    V Tn, To, T2q, T2r;		    Tn = LD(&(ri[WS(is, 60)]), ivs, &(ri[0]));		    To = LD(&(ri[WS(is, 28)]), ivs, &(ri[0]));		    Tp = VADD(Tn, To);		    T3q = VSUB(Tn, To);		    T2q = LD(&(ii[WS(is, 60)]), ivs, &(ii[0]));		    T2r = LD(&(ii[WS(is, 28)]), ivs, &(ii[0]));		    T2s = VADD(T2q, T2r);		    T3o = VSUB(T2q, T2r);	       }	       {		    V Tq, Tr, T2t, T2u;		    Tq = LD(&(ri[WS(is, 12)]), ivs, &(ri[0]));		    Tr = LD(&(ri[WS(is, 44)]), ivs, &(ri[0]));		    Ts = VADD(Tq, Tr);		    T3n = VSUB(Tq, Tr);		    T2t = LD(&(ii[WS(is, 12)]), ivs, &(ii[0]));		   

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -