⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t1sv_16.c

📁 快速傅立叶变换库函数
💻 C
📖 第 1 页 / 共 2 页
字号:
     VTW(13),     VTW(14),     VTW(15),     {TW_NEXT, (2 * VL), 0}};static const ct_desc desc = { 16, "t1sv_16", twinstr, &GENUS, {104, 30, 70, 0}, 0, 0, 0 };void X(codelet_t1sv_16) (planner *p) {     X(kdft_dit_register) (p, t1sv_16, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_twiddle -simd -compact -variables 4 -pipeline-latency 8 -n 16 -name t1sv_16 -include ts.h *//* * This function contains 174 FP additions, 84 FP multiplications, * (or, 136 additions, 46 multiplications, 38 fused multiply/add), * 52 stack variables, and 64 memory accesses *//* * Generator Id's :  * $Id: algsimp.ml,v 1.9 2006-02-12 23:34:12 athena Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_twiddle.ml,v 1.24 2006-02-12 23:34:12 athena Exp $ */#include "ts.h"static const R *t1sv_16(R *ri, R *ii, const R *W, stride ios, INT m, INT dist){     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT i;     for (i = m; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * dist), ii = ii + ((2 * VL) * dist), W = W + ((2 * VL) * 30), MAKE_VOLATILE_STRIDE(ios)) {	  V T7, T37, T1t, T2U, Ti, T38, T1w, T2R, Tu, T2s, T1C, T2c, TF, T2t, T1H;	  V T2d, T1f, T1q, T2B, T2C, T2D, T2E, T1Z, T2j, T24, T2k, TS, T13, T2w, T2x;	  V T2y, T2z, T1O, T2g, T1T, T2h;	  {	       V T1, T2T, T6, T2S;	       T1 = LD(&(ri[0]), dist, &(ri[0]));	       T2T = LD(&(ii[0]), dist, &(ii[0]));	       {		    V T3, T5, T2, T4;		    T3 = LD(&(ri[WS(ios, 8)]), dist, &(ri[0]));		    T5 = LD(&(ii[WS(ios, 8)]), dist, &(ii[0]));		    T2 = LDW(&(W[TWVL * 14]));		    T4 = LDW(&(W[TWVL * 15]));		    T6 = VFMA(T2, T3, VMUL(T4, T5));		    T2S = VFNMS(T4, T3, VMUL(T2, T5));	       }	       T7 = VADD(T1, T6);	       T37 = VSUB(T2T, T2S);	       T1t = VSUB(T1, T6);	       T2U = VADD(T2S, T2T);	  }	  {	       V Tc, T1u, Th, T1v;	       {		    V T9, Tb, T8, Ta;		    T9 = LD(&(ri[WS(ios, 4)]), dist, &(ri[0]));		    Tb = LD(&(ii[WS(ios, 4)]), dist, &(ii[0]));		    T8 = LDW(&(W[TWVL * 6]));		    Ta = LDW(&(W[TWVL * 7]));		    Tc = VFMA(T8, T9, VMUL(Ta, Tb));		    T1u = VFNMS(Ta, T9, VMUL(T8, Tb));	       }	       {		    V Te, Tg, Td, Tf;		    Te = LD(&(ri[WS(ios, 12)]), dist, &(ri[0]));		    Tg = LD(&(ii[WS(ios, 12)]), dist, &(ii[0]));		    Td = LDW(&(W[TWVL * 22]));		    Tf = LDW(&(W[TWVL * 23]));		    Th = VFMA(Td, Te, VMUL(Tf, Tg));		    T1v = VFNMS(Tf, Te, VMUL(Td, Tg));	       }	       Ti = VADD(Tc, Th);	       T38 = VSUB(Tc, Th);	       T1w = VSUB(T1u, T1v);	       T2R = VADD(T1u, T1v);	  }	  {	       V To, T1y, Tt, T1z, T1A, T1B;	       {		    V Tl, Tn, Tk, Tm;		    Tl = LD(&(ri[WS(ios, 2)]), dist, &(ri[0]));		    Tn = LD(&(ii[WS(ios, 2)]), dist, &(ii[0]));		    Tk = LDW(&(W[TWVL * 2]));		    Tm = LDW(&(W[TWVL * 3]));		    To = VFMA(Tk, Tl, VMUL(Tm, Tn));		    T1y = VFNMS(Tm, Tl, VMUL(Tk, Tn));	       }	       {		    V Tq, Ts, Tp, Tr;		    Tq = LD(&(ri[WS(ios, 10)]), dist, &(ri[0]));		    Ts = LD(&(ii[WS(ios, 10)]), dist, &(ii[0]));		    Tp = LDW(&(W[TWVL * 18]));		    Tr = LDW(&(W[TWVL * 19]));		    Tt = VFMA(Tp, Tq, VMUL(Tr, Ts));		    T1z = VFNMS(Tr, Tq, VMUL(Tp, Ts));	       }	       Tu = VADD(To, Tt);	       T2s = VADD(T1y, T1z);	       T1A = VSUB(T1y, T1z);	       T1B = VSUB(To, Tt);	       T1C = VSUB(T1A, T1B);	       T2c = VADD(T1B, T1A);	  }	  {	       V Tz, T1E, TE, T1F, T1D, T1G;	       {		    V Tw, Ty, Tv, Tx;		    Tw = LD(&(ri[WS(ios, 14)]), dist, &(ri[0]));		    Ty = LD(&(ii[WS(ios, 14)]), dist, &(ii[0]));		    Tv = LDW(&(W[TWVL * 26]));		    Tx = LDW(&(W[TWVL * 27]));		    Tz = VFMA(Tv, Tw, VMUL(Tx, Ty));		    T1E = VFNMS(Tx, Tw, VMUL(Tv, Ty));	       }	       {		    V TB, TD, TA, TC;		    TB = LD(&(ri[WS(ios, 6)]), dist, &(ri[0]));		    TD = LD(&(ii[WS(ios, 6)]), dist, &(ii[0]));		    TA = LDW(&(W[TWVL * 10]));		    TC = LDW(&(W[TWVL * 11]));		    TE = VFMA(TA, TB, VMUL(TC, TD));		    T1F = VFNMS(TC, TB, VMUL(TA, TD));	       }	       TF = VADD(Tz, TE);	       T2t = VADD(T1E, T1F);	       T1D = VSUB(Tz, TE);	       T1G = VSUB(T1E, T1F);	       T1H = VADD(T1D, T1G);	       T2d = VSUB(T1D, T1G);	  }	  {	       V T19, T20, T1p, T1X, T1e, T21, T1k, T1W;	       {		    V T16, T18, T15, T17;		    T16 = LD(&(ri[WS(ios, 15)]), dist, &(ri[WS(ios, 1)]));		    T18 = LD(&(ii[WS(ios, 15)]), dist, &(ii[WS(ios, 1)]));		    T15 = LDW(&(W[TWVL * 28]));		    T17 = LDW(&(W[TWVL * 29]));		    T19 = VFMA(T15, T16, VMUL(T17, T18));		    T20 = VFNMS(T17, T16, VMUL(T15, T18));	       }	       {		    V T1m, T1o, T1l, T1n;		    T1m = LD(&(ri[WS(ios, 11)]), dist, &(ri[WS(ios, 1)]));		    T1o = LD(&(ii[WS(ios, 11)]), dist, &(ii[WS(ios, 1)]));		    T1l = LDW(&(W[TWVL * 20]));		    T1n = LDW(&(W[TWVL * 21]));		    T1p = VFMA(T1l, T1m, VMUL(T1n, T1o));		    T1X = VFNMS(T1n, T1m, VMUL(T1l, T1o));	       }	       {		    V T1b, T1d, T1a, T1c;		    T1b = LD(&(ri[WS(ios, 7)]), dist, &(ri[WS(ios, 1)]));		    T1d = LD(&(ii[WS(ios, 7)]), dist, &(ii[WS(ios, 1)]));		    T1a = LDW(&(W[TWVL * 12]));		    T1c = LDW(&(W[TWVL * 13]));		    T1e = VFMA(T1a, T1b, VMUL(T1c, T1d));		    T21 = VFNMS(T1c, T1b, VMUL(T1a, T1d));	       }	       {		    V T1h, T1j, T1g, T1i;		    T1h = LD(&(ri[WS(ios, 3)]), dist, &(ri[WS(ios, 1)]));		    T1j = LD(&(ii[WS(ios, 3)]), dist, &(ii[WS(ios, 1)]));		    T1g = LDW(&(W[TWVL * 4]));		    T1i = LDW(&(W[TWVL * 5]));		    T1k = VFMA(T1g, T1h, VMUL(T1i, T1j));		    T1W = VFNMS(T1i, T1h, VMUL(T1g, T1j));	       }	       T1f = VADD(T19, T1e);	       T1q = VADD(T1k, T1p);	       T2B = VSUB(T1f, T1q);	       T2C = VADD(T20, T21);	       T2D = VADD(T1W, T1X);	       T2E = VSUB(T2C, T2D);	       {		    V T1V, T1Y, T22, T23;		    T1V = VSUB(T19, T1e);		    T1Y = VSUB(T1W, T1X);		    T1Z = VSUB(T1V, T1Y);		    T2j = VADD(T1V, T1Y);		    T22 = VSUB(T20, T21);		    T23 = VSUB(T1k, T1p);		    T24 = VADD(T22, T23);		    T2k = VSUB(T22, T23);	       }	  }	  {	       V TM, T1K, T12, T1R, TR, T1L, TX, T1Q;	       {		    V TJ, TL, TI, TK;		    TJ = LD(&(ri[WS(ios, 1)]), dist, &(ri[WS(ios, 1)]));		    TL = LD(&(ii[WS(ios, 1)]), dist, &(ii[WS(ios, 1)]));		    TI = LDW(&(W[0]));		    TK = LDW(&(W[TWVL * 1]));		    TM = VFMA(TI, TJ, VMUL(TK, TL));		    T1K = VFNMS(TK, TJ, VMUL(TI, TL));	       }	       {		    V TZ, T11, TY, T10;		    TZ = LD(&(ri[WS(ios, 13)]), dist, &(ri[WS(ios, 1)]));		    T11 = LD(&(ii[WS(ios, 13)]), dist, &(ii[WS(ios, 1)]));		    TY = LDW(&(W[TWVL * 24]));		    T10 = LDW(&(W[TWVL * 25]));		    T12 = VFMA(TY, TZ, VMUL(T10, T11));		    T1R = VFNMS(T10, TZ, VMUL(TY, T11));	       }	       {		    V TO, TQ, TN, TP;		    TO = LD(&(ri[WS(ios, 9)]), dist, &(ri[WS(ios, 1)]));		    TQ = LD(&(ii[WS(ios, 9)]), dist, &(ii[WS(ios, 1)]));		    TN = LDW(&(W[TWVL * 16]));		    TP = LDW(&(W[TWVL * 17]));		    TR = VFMA(TN, TO, VMUL(TP, TQ));		    T1L = VFNMS(TP, TO, VMUL(TN, TQ));	       }	       {		    V TU, TW, TT, TV;		    TU = LD(&(ri[WS(ios, 5)]), dist, &(ri[WS(ios, 1)]));		    TW = LD(&(ii[WS(ios, 5)]), dist, &(ii[WS(ios, 1)]));		    TT = LDW(&(W[TWVL * 8]));		    TV = LDW(&(W[TWVL * 9]));		    TX = VFMA(TT, TU, VMUL(TV, TW));		    T1Q = VFNMS(TV, TU, VMUL(TT, TW));	       }	       TS = VADD(TM, TR);	       T13 = VADD(TX, T12);	       T2w = VSUB(TS, T13);	       T2x = VADD(T1K, T1L);	       T2y = VADD(T1Q, T1R);	       T2z = VSUB(T2x, T2y);	       {		    V T1M, T1N, T1P, T1S;		    T1M = VSUB(T1K, T1L);		    T1N = VSUB(TX, T12);		    T1O = VADD(T1M, T1N);		    T2g = VSUB(T1M, T1N);		    T1P = VSUB(TM, TR);		    T1S = VSUB(T1Q, T1R);		    T1T = VSUB(T1P, T1S);		    T2h = VADD(T1P, T1S);	       }	  }	  {	       V T1J, T27, T3g, T3i, T26, T3h, T2a, T3d;	       {		    V T1x, T1I, T3e, T3f;		    T1x = VSUB(T1t, T1w);		    T1I = VMUL(LDK(KP707106781), VSUB(T1C, T1H));		    T1J = VADD(T1x, T1I);		    T27 = VSUB(T1x, T1I);		    T3e = VMUL(LDK(KP707106781), VSUB(T2d, T2c));		    T3f = VADD(T38, T37);		    T3g = VADD(T3e, T3f);		    T3i = VSUB(T3f, T3e);	       }	       {		    V T1U, T25, T28, T29;		    T1U = VFMA(LDK(KP923879532), T1O, VMUL(LDK(KP382683432), T1T));		    T25 = VFNMS(LDK(KP923879532), T24, VMUL(LDK(KP382683432), T1Z));		    T26 = VADD(T1U, T25);		    T3h = VSUB(T25, T1U);		    T28 = VFNMS(LDK(KP923879532), T1T, VMUL(LDK(KP382683432), T1O));		    T29 = VFMA(LDK(KP382683432), T24, VMUL(LDK(KP923879532), T1Z));		    T2a = VSUB(T28, T29);		    T3d = VADD(T28, T29);	       }	       ST(&(ri[WS(ios, 11)]), VSUB(T1J, T26), dist, &(ri[WS(ios, 1)]));	       ST(&(ii[WS(ios, 11)]), VSUB(T3g, T3d), dist, &(ii[WS(ios, 1)]));	       ST(&(ri[WS(ios, 3)]), VADD(T1J, T26), dist, &(ri[WS(ios, 1)]));	       ST(&(ii[WS(ios, 3)]), VADD(T3d, T3g), dist, &(ii[WS(ios, 1)]));	       ST(&(ri[WS(ios, 15)]), VSUB(T27, T2a), dist, &(ri[WS(ios, 1)]));	       ST(&(ii[WS(ios, 15)]), VSUB(T3i, T3h), dist, &(ii[WS(ios, 1)]));	       ST(&(ri[WS(ios, 7)]), VADD(T27, T2a), dist, &(ri[WS(ios, 1)]));	       ST(&(ii[WS(ios, 7)]), VADD(T3h, T3i), dist, &(ii[WS(ios, 1)]));	  }	  {	       V T2v, T2H, T32, T34, T2G, T33, T2K, T2Z;	       {		    V T2r, T2u, T30, T31;		    T2r = VSUB(T7, Ti);		    T2u = VSUB(T2s, T2t);		    T2v = VADD(T2r, T2u);		    T2H = VSUB(T2r, T2u);		    T30 = VSUB(TF, Tu);		    T31 = VSUB(T2U, T2R);		    T32 = VADD(T30, T31);		    T34 = VSUB(T31, T30);	       }	       {		    V T2A, T2F, T2I, T2J;		    T2A = VADD(T2w, T2z);		    T2F = VSUB(T2B, T2E);		    T2G = VMUL(LDK(KP707106781), VADD(T2A, T2F));		    T33 = VMUL(LDK(KP707106781), VSUB(T2F, T2A));		    T2I = VSUB(T2z, T2w);		    T2J = VADD(T2B, T2E);		    T2K = VMUL(LDK(KP707106781), VSUB(T2I, T2J));		    T2Z = VMUL(LDK(KP707106781), VADD(T2I, T2J));	       }	       ST(&(ri[WS(ios, 10)]), VSUB(T2v, T2G), dist, &(ri[0]));	       ST(&(ii[WS(ios, 10)]), VSUB(T32, T2Z), dist, &(ii[0]));	       ST(&(ri[WS(ios, 2)]), VADD(T2v, T2G), dist, &(ri[0]));	       ST(&(ii[WS(ios, 2)]), VADD(T2Z, T32), dist, &(ii[0]));	       ST(&(ri[WS(ios, 14)]), VSUB(T2H, T2K), dist, &(ri[0]));	       ST(&(ii[WS(ios, 14)]), VSUB(T34, T33), dist, &(ii[0]));	       ST(&(ri[WS(ios, 6)]), VADD(T2H, T2K), dist, &(ri[0]));	       ST(&(ii[WS(ios, 6)]), VADD(T33, T34), dist, &(ii[0]));	  }	  {	       V T2f, T2n, T3a, T3c, T2m, T3b, T2q, T35;	       {		    V T2b, T2e, T36, T39;		    T2b = VADD(T1t, T1w);		    T2e = VMUL(LDK(KP707106781), VADD(T2c, T2d));		    T2f = VADD(T2b, T2e);		    T2n = VSUB(T2b, T2e);		    T36 = VMUL(LDK(KP707106781), VADD(T1C, T1H));		    T39 = VSUB(T37, T38);		    T3a = VADD(T36, T39);		    T3c = VSUB(T39, T36);	       }	       {		    V T2i, T2l, T2o, T2p;		    T2i = VFMA(LDK(KP382683432), T2g, VMUL(LDK(KP923879532), T2h));		    T2l = VFNMS(LDK(KP382683432), T2k, VMUL(LDK(KP923879532), T2j));		    T2m = VADD(T2i, T2l);		    T3b = VSUB(T2l, T2i);		    T2o = VFNMS(LDK(KP382683432), T2h, VMUL(LDK(KP923879532), T2g));		    T2p = VFMA(LDK(KP923879532), T2k, VMUL(LDK(KP382683432), T2j));		    T2q = VSUB(T2o, T2p);		    T35 = VADD(T2o, T2p);	       }	       ST(&(ri[WS(ios, 9)]), VSUB(T2f, T2m), dist, &(ri[WS(ios, 1)]));	       ST(&(ii[WS(ios, 9)]), VSUB(T3a, T35), dist, &(ii[WS(ios, 1)]));	       ST(&(ri[WS(ios, 1)]), VADD(T2f, T2m), dist, &(ri[WS(ios, 1)]));	       ST(&(ii[WS(ios, 1)]), VADD(T35, T3a), dist, &(ii[WS(ios, 1)]));	       ST(&(ri[WS(ios, 13)]), VSUB(T2n, T2q), dist, &(ri[WS(ios, 1)]));	       ST(&(ii[WS(ios, 13)]), VSUB(T3c, T3b), dist, &(ii[WS(ios, 1)]));	       ST(&(ri[WS(ios, 5)]), VADD(T2n, T2q), dist, &(ri[WS(ios, 1)]));	       ST(&(ii[WS(ios, 5)]), VADD(T3b, T3c), dist, &(ii[WS(ios, 1)]));	  }	  {	       V TH, T2L, T2W, T2Y, T1s, T2X, T2O, T2P;	       {		    V Tj, TG, T2Q, T2V;		    Tj = VADD(T7, Ti);		    TG = VADD(Tu, TF);		    TH = VADD(Tj, TG);		    T2L = VSUB(Tj, TG);		    T2Q = VADD(T2s, T2t);		    T2V = VADD(T2R, T2U);		    T2W = VADD(T2Q, T2V);		    T2Y = VSUB(T2V, T2Q);	       }	       {		    V T14, T1r, T2M, T2N;		    T14 = VADD(TS, T13);		    T1r = VADD(T1f, T1q);		    T1s = VADD(T14, T1r);		    T2X = VSUB(T1r, T14);		    T2M = VADD(T2x, T2y);		    T2N = VADD(T2C, T2D);		    T2O = VSUB(T2M, T2N);		    T2P = VADD(T2M, T2N);	       }	       ST(&(ri[WS(ios, 8)]), VSUB(TH, T1s), dist, &(ri[0]));	       ST(&(ii[WS(ios, 8)]), VSUB(T2W, T2P), dist, &(ii[0]));	       ST(&(ri[0]), VADD(TH, T1s), dist, &(ri[0]));	       ST(&(ii[0]), VADD(T2P, T2W), dist, &(ii[0]));	       ST(&(ri[WS(ios, 12)]), VSUB(T2L, T2O), dist, &(ri[0]));	       ST(&(ii[WS(ios, 12)]), VSUB(T2Y, T2X), dist, &(ii[0]));	       ST(&(ri[WS(ios, 4)]), VADD(T2L, T2O), dist, &(ri[0]));	       ST(&(ii[WS(ios, 4)]), VADD(T2X, T2Y), dist, &(ii[0]));	  }     }     return W;}static const tw_instr twinstr[] = {     VTW(1),     VTW(2),     VTW(3),     VTW(4),     VTW(5),     VTW(6),     VTW(7),     VTW(8),     VTW(9),     VTW(10),     VTW(11),     VTW(12),     VTW(13),     VTW(14),     VTW(15),     {TW_NEXT, (2 * VL), 0}};static const ct_desc desc = { 16, "t1sv_16", twinstr, &GENUS, {136, 46, 38, 0}, 0, 0, 0 };void X(codelet_t1sv_16) (planner *p) {     X(kdft_dit_register) (p, t1sv_16, &desc);}#endif				/* HAVE_FMA */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -