⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cfdftv_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 3 页
字号:
		    T4w = VMUL(LDK(KP500000000), VFNMSI(T4v, T4u));		    T4s = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T4r, T4o)));		    T4t = VMUL(LDK(KP500000000), VFMAI(T4r, T4o));		    ST(&(Rp[0]), T4i, ms, &(Rp[0]));		    ST(&(Rm[WS(rs, 15)]), T4j, -ms, &(Rm[WS(rs, 1)]));		    ST(&(Rm[WS(rs, 7)]), T4f, -ms, &(Rm[WS(rs, 1)]));		    ST(&(Rp[WS(rs, 8)]), T4e, ms, &(Rp[0]));		    ST(&(Rm[WS(rs, 9)]), T3M, -ms, &(Rm[WS(rs, 1)]));		    ST(&(Rp[WS(rs, 10)]), T3N, ms, &(Rp[0]));		    ST(&(Rm[WS(rs, 5)]), T3J, -ms, &(Rm[WS(rs, 1)]));		    ST(&(Rp[WS(rs, 6)]), T3I, ms, &(Rp[0]));		    ST(&(Rp[WS(rs, 12)]), T4w, ms, &(Rp[0]));		    ST(&(Rm[WS(rs, 11)]), T4x, -ms, &(Rm[WS(rs, 1)]));		    ST(&(Rp[WS(rs, 4)]), T4t, ms, &(Rp[0]));		    ST(&(Rm[WS(rs, 3)]), T4s, -ms, &(Rm[WS(rs, 1)]));		    {			 V T2A, T2W, T2L, T2Z, T2D, T2N, T2M, T2G, T3T, T3X, T16, T2p, T1v, T35, T31;			 V T2I, T2S, T34, T2Y, T2P, T2T, T1Y, T2H, T30, T3Z, T3Y, T3U, T3V, T2O, T2X;			 V T32, T33, T36, T37, T2U, T2V, T2Q, T2R, T1Z, T2q;			 T2A = VFNMS(LDK(KP923879532), T2z, T2y);			 T2W = VFMA(LDK(KP923879532), T2z, T2y);			 T2L = VFNMS(LDK(KP923879532), T2K, T2J);			 T2Z = VFMA(LDK(KP923879532), T2K, T2J);			 T2D = VFMA(LDK(KP198912367), T2C, T2B);			 T2N = VFNMS(LDK(KP198912367), T2B, T2C);			 T2M = VFMA(LDK(KP198912367), T2E, T2F);			 T2G = VFNMS(LDK(KP198912367), T2F, T2E);			 T3T = VFMA(LDK(KP923879532), T3S, T3R);			 T3X = VFNMS(LDK(KP923879532), T3S, T3R);			 T16 = VFNMS(LDK(KP923879532), T15, Ts);			 T2m = VFMA(LDK(KP923879532), T15, Ts);			 T2H = VSUB(T2D, T2G);			 T30 = VADD(T2D, T2G);			 T2b = VFNMS(LDK(KP923879532), T2a, T27);			 T2p = VFMA(LDK(KP923879532), T2a, T27);			 T1v = VFMA(LDK(KP668178637), T1u, T1n);			 T2c = VFNMS(LDK(KP668178637), T1n, T1u);			 T3Z = VCONJ(VMUL(LDK(KP500000000), VFMAI(T3X, T3W)));			 T3Y = VMUL(LDK(KP500000000), VFNMSI(T3X, T3W));			 T3U = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T3T, T3Q)));			 T3V = VMUL(LDK(KP500000000), VFMAI(T3T, T3Q));			 T2O = VSUB(T2M, T2N);			 T2X = VADD(T2N, T2M);			 T35 = VFNMS(LDK(KP980785280), T30, T2Z);			 T31 = VFMA(LDK(KP980785280), T30, T2Z);			 T2I = VFMA(LDK(KP980785280), T2H, T2A);			 T2S = VFNMS(LDK(KP980785280), T2H, T2A);			 ST(&(Rp[WS(rs, 14)]), T3Y, ms, &(Rp[0]));			 ST(&(Rm[WS(rs, 13)]), T3Z, -ms, &(Rm[WS(rs, 1)]));			 ST(&(Rp[WS(rs, 2)]), T3V, ms, &(Rp[0]));			 ST(&(Rm[WS(rs, 1)]), T3U, -ms, &(Rm[WS(rs, 1)]));			 T34 = VFNMS(LDK(KP980785280), T2X, T2W);			 T2Y = VFMA(LDK(KP980785280), T2X, T2W);			 T2P = VFMA(LDK(KP980785280), T2O, T2L);			 T2T = VFNMS(LDK(KP980785280), T2O, T2L);			 T2d = VFMA(LDK(KP668178637), T1Q, T1X);			 T1Y = VFNMS(LDK(KP668178637), T1X, T1Q);			 T32 = VMUL(LDK(KP500000000), VFNMSI(T31, T2Y));			 T33 = VCONJ(VMUL(LDK(KP500000000), VFMAI(T31, T2Y)));			 T36 = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T35, T34)));			 T37 = VMUL(LDK(KP500000000), VFMAI(T35, T34));			 T2U = VMUL(LDK(KP500000000), VFNMSI(T2T, T2S));			 T2V = VCONJ(VMUL(LDK(KP500000000), VFMAI(T2T, T2S)));			 T2Q = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T2P, T2I)));			 T2R = VMUL(LDK(KP500000000), VFMAI(T2P, T2I));			 T1Z = VSUB(T1v, T1Y);			 T2q = VADD(T1Y, T1v);			 ST(&(Rm[0]), T33, -ms, &(Rm[0]));			 ST(&(Rp[WS(rs, 1)]), T32, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rp[WS(rs, 15)]), T37, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 14)]), T36, -ms, &(Rm[0]));			 ST(&(Rm[WS(rs, 8)]), T2V, -ms, &(Rm[0]));			 ST(&(Rp[WS(rs, 9)]), T2U, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rp[WS(rs, 7)]), T2R, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 6)]), T2Q, -ms, &(Rm[0]));			 T2v = VFNMS(LDK(KP831469612), T2q, T2p);			 T2r = VFMA(LDK(KP831469612), T2q, T2p);			 T20 = VFMA(LDK(KP831469612), T1Z, T16);			 T2i = VFNMS(LDK(KP831469612), T1Z, T16);		    }	       }	  }	  T2n = VADD(T2d, T2c);	  T2e = VSUB(T2c, T2d);	  T2o = VFMA(LDK(KP831469612), T2n, T2m);	  T2u = VFNMS(LDK(KP831469612), T2n, T2m);	  T2j = VFMA(LDK(KP831469612), T2e, T2b);	  T2f = VFNMS(LDK(KP831469612), T2e, T2b);	  T2t = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T2r, T2o)));	  T2s = VMUL(LDK(KP500000000), VFMAI(T2r, T2o));	  T2x = VCONJ(VMUL(LDK(KP500000000), VFMAI(T2v, T2u)));	  T2w = VMUL(LDK(KP500000000), VFNMSI(T2v, T2u));	  T2l = VCONJ(VMUL(LDK(KP500000000), VFNMSI(T2j, T2i)));	  T2k = VMUL(LDK(KP500000000), VFMAI(T2j, T2i));	  T2h = VCONJ(VMUL(LDK(KP500000000), VFMAI(T2f, T20)));	  T2g = VMUL(LDK(KP500000000), VFNMSI(T2f, T20));	  ST(&(Rm[WS(rs, 2)]), T2t, -ms, &(Rm[0]));	  ST(&(Rp[WS(rs, 3)]), T2s, ms, &(Rp[WS(rs, 1)]));	  ST(&(Rm[WS(rs, 12)]), T2x, -ms, &(Rm[0]));	  ST(&(Rp[WS(rs, 13)]), T2w, ms, &(Rp[WS(rs, 1)]));	  ST(&(Rm[WS(rs, 10)]), T2l, -ms, &(Rm[0]));	  ST(&(Rp[WS(rs, 11)]), T2k, ms, &(Rp[WS(rs, 1)]));	  ST(&(Rm[WS(rs, 4)]), T2h, -ms, &(Rm[0]));	  ST(&(Rp[WS(rs, 5)]), T2g, ms, &(Rp[WS(rs, 1)]));     }}static const tw_instr twinstr[] = {     VTW(1, 1),     VTW(1, 2),     VTW(1, 3),     VTW(1, 4),     VTW(1, 5),     VTW(1, 6),     VTW(1, 7),     VTW(1, 8),     VTW(1, 9),     VTW(1, 10),     VTW(1, 11),     VTW(1, 12),     VTW(1, 13),     VTW(1, 14),     VTW(1, 15),     VTW(1, 16),     VTW(1, 17),     VTW(1, 18),     VTW(1, 19),     VTW(1, 20),     VTW(1, 21),     VTW(1, 22),     VTW(1, 23),     VTW(1, 24),     VTW(1, 25),     VTW(1, 26),     VTW(1, 27),     VTW(1, 28),     VTW(1, 29),     VTW(1, 30),     VTW(1, 31),     {TW_NEXT, VL, 0}};static const hc2c_desc desc = { 32, "hc2cfdftv_32", twinstr, &GENUS, {119, 94, 130, 0} };void X(codelet_hc2cfdftv_32) (planner *p) {     X(khc2c_register) (p, hc2cfdftv_32, &desc, HC2C_VIA_DFT);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2cdft_c -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 32 -dit -name hc2cfdftv_32 -include hc2cfv.h *//* * This function contains 249 FP additions, 133 FP multiplications, * (or, 233 additions, 117 multiplications, 16 fused multiply/add), * 130 stack variables, 9 constants, and 64 memory accesses */#include "hc2cfv.h"static void hc2cfdftv_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DVK(KP555570233, +0.555570233019602224742830813948532874374937191);     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);     DVK(KP195090322, +0.195090322016128267848284868477022240927691618);     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     DVK(KP353553390, +0.353553390593273762200422181052424519642417969);     DVK(KP500000000, +0.500000000000000000000000000000000000000000000);     INT m;     for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 62)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(rs)) {	  V Ta, T2m, Tx, T2h, T3R, T4h, T3q, T4g, T3B, T4n, T3E, T4o, T1B, T2S, T1O;	  V T2R, TV, T2p, T1i, T2o, T3L, T4q, T3I, T4r, T3w, T4k, T3t, T4j, T26, T2V;	  V T2d, T2U;	  {	       V T4, T1m, T1H, T2j, T1M, T2l, T9, T1o, Tf, T1r, Tq, T1w, Tv, T1y, Tk;	       V T1t, Tl, Tw, T3P, T3Q, T3o, T3p, T3z, T3A, T3C, T3D, T1p, T1N, T1A, T1C;	       V T1u, T1z;	       {		    V T1, T3, T2, T1l, T1G, T1F, T1E, T1D, T2i, T1L, T1K, T1J, T1I, T2k, T6;		    V T8, T7, T5, T1n, Tc, Te, Td, Tb, T1q, Tn, Tp, To, Tm, T1v, Ts;		    V Tu, Tt, Tr, T1x, Th, Tj, Ti, Tg, T1s;		    T1 = LD(&(Rp[0]), ms, &(Rp[0]));		    T2 = LD(&(Rm[0]), -ms, &(Rm[0]));		    T3 = VCONJ(T2);		    T4 = VADD(T1, T3);		    T1l = LDW(&(W[0]));		    T1m = VZMULIJ(T1l, VSUB(T3, T1));		    T1G = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0]));		    T1E = LD(&(Rm[WS(rs, 4)]), -ms, &(Rm[0]));		    T1F = VCONJ(T1E);		    T1D = LDW(&(W[TWVL * 16]));		    T1H = VZMULIJ(T1D, VSUB(T1F, T1G));		    T2i = LDW(&(W[TWVL * 14]));		    T2j = VZMULJ(T2i, VADD(T1G, T1F));		    T1L = LD(&(Rp[WS(rs, 12)]), ms, &(Rp[0]));		    T1J = LD(&(Rm[WS(rs, 12)]), -ms, &(Rm[0]));		    T1K = VCONJ(T1J);		    T1I = LDW(&(W[TWVL * 48]));		    T1M = VZMULIJ(T1I, VSUB(T1K, T1L));		    T2k = LDW(&(W[TWVL * 46]));		    T2l = VZMULJ(T2k, VADD(T1L, T1K));		    T6 = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0]));		    T7 = LD(&(Rm[WS(rs, 8)]), -ms, &(Rm[0]));		    T8 = VCONJ(T7);		    T5 = LDW(&(W[TWVL * 30]));		    T9 = VZMULJ(T5, VADD(T6, T8));		    T1n = LDW(&(W[TWVL * 32]));		    T1o = VZMULIJ(T1n, VSUB(T8, T6));		    Tc = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0]));		    Td = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0]));		    Te = VCONJ(Td);		    Tb = LDW(&(W[TWVL * 6]));		    Tf = VZMULJ(Tb, VADD(Tc, Te));		    T1q = LDW(&(W[TWVL * 8]));		    T1r = VZMULIJ(T1q, VSUB(Te, Tc));		    Tn = LD(&(Rp[WS(rs, 14)]), ms, &(Rp[0]));		    To = LD(&(Rm[WS(rs, 14)]), -ms, &(Rm[0]));		    Tp = VCONJ(To);		    Tm = LDW(&(W[TWVL * 54]));		    Tq = VZMULJ(Tm, VADD(Tn, Tp));		    T1v = LDW(&(W[TWVL * 56]));		    T1w = VZMULIJ(T1v, VSUB(Tp, Tn));		    Ts = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0]));		    Tt = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0]));		    Tu = VCONJ(Tt);		    Tr = LDW(&(W[TWVL * 22]));		    Tv = VZMULJ(Tr, VADD(Ts, Tu));		    T1x = LDW(&(W[TWVL * 24]));		    T1y = VZMULIJ(T1x, VSUB(Tu, Ts));		    Th = LD(&(Rp[WS(rs, 10)]), ms, &(Rp[0]));		    Ti = LD(&(Rm[WS(rs, 10)]), -ms, &(Rm[0]));		    Tj = VCONJ(Ti);		    Tg = LDW(&(W[TWVL * 38]));		    Tk = VZMULJ(Tg, VADD(Th, Tj));		    T1s = LDW(&(W[TWVL * 40]));		    T1t = VZMULIJ(T1s, VSUB(Tj, Th));	       }	       Ta = VMUL(LDK(KP500000000), VSUB(T4, T9));	       T2m = VSUB(T2j, T2l);	       Tl = VSUB(Tf, Tk);	       Tw = VSUB(Tq, Tv);	       Tx = VMUL(LDK(KP353553390), VADD(Tl, Tw));	       T2h = VMUL(LDK(KP707106781), VSUB(Tw, Tl));	       T3P = VADD(Tq, Tv);	       T3Q = VADD(Tf, Tk);	       T3R = VSUB(T3P, T3Q);	       T4h = VADD(T3Q, T3P);	       T3o = VADD(T4, T9);	       T3p = VADD(T2j, T2l);	       T3q = VMUL(LDK(KP500000000), VSUB(T3o, T3p));	       T4g = VADD(T3o, T3p);	       T3z = VADD(T1m, T1o);	       T3A = VADD(T1H, T1M);	       T3B = VSUB(T3z, T3A);	       T4n = VADD(T3z, T3A);	       T3C = VADD(T1w, T1y);	       T3D = VADD(T1r, T1t);	       T3E = VSUB(T3C, T3D);	       T4o = VADD(T3D, T3C);	       T1p = VSUB(T1m, T1o);	       T1N = VSUB(T1H, T1M);	       T1u = VSUB(T1r, T1t);	       T1z = VSUB(T1w, T1y);	       T1A = VMUL(LDK(KP707106781), VADD(T1u, T1z));	       T1C = VMUL(LDK(KP707106781), VSUB(T1z, T1u));	       T1B = VADD(T1p, T1A);	       T2S = VADD(T1N, T1C);	       T1O = VSUB(T1C, T1N);	       T2R = VSUB(T1p, T1A);	  }	  {	       V TD, T1R, T1b, T29, T1g, T2b, TI, T1T, TO, T1Y, T10, T22, T15, T24, TT;	       V T1W, TJ, TU, T16, T1h, T3J, T3K, T3G, T3H, T3u, T3v, T3r, T3s, T25, T2c;	       V T20, T27, T1U, T1Z;	       {		    V TA, TC, TB, Tz, T1Q, T18, T1a, T19, T17, T28, T1d, T1f, T1e, T1c, T2a;		    V TF, TH, TG, TE, T1S, TL, TN, TM, TK, T1X, TX, TZ, TY, TW, T21;		    V T12, T14, T13, T11, T23, TQ, TS, TR, TP, T1V;		    TA = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));		    TB = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));		    TC = VCONJ(TB);		    Tz = LDW(&(W[TWVL * 2]));		    TD = VZMULJ(Tz, VADD(TA, TC));		    T1Q = LDW(&(W[TWVL * 4]));		    T1R = VZMULIJ(T1Q, VSUB(TC, TA));		    T18 = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)]));		    T19 = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)]));		    T1a = VCONJ(T19);		    T17 = LDW(&(W[TWVL * 10]));		    T1b = VZMULJ(T17, VADD(T18, T1a));		    T28 = LDW(&(W[TWVL * 12]));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -