⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cbdftv_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 3 页
字号:
			      T3J = VZMULI(T3I, VFNMSI(T1f, T12));			      T1g = VZMULI(T1, VFMAI(T1f, T12));			      T2Q = VFNMS(LDK(KP980785280), T2v, T2o);			      T2w = VFMA(LDK(KP980785280), T2v, T2o);			      T2R = VFMA(LDK(KP980785280), T2C, T2z);			      T2D = VFNMS(LDK(KP980785280), T2C, T2z);			      T1R = LDW(&(W[TWVL * 40]));			      T4s = LDW(&(W[TWVL * 52]));			      T23 = LDW(&(W[TWVL * 8]));			      T42 = LDW(&(W[TWVL * 20]));			      T4f = VZMULI(T4e, VFNMSI(T2R, T2Q));			      T2S = VZMULI(T2P, VFMAI(T2R, T2Q));			      T4l = VZMULI(T4k, VFNMSI(T2D, T2w));			      T2E = VZMULI(T2l, VFMAI(T2D, T2w));			      T24 = VFMA(LDK(KP831469612), T1T, T1S);			      T1U = VFNMS(LDK(KP831469612), T1T, T1S);			      T25 = VFMA(LDK(KP831469612), T1W, T1V);			      T1X = VFNMS(LDK(KP831469612), T1W, T1V);			      T2X = LDW(&(W[TWVL * 32]));			      T3O = LDW(&(W[TWVL * 60]));			      T3b = LDW(&(W[0]));			      T3i = LDW(&(W[TWVL * 28]));			      T26 = VZMULI(T23, VFMAI(T25, T24));			      T4t = VZMULI(T4s, VFNMSI(T25, T24));			      T43 = VZMULI(T42, VFNMSI(T1X, T1U));			      T1Y = VZMULI(T1R, VFMAI(T1X, T1U));			      T3c = VFMA(LDK(KP980785280), T2Z, T2Y);			      T30 = VFNMS(LDK(KP980785280), T2Z, T2Y);			      T3d = VFMA(LDK(KP980785280), T32, T31);			      T33 = VFNMS(LDK(KP980785280), T32, T31);			 }		    }		    {			 V T3e, T3P, T3j, T34, T2c, T4j, T2k, T4d, T1P, T1Q, T4x, T4w, T2j, T4c, T21;			 V T22, T4r, T4q, T2b, T4i, T3h, T3H, T2N, T2O, T41, T40, T3g, T3G, T2V, T2W;			 V T3V, T3U, T39, T3M;			 T1P = VADD(T1g, T1O);			 T1Q = VCONJ(VSUB(T1O, T1g));			 T4x = VCONJ(VSUB(T4v, T4t));			 T4w = VADD(T4t, T4v);			 T2j = VADD(T2g, T2i);			 T2k = VCONJ(VSUB(T2i, T2g));			 T4d = VCONJ(VSUB(T4b, T43));			 T4c = VADD(T43, T4b);			 T3e = VZMULI(T3b, VFMAI(T3d, T3c));			 T3P = VZMULI(T3O, VFNMSI(T3d, T3c));			 T3j = VZMULI(T3i, VFNMSI(T33, T30));			 T34 = VZMULI(T2X, VFMAI(T33, T30));			 ST(&(Rp[WS(rs, 6)]), T1P, ms, &(Rp[0]));			 ST(&(Rp[WS(rs, 13)]), T4w, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rp[WS(rs, 14)]), T2j, ms, &(Rp[0]));			 ST(&(Rp[WS(rs, 5)]), T4c, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 13)]), T4x, -ms, &(Rm[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 6)]), T1Q, -ms, &(Rm[0]));			 T21 = VADD(T1Y, T20);			 T22 = VCONJ(VSUB(T20, T1Y));			 T4r = VCONJ(VSUB(T4p, T4l));			 T4q = VADD(T4l, T4p);			 T2b = VADD(T26, T2a);			 T2c = VCONJ(VSUB(T2a, T26));			 T4j = VCONJ(VSUB(T4h, T4f));			 T4i = VADD(T4f, T4h);			 ST(&(Rm[WS(rs, 5)]), T4d, -ms, &(Rm[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 14)]), T2k, -ms, &(Rm[0]));			 ST(&(Rp[WS(rs, 10)]), T21, ms, &(Rp[0]));			 ST(&(Rp[WS(rs, 3)]), T4q, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rp[WS(rs, 2)]), T2b, ms, &(Rp[0]));			 ST(&(Rp[WS(rs, 11)]), T4i, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 3)]), T4r, -ms, &(Rm[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 10)]), T22, -ms, &(Rm[0]));			 T2N = VADD(T2E, T2M);			 T2O = VCONJ(VSUB(T2M, T2E));			 T41 = VCONJ(VSUB(T3Z, T3X));			 T40 = VADD(T3X, T3Z);			 T3g = VADD(T3e, T3f);			 T3h = VCONJ(VSUB(T3f, T3e));			 T3H = VCONJ(VSUB(T3F, T3j));			 T3G = VADD(T3j, T3F);			 ST(&(Rm[WS(rs, 11)]), T4j, -ms, &(Rm[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 2)]), T2c, -ms, &(Rm[0]));			 ST(&(Rp[WS(rs, 12)]), T2N, ms, &(Rp[0]));			 ST(&(Rp[WS(rs, 1)]), T40, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rp[0]), T3g, ms, &(Rp[0]));			 ST(&(Rp[WS(rs, 7)]), T3G, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 1)]), T41, -ms, &(Rm[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 12)]), T2O, -ms, &(Rm[0]));			 T2V = VADD(T2S, T2U);			 T2W = VCONJ(VSUB(T2U, T2S));			 T3V = VCONJ(VSUB(T3T, T3P));			 T3U = VADD(T3P, T3T);			 T39 = VADD(T34, T38);			 T3a = VCONJ(VSUB(T38, T34));			 T3N = VCONJ(VSUB(T3L, T3J));			 T3M = VADD(T3J, T3L);			 ST(&(Rm[WS(rs, 7)]), T3H, -ms, &(Rm[WS(rs, 1)]));			 ST(&(Rm[0]), T3h, -ms, &(Rm[0]));			 ST(&(Rp[WS(rs, 4)]), T2V, ms, &(Rp[0]));			 ST(&(Rp[WS(rs, 15)]), T3U, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rp[WS(rs, 8)]), T39, ms, &(Rp[0]));			 ST(&(Rp[WS(rs, 9)]), T3M, ms, &(Rp[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 15)]), T3V, -ms, &(Rm[WS(rs, 1)]));			 ST(&(Rm[WS(rs, 4)]), T2W, -ms, &(Rm[0]));		    }	       }	  }	  ST(&(Rm[WS(rs, 9)]), T3N, -ms, &(Rm[WS(rs, 1)]));	  ST(&(Rm[WS(rs, 8)]), T3a, -ms, &(Rm[0]));     }}static const tw_instr twinstr[] = {     VTW(1, 1),     VTW(1, 2),     VTW(1, 3),     VTW(1, 4),     VTW(1, 5),     VTW(1, 6),     VTW(1, 7),     VTW(1, 8),     VTW(1, 9),     VTW(1, 10),     VTW(1, 11),     VTW(1, 12),     VTW(1, 13),     VTW(1, 14),     VTW(1, 15),     VTW(1, 16),     VTW(1, 17),     VTW(1, 18),     VTW(1, 19),     VTW(1, 20),     VTW(1, 21),     VTW(1, 22),     VTW(1, 23),     VTW(1, 24),     VTW(1, 25),     VTW(1, 26),     VTW(1, 27),     VTW(1, 28),     VTW(1, 29),     VTW(1, 30),     VTW(1, 31),     {TW_NEXT, VL, 0}};static const hc2c_desc desc = { 32, "hc2cbdftv_32", twinstr, &GENUS, {119, 62, 130, 0} };void X(codelet_hc2cbdftv_32) (planner *p) {     X(khc2c_register) (p, hc2cbdftv_32, &desc, HC2C_VIA_DFT);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_hc2cdft_c -simd -compact -variables 4 -pipeline-latency 8 -trivial-stores -variables 32 -no-generate-bytw -n 32 -dif -sign 1 -name hc2cbdftv_32 -include hc2cbv.h *//* * This function contains 249 FP additions, 104 FP multiplications, * (or, 233 additions, 88 multiplications, 16 fused multiply/add), * 161 stack variables, 7 constants, and 64 memory accesses */#include "hc2cbv.h"static void hc2cbdftv_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DVK(KP195090322, +0.195090322016128267848284868477022240927691618);     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);     DVK(KP555570233, +0.555570233019602224742830813948532874374937191);     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 62)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 62), MAKE_VOLATILE_STRIDE(rs)) {	  V T1W, T21, Tf, T2c, T1t, T2r, T3T, T4m, Ty, T2q, T3P, T4n, T1n, T2d, T1T;	  V T22, T1E, T24, T3I, T4p, TU, T2n, T1i, T2h, T1L, T25, T3L, T4q, T1f, T2o;	  V T1j, T2k;	  {	       V T2, T4, T1Z, T1p, T1r, T20, T9, T1U, Td, T1V, T3, T1q, T6, T8, T7;	       V Tc, Tb, Ta, T5, Te, T1o, T1s, T3R, T3S, Tj, T1N, Tw, T1Q, Tn, T1O;	       V Ts, T1R, Tg, Ti, Th, Tv, Tu, Tt, Tk, Tm, Tl, Tp, Tr, Tq, To;	       V Tx, T3N, T3O, T1l, T1m, T1P, T1S;	       T2 = LD(&(Rp[0]), ms, &(Rp[0]));	       T3 = LD(&(Rm[WS(rs, 15)]), -ms, &(Rm[WS(rs, 1)]));	       T4 = VCONJ(T3);	       T1Z = VADD(T2, T4);	       T1p = LD(&(Rp[WS(rs, 8)]), ms, &(Rp[0]));	       T1q = LD(&(Rm[WS(rs, 7)]), -ms, &(Rm[WS(rs, 1)]));	       T1r = VCONJ(T1q);	       T20 = VADD(T1p, T1r);	       T6 = LD(&(Rp[WS(rs, 4)]), ms, &(Rp[0]));	       T7 = LD(&(Rm[WS(rs, 11)]), -ms, &(Rm[WS(rs, 1)]));	       T8 = VCONJ(T7);	       T9 = VSUB(T6, T8);	       T1U = VADD(T6, T8);	       Tc = LD(&(Rp[WS(rs, 12)]), ms, &(Rp[0]));	       Ta = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)]));	       Tb = VCONJ(Ta);	       Td = VSUB(Tb, Tc);	       T1V = VADD(Tb, Tc);	       T1W = VSUB(T1U, T1V);	       T21 = VSUB(T1Z, T20);	       T5 = VSUB(T2, T4);	       Te = VMUL(LDK(KP707106781), VADD(T9, Td));	       Tf = VSUB(T5, Te);	       T2c = VADD(T5, Te);	       T1o = VMUL(LDK(KP707106781), VSUB(T9, Td));	       T1s = VSUB(T1p, T1r);	       T1t = VSUB(T1o, T1s);	       T2r = VADD(T1s, T1o);	       T3R = VADD(T1Z, T20);	       T3S = VADD(T1U, T1V);	       T3T = VSUB(T3R, T3S);	       T4m = VADD(T3R, T3S);	       Tg = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0]));	       Th = LD(&(Rm[WS(rs, 13)]), -ms, &(Rm[WS(rs, 1)]));	       Ti = VCONJ(Th);	       Tj = VSUB(Tg, Ti);	       T1N = VADD(Tg, Ti);	       Tv = LD(&(Rp[WS(rs, 14)]), ms, &(Rp[0]));	       Tt = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));	       Tu = VCONJ(Tt);	       Tw = VSUB(Tu, Tv);	       T1Q = VADD(Tu, Tv);	       Tk = LD(&(Rp[WS(rs, 10)]), ms, &(Rp[0]));	       Tl = LD(&(Rm[WS(rs, 5)]), -ms, &(Rm[WS(rs, 1)]));	       Tm = VCONJ(Tl);	       Tn = VSUB(Tk, Tm);	       T1O = VADD(Tk, Tm);	       Tp = LD(&(Rp[WS(rs, 6)]), ms, &(Rp[0]));	       Tq = LD(&(Rm[WS(rs, 9)]), -ms, &(Rm[WS(rs, 1)]));	       Tr = VCONJ(Tq);	       Ts = VSUB(Tp, Tr);	       T1R = VADD(Tp, Tr);	       To = VFMA(LDK(KP382683432), Tj, VMUL(LDK(KP923879532), Tn));	       Tx = VFNMS(LDK(KP382683432), Tw, VMUL(LDK(KP923879532), Ts));	       Ty = VSUB(To, Tx);	       T2q = VADD(To, Tx);	       T3N = VADD(T1N, T1O);	       T3O = VADD(T1Q, T1R);	       T3P = VSUB(T3N, T3O);	       T4n = VADD(T3N, T3O);	       T1l = VFNMS(LDK(KP382683432), Tn, VMUL(LDK(KP923879532), Tj));	       T1m = VFMA(LDK(KP923879532), Tw, VMUL(LDK(KP382683432), Ts));	       T1n = VSUB(T1l, T1m);	       T2d = VADD(T1l, T1m);	       T1P = VSUB(T1N, T1O);	       T1S = VSUB(T1Q, T1R);	       T1T = VMUL(LDK(KP707106781), VSUB(T1P, T1S));	       T22 = VMUL(LDK(KP707106781), VADD(T1P, T1S));	  }	  {	       V TD, T1B, TR, T1y, TH, T1C, TM, T1z, TA, TC, TB, TO, TQ, TP, TG;	       V TF, TE, TJ, TL, TK, T1A, T1D, T3G, T3H, TN, T2f, TT, T2g, TI, TS;	       V TY, T1I, T1c, T1F, T12, T1J, T17, T1G, TV, TX, TW, T1b, T1a, T19, T11;	       V T10, TZ, T14, T16, T15, T1H, T1K, T3J, T3K, T18, T2i, T1e, T2j, T13, T1d;	       TA = LD(&(Rp[WS(rs, 5)]), ms, &(Rp[WS(rs, 1)]));	       TB = LD(&(Rm[WS(rs, 10)]), -ms, &(Rm[0]));	       TC = VCONJ(TB);	       TD = VSUB(TA, TC);	       T1B = VADD(TA, TC);	       TO = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));	       TP = LD(&(Rm[WS(rs, 14)]), -ms, &(Rm[0]));	       TQ = VCONJ(TP);	       TR = VSUB(TO, TQ);	       T1y = VADD(TO, TQ);	       TG = LD(&(Rp[WS(rs, 13)]), ms, &(Rp[WS(rs, 1)]));	       TE = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0]));	       TF = VCONJ(TE);	       TH = VSUB(TF, TG);	       T1C = VADD(TF, TG);	       TJ = LD(&(Rp[WS(rs, 9)]), ms, &(Rp[WS(rs, 1)]));	       TK = LD(&(Rm[WS(rs, 6)]), -ms, &(Rm[0]));	       TL = VCONJ(TK);	       TM = VSUB(TJ, TL);	       T1z = VADD(TJ, TL);	       T1A = VSUB(T1y, T1z);	       T1D = VSUB(T1B, T1C);	       T1E = VFNMS(LDK(KP382683432), T1D, VMUL(LDK(KP923879532), T1A));	       T24 = VFMA(LDK(KP382683432), T1A, VMUL(LDK(KP923879532), T1D));	       T3G = VADD(T1y, T1z);	       T3H = VADD(T1B, T1C);	       T3I = VSUB(T3G, T3H);	       T4p = VADD(T3G, T3H);	       TI = VMUL(LDK(KP707106781), VSUB(TD, TH));	       TN = VSUB(TI, TM);	       T2f = VADD(TM, TI);	       TS = VMUL(LDK(KP707106781), VADD(TD, TH));	       TT = VSUB(TR, TS);	       T2g = VADD(TR, TS);	       TU = VFMA(LDK(KP831469612), TN, VMUL(LDK(KP555570233), TT));	       T2n = VFNMS(LDK(KP195090322), T2f, VMUL(LDK(KP980785280), T2g));	       T1i = VFNMS(LDK(KP555570233), TN, VMUL(LDK(KP831469612), TT));

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -