⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 q1fv_8.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 3 页
字号:
	       T2i = VMUL(LDK(KP707106781), VADD(T2e, T2h));	       T2D = VBYI(VSUB(T2z, T2y));	       T2k = VMUL(LDK(KP707106781), VSUB(T2h, T2e));	       T2A = VADD(T2y, T2z);	  }	  {	       V T3P, T49, T3S, T4a;	       {		    V T3N, T3O, T3Q, T3R;		    T3N = LD(&(x[WS(vs, 7) + WS(rs, 1)]), ms, &(x[WS(vs, 7) + WS(rs, 1)]));		    T3O = LD(&(x[WS(vs, 7) + WS(rs, 5)]), ms, &(x[WS(vs, 7) + WS(rs, 1)]));		    T3P = VSUB(T3N, T3O);		    T49 = VADD(T3N, T3O);		    T3Q = LD(&(x[WS(vs, 7) + WS(rs, 7)]), ms, &(x[WS(vs, 7) + WS(rs, 1)]));		    T3R = LD(&(x[WS(vs, 7) + WS(rs, 3)]), ms, &(x[WS(vs, 7) + WS(rs, 1)]));		    T3S = VSUB(T3Q, T3R);		    T4a = VADD(T3Q, T3R);	       }	       T3T = VMUL(LDK(KP707106781), VADD(T3P, T3S));	       T4e = VBYI(VSUB(T4a, T49));	       T3V = VMUL(LDK(KP707106781), VSUB(T3S, T3P));	       T4b = VADD(T49, T4a);	  }	  {	       V TD, TX, TG, TY;	       {		    V TB, TC, TE, TF;		    TB = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)]));		    TC = LD(&(x[WS(vs, 1) + WS(rs, 5)]), ms, &(x[WS(vs, 1) + WS(rs, 1)]));		    TD = VSUB(TB, TC);		    TX = VADD(TB, TC);		    TE = LD(&(x[WS(vs, 1) + WS(rs, 7)]), ms, &(x[WS(vs, 1) + WS(rs, 1)]));		    TF = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)]));		    TG = VSUB(TE, TF);		    TY = VADD(TE, TF);	       }	       TH = VMUL(LDK(KP707106781), VADD(TD, TG));	       T12 = VBYI(VSUB(TY, TX));	       TJ = VMUL(LDK(KP707106781), VSUB(TG, TD));	       TZ = VADD(TX, TY);	  }	  {	       V T1a, T1u, T1d, T1v;	       {		    V T18, T19, T1b, T1c;		    T18 = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)]));		    T19 = LD(&(x[WS(vs, 2) + WS(rs, 5)]), ms, &(x[WS(vs, 2) + WS(rs, 1)]));		    T1a = VSUB(T18, T19);		    T1u = VADD(T18, T19);		    T1b = LD(&(x[WS(vs, 2) + WS(rs, 7)]), ms, &(x[WS(vs, 2) + WS(rs, 1)]));		    T1c = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)]));		    T1d = VSUB(T1b, T1c);		    T1v = VADD(T1b, T1c);	       }	       T1e = VMUL(LDK(KP707106781), VADD(T1a, T1d));	       T1z = VBYI(VSUB(T1v, T1u));	       T1g = VMUL(LDK(KP707106781), VSUB(T1d, T1a));	       T1w = VADD(T1u, T1v);	  }	  {	       V T2L, T35, T2O, T36;	       {		    V T2J, T2K, T2M, T2N;		    T2J = LD(&(x[WS(vs, 5) + WS(rs, 1)]), ms, &(x[WS(vs, 5) + WS(rs, 1)]));		    T2K = LD(&(x[WS(vs, 5) + WS(rs, 5)]), ms, &(x[WS(vs, 5) + WS(rs, 1)]));		    T2L = VSUB(T2J, T2K);		    T35 = VADD(T2J, T2K);		    T2M = LD(&(x[WS(vs, 5) + WS(rs, 7)]), ms, &(x[WS(vs, 5) + WS(rs, 1)]));		    T2N = LD(&(x[WS(vs, 5) + WS(rs, 3)]), ms, &(x[WS(vs, 5) + WS(rs, 1)]));		    T2O = VSUB(T2M, T2N);		    T36 = VADD(T2M, T2N);	       }	       T2P = VMUL(LDK(KP707106781), VADD(T2L, T2O));	       T3a = VBYI(VSUB(T36, T35));	       T2R = VMUL(LDK(KP707106781), VSUB(T2O, T2L));	       T37 = VADD(T35, T36);	  }	  {	       V T3i, T3C, T3l, T3D;	       {		    V T3g, T3h, T3j, T3k;		    T3g = LD(&(x[WS(vs, 6) + WS(rs, 1)]), ms, &(x[WS(vs, 6) + WS(rs, 1)]));		    T3h = LD(&(x[WS(vs, 6) + WS(rs, 5)]), ms, &(x[WS(vs, 6) + WS(rs, 1)]));		    T3i = VSUB(T3g, T3h);		    T3C = VADD(T3g, T3h);		    T3j = LD(&(x[WS(vs, 6) + WS(rs, 7)]), ms, &(x[WS(vs, 6) + WS(rs, 1)]));		    T3k = LD(&(x[WS(vs, 6) + WS(rs, 3)]), ms, &(x[WS(vs, 6) + WS(rs, 1)]));		    T3l = VSUB(T3j, T3k);		    T3D = VADD(T3j, T3k);	       }	       T3m = VMUL(LDK(KP707106781), VADD(T3i, T3l));	       T3H = VBYI(VSUB(T3D, T3C));	       T3o = VMUL(LDK(KP707106781), VSUB(T3l, T3i));	       T3E = VADD(T3C, T3D);	  }	  ST(&(x[0]), VADD(Tp, Ts), ms, &(x[0]));	  ST(&(x[WS(rs, 2)]), VADD(T1t, T1w), ms, &(x[0]));	  ST(&(x[WS(rs, 5)]), VADD(T34, T37), ms, &(x[WS(rs, 1)]));	  ST(&(x[WS(rs, 7)]), VADD(T48, T4b), ms, &(x[WS(rs, 1)]));	  ST(&(x[WS(rs, 6)]), VADD(T3B, T3E), ms, &(x[0]));	  ST(&(x[WS(rs, 4)]), VADD(T2x, T2A), ms, &(x[0]));	  {	       V Tt, T4c, T2B, T24;	       ST(&(x[WS(rs, 3)]), VADD(T20, T23), ms, &(x[WS(rs, 1)]));	       ST(&(x[WS(rs, 1)]), VADD(TW, TZ), ms, &(x[WS(rs, 1)]));	       Tt = BYTWJ(&(W[TWVL * 6]), VSUB(Tp, Ts));	       ST(&(x[WS(vs, 4)]), Tt, ms, &(x[WS(vs, 4)]));	       T4c = BYTWJ(&(W[TWVL * 6]), VSUB(T48, T4b));	       ST(&(x[WS(vs, 4) + WS(rs, 7)]), T4c, ms, &(x[WS(vs, 4) + WS(rs, 1)]));	       T2B = BYTWJ(&(W[TWVL * 6]), VSUB(T2x, T2A));	       ST(&(x[WS(vs, 4) + WS(rs, 4)]), T2B, ms, &(x[WS(vs, 4)]));	       T24 = BYTWJ(&(W[TWVL * 6]), VSUB(T20, T23));	       ST(&(x[WS(vs, 4) + WS(rs, 3)]), T24, ms, &(x[WS(vs, 4) + WS(rs, 1)]));	  }	  {	       V T10, T1x, T3F, T38, T1A, Tw;	       T10 = BYTWJ(&(W[TWVL * 6]), VSUB(TW, TZ));	       ST(&(x[WS(vs, 4) + WS(rs, 1)]), T10, ms, &(x[WS(vs, 4) + WS(rs, 1)]));	       T1x = BYTWJ(&(W[TWVL * 6]), VSUB(T1t, T1w));	       ST(&(x[WS(vs, 4) + WS(rs, 2)]), T1x, ms, &(x[WS(vs, 4)]));	       T3F = BYTWJ(&(W[TWVL * 6]), VSUB(T3B, T3E));	       ST(&(x[WS(vs, 4) + WS(rs, 6)]), T3F, ms, &(x[WS(vs, 4)]));	       T38 = BYTWJ(&(W[TWVL * 6]), VSUB(T34, T37));	       ST(&(x[WS(vs, 4) + WS(rs, 5)]), T38, ms, &(x[WS(vs, 4) + WS(rs, 1)]));	       T1A = BYTWJ(&(W[TWVL * 10]), VSUB(T1y, T1z));	       ST(&(x[WS(vs, 6) + WS(rs, 2)]), T1A, ms, &(x[WS(vs, 6)]));	       Tw = BYTWJ(&(W[TWVL * 10]), VSUB(Tu, Tv));	       ST(&(x[WS(vs, 6)]), Tw, ms, &(x[WS(vs, 6)]));	  }	  {	       V T2E, T3I, T13, T27, T3b, T4f;	       T2E = BYTWJ(&(W[TWVL * 10]), VSUB(T2C, T2D));	       ST(&(x[WS(vs, 6) + WS(rs, 4)]), T2E, ms, &(x[WS(vs, 6)]));	       T3I = BYTWJ(&(W[TWVL * 10]), VSUB(T3G, T3H));	       ST(&(x[WS(vs, 6) + WS(rs, 6)]), T3I, ms, &(x[WS(vs, 6)]));	       T13 = BYTWJ(&(W[TWVL * 10]), VSUB(T11, T12));	       ST(&(x[WS(vs, 6) + WS(rs, 1)]), T13, ms, &(x[WS(vs, 6) + WS(rs, 1)]));	       T27 = BYTWJ(&(W[TWVL * 10]), VSUB(T25, T26));	       ST(&(x[WS(vs, 6) + WS(rs, 3)]), T27, ms, &(x[WS(vs, 6) + WS(rs, 1)]));	       T3b = BYTWJ(&(W[TWVL * 10]), VSUB(T39, T3a));	       ST(&(x[WS(vs, 6) + WS(rs, 5)]), T3b, ms, &(x[WS(vs, 6) + WS(rs, 1)]));	       T4f = BYTWJ(&(W[TWVL * 10]), VSUB(T4d, T4e));	       ST(&(x[WS(vs, 6) + WS(rs, 7)]), T4f, ms, &(x[WS(vs, 6) + WS(rs, 1)]));	  }	  {	       V Tx, T1B, T3c, T4g, T3J, T2F;	       Tx = BYTWJ(&(W[TWVL * 2]), VADD(Tu, Tv));	       ST(&(x[WS(vs, 2)]), Tx, ms, &(x[WS(vs, 2)]));	       T1B = BYTWJ(&(W[TWVL * 2]), VADD(T1y, T1z));	       ST(&(x[WS(vs, 2) + WS(rs, 2)]), T1B, ms, &(x[WS(vs, 2)]));	       T3c = BYTWJ(&(W[TWVL * 2]), VADD(T39, T3a));	       ST(&(x[WS(vs, 2) + WS(rs, 5)]), T3c, ms, &(x[WS(vs, 2) + WS(rs, 1)]));	       T4g = BYTWJ(&(W[TWVL * 2]), VADD(T4d, T4e));	       ST(&(x[WS(vs, 2) + WS(rs, 7)]), T4g, ms, &(x[WS(vs, 2) + WS(rs, 1)]));	       T3J = BYTWJ(&(W[TWVL * 2]), VADD(T3G, T3H));	       ST(&(x[WS(vs, 2) + WS(rs, 6)]), T3J, ms, &(x[WS(vs, 2)]));	       T2F = BYTWJ(&(W[TWVL * 2]), VADD(T2C, T2D));	       ST(&(x[WS(vs, 2) + WS(rs, 4)]), T2F, ms, &(x[WS(vs, 2)]));	  }	  T28 = BYTWJ(&(W[TWVL * 2]), VADD(T25, T26));	  ST(&(x[WS(vs, 2) + WS(rs, 3)]), T28, ms, &(x[WS(vs, 2) + WS(rs, 1)]));	  T14 = BYTWJ(&(W[TWVL * 2]), VADD(T11, T12));	  ST(&(x[WS(vs, 2) + WS(rs, 1)]), T14, ms, &(x[WS(vs, 2) + WS(rs, 1)]));	  {	       V Th, Ti, Tb, Tg;	       Tb = VADD(T3, Ta);	       Tg = VBYI(VSUB(Tc, Tf));	       Th = BYTWJ(&(W[TWVL * 12]), VSUB(Tb, Tg));	       Ti = BYTWJ(&(W[0]), VADD(Tb, Tg));	       ST(&(x[WS(vs, 7)]), Th, ms, &(x[WS(vs, 7)]));	       ST(&(x[WS(vs, 1)]), Ti, ms, &(x[WS(vs, 1)]));	  }	  {	       V T40, T41, T3U, T3Z;	       T3U = VADD(T3M, T3T);	       T3Z = VBYI(VSUB(T3V, T3Y));	       T40 = BYTWJ(&(W[TWVL * 12]), VSUB(T3U, T3Z));	       T41 = BYTWJ(&(W[0]), VADD(T3U, T3Z));	       ST(&(x[WS(vs, 7) + WS(rs, 7)]), T40, ms, &(x[WS(vs, 7) + WS(rs, 1)]));	       ST(&(x[WS(vs, 1) + WS(rs, 7)]), T41, ms, &(x[WS(vs, 1) + WS(rs, 1)]));	  }	  {	       V T2p, T2q, T2j, T2o;	       T2j = VADD(T2b, T2i);	       T2o = VBYI(VSUB(T2k, T2n));	       T2p = BYTWJ(&(W[TWVL * 12]), VSUB(T2j, T2o));	       T2q = BYTWJ(&(W[0]), VADD(T2j, T2o));	       ST(&(x[WS(vs, 7) + WS(rs, 4)]), T2p, ms, &(x[WS(vs, 7)]));	       ST(&(x[WS(vs, 1) + WS(rs, 4)]), T2q, ms, &(x[WS(vs, 1)]));	  }	  {	       V T1S, T1T, T1M, T1R;	       T1M = VADD(T1E, T1L);	       T1R = VBYI(VSUB(T1N, T1Q));	       T1S = BYTWJ(&(W[TWVL * 12]), VSUB(T1M, T1R));	       T1T = BYTWJ(&(W[0]), VADD(T1M, T1R));	       ST(&(x[WS(vs, 7) + WS(rs, 3)]), T1S, ms, &(x[WS(vs, 7) + WS(rs, 1)]));	       ST(&(x[WS(vs, 1) + WS(rs, 3)]), T1T, ms, &(x[WS(vs, 1) + WS(rs, 1)]));	  }	  {	       V TO, TP, TI, TN;	       TI = VADD(TA, TH);	       TN = VBYI(VSUB(TJ, TM));	       TO = BYTWJ(&(W[TWVL * 12]), VSUB(TI, TN));	       TP = BYTWJ(&(W[0]), VADD(TI, TN));	       ST(&(x[WS(vs, 7) + WS(rs, 1)]), TO, ms, &(x[WS(vs, 7) + WS(rs, 1)]));	       ST(&(x[WS(vs, 1) + WS(rs, 1)]), TP, ms, &(x[WS(vs, 1) + WS(rs, 1)]));	  }	  {	       V T1l, T1m, T1f, T1k;	       T1f = VADD(T17, T1e);	       T1k = VBYI(VSUB(T1g, T1j));	       T1l = BYTWJ(&(W[TWVL * 12]), VSUB(T1f, T1k));	       T1m = BYTWJ(&(W[0]), VADD(T1f, T1k));	       ST(&(x[WS(vs, 7) + WS(rs, 2)]), T1l, ms, &(x[WS(vs, 7)]));	       ST(&(x[WS(vs, 1) + WS(rs, 2)]), T1m, ms, &(x[WS(vs, 1)]));	  }	  {	       V T3t, T3u, T3n, T3s;	       T3n = VADD(T3f, T3m);	       T3s = VBYI(VSUB(T3o, T3r));	       T3t = BYTWJ(&(W[TWVL * 12]), VSUB(T3n, T3s));	       T3u = BYTWJ(&(W[0]), VADD(T3n, T3s));	       ST(&(x[WS(vs, 7) + WS(rs, 6)]), T3t, ms, &(x[WS(vs, 7)]));	       ST(&(x[WS(vs, 1) + WS(rs, 6)]), T3u, ms, &(x[WS(vs, 1)]));	  }	  {	       V T2W, T2X, T2Q, T2V;	       T2Q = VADD(T2I, T2P);	       T2V = VBYI(VSUB(T2R, T2U));	       T2W = BYTWJ(&(W[TWVL * 12]), VSUB(T2Q, T2V));	       T2X = BYTWJ(&(W[0]), VADD(T2Q, T2V));	       ST(&(x[WS(vs, 7) + WS(rs, 5)]), T2W, ms, &(x[WS(vs, 7) + WS(rs, 1)]));	       ST(&(x[WS(vs, 1) + WS(rs, 5)]), T2X, ms, &(x[WS(vs, 1) + WS(rs, 1)]));	  }	  {	       V T1p, T1q, T1n, T1o;	       T1n = VSUB(T17, T1e);	       T1o = VBYI(VADD(T1j, T1g));	       T1p = BYTWJ(&(W[TWVL * 8]), VSUB(T1n, T1o));	       T1q = BYTWJ(&(W[TWVL * 4]), VADD(T1n, T1o));	       ST(&(x[WS(vs, 5) + WS(rs, 2)]), T1p, ms, &(x[WS(vs, 5)]));	       ST(&(x[WS(vs, 3) + WS(rs, 2)]), T1q, ms, &(x[WS(vs, 3)]));	  }	  {	       V Tl, Tm, Tj, Tk;	       Tj = VSUB(T3, Ta);	       Tk = VBYI(VADD(Tf, Tc));	       Tl = BYTWJ(&(W[TWVL * 8]), VSUB(Tj, Tk));	       Tm = BYTWJ(&(W[TWVL * 4]), VADD(Tj, Tk));	       ST(&(x[WS(vs, 5)]), Tl, ms, &(x[WS(vs, 5)]));	       ST(&(x[WS(vs, 3)]), Tm, ms, &(x[WS(vs, 3)]));	  }	  {	       V T2t, T2u, T2r, T2s;	       T2r = VSUB(T2b, T2i);	       T2s = VBYI(VADD(T2n, T2k));	       T2t = BYTWJ(&(W[TWVL * 8]), VSUB(T2r, T2s));	       T2u = BYTWJ(&(W[TWVL * 4]), VADD(T2r, T2s));	       ST(&(x[WS(vs, 5) + WS(rs, 4)]), T2t, ms, &(x[WS(vs, 5)]));	       ST(&(x[WS(vs, 3) + WS(rs, 4)]), T2u, ms, &(x[WS(vs, 3)]));	  }	  {	       V T3x, T3y, T3v, T3w;	       T3v = VSUB(T3f, T3m);	       T3w = VBYI(VADD(T3r, T3o));	       T3x = BYTWJ(&(W[TWVL * 8]), VSUB(T3v, T3w));	       T3y = BYTWJ(&(W[TWVL * 4]), VADD(T3v, T3w));	       ST(&(x[WS(vs, 5) + WS(rs, 6)]), T3x, ms, &(x[WS(vs, 5)]));	       ST(&(x[WS(vs, 3) + WS(rs, 6)]), T3y, ms, &(x[WS(vs, 3)]));	  }	  {	       V TS, TT, TQ, TR;	       TQ = VSUB(TA, TH);	       TR = VBYI(VADD(TM, TJ));	       TS = BYTWJ(&(W[TWVL * 8]), VSUB(TQ, TR));	       TT = BYTWJ(&(W[TWVL * 4]), VADD(TQ, TR));	       ST(&(x[WS(vs, 5) + WS(rs, 1)]), TS, ms, &(x[WS(vs, 5) + WS(rs, 1)]));	       ST(&(x[WS(vs, 3) + WS(rs, 1)]), TT, ms, &(x[WS(vs, 3) + WS(rs, 1)]));	  }	  {	       V T1W, T1X, T1U, T1V;	       T1U = VSUB(T1E, T1L);	       T1V = VBYI(VADD(T1Q, T1N));	       T1W = BYTWJ(&(W[TWVL * 8]), VSUB(T1U, T1V));	       T1X = BYTWJ(&(W[TWVL * 4]), VADD(T1U, T1V));	       ST(&(x[WS(vs, 5) + WS(rs, 3)]), T1W, ms, &(x[WS(vs, 5) + WS(rs, 1)]));	       ST(&(x[WS(vs, 3) + WS(rs, 3)]), T1X, ms, &(x[WS(vs, 3) + WS(rs, 1)]));	  }	  {	       V T30, T31, T2Y, T2Z;	       T2Y = VSUB(T2I, T2P);	       T2Z = VBYI(VADD(T2U, T2R));	       T30 = BYTWJ(&(W[TWVL * 8]), VSUB(T2Y, T2Z));	       T31 = BYTWJ(&(W[TWVL * 4]), VADD(T2Y, T2Z));	       ST(&(x[WS(vs, 5) + WS(rs, 5)]), T30, ms, &(x[WS(vs, 5) + WS(rs, 1)]));	       ST(&(x[WS(vs, 3) + WS(rs, 5)]), T31, ms, &(x[WS(vs, 3) + WS(rs, 1)]));	  }	  {	       V T44, T45, T42, T43;	       T42 = VSUB(T3M, T3T);	       T43 = VBYI(VADD(T3Y, T3V));	       T44 = BYTWJ(&(W[TWVL * 8]), VSUB(T42, T43));	       T45 = BYTWJ(&(W[TWVL * 4]), VADD(T42, T43));	       ST(&(x[WS(vs, 5) + WS(rs, 7)]), T44, ms, &(x[WS(vs, 5) + WS(rs, 1)]));	       ST(&(x[WS(vs, 3) + WS(rs, 7)]), T45, ms, &(x[WS(vs, 3) + WS(rs, 1)]));	  }     }}static const tw_instr twinstr[] = {     VTW(0, 1),     VTW(0, 2),     VTW(0, 3),     VTW(0, 4),     VTW(0, 5),     VTW(0, 6),     VTW(0, 7),     {TW_NEXT, VL, 0}};static const ct_desc desc = { 8, "q1fv_8", twinstr, &GENUS, {264, 128, 0, 0}, 0, 0, 0 };void X(codelet_q1fv_8) (planner *p) {     X(kdft_difsq_register) (p, q1fv_8, &desc);}#endif				/* HAVE_FMA */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -