⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 q1_8.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 5 页
字号:
					E T5V, T5I, T5R, T5U, T5T, T5W;					{					     E T2W, T31, T2V, T2Y, T5S, T32, T2X;					     T2W = T1D - T1K;					     T31 = T2Z - T30;					     iio[WS(vs, 2) + WS(rs, 5)] = FNMS(T8W, T8X, T8V);					     rio[WS(vs, 2) + WS(rs, 5)] = FMA(T8T, T8X, T8Y);					     T2V = W[6];					     T2Y = W[7];					     T5P = T5L - T5O;					     T5V = T5L + T5O;					     T5S = T5H + T5G;					     T5I = T5G - T5H;					     T32 = T2V * T31;					     T2X = T2V * T2W;					     T5R = W[2];					     T5U = W[3];					     iio[WS(vs, 4) + WS(rs, 1)] = FNMS(T2Y, T2W, T32);					     rio[WS(vs, 4) + WS(rs, 1)] = FMA(T2Y, T31, T2X);					     T5T = T5R * T5S;					     T5W = T5U * T5S;					}					{					     E T3R, T3W, T40, T3V;					     iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T5U, T5V, T5T);					     rio[WS(vs, 2) + WS(rs, 3)] = FMA(T5R, T5V, T5W);					     T3R = W[8];					     T3W = W[9];					     T40 = T3R * T3Z;					     T3V = T3R * T3U;					     T5D = W[10];					     T5K = W[11];					     iio[WS(vs, 5) + WS(rs, 2)] = FNMS(T3W, T3U, T40);					     rio[WS(vs, 5) + WS(rs, 2)] = FMA(T3W, T3Z, T3V);					     T5J = T5D * T5I;					     T5Q = T5K * T5I;					}				   }				   {					E T73, T76, T78, T75, T9V;					iio[WS(vs, 6) + WS(rs, 3)] = FNMS(T5K, T5P, T5J);					rio[WS(vs, 6) + WS(rs, 3)] = FMA(T5D, T5P, T5Q);					T73 = W[0];					T76 = W[1];					T78 = T73 * T77;					T75 = T73 * T74;					T9V = W[8];					Ta0 = W[9];					iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T76, T74, T78);					rio[WS(vs, 1) + WS(rs, 4)] = FMA(T76, T77, T75);					Ta4 = T9V * Ta3;					T9Z = T9V * T9Y;				   }				   {					E T79, T7g, T7f, T7m, T8Z;					iio[WS(vs, 5) + WS(rs, 6)] = FNMS(Ta0, T9Y, Ta4);					rio[WS(vs, 5) + WS(rs, 6)] = FMA(Ta0, Ta3, T9Z);					T79 = W[10];					T7g = W[11];					T90 = T7H - T7O;					T95 = T93 - T94;					T7f = T79 * T7e;					T7m = T7g * T7e;					T8Z = W[6];					T92 = W[7];					iio[WS(vs, 6) + WS(rs, 4)] = FNMS(T7g, T7l, T7f);					rio[WS(vs, 6) + WS(rs, 4)] = FMA(T79, T7l, T7m);					T96 = T8Z * T95;					T91 = T8Z * T90;				   }			      }			 }			 {			      E T8A, T8D, T8C, T8E, T8B;			      {				   E T4s, T4x, T4u, T4y, T4t;				   {					E T4p, T4m, T5s, T5w, T5r;					{					     E T4j, T4c, T47, T4e, T4d, T4k, T5n;					     T4p = T4f + T4i;					     T4j = T4f - T4i;					     T4c = T4a - T4b;					     T4m = T4b + T4a;					     iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T92, T90, T96);					     rio[WS(vs, 4) + WS(rs, 5)] = FMA(T92, T95, T91);					     T47 = W[10];					     T4e = W[11];					     T4d = T47 * T4c;					     T4k = T4e * T4c;					     T5n = W[8];					     T5s = W[9];					     iio[WS(vs, 6) + WS(rs, 2)] = FNMS(T4e, T4j, T4d);					     rio[WS(vs, 6) + WS(rs, 2)] = FMA(T47, T4j, T4k);					     T5w = T5n * T5v;					     T5r = T5n * T5q;					}					{					     E T4l, T4o, T4n, T4q, T4r;					     iio[WS(vs, 5) + WS(rs, 3)] = FNMS(T5s, T5q, T5w);					     rio[WS(vs, 5) + WS(rs, 3)] = FMA(T5s, T5v, T5r);					     T4l = W[2];					     T4o = W[3];					     T4s = T39 - T3g;					     T4x = T4v - T4w;					     T4n = T4l * T4m;					     T4q = T4o * T4m;					     T4r = W[6];					     T4u = W[7];					     iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T4o, T4p, T4n);					     rio[WS(vs, 2) + WS(rs, 2)] = FMA(T4l, T4p, T4q);					     T4y = T4r * T4x;					     T4t = T4r * T4s;					}				   }				   {					E T8F, T8M, T8L, T8S;					{					     E T7u, T7z, T7t, T7w, T7A, T7v;					     T7u = T6b - T6i;					     T7z = T7x - T7y;					     iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T4u, T4s, T4y);					     rio[WS(vs, 4) + WS(rs, 2)] = FMA(T4u, T4x, T4t);					     T7t = W[6];					     T7w = W[7];					     T7A = T7t * T7z;					     T7v = T7t * T7u;					     T8F = W[10];					     T8M = W[11];					     iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T7w, T7u, T7A);					     rio[WS(vs, 4) + WS(rs, 4)] = FMA(T7w, T7z, T7v);					     T8L = T8F * T8K;					     T8S = T8M * T8K;					}					{					     E T8s, T8x, T8p, T8u, T8y, T8t, T8z;					     T8A = FMA(KP707106781, T8r, T8q);					     T8s = FNMS(KP707106781, T8r, T8q);					     T8x = FNMS(KP707106781, T8w, T8v);					     T8D = FMA(KP707106781, T8w, T8v);					     iio[WS(vs, 6) + WS(rs, 5)] = FNMS(T8M, T8R, T8L);					     rio[WS(vs, 6) + WS(rs, 5)] = FMA(T8F, T8R, T8S);					     T8p = W[8];					     T8u = W[9];					     T8y = T8p * T8x;					     T8t = T8p * T8s;					     T8z = W[0];					     T8C = W[1];					     iio[WS(vs, 5) + WS(rs, 5)] = FNMS(T8u, T8s, T8y);					     rio[WS(vs, 5) + WS(rs, 5)] = FMA(T8u, T8x, T8t);					     T8E = T8z * T8D;					     T8B = T8z * T8A;					}				   }			      }			      {				   E T3y, T3J, T3h, T3A, T3z, T3K;				   {					E T54, T5f, T4N, T56, T55, T5g;					{					     E Tw, TH, Tf, Ty, Tx, TI;					     {						  E TN, TJ, TM, TL, TO, TK;						  TK = FMA(KP707106781, Tv, Tk);						  Tw = FNMS(KP707106781, Tv, Tk);						  iio[WS(vs, 1) + WS(rs, 5)] = FNMS(T8C, T8A, T8E);						  rio[WS(vs, 1) + WS(rs, 5)] = FMA(T8C, T8D, T8B);						  TH = FNMS(KP707106781, TG, TD);						  TN = FMA(KP707106781, TG, TD);						  TJ = W[4];						  TM = W[5];						  Tf = W[12];						  TL = TJ * TK;						  TO = TM * TK;						  Ty = W[13];						  Tx = Tf * Tw;						  iio[WS(vs, 3)] = FNMS(TM, TN, TL);						  rio[WS(vs, 3)] = FMA(TJ, TN, TO);					     }					     TI = Ty * Tw;					     iio[WS(vs, 7)] = FNMS(Ty, TH, Tx);					     {						  E T5h, T5l, T5k, T5j, T5m, T5i;						  T5i = FMA(KP707106781, T53, T4S);						  T54 = FNMS(KP707106781, T53, T4S);						  rio[WS(vs, 7)] = FMA(Tf, TH, TI);						  T5h = W[4];						  T5f = FNMS(KP707106781, T5e, T5b);						  T5l = FMA(KP707106781, T5e, T5b);						  T5k = W[5];						  T5j = T5h * T5i;						  T4N = W[12];						  T5m = T5k * T5i;						  T56 = W[13];						  iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T5k, T5l, T5j);						  T55 = T4N * T54;						  rio[WS(vs, 3) + WS(rs, 3)] = FMA(T5h, T5l, T5m);					     }					}					T5g = T56 * T54;					{					     E T22, T2d, T1L, T24, T23, T2e;					     {						  E T2j, T2f, T2i, T2h, T2k, T2g;						  iio[WS(vs, 7) + WS(rs, 3)] = FNMS(T56, T5f, T55);						  T22 = FNMS(KP707106781, T21, T1Q);						  T2g = FMA(KP707106781, T21, T1Q);						  rio[WS(vs, 7) + WS(rs, 3)] = FMA(T4N, T5f, T5g);						  T2d = FNMS(KP707106781, T2c, T29);						  T2j = FMA(KP707106781, T2c, T29);						  T2f = W[4];						  T2i = W[5];						  T1L = W[12];						  T2h = T2f * T2g;						  T2k = T2i * T2g;						  T24 = W[13];						  T23 = T1L * T22;						  iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T2i, T2j, T2h);						  rio[WS(vs, 3) + WS(rs, 1)] = FMA(T2f, T2j, T2k);					     }					     T2e = T24 * T22;					     iio[WS(vs, 7) + WS(rs, 1)] = FNMS(T24, T2d, T23);					     {						  E T3L, T3P, T3O, T3N, T3Q, T3M;						  T3M = FMA(KP707106781, T3x, T3m);						  T3y = FNMS(KP707106781, T3x, T3m);						  rio[WS(vs, 7) + WS(rs, 1)] = FMA(T1L, T2d, T2e);						  T3L = W[4];						  T3J = FNMS(KP707106781, T3I, T3F);						  T3P = FMA(KP707106781, T3I, T3F);						  T3O = W[5];						  T3N = T3L * T3M;						  T3h = W[12];						  T3Q = T3O * T3M;						  T3A = W[13];						  iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T3O, T3P, T3N);						  T3z = T3h * T3y;						  rio[WS(vs, 3) + WS(rs, 2)] = FMA(T3L, T3P, T3Q);					     }					}				   }				   T3K = T3A * T3y;				   {					E Tb8, Tbj, TaR, Tba, Tb9, Tbk;					{					     E T6A, T6L, T6j, T6C, T6B, T6M;					     {						  E T6R, T6N, T6Q, T6P, T6S, T6O;						  iio[WS(vs, 7) + WS(rs, 2)] = FNMS(T3A, T3J, T3z);						  T6A = FNMS(KP707106781, T6z, T6o);						  T6O = FMA(KP707106781, T6z, T6o);						  rio[WS(vs, 7) + WS(rs, 2)] = FMA(T3h, T3J, T3K);						  T6L = FNMS(KP707106781, T6K, T6H);						  T6R = FMA(KP707106781, T6K, T6H);						  T6N = W[4];						  T6Q = W[5];						  T6j = W[12];						  T6P = T6N * T6O;						  T6S = T6Q * T6O;						  T6C = W[13];						  T6B = T6j * T6A;						  iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T6Q, T6R, T6P);						  rio[WS(vs, 3) + WS(rs, 4)] = FMA(T6N, T6R, T6S);					     }					     T6M = T6C * T6A;					     iio[WS(vs, 7) + WS(rs, 4)] = FNMS(T6C, T6L, T6B);					     {						  E Tbl, Tbp, Tbo, Tbn, Tbq, Tbm;						  Tbm = FMA(KP707106781, Tb7, TaW);						  Tb8 = FNMS(KP707106781, Tb7, TaW);						  rio[WS(vs, 7) + WS(rs, 4)] = FMA(T6j, T6L, T6M);						  Tbl = W[4];						  Tbj = FNMS(KP707106781, Tbi, Tbf);						  Tbp = FMA(KP707106781, Tbi, Tbf);						  Tbo = W[5];						  Tbn = Tbl * Tbm;						  TaR = W[12];						  Tbq = Tbo * Tbm;						  Tba = W[13];						  iio[WS(vs, 3) + WS(rs, 7)] = FNMS(Tbo, Tbp, Tbn);						  Tb9 = TaR * Tb8;						  rio[WS(vs, 3) + WS(rs, 7)] = FMA(Tbl, Tbp, Tbq);					     }					}					Tbk = Tba * Tb8;					{					     E T86, T8h, T7P, T88, T87, T8i;					     {						  E T8n, T8j, T8m, T8l, T8o, T8k;						  iio[WS(vs, 7) + WS(rs, 7)] = FNMS(Tba, Tbj, Tb9);						  T86 = FNMS(KP707106781, T85, T7U);						  T8k = FMA(KP707106781, T85, T7U);						  rio[WS(vs, 7) + WS(rs, 7)] = FMA(TaR, Tbj, Tbk);						  T8h = FNMS(KP707106781, T8g, T8d);						  T8n = FMA(KP707106781, T8g, T8d);						  T8j = W[4];						  T8m = W[5];						  T7P = W[12];						  T8l = T8j * T8k;						  T8o = T8m * T8k;						  T88 = W[13];						  T87 = T7P * T86;						  iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T8m, T8n, T8l);						  rio[WS(vs, 3) + WS(rs, 5)] = FMA(T8j, T8n, T8o);					     }					     T8i = T88 * T86;					     iio[WS(vs, 7) + WS(rs, 5)] = FNMS(T88, T8h, T87);					     {						  E T9P, T9T, T9S, T9R, T9U, T9Q;						  T9Q = FMA(KP707106781, T9B, T9q);						  T9C = FNMS(KP707106781, T9B, T9q);						  rio[WS(vs, 7) + WS(rs, 5)] = FMA(T7P, T8h, T8i);						  T9P = W[4];						  T9N = FNMS(KP707106781, T9M, T9J);						  T9T = FMA(KP707106781, T9M, T9J);						  T9S = W[5];						  T9R = T9P * T9Q;						  T9l = W[12];						  T9U = T9S * T9Q;						  T9E = W[13];						  iio[WS(vs, 3) + WS(rs, 6)] = FNMS(T9S, T9T, T9R);						  T9D = T9l * T9C;						  rio[WS(vs, 3) + WS(rs, 6)] = FMA(T9P, T9T, T9U);					     }					}				   }			      }			 }		    }	       }	  }	  T9O = T9E * T9C;	  iio[WS(vs, 7) + WS(rs, 6)] = FNMS(T9E, T9N, T9D);	  rio[WS(vs, 7) + WS(rs, 6)] = FMA(T9l, T9N, T9O);     }}static const tw_instr twinstr[] = {     {TW_FULL, 0, 8},     {TW_NEXT, 1, 0}};static const ct_desc desc = { 8, "q1_8", twinstr, &GENUS, {352, 112, 176, 0}, 0, 0, 0 };void X(codelet_q1_8) (planner *p) {     X(kdft_difsq_register) (p, q1_8, &desc);}#else				/* HAVE_FMA *//* Generated by: ../../../genfft/gen_twidsq -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 8 -name q1_8 -include q.h *//* * This function contains 528 FP additions, 256 FP multiplications, * (or, 416 additions, 144 multiplications, 112 fused multiply/add), * 142 stack variables, 1 constants, and 256 memory accesses */#include "q.h"static void q1_8(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms){     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + (mb * 14); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) {	  E T7, T14, T1g, Tk, TC, TQ, T10, TM, T1w, T2p, T2z, T1H, T1M, T1W, T2j;	  E T1V, T7R, T8O, T90, T84, T8m, T8A, T8K, T8w, T9g, Ta9, Taj, T9r, T9w, T9G;	  E Ta3, T9F, Te, T17, T1h, Tp, Tu, TE, T11, TD, T1p, T2m, T2y, T1C, T1U;	  E T28, T2i, T24, T7Y, T8R, T91, T89, T8e, T8o, T8L, T8n, T99, Ta6, Tai, T9m;	  E T9E, T9S, Ta2, T9O, T2H, T3E, T3Q, T2U, T3c, T3q, T3A, T3m, T46, T4Z, T59;	  E T4h, T4m, T4w, T4T, T4v, T5h, T6e, T6q, T5u, T5M, T60, T6a, T5W, T6G, T7z;	  E T7J, T6R, T6W, T76, T7t, T75, T2O, T3H, T3R, T2Z, T34, T3e, T3B, T3d, T3Z;	  E T4W, T58, T4c, T4u, T4I, T4S, T4E, T5o, T6h, T6r, T5z, T5E, T5O, T6b, T5N;	  E T6z, T7w, T7I, T6M, T74, T7i, T7s, T7e;	  {	       E T3, Ty, Tj, TY, T6, Tg, TB, TZ;	       {		    E T1, T2, Th, Ti;		    T1 = rio[0];		    T2 = rio[WS(rs, 4)];		    T3 = T1 + T2;		    Ty = T1 - T2;		    Th = iio[0];		    Ti = iio[WS(rs, 4)];		    Tj = Th - Ti;		    TY = Th + Ti;	       }	       {		    E T4, T5, Tz, TA;		    T4 = rio[WS(rs, 2)];		    T5 = rio[WS(rs, 6)];		    T6 = T4 + T5;		    Tg = T4 - T5;		    Tz = iio[WS(rs, 2)];		    TA = iio[WS(rs, 6)];		    TB = Tz - TA;		    TZ = Tz + TA;	       }	       T7 = T3 + T6;	       T14 = T3 - T6;	       T1g = TY + TZ;	       Tk = Tg + Tj;	       TC = Ty - TB;	       TQ = Tj - Tg;	       T10 = TY - TZ;	       TM = Ty + TB;	  }	  {	       E T1s, T1I, T1L, T2n, T1v, T1D, T1G, T2o;	       {		    E T1q, T1r, T1J, T1K;		    T1q = rio[WS(vs, 1) + WS(rs, 1)];		    T1r = rio[WS(vs, 1) + WS(rs, 5)];		    T1s = T1q + T1r;		    T1I = T1q - T1r;		    T1J = iio[WS(vs, 1) + WS(rs, 1)];		    T1K = iio[WS(vs, 1) + WS(rs, 5)];		    T1L = T1J - T1K;		    T2n = T1J + T1K;	       }	       {		    E T1t, T1u, T1E, T1F;		    T1t = rio[WS(vs, 1) + WS(rs, 7)];		    T1u = rio[WS(vs, 1) + WS(rs, 3)];		    T1v = T1t + T1u;		    T1D = T1t - T1u;		    T1E = iio[WS(vs, 1) + WS(rs, 7)];		    T1F = iio[WS(vs, 1) + WS(rs, 3)];		    T1G = T1E - T1F;		    T2o = T1E + T1F;	       }	       T1w = T1s + T1v;	       T2p = T2n - T2o;	       T2z = T2n + T2o;	       T1H = T1D - T1G;	       T1M = T1I + T1L;	       T1W = T1D + T1G;	       T2j = T1v - T1s;	       T1V = T1L - T1I;	  }	  {	       E T7N, T8i, T83, T8I, T7Q, T80, T8l, T8J;	       {		    E T7L, T7M, T81, T82;		    T7L = rio[WS(vs, 6)];		    T7M = rio[WS(vs, 6) + WS(rs, 4)];		    T7N = T7L + T7M;		    T8i = T7L - T7M;		    T81 = iio[WS(vs, 6)];		    T82 = iio[WS(vs, 6) + WS(rs, 4)];		    T83 = T81 - T82;		    T8I = T81 + T82;	       }	       {		    E T7O, T7P, T8j, T8k;		    T7O = rio[WS(vs, 6) + WS(rs, 2)];		    T7P = rio[WS(vs, 6) + WS(rs, 6)];		    T7Q = T7O + T7P;		    T80 = T7O - T7P;		    T8j = iio[WS(vs, 6) + WS(rs, 2)];		    T8k = iio[WS(vs, 6) + WS(rs, 6)];		    T8l = T8j - T8k;		    T8J = T8j + T8k;	       }	       T7R = T7N + T7Q;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -