⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 q1_5.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 2 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:41:37 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twidsq -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 5 -name q1_5 -include q.h *//* * This function contains 200 FP additions, 170 FP multiplications, * (or, 70 additions, 40 multiplications, 130 fused multiply/add), * 104 stack variables, 4 constants, and 100 memory accesses */#include "q.h"static void q1_5(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms){     DK(KP951056516, +0.951056516295153572116439333379382143405698634);     DK(KP559016994, +0.559016994374947424102293417182819058860154590);     DK(KP250000000, +0.250000000000000000000000000000000000000000000);     DK(KP618033988, +0.618033988749894848204586834365638117720309180);     INT m;     for (m = mb, W = W + (mb * 8); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) {	  E T1x, T1w, T1v;	  {	       E T1, Tn, TM, Tw, Tb, T8, Ta, TV, Tq, Ts, TH, Tj, Tr, T1h, T1q;	       E T1G, T12, T15, T1P, T14, T1k, T1m, T1B, T1d, T1l, T2b, T2k, T2A, T1W, T1Z;	       E T3Z, T1Y, T2e, T2g, T2v, T27, T2f, T3D, T42, T44, T4j, T3V, T43, T2J, T48;	       E T4o, T3K, T3N, T35, T3M, T2V, T3e, T3u, T2Q, T2T, T37, T30, T2S, T2W;	       {		    E T1Q, T2j, T1V, T1R;		    {			 E Tp, Ti, Td, Te;			 {			      E T5, T6, T2, T3, T7, Tv;			      T1 = rio[0];			      T5 = rio[WS(rs, 2)];			      T6 = rio[WS(rs, 3)];			      T2 = rio[WS(rs, 1)];			      T3 = rio[WS(rs, 4)];			      Tn = iio[0];			      T7 = T5 + T6;			      Tv = T5 - T6;			      {				   E T4, Tu, Tg, Th;				   T4 = T2 + T3;				   Tu = T2 - T3;				   Tg = iio[WS(rs, 2)];				   Th = iio[WS(rs, 3)];				   TM = FNMS(KP618033988, Tu, Tv);				   Tw = FMA(KP618033988, Tv, Tu);				   Tb = T4 - T7;				   T8 = T4 + T7;				   Tp = Tg + Th;				   Ti = Tg - Th;				   Ta = FNMS(KP250000000, T8, T1);				   Td = iio[WS(rs, 1)];				   Te = iio[WS(rs, 4)];			      }			 }			 {			      E TW, T1p, T11, TX;			      TV = rio[WS(vs, 1)];			      {				   E TZ, T10, Tf, To;				   TZ = rio[WS(vs, 1) + WS(rs, 2)];				   T10 = rio[WS(vs, 1) + WS(rs, 3)];				   Tf = Td - Te;				   To = Td + Te;				   TW = rio[WS(vs, 1) + WS(rs, 1)];				   T1p = TZ - T10;				   T11 = TZ + T10;				   Tq = To + Tp;				   Ts = To - Tp;				   TH = FNMS(KP618033988, Tf, Ti);				   Tj = FMA(KP618033988, Ti, Tf);				   Tr = FNMS(KP250000000, Tq, Tn);				   TX = rio[WS(vs, 1) + WS(rs, 4)];			      }			      {				   E T17, T1j, T1c, T18;				   T1h = iio[WS(vs, 1)];				   {					E T1a, T1b, TY, T1o;					T1a = iio[WS(vs, 1) + WS(rs, 2)];					T1b = iio[WS(vs, 1) + WS(rs, 3)];					TY = TW + TX;					T1o = TW - TX;					T17 = iio[WS(vs, 1) + WS(rs, 1)];					T1j = T1a + T1b;					T1c = T1a - T1b;					T1q = FMA(KP618033988, T1p, T1o);					T1G = FNMS(KP618033988, T1o, T1p);					T12 = TY + T11;					T15 = TY - T11;					T18 = iio[WS(vs, 1) + WS(rs, 4)];				   }				   T1P = rio[WS(vs, 2)];				   T14 = FNMS(KP250000000, T12, TV);				   {					E T1T, T1i, T19, T1U;					T1T = rio[WS(vs, 2) + WS(rs, 2)];					T1i = T17 + T18;					T19 = T17 - T18;					T1U = rio[WS(vs, 2) + WS(rs, 3)];					T1Q = rio[WS(vs, 2) + WS(rs, 1)];					T1k = T1i + T1j;					T1m = T1i - T1j;					T1B = FNMS(KP618033988, T19, T1c);					T1d = FMA(KP618033988, T1c, T19);					T2j = T1T - T1U;					T1V = T1T + T1U;					T1l = FNMS(KP250000000, T1k, T1h);					T1R = rio[WS(vs, 2) + WS(rs, 4)];				   }			      }			 }		    }		    {			 E T3P, T41, T3U, T3Q;			 {			      E T21, T2d, T26, T22;			      T2b = iio[WS(vs, 2)];			      {				   E T24, T25, T1S, T2i;				   T24 = iio[WS(vs, 2) + WS(rs, 2)];				   T25 = iio[WS(vs, 2) + WS(rs, 3)];				   T1S = T1Q + T1R;				   T2i = T1Q - T1R;				   T21 = iio[WS(vs, 2) + WS(rs, 1)];				   T2d = T24 + T25;				   T26 = T24 - T25;				   T2k = FMA(KP618033988, T2j, T2i);				   T2A = FNMS(KP618033988, T2i, T2j);				   T1W = T1S + T1V;				   T1Z = T1S - T1V;				   T22 = iio[WS(vs, 2) + WS(rs, 4)];			      }			      T3Z = iio[WS(vs, 4)];			      T1Y = FNMS(KP250000000, T1W, T1P);			      {				   E T3S, T2c, T23, T3T;				   T3S = iio[WS(vs, 4) + WS(rs, 2)];				   T2c = T21 + T22;				   T23 = T21 - T22;				   T3T = iio[WS(vs, 4) + WS(rs, 3)];				   T3P = iio[WS(vs, 4) + WS(rs, 1)];				   T2e = T2c + T2d;				   T2g = T2c - T2d;				   T2v = FNMS(KP618033988, T23, T26);				   T27 = FMA(KP618033988, T26, T23);				   T41 = T3S + T3T;				   T3U = T3S - T3T;				   T2f = FNMS(KP250000000, T2e, T2b);				   T3Q = iio[WS(vs, 4) + WS(rs, 4)];			      }			 }			 {			      E T3E, T47, T3J, T3F;			      T3D = rio[WS(vs, 4)];			      {				   E T3H, T3I, T3R, T40;				   T3H = rio[WS(vs, 4) + WS(rs, 2)];				   T3I = rio[WS(vs, 4) + WS(rs, 3)];				   T3R = T3P - T3Q;				   T40 = T3P + T3Q;				   T3E = rio[WS(vs, 4) + WS(rs, 1)];				   T47 = T3H - T3I;				   T3J = T3H + T3I;				   T42 = T40 + T41;				   T44 = T40 - T41;				   T4j = FNMS(KP618033988, T3R, T3U);				   T3V = FMA(KP618033988, T3U, T3R);				   T43 = FNMS(KP250000000, T42, T3Z);				   T3F = rio[WS(vs, 4) + WS(rs, 4)];			      }			      {				   E T2K, T3d, T2P, T2L;				   T2J = rio[WS(vs, 3)];				   {					E T2N, T2O, T3G, T46;					T2N = rio[WS(vs, 3) + WS(rs, 2)];					T2O = rio[WS(vs, 3) + WS(rs, 3)];					T3G = T3E + T3F;					T46 = T3E - T3F;					T2K = rio[WS(vs, 3) + WS(rs, 1)];					T3d = T2N - T2O;					T2P = T2N + T2O;					T48 = FMA(KP618033988, T47, T46);					T4o = FNMS(KP618033988, T46, T47);					T3K = T3G + T3J;					T3N = T3G - T3J;					T2L = rio[WS(vs, 3) + WS(rs, 4)];				   }				   T35 = iio[WS(vs, 3)];				   T3M = FNMS(KP250000000, T3K, T3D);				   {					E T2Y, T3c, T2M, T2Z;					T2Y = iio[WS(vs, 3) + WS(rs, 2)];					T3c = T2K - T2L;					T2M = T2K + T2L;					T2Z = iio[WS(vs, 3) + WS(rs, 3)];					T2V = iio[WS(vs, 3) + WS(rs, 1)];					T3e = FMA(KP618033988, T3d, T3c);					T3u = FNMS(KP618033988, T3c, T3d);					T2Q = T2M + T2P;					T2T = T2M - T2P;					T37 = T2Y + T2Z;					T30 = T2Y - T2Z;					T2S = FNMS(KP250000000, T2Q, T2J);					T2W = iio[WS(vs, 3) + WS(rs, 4)];				   }			      }			 }		    }	       }	       {		    E T3a, T31, T3p, T39, T2X, T36, T38;		    rio[0] = T1 + T8;		    iio[0] = Tn + Tq;		    rio[WS(rs, 1)] = TV + T12;		    T2X = T2V - T2W;		    T36 = T2V + T2W;		    iio[WS(rs, 1)] = T1h + T1k;		    rio[WS(rs, 2)] = T1P + T1W;		    T3a = T36 - T37;		    T38 = T36 + T37;		    T31 = FMA(KP618033988, T30, T2X);		    T3p = FNMS(KP618033988, T2X, T30);		    T39 = FNMS(KP250000000, T38, T35);		    iio[WS(rs, 2)] = T2b + T2e;		    iio[WS(rs, 4)] = T3Z + T42;		    rio[WS(rs, 4)] = T3D + T3K;		    rio[WS(rs, 3)] = T2J + T2Q;		    iio[WS(rs, 3)] = T35 + T38;		    {			 E T3O, T45, T2r, T2q, T2p, TT, TS, TR;			 {			      E TG, TL, TD, TC, TB, Tc, Tt;			      TG = FNMS(KP559016994, Tb, Ta);			      Tc = FMA(KP559016994, Tb, Ta);			      Tt = FMA(KP559016994, Ts, Tr);			      TL = FNMS(KP559016994, Ts, Tr);			      {				   E T9, Tm, Tk, TA, Tx;				   T9 = W[0];				   Tm = W[1];				   Tk = FMA(KP951056516, Tj, Tc);				   TA = FNMS(KP951056516, Tj, Tc);				   Tx = FNMS(KP951056516, Tw, Tt);				   TD = FMA(KP951056516, Tw, Tt);				   {					E Tz, Tl, Ty, TE;					Tz = W[6];					Tl = T9 * Tk;					TC = W[7];					Ty = T9 * Tx;					TE = Tz * TD;					TB = Tz * TA;					rio[WS(vs, 1)] = FMA(Tm, Tx, Tl);					iio[WS(vs, 1)] = FNMS(Tm, Tk, Ty);					iio[WS(vs, 4)] = FNMS(TC, TA, TE);				   }			      }			      rio[WS(vs, 4)] = FMA(TC, TD, TB);			      {				   E TF, TK, TI, TQ, TN;				   TF = W[2];				   TK = W[3];				   TI = FNMS(KP951056516, TH, TG);				   TQ = FMA(KP951056516, TH, TG);				   TN = FMA(KP951056516, TM, TL);				   TT = FNMS(KP951056516, TM, TL);				   {					E TP, TJ, TO, TU;					TP = W[4];					TJ = TF * TI;					TS = W[5];					TO = TF * TN;					TU = TP * TT;					TR = TP * TQ;					rio[WS(vs, 2)] = FMA(TK, TN, TJ);					iio[WS(vs, 2)] = FNMS(TK, TI, TO);					iio[WS(vs, 3)] = FNMS(TS, TQ, TU);				   }			      }			 }			 rio[WS(vs, 3)] = FMA(TS, TT, TR);			 {			      E T20, T2h, T2H, T2G, T2F, T2u, T2z;			      T20 = FMA(KP559016994, T1Z, T1Y);			      T2u = FNMS(KP559016994, T1Z, T1Y);			      T2z = FNMS(KP559016994, T2g, T2f);			      T2h = FMA(KP559016994, T2g, T2f);			      {				   E T2t, T2y, T2w, T2E, T2B;				   T2t = W[2];				   T2y = W[3];				   T2w = FNMS(KP951056516, T2v, T2u);				   T2E = FMA(KP951056516, T2v, T2u);				   T2B = FMA(KP951056516, T2A, T2z);				   T2H = FNMS(KP951056516, T2A, T2z);				   {					E T2D, T2x, T2C, T2I;					T2D = W[4];					T2x = T2t * T2w;					T2G = W[5];					T2C = T2t * T2B;					T2I = T2D * T2H;					T2F = T2D * T2E;					rio[WS(vs, 2) + WS(rs, 2)] = FMA(T2y, T2B, T2x);					iio[WS(vs, 2) + WS(rs, 2)] = FNMS(T2y, T2w, T2C);					iio[WS(vs, 3) + WS(rs, 2)] = FNMS(T2G, T2E, T2I);				   }			      }			      rio[WS(vs, 3) + WS(rs, 2)] = FMA(T2G, T2H, T2F);			      {				   E T4v, T4u, T4t, T4i, T4n;				   T3O = FMA(KP559016994, T3N, T3M);				   T4i = FNMS(KP559016994, T3N, T3M);				   T4n = FNMS(KP559016994, T44, T43);				   T45 = FMA(KP559016994, T44, T43);				   {					E T4h, T4m, T4k, T4s, T4p;					T4h = W[2];					T4m = W[3];					T4k = FNMS(KP951056516, T4j, T4i);					T4s = FMA(KP951056516, T4j, T4i);					T4p = FMA(KP951056516, T4o, T4n);					T4v = FNMS(KP951056516, T4o, T4n);					{					     E T4r, T4l, T4q, T4w;					     T4r = W[4];					     T4l = T4h * T4k;					     T4u = W[5];					     T4q = T4h * T4p;					     T4w = T4r * T4v;					     T4t = T4r * T4s;					     rio[WS(vs, 2) + WS(rs, 4)] = FMA(T4m, T4p, T4l);					     iio[WS(vs, 2) + WS(rs, 4)] = FNMS(T4m, T4k, T4q);					     iio[WS(vs, 3) + WS(rs, 4)] = FNMS(T4u, T4s, T4w);					}				   }				   rio[WS(vs, 3) + WS(rs, 4)] = FMA(T4u, T4v, T4t);				   {					E T1X, T2a, T28, T2o, T2l;					T1X = W[0];					T2a = W[1];					T28 = FMA(KP951056516, T27, T20);					T2o = FNMS(KP951056516, T27, T20);					T2l = FNMS(KP951056516, T2k, T2h);					T2r = FMA(KP951056516, T2k, T2h);					{					     E T2n, T29, T2m, T2s;					     T2n = W[6];					     T29 = T1X * T28;					     T2q = W[7];					     T2m = T1X * T2l;					     T2s = T2n * T2r;					     T2p = T2n * T2o;					     rio[WS(vs, 1) + WS(rs, 2)] = FMA(T2a, T2l, T29);					     iio[WS(vs, 1) + WS(rs, 2)] = FNMS(T2a, T28, T2m);					     iio[WS(vs, 4) + WS(rs, 2)] = FNMS(T2q, T2o, T2s);					}				   }			      }			 }			 rio[WS(vs, 4) + WS(rs, 2)] = FMA(T2q, T2r, T2p);			 {			      E T3B, T3A, T3z, T4f, T4e, T4d;			      {				   E T3o, T3t, T3l, T3k, T3j, T2U, T3b;				   T3o = FNMS(KP559016994, T2T, T2S);				   T2U = FMA(KP559016994, T2T, T2S);				   T3b = FMA(KP559016994, T3a, T39);				   T3t = FNMS(KP559016994, T3a, T39);				   {					E T2R, T34, T32, T3i, T3f;					T2R = W[0];					T34 = W[1];					T32 = FMA(KP951056516, T31, T2U);					T3i = FNMS(KP951056516, T31, T2U);					T3f = FNMS(KP951056516, T3e, T3b);					T3l = FMA(KP951056516, T3e, T3b);					{					     E T3h, T33, T3g, T3m;					     T3h = W[6];					     T33 = T2R * T32;					     T3k = W[7];					     T3g = T2R * T3f;					     T3m = T3h * T3l;					     T3j = T3h * T3i;					     rio[WS(vs, 1) + WS(rs, 3)] = FMA(T34, T3f, T33);					     iio[WS(vs, 1) + WS(rs, 3)] = FNMS(T34, T32, T3g);					     iio[WS(vs, 4) + WS(rs, 3)] = FNMS(T3k, T3i, T3m);					}				   }				   rio[WS(vs, 4) + WS(rs, 3)] = FMA(T3k, T3l, T3j);				   {					E T3n, T3s, T3q, T3y, T3v;					T3n = W[2];					T3s = W[3];					T3q = FNMS(KP951056516, T3p, T3o);					T3y = FMA(KP951056516, T3p, T3o);					T3v = FMA(KP951056516, T3u, T3t);					T3B = FNMS(KP951056516, T3u, T3t);					{					     E T3x, T3r, T3w, T3C;					     T3x = W[4];					     T3r = T3n * T3q;					     T3A = W[5];					     T3w = T3n * T3v;					     T3C = T3x * T3B;					     T3z = T3x * T3y;					     rio[WS(vs, 2) + WS(rs, 3)] = FMA(T3s, T3v, T3r);					     iio[WS(vs, 2) + WS(rs, 3)] = FNMS(T3s, T3q, T3w);					     iio[WS(vs, 3) + WS(rs, 3)] = FNMS(T3A, T3y, T3C);					}				   }			      }			      rio[WS(vs, 3) + WS(rs, 3)] = FMA(T3A, T3B, T3z);			      {				   E T3L, T3Y, T3W, T4c, T49;				   T3L = W[0];				   T3Y = W[1];				   T3W = FMA(KP951056516, T3V, T3O);				   T4c = FNMS(KP951056516, T3V, T3O);				   T49 = FNMS(KP951056516, T48, T45);				   T4f = FMA(KP951056516, T48, T45);				   {					E T4b, T3X, T4a, T4g;					T4b = W[6];					T3X = T3L * T3W;					T4e = W[7];					T4a = T3L * T49;					T4g = T4b * T4f;					T4d = T4b * T4c;					rio[WS(vs, 1) + WS(rs, 4)] = FMA(T3Y, T49, T3X);					iio[WS(vs, 1) + WS(rs, 4)] = FNMS(T3Y, T3W, T4a);					iio[WS(vs, 4) + WS(rs, 4)] = FNMS(T4e, T4c, T4g);				   }			      }			      rio[WS(vs, 4) + WS(rs, 4)] = FMA(T4e, T4f, T4d);			      {				   E T16, T1n, T1N, T1M, T1L, T1A, T1F;				   T16 = FMA(KP559016994, T15, T14);				   T1A = FNMS(KP559016994, T15, T14);				   T1F = FNMS(KP559016994, T1m, T1l);				   T1n = FMA(KP559016994, T1m, T1l);				   {					E T1z, T1E, T1C, T1K, T1H;					T1z = W[2];					T1E = W[3];					T1C = FNMS(KP951056516, T1B, T1A);					T1K = FMA(KP951056516, T1B, T1A);					T1H = FMA(KP951056516, T1G, T1F);					T1N = FNMS(KP951056516, T1G, T1F);					{					     E T1J, T1D, T1I, T1O;					     T1J = W[4];					     T1D = T1z * T1C;					     T1M = W[5];					     T1I = T1z * T1H;					     T1O = T1J * T1N;					     T1L = T1J * T1K;					     rio[WS(vs, 2) + WS(rs, 1)] = FMA(T1E, T1H, T1D);					     iio[WS(vs, 2) + WS(rs, 1)] = FNMS(T1E, T1C, T1I);					     iio[WS(vs, 3) + WS(rs, 1)] = FNMS(T1M, T1K, T1O);					}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -