⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 q1_6.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 3 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:41:37 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twidsq -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 6 -name q1_6 -include q.h *//* * This function contains 276 FP additions, 192 FP multiplications, * (or, 144 additions, 60 multiplications, 132 fused multiply/add), * 129 stack variables, 2 constants, and 144 memory accesses */#include "q.h"static void q1_6(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms){     DK(KP866025403, +0.866025403784438646763723170752936183471402627);     DK(KP500000000, +0.500000000000000000000000000000000000000000000);     INT m;     for (m = mb, W = W + (mb * 10); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 10, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) {	  E T4c, T4f, T4e, T4g, T4d;	  {	       E T3, Tw, Ta, TW, Tg, TG, TM, TT, TU, TP, Tn, T17, TV, TJ, Tv;	       E T1A, T1e, T20, T1k, T1K, T1Q, T1X, T1Y, T1T, T1r, T1Z, T1N, T1z, T31, T32;	       E T2X, T2v, T2b, T33, T2R, T2D, T2E, T2i, T34, T3f, T2o, T2O, T2U, T3I, T3m;	       E T48, T3s, T3S, T3Y, T45, T46, T41, T3z, T4j, T47, T3V, T3H, T4M, T4q, T5c;	       E T4w, T4W, T52, T59, T5a, T55, T4D, T5b, T4Z, T4L, T6d, T5r, T6e, T69, T5H;	       E T5w, T5n, T6f, T63, T5P, T5s, T5o, T5p;	       {		    E T2f, T2k, T2g, T2c, T2d;		    {			 E T1b, T1g, T1c, T18, T19;			 {			      E T4, Tc, Te, T9, T5;			      {				   E T1, T2, T7, T8;				   T1 = rio[0];				   T2 = rio[WS(rs, 3)];				   T7 = rio[WS(rs, 4)];				   T8 = rio[WS(rs, 1)];				   T4 = rio[WS(rs, 2)];				   Tc = T1 - T2;				   T3 = T1 + T2;				   Te = T7 - T8;				   T9 = T7 + T8;				   T5 = rio[WS(rs, 5)];			      }			      {				   E TN, Tj, Tk, Tl, Tt, Th, Ti;				   Th = iio[WS(rs, 2)];				   Ti = iio[WS(rs, 5)];				   {					E Tr, Ts, Td, T6, Tf;					Tr = iio[0];					Td = T4 - T5;					T6 = T4 + T5;					TN = Th + Ti;					Tj = Th - Ti;					Tf = Td + Te;					Tw = Te - Td;					Ta = T6 + T9;					TW = T9 - T6;					Tg = FNMS(KP500000000, Tf, Tc);					TG = Tc + Tf;					Ts = iio[WS(rs, 3)];					TM = FNMS(KP500000000, Ta, T3);					Tk = iio[WS(rs, 4)];					Tl = iio[WS(rs, 1)];					Tt = Tr - Ts;					TT = Tr + Ts;				   }				   {					E T15, TO, Tm, T16, Tu;					T15 = rio[WS(vs, 1)];					TO = Tk + Tl;					Tm = Tk - Tl;					T16 = rio[WS(vs, 1) + WS(rs, 3)];					T1b = rio[WS(vs, 1) + WS(rs, 4)];					TU = TN + TO;					TP = TN - TO;					Tu = Tj + Tm;					Tn = Tj - Tm;					T1g = T15 - T16;					T17 = T15 + T16;					TV = FNMS(KP500000000, TU, TT);					TJ = Tt + Tu;					Tv = FNMS(KP500000000, Tu, Tt);					T1c = rio[WS(vs, 1) + WS(rs, 1)];					T18 = rio[WS(vs, 1) + WS(rs, 2)];					T19 = rio[WS(vs, 1) + WS(rs, 5)];				   }			      }			 }			 {			      E T1v, T1R, T1n, T1w, T1o, T1p;			      {				   E T1l, T1i, T1d, T1h, T1a, T1m, T1j;				   T1l = iio[WS(vs, 1) + WS(rs, 2)];				   T1i = T1b - T1c;				   T1d = T1b + T1c;				   T1h = T18 - T19;				   T1a = T18 + T19;				   T1m = iio[WS(vs, 1) + WS(rs, 5)];				   T1v = iio[WS(vs, 1)];				   T1j = T1h + T1i;				   T1A = T1i - T1h;				   T1e = T1a + T1d;				   T20 = T1d - T1a;				   T1R = T1l + T1m;				   T1n = T1l - T1m;				   T1k = FNMS(KP500000000, T1j, T1g);				   T1K = T1g + T1j;				   T1Q = FNMS(KP500000000, T1e, T17);				   T1w = iio[WS(vs, 1) + WS(rs, 3)];				   T1o = iio[WS(vs, 1) + WS(rs, 4)];				   T1p = iio[WS(vs, 1) + WS(rs, 1)];			      }			      {				   E T2z, T2V, T2r, T2A, T2s, T2t;				   {					E T2p, T1x, T1S, T1q, T2q, T1y;					T2p = iio[WS(vs, 2) + WS(rs, 2)];					T1X = T1v + T1w;					T1x = T1v - T1w;					T1S = T1o + T1p;					T1q = T1o - T1p;					T2q = iio[WS(vs, 2) + WS(rs, 5)];					T2z = iio[WS(vs, 2)];					T1Y = T1R + T1S;					T1T = T1R - T1S;					T1y = T1n + T1q;					T1r = T1n - T1q;					T2V = T2p + T2q;					T2r = T2p - T2q;					T1Z = FNMS(KP500000000, T1Y, T1X);					T1N = T1x + T1y;					T1z = FNMS(KP500000000, T1y, T1x);					T2A = iio[WS(vs, 2) + WS(rs, 3)];					T2s = iio[WS(vs, 2) + WS(rs, 4)];					T2t = iio[WS(vs, 2) + WS(rs, 1)];				   }				   {					E T29, T2B, T2W, T2u, T2a, T2C;					T29 = rio[WS(vs, 2)];					T31 = T2z + T2A;					T2B = T2z - T2A;					T2W = T2s + T2t;					T2u = T2s - T2t;					T2a = rio[WS(vs, 2) + WS(rs, 3)];					T2f = rio[WS(vs, 2) + WS(rs, 4)];					T32 = T2V + T2W;					T2X = T2V - T2W;					T2C = T2r + T2u;					T2v = T2r - T2u;					T2k = T29 - T2a;					T2b = T29 + T2a;					T33 = FNMS(KP500000000, T32, T31);					T2R = T2B + T2C;					T2D = FNMS(KP500000000, T2C, T2B);					T2g = rio[WS(vs, 2) + WS(rs, 1)];					T2c = rio[WS(vs, 2) + WS(rs, 2)];					T2d = rio[WS(vs, 2) + WS(rs, 5)];				   }			      }			 }		    }		    {			 E T4n, T4s, T4o, T4k, T4l;			 {			      E T3j, T3o, T3k, T3g, T3h;			      {				   E T3d, T2m, T2h, T2l, T2e, T3e, T2n;				   T3d = rio[WS(vs, 3)];				   T2m = T2f - T2g;				   T2h = T2f + T2g;				   T2l = T2c - T2d;				   T2e = T2c + T2d;				   T3e = rio[WS(vs, 3) + WS(rs, 3)];				   T3j = rio[WS(vs, 3) + WS(rs, 4)];				   T2n = T2l + T2m;				   T2E = T2m - T2l;				   T2i = T2e + T2h;				   T34 = T2h - T2e;				   T3o = T3d - T3e;				   T3f = T3d + T3e;				   T2o = FNMS(KP500000000, T2n, T2k);				   T2O = T2k + T2n;				   T2U = FNMS(KP500000000, T2i, T2b);				   T3k = rio[WS(vs, 3) + WS(rs, 1)];				   T3g = rio[WS(vs, 3) + WS(rs, 2)];				   T3h = rio[WS(vs, 3) + WS(rs, 5)];			      }			      {				   E T3D, T3Z, T3v, T3E, T3w, T3x;				   {					E T3t, T3q, T3l, T3p, T3i, T3u, T3r;					T3t = iio[WS(vs, 3) + WS(rs, 2)];					T3q = T3j - T3k;					T3l = T3j + T3k;					T3p = T3g - T3h;					T3i = T3g + T3h;					T3u = iio[WS(vs, 3) + WS(rs, 5)];					T3D = iio[WS(vs, 3)];					T3r = T3p + T3q;					T3I = T3q - T3p;					T3m = T3i + T3l;					T48 = T3l - T3i;					T3Z = T3t + T3u;					T3v = T3t - T3u;					T3s = FNMS(KP500000000, T3r, T3o);					T3S = T3o + T3r;					T3Y = FNMS(KP500000000, T3m, T3f);					T3E = iio[WS(vs, 3) + WS(rs, 3)];					T3w = iio[WS(vs, 3) + WS(rs, 4)];					T3x = iio[WS(vs, 3) + WS(rs, 1)];				   }				   {					E T4h, T3F, T40, T3y, T4i, T3G;					T4h = rio[WS(vs, 4)];					T45 = T3D + T3E;					T3F = T3D - T3E;					T40 = T3w + T3x;					T3y = T3w - T3x;					T4i = rio[WS(vs, 4) + WS(rs, 3)];					T4n = rio[WS(vs, 4) + WS(rs, 4)];					T46 = T3Z + T40;					T41 = T3Z - T40;					T3G = T3v + T3y;					T3z = T3v - T3y;					T4s = T4h - T4i;					T4j = T4h + T4i;					T47 = FNMS(KP500000000, T46, T45);					T3V = T3F + T3G;					T3H = FNMS(KP500000000, T3G, T3F);					T4o = rio[WS(vs, 4) + WS(rs, 1)];					T4k = rio[WS(vs, 4) + WS(rs, 2)];					T4l = rio[WS(vs, 4) + WS(rs, 5)];				   }			      }			 }			 {			      E T4H, T53, T4z, T4I, T4A, T4B;			      {				   E T4x, T4u, T4p, T4t, T4m, T4y, T4v;				   T4x = iio[WS(vs, 4) + WS(rs, 2)];				   T4u = T4n - T4o;				   T4p = T4n + T4o;				   T4t = T4k - T4l;				   T4m = T4k + T4l;				   T4y = iio[WS(vs, 4) + WS(rs, 5)];				   T4H = iio[WS(vs, 4)];				   T4v = T4t + T4u;				   T4M = T4u - T4t;				   T4q = T4m + T4p;				   T5c = T4p - T4m;				   T53 = T4x + T4y;				   T4z = T4x - T4y;				   T4w = FNMS(KP500000000, T4v, T4s);				   T4W = T4s + T4v;				   T52 = FNMS(KP500000000, T4q, T4j);				   T4I = iio[WS(vs, 4) + WS(rs, 3)];				   T4A = iio[WS(vs, 4) + WS(rs, 4)];				   T4B = iio[WS(vs, 4) + WS(rs, 1)];			      }			      {				   E T5L, T67, T5D, T5M, T5E, T5F;				   {					E T5B, T4J, T54, T4C, T5C, T4K;					T5B = iio[WS(vs, 5) + WS(rs, 2)];					T59 = T4H + T4I;					T4J = T4H - T4I;					T54 = T4A + T4B;					T4C = T4A - T4B;					T5C = iio[WS(vs, 5) + WS(rs, 5)];					T5L = iio[WS(vs, 5)];					T5a = T53 + T54;					T55 = T53 - T54;					T4K = T4z + T4C;					T4D = T4z - T4C;					T67 = T5B + T5C;					T5D = T5B - T5C;					T5b = FNMS(KP500000000, T5a, T59);					T4Z = T4J + T4K;					T4L = FNMS(KP500000000, T4K, T4J);					T5M = iio[WS(vs, 5) + WS(rs, 3)];					T5E = iio[WS(vs, 5) + WS(rs, 4)];					T5F = iio[WS(vs, 5) + WS(rs, 1)];				   }				   {					E T5l, T5N, T68, T5G, T5m, T5O;					T5l = rio[WS(vs, 5)];					T6d = T5L + T5M;					T5N = T5L - T5M;					T68 = T5E + T5F;					T5G = T5E - T5F;					T5m = rio[WS(vs, 5) + WS(rs, 3)];					T5r = rio[WS(vs, 5) + WS(rs, 4)];					T6e = T67 + T68;					T69 = T67 - T68;					T5O = T5D + T5G;					T5H = T5D - T5G;					T5w = T5l - T5m;					T5n = T5l + T5m;					T6f = FNMS(KP500000000, T6e, T6d);					T63 = T5N + T5O;					T5P = FNMS(KP500000000, T5O, T5N);					T5s = rio[WS(vs, 5) + WS(rs, 1)];					T5o = rio[WS(vs, 5) + WS(rs, 2)];					T5p = rio[WS(vs, 5) + WS(rs, 5)];				   }			      }			 }		    }	       }	       {		    E T6a, T6h, T5I, T5R, T65, T6c;		    {			 E T5Q, T5u, T6g, T5A, T60, T66;			 {			      E T5y, T5t, T5x, T5q, T5z;			      rio[0] = T3 + Ta;			      T5y = T5r - T5s;			      T5t = T5r + T5s;			      T5x = T5o - T5p;			      T5q = T5o + T5p;			      iio[0] = TT + TU;			      rio[WS(rs, 1)] = T17 + T1e;			      T5z = T5x + T5y;			      T5Q = T5y - T5x;			      T5u = T5q + T5t;			      T6g = T5t - T5q;			      T5A = FNMS(KP500000000, T5z, T5w);			      T60 = T5w + T5z;			      iio[WS(rs, 1)] = T1X + T1Y;			      T66 = FNMS(KP500000000, T5u, T5n);			      rio[WS(rs, 2)] = T2b + T2i;			 }			 iio[WS(rs, 2)] = T31 + T32;			 iio[WS(rs, 4)] = T59 + T5a;			 rio[WS(rs, 4)] = T4j + T4q;			 rio[WS(rs, 3)] = T3f + T3m;			 iio[WS(rs, 3)] = T45 + T46;			 {			      E TA, TD, TQ, T10, T13, TX, TZ, T12;			      rio[WS(rs, 5)] = T5n + T5u;			      iio[WS(rs, 5)] = T6d + T6e;			      {				   E To, Tx, Tb, Tq;				   TA = FNMS(KP866025403, Tn, Tg);				   To = FMA(KP866025403, Tn, Tg);				   Tx = FMA(KP866025403, Tw, Tv);				   TD = FNMS(KP866025403, Tw, Tv);				   Tb = W[0];				   Tq = W[1];				   {					E TI, TK, TH, Ty, Tp, TF;					Ty = Tb * Tx;					Tp = Tb * To;					TF = W[4];					TI = W[5];					iio[WS(vs, 1)] = FNMS(Tq, To, Ty);					rio[WS(vs, 1)] = FMA(Tq, Tx, Tp);					TK = TF * TJ;					TH = TF * TG;					TQ = FNMS(KP866025403, TP, TM);					T10 = FMA(KP866025403, TP, TM);					T13 = FMA(KP866025403, TW, TV);					TX = FNMS(KP866025403, TW, TV);					iio[WS(vs, 3)] = FNMS(TI, TG, TK);					rio[WS(vs, 3)] = FMA(TI, TJ, TH);					TZ = W[6];					T12 = W[7];				   }			      }			      {				   E TC, TE, TB, TL, TS;				   {					E T62, T64, T61, T14, T11, T5Z;					T14 = TZ * T13;					T11 = TZ * T10;					T5Z = W[4];					T62 = W[5];					iio[WS(vs, 4)] = FNMS(T12, T10, T14);					rio[WS(vs, 4)] = FMA(T12, T13, T11);					T64 = T5Z * T63;					T61 = T5Z * T60;					{					     E T6k, T6n, T6j, T6m, T6o, T6l, Tz;					     T6a = FNMS(KP866025403, T69, T66);					     T6k = FMA(KP866025403, T69, T66);					     T6n = FMA(KP866025403, T6g, T6f);					     T6h = FNMS(KP866025403, T6g, T6f);					     iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T62, T60, T64);					     rio[WS(vs, 3) + WS(rs, 5)] = FMA(T62, T63, T61);					     T6j = W[6];					     T6m = W[7];					     T6o = T6j * T6n;					     T6l = T6j * T6k;					     Tz = W[8];					     TC = W[9];					     iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T6m, T6k, T6o);					     rio[WS(vs, 4) + WS(rs, 5)] = FMA(T6m, T6n, T6l);					     TE = Tz * TD;					     TB = Tz * TA;					}				   }				   iio[WS(vs, 5)] = FNMS(TC, TA, TE);				   rio[WS(vs, 5)] = FMA(TC, TD, TB);				   TL = W[2];				   TS = W[3];				   {					E T5U, T5X, T5W, T5Y, T5V, TY, TR, T5T;					T5I = FMA(KP866025403, T5H, T5A);					T5U = FNMS(KP866025403, T5H, T5A);					T5X = FNMS(KP866025403, T5Q, T5P);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -