⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cfdft2_20.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 3 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:04:17 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2cdft -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 20 -dit -name hc2cfdft2_20 -include hc2cf.h *//* * This function contains 316 FP additions, 238 FP multiplications, * (or, 176 additions, 98 multiplications, 140 fused multiply/add), * 180 stack variables, 5 constants, and 80 memory accesses */#include "hc2cf.h"static void hc2cfdft2_20(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP951056516, +0.951056516295153572116439333379382143405698634);     DK(KP559016994, +0.559016994374947424102293417182819058860154590);     DK(KP250000000, +0.250000000000000000000000000000000000000000000);     DK(KP500000000, +0.500000000000000000000000000000000000000000000);     DK(KP618033988, +0.618033988749894848204586834365638117720309180);     INT m;     for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) {	  E T5h, T5C, T5E, T5y, T5w, T5x, T5D, T5z;	  {	       E Tm, Tq, Tn, T1, T6, Tg, Tp, Tb, T1i, TU, Tr, TW, Tx, T2B, T1A;	       E T1u, T2y, T33, T26, T1o, T30, T22, TD, T1Q, T2a, T2e, T2V, T2R, TG, T1V;	       E TV, TH, TN, T2t, T12, T2p;	       {		    E Tw, To, T29, T1h, T1n, T2d, TC, T2U;		    Tm = W[0];		    Tq = W[3];		    Tn = W[2];		    T1 = W[6];		    T6 = W[7];		    Tw = Tm * Tq;		    To = Tm * Tn;		    T29 = Tm * T1;		    T1h = Tn * T1;		    T1n = Tn * T6;		    T2d = Tm * T6;		    Tg = W[5];		    Tp = W[1];		    Tb = W[4];		    {			 E T21, T25, T1t, T1z;			 T1i = FMA(Tq, T6, T1h);			 T25 = Tm * Tg;			 T1z = Tn * Tg;			 TU = FMA(Tp, Tq, To);			 Tr = FNMS(Tp, Tq, To);			 TW = FNMS(Tp, Tn, Tw);			 Tx = FMA(Tp, Tn, Tw);			 T1t = Tn * Tb;			 T21 = Tm * Tb;			 T2B = FMA(Tq, Tb, T1z);			 T1A = FNMS(Tq, Tb, T1z);			 TC = Tr * Tb;			 T1u = FMA(Tq, Tg, T1t);			 T2y = FNMS(Tq, Tg, T1t);			 T33 = FMA(Tp, Tb, T25);			 T26 = FNMS(Tp, Tb, T25);			 T1o = FNMS(Tq, T1, T1n);			 T30 = FNMS(Tp, Tg, T21);			 T22 = FMA(Tp, Tg, T21);		    }		    TD = FMA(Tx, Tg, TC);		    T1Q = FNMS(Tx, Tg, TC);		    T2a = FMA(Tp, T6, T29);		    T2e = FNMS(Tp, T1, T2d);		    T2U = Tr * T6;		    {			 E T2Q, TE, TM, TF;			 T2Q = Tr * T1;			 TF = Tr * Tg;			 T2V = FNMS(Tx, T1, T2U);			 T2R = FMA(Tx, T6, T2Q);			 TG = FNMS(Tx, Tb, TF);			 T1V = FMA(Tx, Tb, TF);			 TE = TD * T1;			 TM = TD * T6;			 TV = TU * Tb;			 TH = FMA(TG, T6, TE);			 TN = FNMS(TG, T1, TM);			 T2t = TU * T1;			 T12 = TU * Tg;			 T2p = TU * T6;		    }	       }	       {		    E T36, T3Q, T5f, T4D, T5g, T2Y, T4E, T3P, T5R, T5k, T39, TT, T3T, T3m, T49;		    E T4X, T5T, T5r, T3c, T2i, T3W, T3B, T4o, T4U, T5U, T5u, T3d, T2J, T3X, T3I;		    E T4v, T4V, T5Q, T5n, T3a, T1G, T3U, T3t, T4g, T4Y;		    {			 E T13, T2m, T2q, T2u, T2f, T9, T2O, TA, T2c, T4k, T3i, T5, T2Z, T1e, T2G;			 E T1O, T2W, TQ, T2C, T1Y, T3v, T27, Tj, T1l, T2v, T3g, T1m, T1D, T2n, T1x;			 E T2k, T3E, T4c, T2l, T1y, T10, T31, T16, T34, T32, T11, T4B, T3p, T4A, T1T;			 E T3n, T1b, T2A, T4q, T1U, Te, Tf, T24, T4i, T1r, T4a, T3C, T2s, T43, Tv;			 E T3L, T2N, T45, TL, T3N, T2T, T2E, T1K;			 {			      E T2j, TX, T1B, T1C;			      {				   E T1c, T1d, T1M, T1N;				   {					E T2, T3, T7, T8;					T7 = Rp[WS(rs, 9)];					T8 = Rm[WS(rs, 9)];					T2 = Ip[WS(rs, 9)];					T2j = FMA(TW, Tg, TV);					TX = FNMS(TW, Tg, TV);					T13 = FMA(TW, Tb, T12);					T2m = FNMS(TW, Tb, T12);					T2q = FNMS(TW, T1, T2p);					T2u = FMA(TW, T6, T2t);					T2f = T7 + T8;					T9 = T7 - T8;					T3 = Im[WS(rs, 9)];					{					     E Ty, Tz, T2b, T4;					     Ty = Rp[WS(rs, 2)];					     Tz = Rm[WS(rs, 2)];					     T1c = Ip[0];					     T2b = T2 - T3;					     T4 = T2 + T3;					     T2O = Ty - Tz;					     TA = Ty + Tz;					     T2c = T2a * T2b;					     T4k = T2e * T2b;					     T3i = T6 * T4;					     T5 = T1 * T4;					     T1d = Im[0];					     T1M = Rp[WS(rs, 1)];					     T1N = Rm[WS(rs, 1)];					}				   }				   {					E TO, TP, T1W, T1X;					TO = Rp[WS(rs, 7)];					T2Z = T1c - T1d;					T1e = T1c + T1d;					T2G = T1M + T1N;					T1O = T1M - T1N;					TP = Rm[WS(rs, 7)];					T1W = Rm[WS(rs, 6)];					T1X = Rp[WS(rs, 6)];					{					     E Th, Ti, T1j, T1k;					     Th = Rm[WS(rs, 4)];					     T2W = TO - TP;					     TQ = TO + TP;					     T2C = T1X + T1W;					     T1Y = T1W - T1X;					     Ti = Rp[WS(rs, 4)];					     T1j = Ip[WS(rs, 8)];					     T1k = Im[WS(rs, 8)];					     T3v = T1Q * T1Y;					     T27 = Ti + Th;					     Tj = Th - Ti;					     T1l = T1j - T1k;					     T2v = T1j + T1k;					     T1B = Rp[WS(rs, 3)];					     T3g = Tb * Tj;					     T1m = T1i * T1l;					     T1C = Rm[WS(rs, 3)];					}				   }			      }			      {				   E T18, T19, T1R, T1S;				   {					E TY, TZ, T1v, T1w, T14, T15;					T1v = Ip[WS(rs, 3)];					T1w = Im[WS(rs, 3)];					TY = Ip[WS(rs, 5)];					T1D = T1B + T1C;					T2n = T1B - T1C;					T1x = T1v - T1w;					T2k = T1v + T1w;					T3E = T2j * T2n;					T4c = T1u * T1D;					T2l = T2j * T2k;					T1y = T1u * T1x;					TZ = Im[WS(rs, 5)];					T14 = Rp[WS(rs, 5)];					T15 = Rm[WS(rs, 5)];					T18 = Rm[0];					T10 = TY + TZ;					T31 = TY - TZ;					T16 = T14 - T15;					T34 = T14 + T15;					T32 = T30 * T31;					T11 = TX * T10;					T4B = T30 * T34;					T3p = TX * T16;					T19 = Rp[0];					T1R = Ip[WS(rs, 6)];					T1S = Im[WS(rs, 6)];				   }				   {					E T2r, T23, T1p, T1q;					{					     E Tc, T1a, T2z, Td;					     Tc = Ip[WS(rs, 4)];					     T1a = T18 - T19;					     T4A = T19 + T18;					     T1T = T1R + T1S;					     T2z = T1R - T1S;					     Td = Im[WS(rs, 4)];					     T3n = Tm * T1a;					     T1b = Tp * T1a;					     T2A = T2y * T2z;					     T4q = T2B * T2z;					     T1U = T1Q * T1T;					     T23 = Tc - Td;					     Te = Tc + Td;					}					T1p = Rp[WS(rs, 8)];					T1q = Rm[WS(rs, 8)];					Tf = Tb * Te;					T24 = T22 * T23;					T4i = T26 * T23;					T1r = T1p + T1q;					T2r = T1q - T1p;					{					     E T2M, Tu, Ts, Tt;					     Ts = Ip[WS(rs, 2)];					     Tt = Im[WS(rs, 2)];					     T4a = T1i * T1r;					     T3C = T2u * T2r;					     T2s = T2q * T2r;					     T2M = Ts + Tt;					     Tu = Ts - Tt;					     {						  E T2S, TK, TI, TJ, T1I, T1J;						  TI = Ip[WS(rs, 7)];						  TJ = Im[WS(rs, 7)];						  T43 = Tx * Tu;						  Tv = Tr * Tu;						  T3L = TG * T2M;						  T2N = TD * T2M;						  T2S = TI + TJ;						  TK = TI - TJ;						  T1I = Ip[WS(rs, 1)];						  T1J = Im[WS(rs, 1)];						  T45 = TN * TK;						  TL = TH * TK;						  T3N = T2V * T2S;						  T2T = T2R * T2S;						  T2E = T1I - T1J;						  T1K = T1I + T1J;					     }					}				   }			      }			 }			 {			      E T3x, T1L, T2F, T4s, T2P, T2X, T3M, T3O, T35, T4C;			      T35 = FNMS(T33, T34, T32);			      T4C = FMA(T33, T31, T4B);			      T3x = Tq * T1K;			      T1L = Tn * T1K;			      T2F = TU * T2E;			      T4s = TW * T2E;			      T36 = T2Z - T35;			      T3Q = T35 + T2Z;			      T5f = T4A + T4C;			      T4D = T4A - T4C;			      T2P = FNMS(TG, T2O, T2N);			      T2X = FNMS(T2V, T2W, T2T);			      T3M = FMA(TD, T2O, T3L);			      T3O = FMA(T2R, T2W, T3N);			      {				   E TB, T5j, Tl, T5i, T47, TR, T3h, T3j;				   {					E Ta, Tk, T44, T46;					Ta = FNMS(T6, T9, T5);					T5g = T2P + T2X;					T2Y = T2P - T2X;					T4E = T3O - T3M;					T3P = T3M + T3O;					Tk = FMA(Tg, Tj, Tf);					T44 = FMA(Tr, TA, T43);					T46 = FMA(TH, TQ, T45);					TB = FNMS(Tx, TA, Tv);					T5j = Tk + Ta;					Tl = Ta - Tk;					T5i = T44 + T46;					T47 = T44 - T46;					TR = FNMS(TN, TQ, TL);					T3h = FNMS(Tg, Te, T3g);					T3j = FMA(T1, T9, T3i);				   }				   {					E T3l, T48, T3k, TS;					T5R = T5i - T5j;					T5k = T5i + T5j;					T3l = TB + TR;					TS = TB - TR;					T48 = T3h + T3j;					T3k = T3h - T3j;					T39 = TS + Tl;					TT = Tl - TS;					T3T = T3l + T3k;					T3m = T3k - T3l;					T49 = T47 + T48;					T4X = T47 - T48;				   }			      }			      {				   E T28, T5q, T20, T5p, T4m, T2g, T3w, T3y;				   {					E T1P, T1Z, T4j, T4l;					T1P = FNMS(Tq, T1O, T1L);					T1Z = FMA(T1V, T1Y, T1U);					T4j = FMA(T22, T27, T4i);					T4l = FMA(T2a, T2f, T4k);					T28 = FNMS(T26, T27, T24);					T5q = T1Z + T1P;					T20 = T1P - T1Z;					T5p = T4j + T4l;					T4m = T4j - T4l;					T2g = FNMS(T2e, T2f, T2c);					T3w = FNMS(T1V, T1T, T3v);					T3y = FMA(Tn, T1O, T3x);				   }				   {					E T3A, T4n, T3z, T2h;					T5T = T5p - T5q;					T5r = T5p + T5q;					T3A = T28 + T2g;					T2h = T28 - T2g;					T4n = T3w + T3y;					T3z = T3w - T3y;					T3c = T2h + T20;					T2i = T20 - T2h;					T3W = T3A + T3z;					T3B = T3z - T3A;					T4o = T4m + T4n;					T4U = T4m - T4n;				   }			      }			      {				   E T2D, T5s, T2x, T5t, T4u, T2H, T3D, T3F;				   {					E T2o, T2w, T4r, T4t;					T2o = FNMS(T2m, T2n, T2l);					T2w = FMA(T2u, T2v, T2s);					T4r = FMA(T2y, T2C, T4q);					T4t = FMA(TU, T2G, T4s);					T2D = FNMS(T2B, T2C, T2A);					T5s = T2w + T2o;					T2x = T2o - T2w;					T5t = T4r + T4t;					T4u = T4r - T4t;					T2H = FNMS(TW, T2G, T2F);					T3D = FNMS(T2q, T2v, T3C);					T3F = FMA(T2m, T2k, T3E);				   }				   {					E T3H, T4p, T3G, T2I;					T5U = T5t - T5s;					T5u = T5s + T5t;					T3H = T2D + T2H;					T2I = T2D - T2H;					T4p = T3D + T3F;					T3G = T3D - T3F;					T3d = T2x + T2I;					T2J = T2x - T2I;					T3X = T3G + T3H;					T3I = T3G - T3H;					T4v = T4p + T4u;					T4V = T4u - T4p;				   }			      }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -