⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cfdft2_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:04:03 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2cdft -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -twiddle-log3 -precompute-twiddles -n 32 -dit -name hc2cfdft2_32 -include hc2cf.h *//* * This function contains 552 FP additions, 414 FP multiplications, * (or, 300 additions, 162 multiplications, 252 fused multiply/add), * 196 stack variables, 8 constants, and 128 memory accesses */#include "hc2cf.h"static void hc2cfdft2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     DK(KP500000000, +0.500000000000000000000000000000000000000000000);     INT m;     for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) {	  E Tax, TaA;	  {	       E T1, Th, T2, T5, Ti, Ty, T1t, T3, Tb, Tj, TY, TK, Tl, T4, Tk;	       T1 = W[0];	       Th = W[4];	       T2 = W[2];	       T5 = W[3];	       Ti = W[6];	       Ty = T1 * Th;	       T1t = T2 * Th;	       T3 = T1 * T2;	       Tb = T1 * T5;	       Tj = Th * Ti;	       TY = T2 * Ti;	       TK = T1 * Ti;	       Tl = W[7];	       T4 = W[1];	       Tk = W[5];	       {		    E T3j, T7Z, T5b, T93, T6B, T8V, T4d, T8J, T8r, T6e, T8l, T1T, T8C, T54, T8i;		    E T5O, T94, T31, T8K, T6w, T8U, T3Y, T80, T5g, T8B, T69, T8h, T1s, T8q, T4T;		    E T8k, T5J, Tx, T8a, T5y, T8d, T4s, T5Y, T8v, T8E, T2k, T82, T6l, T3z, T83;		    E T5m, T8X, T8O, T2F, T86, T6q, T3M, T85, T5r, T8Y, T8R, TW, T8e, T8x, T4B;		    E T5D, T8b, T63, T8w;		    {			 E TL, T2l, T1c, Tc, T1a, T6, Tm, T2v, Tz, T2q, TR, Ts, T2A, TF, T1H;			 E T1g, T1d, T1F, T34, T3F, T3B, T32, T3w, T3s, T4p, T4l, T2f, T29, T4K, T4S;			 E T5G, T5I;			 {			      E TZ, T2R, T2H, T15, T2W, T2M, T4I, T4E, T3V, T3S, T4Q, T4M, T1n, T1h, T4X;			      E T53, T5L, T5N, T5d, T5f;			      {				   E T1u, T1A, T51, T4Y, T28, T25, T44, T40, T1O, T1I, T3b, T35, T4b, T3i, T45;				   E T38, T39, T58, T49, T3e, T41;				   {					E T3g, T3h, T36, T37, TQ;					T3g = Ip[0];					TZ = FNMS(T5, Tl, TY);					T2R = FMA(T5, Tl, TY);					TQ = T1 * Tl;					{					     E T14, Tr, T1z, TE;					     T14 = T2 * Tl;					     Tr = Th * Tl;					     TL = FMA(T4, Tl, TK);					     T2l = FNMS(T4, Tl, TK);					     T1c = FMA(T4, T2, Tb);					     Tc = FNMS(T4, T2, Tb);					     T1a = FNMS(T4, T5, T3);					     T6 = FMA(T4, T5, T3);					     Tm = FMA(Tk, Tl, Tj);					     T2v = FNMS(T5, Tk, T1t);					     T1u = FMA(T5, Tk, T1t);					     Tz = FNMS(T4, Tk, Ty);					     T2H = FMA(T4, Tk, Ty);					     T1z = T2 * Tk;					     TE = T1 * Tk;					     T2q = FMA(T4, Ti, TQ);					     TR = FNMS(T4, Ti, TQ);					     T15 = FMA(T5, Ti, T14);					     T2W = FNMS(T5, Ti, T14);					     Ts = FNMS(Tk, Ti, Tr);					     {						  E T1f, T4H, T4D, T1b;						  T1f = T1a * Tk;						  T4H = T1a * Tl;						  T4D = T1a * Ti;						  T1b = T1a * Th;						  {						       E T27, T3E, T3A, T24;						       T27 = T6 * Tk;						       T3E = T6 * Tl;						       T3A = T6 * Ti;						       T24 = T6 * Th;						       {							    E T3v, T3r, T4P, T4L;							    T3v = T1u * Tl;							    T3r = T1u * Ti;							    T4P = T2v * Tl;							    T4L = T2v * Ti;							    {								 E T4o, T4k, T43, T3Z;								 T4o = T2H * Tl;								 T4k = T2H * Ti;								 T43 = Tz * Tl;								 T3Z = Tz * Ti;								 T1A = FNMS(T5, Th, T1z);								 T2A = FMA(T5, Th, T1z);								 T2M = FNMS(T4, Th, TE);								 TF = FMA(T4, Th, TE);								 T1H = FNMS(T1c, Th, T1f);								 T1g = FMA(T1c, Th, T1f);								 T51 = FNMS(T1c, Ti, T4H);								 T4I = FMA(T1c, Ti, T4H);								 T4Y = FMA(T1c, Tl, T4D);								 T4E = FNMS(T1c, Tl, T4D);								 T1d = FNMS(T1c, Tk, T1b);								 T1F = FMA(T1c, Tk, T1b);								 T34 = FMA(Tc, Th, T27);								 T28 = FNMS(Tc, Th, T27);								 T3V = FNMS(Tc, Ti, T3E);								 T3F = FMA(Tc, Ti, T3E);								 T3S = FMA(Tc, Tl, T3A);								 T3B = FNMS(Tc, Tl, T3A);								 T25 = FMA(Tc, Tk, T24);								 T32 = FNMS(Tc, Tk, T24);								 T3w = FNMS(T1A, Ti, T3v);								 T3s = FMA(T1A, Tl, T3r);								 T4Q = FNMS(T2A, Ti, T4P);								 T4M = FMA(T2A, Tl, T4L);								 T4p = FNMS(T2M, Ti, T4o);								 T4l = FMA(T2M, Tl, T4k);								 T44 = FNMS(TF, Ti, T43);								 T40 = FMA(TF, Tl, T3Z);								 {								      E T1m, T1e, T1N, T1G;								      T1m = T1d * Tl;								      T1e = T1d * Ti;								      T1N = T1F * Tl;								      T1G = T1F * Ti;								      {									   E T2e, T26, T3a, T33;									   T2e = T25 * Tl;									   T26 = T25 * Ti;									   T3a = T32 * Tl;									   T33 = T32 * Ti;									   T1n = FNMS(T1g, Ti, T1m);									   T1h = FMA(T1g, Tl, T1e);									   T1O = FNMS(T1H, Ti, T1N);									   T1I = FMA(T1H, Tl, T1G);									   T2f = FNMS(T28, Ti, T2e);									   T29 = FMA(T28, Tl, T26);									   T3b = FNMS(T34, Ti, T3a);									   T35 = FMA(T34, Tl, T33);									   T3h = Im[0];								      }								 }							    }						       }						  }					     }					}					T36 = Ip[WS(rs, 8)];					T37 = Im[WS(rs, 8)];					{					     E T47, T48, T3c, T3d;					     T47 = Rm[0];					     T4b = T3g + T3h;					     T3i = T3g - T3h;					     T45 = T36 + T37;					     T38 = T36 - T37;					     T48 = Rp[0];					     T3c = Rp[WS(rs, 8)];					     T3d = Rm[WS(rs, 8)];					     T39 = T35 * T38;					     T58 = T48 + T47;					     T49 = T47 - T48;					     T3e = T3c + T3d;					     T41 = T3d - T3c;					}				   }				   {					E T4W, T1x, T1y, T6a, T4U, T1D, T1P, T4V, T5K, T52, T1L, T1Q;					{					     E T1B, T1C, T1J, T1K;					     {						  E T1v, T6A, T4c, T5a, T6y, T46, T1w, T6z, T4a;						  T1v = Ip[WS(rs, 3)];						  T6z = T4 * T49;						  T4a = T1 * T49;						  {						       E T3f, T59, T6x, T42;						       T3f = FNMS(T3b, T3e, T39);						       T59 = T35 * T3e;						       T6x = T44 * T41;						       T42 = T40 * T41;						       T6A = FMA(T1, T4b, T6z);						       T4c = FNMS(T4, T4b, T4a);						       T3j = T3f + T3i;						       T7Z = T3i - T3f;						       T5a = FMA(T3b, T38, T59);						       T6y = FMA(T40, T45, T6x);						       T46 = FNMS(T44, T45, T42);						       T1w = Im[WS(rs, 3)];						  }						  T5b = T58 + T5a;						  T93 = T58 - T5a;						  T6B = T6y + T6A;						  T8V = T6A - T6y;						  T4d = T46 + T4c;						  T8J = T4c - T46;						  T4W = T1v + T1w;						  T1x = T1v - T1w;					     }					     T1B = Rp[WS(rs, 3)];					     T1C = Rm[WS(rs, 3)];					     T1y = T1u * T1x;					     T6a = T25 * T4W;					     T1J = Ip[WS(rs, 11)];					     T4U = T1B - T1C;					     T1D = T1B + T1C;					     T1K = Im[WS(rs, 11)];					     T1P = Rp[WS(rs, 11)];					     T4V = T25 * T4U;					     T5K = T1u * T1D;					     T52 = T1J + T1K;					     T1L = T1J - T1K;					     T1Q = Rm[WS(rs, 11)];					}					{					     E T1E, T6c, T1M, T4Z, T1R, T6b;					     T1E = FNMS(T1A, T1D, T1y);					     T6c = T4Y * T52;					     T1M = T1I * T1L;					     T4Z = T1P - T1Q;					     T1R = T1P + T1Q;					     T6b = FNMS(T28, T4U, T6a);					     {						  E T5M, T6d, T50, T1S;						  T4X = FMA(T28, T4W, T4V);						  T6d = FNMS(T51, T4Z, T6c);						  T50 = T4Y * T4Z;						  T1S = FNMS(T1O, T1R, T1M);						  T5M = T1I * T1R;						  T8r = T6d - T6b;						  T6e = T6b + T6d;						  T8l = T1E - T1S;						  T1T = T1E + T1S;						  T53 = FMA(T51, T52, T50);						  T5L = FMA(T1A, T1x, T5K);						  T5N = FMA(T1O, T1L, T5M);					     }					}				   }			      }			      {				   E T3Q, T2K, T2P, T2L, T6s, T3P, T5c, T3W, T2U, T2X, T2Y, T2V;				   {					E T2I, T2J, T2N, T2O, T2S, T3O, T2T;					T2I = Ip[WS(rs, 4)];					T8C = T53 - T4X;					T54 = T4X + T53;					T8i = T5L - T5N;					T5O = T5L + T5N;					T2J = Im[WS(rs, 4)];					T2N = Rp[WS(rs, 4)];					T2O = Rm[WS(rs, 4)];					T2S = Ip[WS(rs, 12)];					T3Q = T2I + T2J;					T2K = T2I - T2J;					T3O = T2O - T2N;					T2P = T2N + T2O;					T2T = Im[WS(rs, 12)];					T2L = T2H * T2K;					T6s = Tk * T3O;					T3P = Th * T3O;					T5c = T2H * T2P;					T3W = T2S + T2T;					T2U = T2S - T2T;					T2X = Rp[WS(rs, 12)];					T2Y = Rm[WS(rs, 12)];					T2V = T2R * T2U;				   }				   {					E T2Q, T6t, T3T, T2Z, T3R, T6u, T3U;					T2Q = FNMS(T2M, T2P, T2L);					T6t = FMA(Th, T3Q, T6s);					T3T = T2Y - T2X;					T2Z = T2X + T2Y;					T3R = FNMS(Tk, T3Q, T3P);					T5d = FMA(T2M, T2K, T5c);					T6u = T3V * T3T;					T3U = T3S * T3T;					{					     E T30, T5e, T6v, T3X;					     T30 = FNMS(T2W, T2Z, T2V);					     T5e = T2R * T2Z;					     T6v = FMA(T3S, T3W, T6u);					     T3X = FNMS(T3V, T3W, T3U);					     T94 = T2Q - T30;					     T31 = T2Q + T30;					     T8K = T6t - T6v;					     T6w = T6t + T6v;					     T8U = T3R - T3X;					     T3Y = T3R + T3X;					     T5f = FMA(T2W, T2U, T5e);					}				   }			      }			      {				   E T4J, T12, T65, T13, T4F, T18, T1o, T4G, T5F, T4R, T1k, T1p;				   {					E T16, T17, T10, T11, T1i, T1j;					T10 = Ip[WS(rs, 15)];					T11 = Im[WS(rs, 15)];					T16 = Rp[WS(rs, 15)];					T80 = T5d - T5f;					T5g = T5d + T5f;					T4J = T10 + T11;					T12 = T10 - T11;					T17 = Rm[WS(rs, 15)];					T1i = Ip[WS(rs, 7)];					T65 = T4E * T4J;					T13 = TZ * T12;					T4F = T16 - T17;					T18 = T16 + T17;					T1j = Im[WS(rs, 7)];					T1o = Rp[WS(rs, 7)];					T4G = T4E * T4F;					T5F = TZ * T18;					T4R = T1i + T1j;					T1k = T1i - T1j;					T1p = Rm[WS(rs, 7)];				   }				   {					E T19, T67, T1l, T4N, T1q, T66;					T19 = FNMS(T15, T18, T13);					T67 = T4M * T4R;					T1l = T1h * T1k;					T4N = T1o - T1p;					T1q = T1o + T1p;					T66 = FNMS(T4I, T4F, T65);					{					     E T5H, T68, T4O, T1r;					     T4K = FMA(T4I, T4J, T4G);					     T68 = FNMS(T4Q, T4N, T67);					     T4O = T4M * T4N;					     T1r = FNMS(T1n, T1q, T1l);					     T5H = T1h * T1q;					     T8B = T66 - T68;					     T69 = T66 + T68;					     T8h = T19 - T1r;					     T1s = T19 + T1r;					     T4S = FMA(T4Q, T4R, T4O);					     T5G = FMA(T15, T12, T5F);					     T5I = FMA(T1n, T1k, T5H);					}				   }			      }			 }			 {			      E T2c, T3x, T2d, T23, T5j, T3q, T2i, T3t, T6i, T8t, T5V, T5X;			      {				   E Tn, T4i, T9, T4g, Tf, T5U, Ta, T4h, T5u, To, Tt, Tu;				   {					E T7, T8, Td, Te;					T7 = Ip[WS(rs, 1)];					T8q = T4S - T4K;					T4T = T4K + T4S;					T8k = T5G - T5I;					T5J = T5G + T5I;					T8 = Im[WS(rs, 1)];					Td = Rp[WS(rs, 1)];					Te = Rm[WS(rs, 1)];					Tn = Ip[WS(rs, 9)];					T4i = T7 + T8;					T9 = T7 - T8;					T4g = Td - Te;					Tf = Td + Te;					T5U = T2 * T4i;					Ta = T6 * T9;					T4h = T2 * T4g;					T5u = T6 * Tf;					To = Im[WS(rs, 9)];					Tt = Rp[WS(rs, 9)];					Tu = Rm[WS(rs, 9)];				   }				   {					E Tg, T4q, Tp, T4m, Tv, T5W, Tq, T4n, T5w;					Tg = FNMS(Tc, Tf, Ta);					T4q = Tn + To;					Tp = Tn - To;					T4m = Tt - Tu;					Tv = Tt + Tu;					T5W = T4l * T4q;					Tq = Tm * Tp;					T4n = T4l * T4m;					T5w = Tm * Tv;					{					     E T5v, Tw, T4j, T5x, T4r;					     T5v = FMA(Tc, T9, T5u);					     Tw = FNMS(Ts, Tv, Tq);					     T4j = FMA(T5, T4i, T4h);					     T5x = FMA(Ts, Tp, T5w);					     T4r = FMA(T4p, T4q, T4n);					     Tx = Tg + Tw;					     T8a = Tg - Tw;					     T5y = T5v + T5x;					     T8d = T5v - T5x;					     T4s = T4j + T4r;					     T8t = T4r - T4j;					     T5V = FNMS(T5, T4g, T5U);					     T5X = FNMS(T4p, T4m, T5W);					}				   }			      }			      {				   E T3p, T1Y, T1Z, T22, T2g, T6h, T3o, T5i, T2h;				   {					E T20, T21, T1W, T1X, T8u, T2a, T2b, T3n;					T1W = Ip[WS(rs, 2)];					T1X = Im[WS(rs, 2)];					T8u = T5V - T5X;					T5Y = T5V + T5X;					T20 = Rp[WS(rs, 2)];					T3p = T1W + T1X;					T1Y = T1W - T1X;					T8v = T8t - T8u;					T8E = T8u + T8t;					T21 = Rm[WS(rs, 2)];					T1Z = T1a * T1Y;					T2a = Ip[WS(rs, 10)];					T2b = Im[WS(rs, 10)];					T3n = T21 - T20;					T22 = T20 + T21;					T2g = Rp[WS(rs, 10)];					T2c = T2a - T2b;					T3x = T2a + T2b;					T6h = T1H * T3n;					T3o = T1F * T3n;					T5i = T1a * T22;					T2d = T29 * T2c;					T2h = Rm[WS(rs, 10)];				   }				   T23 = FNMS(T1c, T22, T1Z);				   T5j = FMA(T1c, T1Y, T5i);				   T3q = FNMS(T1H, T3p, T3o);				   T2i = T2g + T2h;				   T3t = T2h - T2g;				   T6i = FMA(T1F, T3p, T6h);			      }			      {				   E T2y, T3K, T2z, T2u, T5o, T3H, T2D, T3I, T6n;				   {					E T3G, T2o, T2p, T2t, T6m, T3D, T5n, T2B, T2C;					{					     E T2r, T2s, T2m, T2n, T3C, T2w, T2x;					     {						  E T8N, T8M, T6j, T3u, T2j;						  T2m = Ip[WS(rs, 14)];						  T6j = T3w * T3t;						  T3u = T3s * T3t;						  T2j = FNMS(T2f, T2i, T2d);						  {						       E T5k, T6k, T3y, T5l;						       T5k = T29 * T2i;						       T6k = FMA(T3s, T3x, T6j);						       T3y = FNMS(T3w, T3x, T3u);						       T2k = T23 + T2j;						       T82 = T23 - T2j;						       T5l = FMA(T2f, T2c, T5k);						       T6l = T6i + T6k;						       T8N = T6i - T6k;						       T3z = T3q + T3y;						       T8M = T3q - T3y;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -