⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cfdft_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:03:24 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2cdft -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include hc2cf.h *//* * This function contains 498 FP additions, 324 FP multiplications, * (or, 300 additions, 126 multiplications, 198 fused multiply/add), * 172 stack variables, 8 constants, and 128 memory accesses */#include "hc2cf.h"static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     DK(KP500000000, +0.500000000000000000000000000000000000000000000);     INT m;     for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {	  E T9X, Ta0;	  {	       E T3B, T89, T61, T8l, T2F, T7p, T8t, T4B, T7I, T5e, T7L, T1n, T7R, T5E, T82;	       E T4u, T3m, T8k, T5W, T8a, T2r, T8u, T4G, T7q, T59, T7K, T7H, T12, T5z, T81;	       E T7Q, T4h, T4Y, T7D, T7A, Tl, T5o, T3Q, T84, T7V, T2V, T4M, T7t, T7s, T1K;	       E T5L, T8e, T8n, T38, T7v, T4R, T7w, T25, T5Q, T8h, T8o, T3V, T3S, T5p, T3T;	       E T41, Tz, T3Y, TE, TA, T51, T5r, T3Z, Tv, T50, TB, T3U, T40;	       {		    E T49, T46, T5v, T47, T4f, TV, T4c, T10, TW, T57, T5x, T4d, TR, T56, TX;		    E T48, T4e;		    {			 E T4m, T4j, T5A, T4k, T4s, T1g, T4p, T1l, T1h, T5c, T5C, T4q, T1c, T5b, T1i;			 E T4l, T4r;			 {			      E T2E, T4y, T2B, T4A;			      {				   E T3y, T3z, T3t, T5Z, T3x, T2v, T3r, T3q, T3n, T2A, T3o, T2s;				   {					E T2C, T2D, T3w, T3u, T3v;					T2C = Ip[0];					T2D = Im[0];					T3u = Rm[0];					T3v = Rp[0];					T3y = W[1];					T3z = T2C + T2D;					T2E = T2C - T2D;					T4y = T3v + T3u;					T3w = T3u - T3v;					T3t = W[0];					{					     E T2y, T2z, T2t, T2u;					     T2t = Ip[WS(rs, 8)];					     T2u = Im[WS(rs, 8)];					     T5Z = T3y * T3w;					     T3x = T3t * T3w;					     T2y = Rp[WS(rs, 8)];					     T2v = T2t - T2u;					     T3r = T2t + T2u;					     T2z = Rm[WS(rs, 8)];					     T3q = W[33];					     T3n = W[32];					     T2A = T2y + T2z;					     T3o = T2z - T2y;					     T2s = W[30];					}				   }				   {					E T3A, T5X, T4z, T2w, T3s, T3p, T5Y, T60, T2x;					T3A = FNMS(T3y, T3z, T3x);					T3p = T3n * T3o;					T5X = T3q * T3o;					T4z = T2s * T2A;					T2w = T2s * T2v;					T3s = FNMS(T3q, T3r, T3p);					T5Y = FMA(T3n, T3r, T5X);					T60 = FMA(T3t, T3z, T5Z);					T2x = W[31];					T3B = T3s + T3A;					T89 = T3A - T3s;					T61 = T5Y + T60;					T8l = T60 - T5Y;					T2B = FNMS(T2x, T2A, T2w);					T4A = FMA(T2x, T2v, T4z);				   }			      }			      {				   E T16, T1b, T17, T5a, T1d, T4o, T18;				   {					E T19, T1a, T13, T4i, T14, T15;					T14 = Ip[WS(rs, 3)];					T15 = Im[WS(rs, 3)];					T2F = T2B + T2E;					T7p = T2E - T2B;					T8t = T4y - T4A;					T4B = T4y + T4A;					T4m = T14 + T15;					T16 = T14 - T15;					T19 = Rp[WS(rs, 3)];					T1a = Rm[WS(rs, 3)];					T13 = W[10];					T4i = W[12];					{					     E T1e, T1f, T1j, T1k;					     T1e = Ip[WS(rs, 11)];					     T4j = T19 - T1a;					     T1b = T19 + T1a;					     T17 = T13 * T16;					     T5A = T4i * T4m;					     T4k = T4i * T4j;					     T5a = T13 * T1b;					     T1f = Im[WS(rs, 11)];					     T1j = Rp[WS(rs, 11)];					     T1k = Rm[WS(rs, 11)];					     T1d = W[42];					     T4s = T1e + T1f;					     T1g = T1e - T1f;					     T4p = T1j - T1k;					     T1l = T1j + T1k;					     T4o = W[44];					     T1h = T1d * T1g;					}				   }				   T18 = W[11];				   T5c = T1d * T1l;				   T5C = T4o * T4s;				   T4q = T4o * T4p;				   T1c = FNMS(T18, T1b, T17);				   T5b = FMA(T18, T16, T5a);				   T1i = W[43];				   T4l = W[13];				   T4r = W[45];			      }			 }			 {			      E T4D, T2g, T2q, T4F;			      {				   E T3d, T3e, T2a, T2f, T3a, T5S, T3c, T4C, T2b, T3j, T2k, T3k, T2p, T3h, T3g;				   E T2h, T5U, T3b, T27;				   {					E T28, T29, T2d, T2e, T5d, T1m;					T28 = Ip[WS(rs, 4)];					T5d = FMA(T1i, T1g, T5c);					T1m = FNMS(T1i, T1l, T1h);					{					     E T5B, T4n, T5D, T4t;					     T5B = FNMS(T4l, T4j, T5A);					     T4n = FMA(T4l, T4m, T4k);					     T5D = FNMS(T4r, T4p, T5C);					     T4t = FMA(T4r, T4s, T4q);					     T7I = T5b - T5d;					     T5e = T5b + T5d;					     T7L = T1c - T1m;					     T1n = T1c + T1m;					     T7R = T5D - T5B;					     T5E = T5B + T5D;					     T82 = T4t - T4n;					     T4u = T4n + T4t;					     T29 = Im[WS(rs, 4)];					}					T2d = Rp[WS(rs, 4)];					T2e = Rm[WS(rs, 4)];					T3d = W[17];					T3e = T28 + T29;					T2a = T28 - T29;					T3b = T2e - T2d;					T2f = T2d + T2e;					T3a = W[16];					T27 = W[14];					T5S = T3d * T3b;				   }				   {					E T2i, T2j, T2n, T2o;					T2i = Ip[WS(rs, 12)];					T3c = T3a * T3b;					T4C = T27 * T2f;					T2b = T27 * T2a;					T2j = Im[WS(rs, 12)];					T2n = Rp[WS(rs, 12)];					T2o = Rm[WS(rs, 12)];					T3j = W[49];					T2k = T2i - T2j;					T3k = T2i + T2j;					T2p = T2n + T2o;					T3h = T2o - T2n;					T3g = W[48];					T2h = W[46];					T5U = T3j * T3h;				   }				   {					E T3f, T3i, T4E, T2l;					T3f = FNMS(T3d, T3e, T3c);					T3i = T3g * T3h;					T4E = T2h * T2p;					T2l = T2h * T2k;					{					     E T5T, T3l, T5V, T2c, T2m;					     T5T = FMA(T3a, T3e, T5S);					     T3l = FNMS(T3j, T3k, T3i);					     T5V = FMA(T3g, T3k, T5U);					     T2c = W[15];					     T2m = W[47];					     T3m = T3f + T3l;					     T8k = T3f - T3l;					     T5W = T5T + T5V;					     T8a = T5T - T5V;					     T4D = FMA(T2c, T2a, T4C);					     T2g = FNMS(T2c, T2f, T2b);					     T2q = FNMS(T2m, T2p, T2l);					     T4F = FMA(T2m, T2k, T4E);					}				   }			      }			      {				   E TL, TQ, TM, T55, TS, T4b, TN;				   {					E TO, TP, TI, T45, TJ, TK;					TJ = Ip[WS(rs, 15)];					TK = Im[WS(rs, 15)];					T2r = T2g + T2q;					T8u = T2g - T2q;					T4G = T4D + T4F;					T7q = T4D - T4F;					T49 = TJ + TK;					TL = TJ - TK;					TO = Rp[WS(rs, 15)];					TP = Rm[WS(rs, 15)];					TI = W[58];					T45 = W[60];					{					     E TT, TU, TY, TZ;					     TT = Ip[WS(rs, 7)];					     T46 = TO - TP;					     TQ = TO + TP;					     TM = TI * TL;					     T5v = T45 * T49;					     T47 = T45 * T46;					     T55 = TI * TQ;					     TU = Im[WS(rs, 7)];					     TY = Rp[WS(rs, 7)];					     TZ = Rm[WS(rs, 7)];					     TS = W[26];					     T4f = TT + TU;					     TV = TT - TU;					     T4c = TY - TZ;					     T10 = TY + TZ;					     T4b = W[28];					     TW = TS * TV;					}				   }				   TN = W[59];				   T57 = TS * T10;				   T5x = T4b * T4f;				   T4d = T4b * T4c;				   TR = FNMS(TN, TQ, TM);				   T56 = FMA(TN, TL, T55);				   TX = W[27];				   T48 = W[61];				   T4e = W[29];			      }			 }		    }		    {			 E T8c, T8d, T8f, T8g;			 {			      E T3I, T3F, T5k, T3G, T3O, Te, T3L, Tj, Tf, T4W, T5m, T3M, Ta, T4V, Tg;			      E T3H, T3N;			      {				   E T4, T9, T5, T4U, Tb, T3K, T1, T3E, T6;				   {					E T2, T3, T7, T8, T58, T11;					T2 = Ip[WS(rs, 1)];					T58 = FMA(TX, TV, T57);					T11 = FNMS(TX, T10, TW);					{					     E T5w, T4a, T5y, T4g;					     T5w = FNMS(T48, T46, T5v);					     T4a = FMA(T48, T49, T47);					     T5y = FNMS(T4e, T4c, T5x);					     T4g = FMA(T4e, T4f, T4d);					     T59 = T56 + T58;					     T7K = T56 - T58;					     T7H = TR - T11;					     T12 = TR + T11;					     T5z = T5w + T5y;					     T81 = T5w - T5y;					     T7Q = T4g - T4a;					     T4h = T4a + T4g;					     T3 = Im[WS(rs, 1)];					}					T7 = Rp[WS(rs, 1)];					T8 = Rm[WS(rs, 1)];					T1 = W[2];					T3I = T2 + T3;					T4 = T2 - T3;					T3F = T7 - T8;					T9 = T7 + T8;					T3E = W[4];					T5 = T1 * T4;				   }				   {					E Tc, Td, Th, Ti;					Tc = Ip[WS(rs, 9)];					T4U = T1 * T9;					T5k = T3E * T3I;					T3G = T3E * T3F;					Td = Im[WS(rs, 9)];					Th = Rp[WS(rs, 9)];					Ti = Rm[WS(rs, 9)];					Tb = W[34];					T3O = Tc + Td;					Te = Tc - Td;					T3L = Th - Ti;					Tj = Th + Ti;					T3K = W[36];					Tf = Tb * Te;				   }				   T6 = W[3];				   T4W = Tb * Tj;				   T5m = T3K * T3O;				   T3M = T3K * T3L;				   Ta = FNMS(T6, T9, T5);				   T4V = FMA(T6, T4, T4U);				   Tg = W[35];				   T3H = W[5];				   T3N = W[37];			      }			      {				   E T1t, T2N, T2M, T2J, T1y, T2L, T5H, T4I, T1u, T2S, T1D, T2T, T1I, T2Q, T2P;				   E T1A, T5J;				   {					E T2K, T1q, T1w, T1x;					{					     E T1r, T7U, T7T, T1s, T4X, Tk;					     T1r = Ip[WS(rs, 2)];					     T4X = FMA(Tg, Te, T4W);					     Tk = FNMS(Tg, Tj, Tf);					     {						  E T5l, T3J, T5n, T3P;						  T5l = FNMS(T3H, T3F, T5k);						  T3J = FMA(T3H, T3I, T3G);						  T5n = FNMS(T3N, T3L, T5m);						  T3P = FMA(T3N, T3O, T3M);						  T4Y = T4V + T4X;						  T7D = T4V - T4X;						  T7A = Ta - Tk;						  Tl = Ta + Tk;						  T7U = T5l - T5n;						  T5o = T5l + T5n;						  T7T = T3P - T3J;						  T3Q = T3J + T3P;						  T1s = Im[WS(rs, 2)];					     }					     T1w = Rp[WS(rs, 2)];					     T84 = T7U + T7T;					     T7V = T7T - T7U;					     T1t = T1r - T1s;					     T2N = T1r + T1s;					     T1x = Rm[WS(rs, 2)];					}					T2M = W[9];					T2J = W[8];					T1y = T1w + T1x;					T2K = T1x - T1w;					T1q = W[6];					{					     E T1B, T1C, T1G, T1H;					     T1B = Ip[WS(rs, 10)];					     T2L = T2J * T2K;					     T5H = T2M * T2K;					     T4I = T1q * T1y;					     T1u = T1q * T1t;					     T1C = Im[WS(rs, 10)];					     T1G = Rp[WS(rs, 10)];					     T1H = Rm[WS(rs, 10)];					     T2S = W[41];					     T1D = T1B - T1C;					     T2T = T1B + T1C;					     T1I = T1G + T1H;					     T2Q = T1H - T1G;					     T2P = W[40];					     T1A = W[38];					     T5J = T2S * T2Q;					}				   }				   {					E T2R, T4K, T1E, T1z, T4J, T1F, T1v, T2O, T2U;					T1v = W[7];					T2R = T2P * T2Q;					T4K = T1A * T1I;					T1E = T1A * T1D;					T1z = FNMS(T1v, T1y, T1u);					T4J = FMA(T1v, T1t, T4I);					T1F = W[39];					T2O = FNMS(T2M, T2N, T2L);					T2U = FNMS(T2S, T2T, T2R);					{					     E T5I, T4L, T1J, T5K;					     T5I = FMA(T2J, T2N, T5H);					     T4L = FMA(T1F, T1D, T4K);					     T1J = FNMS(T1F, T1I, T1E);					     T8c = T2O - T2U;					     T2V = T2O + T2U;					     T5K = FMA(T2P, T2T, T5J);					     T4M = T4J + T4L;					     T7t = T4J - T4L;					     T7s = T1z - T1J;					     T1K = T1z + T1J;					     T8d = T5I - T5K;					     T5L = T5I + T5K;					}				   }			      }			 }			 {			      E T2Z, T30, T1O, T1T, T2W, T5M, T2Y, T4N, T1P, T35, T1Y, T36, T23, T33, T32;			      E T1V, T5O, T2X, T1L;			      {				   E T1M, T1N, T1R, T1S;				   T1M = Ip[WS(rs, 14)];				   T8e = T8c - T8d;				   T8n = T8c + T8d;				   T1N = Im[WS(rs, 14)];				   T1R = Rp[WS(rs, 14)];				   T1S = Rm[WS(rs, 14)];				   T2Z = W[57];				   T30 = T1M + T1N;				   T1O = T1M - T1N;				   T2X = T1S - T1R;				   T1T = T1R + T1S;				   T2W = W[56];				   T1L = W[54];				   T5M = T2Z * T2X;			      }			      {				   E T1W, T1X, T21, T22;				   T1W = Ip[WS(rs, 6)];				   T2Y = T2W * T2X;				   T4N = T1L * T1T;				   T1P = T1L * T1O;				   T1X = Im[WS(rs, 6)];				   T21 = Rp[WS(rs, 6)];				   T22 = Rm[WS(rs, 6)];				   T35 = W[25];				   T1Y = T1W - T1X;				   T36 = T1W + T1X;				   T23 = T21 + T22;				   T33 = T22 - T21;				   T32 = W[24];				   T1V = W[22];				   T5O = T35 * T33;			      }			      {				   E T34, T4P, T1Z, T1U, T4O, T20, T1Q, T31, T37;				   T1Q = W[55];				   T34 = T32 * T33;				   T4P = T1V * T23;				   T1Z = T1V * T1Y;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -