⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cf_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:01:46 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2c -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cf_32 -include hc2cf.h *//* * This function contains 434 FP additions, 260 FP multiplications, * (or, 236 additions, 62 multiplications, 198 fused multiply/add), * 135 stack variables, 7 constants, and 128 memory accesses */#include "hc2cf.h"static void hc2cf_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) {	  E T90, T8Z;	  {	       E T8x, T87, T8, T3w, T83, T3B, T8y, Tl, T6F, Tz, T3J, T5T, T6G, TM, T3Q;	       E T5U, T3Z, T5Y, T7D, T6L, T5X, T46, T6M, T1f, T4e, T61, T7E, T6R, T6O, T1G;	       E T60, T4l, T54, T6c, T79, T7N, T32, T7b, T6f, T5r, T4v, T65, T6X, T7I, T29;	       E T70, T68, T4S, T5s, T5b, T7O, T7e, T76, T3t, T5t, T5i, T4H, T2y, T4B, T71;	       E T2m, T4w, T4F, T2s;	       {		    E T3X, T1d, T44, T6J, T11, T3T, T3V, T17, T5h, T5c;		    {			 E Ta, Td, Tg, T3x, Tb, Tj, Tf, Tc, Ti;			 {			      E T1, T86, T3, T6, T2, T5;			      T1 = Rp[0];			      T86 = Rm[0];			      T3 = Rp[WS(rs, 8)];			      T6 = Rm[WS(rs, 8)];			      T2 = W[30];			      T5 = W[31];			      {				   E T84, T4, T9, T85, T7;				   Ta = Rp[WS(rs, 4)];				   Td = Rm[WS(rs, 4)];				   T84 = T2 * T6;				   T4 = T2 * T3;				   T9 = W[14];				   Tg = Rp[WS(rs, 12)];				   T85 = FNMS(T5, T3, T84);				   T7 = FMA(T5, T6, T4);				   T3x = T9 * Td;				   Tb = T9 * Ta;				   T8x = T86 - T85;				   T87 = T85 + T86;				   T8 = T1 + T7;				   T3w = T1 - T7;				   Tj = Rm[WS(rs, 12)];				   Tf = W[46];			      }			      Tc = W[15];			      Ti = W[47];			 }			 {			      E Tu, Tx, T3F, Ts, Tw, T3G, Tv;			      {				   E To, Tr, Tp, T3E, Tq, Tt;				   {					E T3y, Te, T3A, Tk, T3z, Th, Tn;					To = Rp[WS(rs, 2)];					T3z = Tf * Tj;					Th = Tf * Tg;					T3y = FNMS(Tc, Ta, T3x);					Te = FMA(Tc, Td, Tb);					T3A = FNMS(Ti, Tg, T3z);					Tk = FMA(Ti, Tj, Th);					Tr = Rm[WS(rs, 2)];					Tn = W[6];					T83 = T3y + T3A;					T3B = T3y - T3A;					T8y = Te - Tk;					Tl = Te + Tk;					Tp = Tn * To;					T3E = Tn * Tr;				   }				   Tq = W[7];				   Tu = Rp[WS(rs, 10)];				   Tx = Rm[WS(rs, 10)];				   Tt = W[38];				   T3F = FNMS(Tq, To, T3E);				   Ts = FMA(Tq, Tr, Tp);				   Tw = W[39];				   T3G = Tt * Tx;				   Tv = Tt * Tu;			      }			      {				   E T3M, TF, TH, TK, TG, TJ, TE, TD, TC;				   {					E TB, T3H, Ty, TA, T3I, T3D, T3L;					TB = Rp[WS(rs, 14)];					TE = Rm[WS(rs, 14)];					T3H = FNMS(Tw, Tu, T3G);					Ty = FMA(Tw, Tx, Tv);					TA = W[54];					TD = W[55];					T6F = T3F + T3H;					T3I = T3F - T3H;					Tz = Ts + Ty;					T3D = Ts - Ty;					T3L = TA * TE;					TC = TA * TB;					T3J = T3D + T3I;					T5T = T3I - T3D;					T3M = FNMS(TD, TB, T3L);				   }				   TF = FMA(TD, TE, TC);				   TH = Rp[WS(rs, 6)];				   TK = Rm[WS(rs, 6)];				   TG = W[22];				   TJ = W[23];				   {					E TU, T41, T13, T16, T43, T10, T12, T15, T3U, T14;					{					     E T19, T1c, T18, T1b, T3P, T3K;					     {						  E TQ, TT, T3N, TI, TP, TS;						  TQ = Rp[WS(rs, 1)];						  TT = Rm[WS(rs, 1)];						  T3N = TG * TK;						  TI = TG * TH;						  TP = W[2];						  TS = W[3];						  {						       E T3O, TL, T40, TR;						       T3O = FNMS(TJ, TH, T3N);						       TL = FMA(TJ, TK, TI);						       T40 = TP * TT;						       TR = TP * TQ;						       T6G = T3M + T3O;						       T3P = T3M - T3O;						       TM = TF + TL;						       T3K = TF - TL;						       TU = FMA(TS, TT, TR);						       T41 = FNMS(TS, TQ, T40);						  }					     }					     T3Q = T3K - T3P;					     T5U = T3K + T3P;					     T19 = Rp[WS(rs, 13)];					     T1c = Rm[WS(rs, 13)];					     T18 = W[50];					     T1b = W[51];					     {						  E TW, TZ, TY, T42, TX, T3W, T1a, TV;						  TW = Rp[WS(rs, 9)];						  TZ = Rm[WS(rs, 9)];						  T3W = T18 * T1c;						  T1a = T18 * T19;						  TV = W[34];						  TY = W[35];						  T3X = FNMS(T1b, T19, T3W);						  T1d = FMA(T1b, T1c, T1a);						  T42 = TV * TZ;						  TX = TV * TW;						  T13 = Rp[WS(rs, 5)];						  T16 = Rm[WS(rs, 5)];						  T43 = FNMS(TY, TW, T42);						  T10 = FMA(TY, TZ, TX);						  T12 = W[18];						  T15 = W[19];					     }					}					T44 = T41 - T43;					T6J = T41 + T43;					T11 = TU + T10;					T3T = TU - T10;					T3U = T12 * T16;					T14 = T12 * T13;					T3V = FNMS(T15, T13, T3U);					T17 = FMA(T15, T16, T14);				   }			      }			 }		    }		    {			 E T4g, T1l, T4c, T1E, T1u, T1x, T1w, T4i, T1r, T49, T1v;			 {			      E T1A, T1D, T1C, T4b, T1B;			      {				   E T1h, T1k, T1g, T1j, T4f, T1i, T1z;				   T1h = Rp[WS(rs, 15)];				   T1k = Rm[WS(rs, 15)];				   {					E T6K, T3Y, T1e, T45;					T6K = T3V + T3X;					T3Y = T3V - T3X;					T1e = T17 + T1d;					T45 = T17 - T1d;					T3Z = T3T + T3Y;					T5Y = T3T - T3Y;					T7D = T6J + T6K;					T6L = T6J - T6K;					T5X = T44 + T45;					T46 = T44 - T45;					T6M = T11 - T1e;					T1f = T11 + T1e;					T1g = W[58];				   }				   T1j = W[59];				   T1A = Rp[WS(rs, 11)];				   T1D = Rm[WS(rs, 11)];				   T4f = T1g * T1k;				   T1i = T1g * T1h;				   T1z = W[42];				   T1C = W[43];				   T4g = FNMS(T1j, T1h, T4f);				   T1l = FMA(T1j, T1k, T1i);				   T4b = T1z * T1D;				   T1B = T1z * T1A;			      }			      {				   E T1n, T1q, T1m, T1p, T4h, T1o, T1t;				   T1n = Rp[WS(rs, 7)];				   T1q = Rm[WS(rs, 7)];				   T4c = FNMS(T1C, T1A, T4b);				   T1E = FMA(T1C, T1D, T1B);				   T1m = W[26];				   T1p = W[27];				   T1u = Rp[WS(rs, 3)];				   T1x = Rm[WS(rs, 3)];				   T4h = T1m * T1q;				   T1o = T1m * T1n;				   T1t = W[10];				   T1w = W[11];				   T4i = FNMS(T1p, T1n, T4h);				   T1r = FMA(T1p, T1q, T1o);				   T49 = T1t * T1x;				   T1v = T1t * T1u;			      }			 }			 {			      E T4j, T6P, T1s, T48, T4a, T1y;			      T4j = T4g - T4i;			      T6P = T4g + T4i;			      T1s = T1l + T1r;			      T48 = T1l - T1r;			      T4a = FNMS(T1w, T1u, T49);			      T1y = FMA(T1w, T1x, T1v);			      {				   E T6Q, T4d, T4k, T1F;				   T6Q = T4a + T4c;				   T4d = T4a - T4c;				   T4k = T1y - T1E;				   T1F = T1y + T1E;				   T4e = T48 + T4d;				   T61 = T48 - T4d;				   T7E = T6P + T6Q;				   T6R = T6P - T6Q;				   T6O = T1s - T1F;				   T1G = T1s + T1F;				   T60 = T4j + T4k;				   T4l = T4j - T4k;			      }			 }		    }		    {			 E T5m, T2H, T52, T30, T2Q, T2T, T2S, T5o, T2N, T4Z, T2R;			 {			      E T2W, T2Z, T2Y, T51, T2X;			      {				   E T2D, T2G, T2C, T2F, T5l, T2E, T2V;				   T2D = Ip[WS(rs, 15)];				   T2G = Im[WS(rs, 15)];				   T2C = W[60];				   T2F = W[61];				   T2W = Ip[WS(rs, 11)];				   T2Z = Im[WS(rs, 11)];				   T5l = T2C * T2G;				   T2E = T2C * T2D;				   T2V = W[44];				   T2Y = W[45];				   T5m = FNMS(T2F, T2D, T5l);				   T2H = FMA(T2F, T2G, T2E);				   T51 = T2V * T2Z;				   T2X = T2V * T2W;			      }			      {				   E T2J, T2M, T2I, T2L, T5n, T2K, T2P;				   T2J = Ip[WS(rs, 7)];				   T2M = Im[WS(rs, 7)];				   T52 = FNMS(T2Y, T2W, T51);				   T30 = FMA(T2Y, T2Z, T2X);				   T2I = W[28];				   T2L = W[29];				   T2Q = Ip[WS(rs, 3)];				   T2T = Im[WS(rs, 3)];				   T5n = T2I * T2M;				   T2K = T2I * T2J;				   T2P = W[12];				   T2S = W[13];				   T5o = FNMS(T2L, T2J, T5n);				   T2N = FMA(T2L, T2M, T2K);				   T4Z = T2P * T2T;				   T2R = T2P * T2Q;			      }			 }			 {			      E T5p, T77, T2O, T4Y, T50, T2U;			      T5p = T5m - T5o;			      T77 = T5m + T5o;			      T2O = T2H + T2N;			      T4Y = T2H - T2N;			      T50 = FNMS(T2S, T2Q, T4Z);			      T2U = FMA(T2S, T2T, T2R);			      {				   E T78, T53, T5q, T31;				   T78 = T50 + T52;				   T53 = T50 - T52;				   T5q = T30 - T2U;				   T31 = T2U + T30;				   T54 = T4Y + T53;				   T6c = T4Y - T53;				   T79 = T77 - T78;				   T7N = T77 + T78;				   T32 = T2O + T31;				   T7b = T2O - T31;				   T6f = T5q - T5p;				   T5r = T5p + T5q;			      }			 }		    }		    {			 E T4N, T1O, T4t, T27, T1X, T20, T1Z, T4P, T1U, T4q, T1Y;			 {			      E T23, T26, T25, T4s, T24;			      {				   E T1K, T1N, T1J, T1M, T4M, T1L, T22;				   T1K = Ip[0];				   T1N = Im[0];				   T1J = W[0];				   T1M = W[1];				   T23 = Ip[WS(rs, 12)];				   T26 = Im[WS(rs, 12)];				   T4M = T1J * T1N;				   T1L = T1J * T1K;				   T22 = W[48];				   T25 = W[49];				   T4N = FNMS(T1M, T1K, T4M);				   T1O = FMA(T1M, T1N, T1L);				   T4s = T22 * T26;				   T24 = T22 * T23;			      }			      {				   E T1Q, T1T, T1P, T1S, T4O, T1R, T1W;				   T1Q = Ip[WS(rs, 8)];				   T1T = Im[WS(rs, 8)];				   T4t = FNMS(T25, T23, T4s);				   T27 = FMA(T25, T26, T24);				   T1P = W[32];				   T1S = W[33];				   T1X = Ip[WS(rs, 4)];				   T20 = Im[WS(rs, 4)];				   T4O = T1P * T1T;				   T1R = T1P * T1Q;				   T1W = W[16];				   T1Z = W[17];				   T4P = FNMS(T1S, T1Q, T4O);				   T1U = FMA(T1S, T1T, T1R);				   T4q = T1W * T20;				   T1Y = T1W * T1X;			      }			 }			 {			      E T4Q, T6V, T1V, T4p, T4r, T21;			      T4Q = T4N - T4P;			      T6V = T4N + T4P;			      T1V = T1O + T1U;			      T4p = T1O - T1U;			      T4r = FNMS(T1Z, T1X, T4q);			      T21 = FMA(T1Z, T20, T1Y);			      {				   E T6W, T4u, T4R, T28;				   T6W = T4r + T4t;				   T4u = T4r - T4t;				   T4R = T21 - T27;				   T28 = T21 + T27;				   T4v = T4p + T4u;				   T65 = T4p - T4u;				   T6X = T6V - T6W;				   T7I = T6V + T6W;				   T29 = T1V + T28;				   T70 = T1V - T28;				   T68 = T4Q + T4R;				   T4S = T4Q - T4R;			      }			 }		    }		    {			 E T57, T38, T5g, T3r, T3h, T3k, T3j, T59, T3e, T5d, T3i;			 {			      E T3n, T3q, T3p, T5f, T3o;			      {				   E T34, T37, T33, T36, T56, T35, T3m;				   T34 = Ip[WS(rs, 1)];				   T37 = Im[WS(rs, 1)];				   T33 = W[4];				   T36 = W[5];				   T3n = Ip[WS(rs, 5)];				   T3q = Im[WS(rs, 5)];				   T56 = T33 * T37;				   T35 = T33 * T34;				   T3m = W[20];				   T3p = W[21];				   T57 = FNMS(T36, T34, T56);				   T38 = FMA(T36, T37, T35);				   T5f = T3m * T3q;				   T3o = T3m * T3n;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -