⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hc2cb2_32.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 4 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:11:27 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2c -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -twiddle-log3 -precompute-twiddles -n 32 -dif -name hc2cb2_32 -include hc2cb.h *//* * This function contains 488 FP additions, 350 FP multiplications, * (or, 236 additions, 98 multiplications, 252 fused multiply/add), * 204 stack variables, 7 constants, and 128 memory accesses */#include "hc2cb.h"static void hc2cb2_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + ((mb - 1) * 8); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 8, MAKE_VOLATILE_STRIDE(rs)) {	  E T5u, T6b, T6e, T5I, T66, T60, T5U, T5R, T67, T5L, T61, T5x, T5A, T5D, T5O;	  E T62, T5V, T5P;	  {	       E T11, T14, T12, T37, T17, T1b, T39, T15, T7C, T8P, T8S, T7I, T98, T7e, T78;	       E T8V, T3d, T3x, T3a, T3v, T9s, T3G, T4p, T5X, T16, T9m, T3y, T4b, T3C, T4g;	       E T5Z, T1a, T4r, T3J, T2O, T1c, T4W, T4s, T3Y, T3K, T3l, T3e, T3i, T3q, T8K;	       E T8E, T8m, T7S, T5k, T5e;	       {		    E T13, T3c, T38, T3F, T7B, T9l, T77, T7d, T9r, T7H;		    T11 = W[2];		    T14 = W[3];		    T12 = W[4];		    T37 = W[0];		    T17 = W[6];		    T1b = W[7];		    T13 = T11 * T12;		    T3c = T37 * T14;		    T38 = T37 * T11;		    T3F = T37 * T12;		    T7B = T11 * T17;		    T9l = T12 * T17;		    T77 = T37 * T17;		    T7d = T37 * T1b;		    T9r = T12 * T1b;		    T7H = T11 * T1b;		    T39 = W[1];		    T15 = W[5];		    {			 E T3I, T19, T5d, T3b, T18, T2N;			 T7C = FMA(T14, T1b, T7B);			 T8P = FNMS(T14, T1b, T7B);			 T8S = FMA(T14, T17, T7H);			 T7I = FNMS(T14, T17, T7H);			 T98 = FNMS(T39, T17, T7d);			 T7e = FMA(T39, T17, T7d);			 T78 = FNMS(T39, T1b, T77);			 T8V = FMA(T39, T1b, T77);			 T3d = FMA(T39, T11, T3c);			 T3x = FNMS(T39, T11, T3c);			 T3a = FNMS(T39, T14, T38);			 T3v = FMA(T39, T14, T38);			 T9s = FNMS(T15, T17, T9r);			 T3G = FNMS(T39, T15, T3F);			 T4p = FMA(T39, T15, T3F);			 T5X = FNMS(T14, T15, T13);			 T16 = FMA(T14, T15, T13);			 T3I = T37 * T15;			 T19 = T11 * T15;			 T5d = T3v * T12;			 T3b = T3a * T12;			 T9m = FMA(T15, T1b, T9l);			 {			      E T3w, T3B, T5t, T5H;			      T3w = T3v * T17;			      T3B = T3v * T1b;			      T5t = T3a * T17;			      T5H = T3a * T1b;			      T3y = FNMS(T3x, T1b, T3w);			      T4b = FMA(T3x, T1b, T3w);			      T3C = FMA(T3x, T17, T3B);			      T4g = FNMS(T3x, T17, T3B);			      T5u = FMA(T3d, T1b, T5t);			      T6b = FNMS(T3d, T1b, T5t);			      T6e = FMA(T3d, T17, T5H);			      T5I = FNMS(T3d, T17, T5H);			      T18 = T16 * T17;			      T2N = T16 * T1b;			      T5Z = FMA(T14, T12, T19);			      T1a = FNMS(T14, T12, T19);			 }			 {			      E T3H, T3X, T4q, T4V, T5Y, T65;			      T4q = T4p * T17;			      T4V = T4p * T1b;			      T4r = FNMS(T39, T12, T3I);			      T3J = FMA(T39, T12, T3I);			      T2O = FNMS(T1a, T17, T2N);			      T1c = FMA(T1a, T1b, T18);			      T3H = T3G * T17;			      T4W = FNMS(T4r, T17, T4V);			      T4s = FMA(T4r, T1b, T4q);			      T3X = T3G * T1b;			      T5Y = T5X * T17;			      T65 = T5X * T1b;			      T3Y = FNMS(T3J, T17, T3X);			      T3K = FMA(T3J, T1b, T3H);			      {				   E T8J, T8D, T3h, T5j, T8l, T7R;				   T3h = T3a * T15;				   T66 = FNMS(T5Z, T17, T65);				   T60 = FMA(T5Z, T1b, T5Y);				   T3l = FNMS(T3d, T15, T3b);				   T3e = FMA(T3d, T15, T3b);				   T3i = FNMS(T3d, T12, T3h);				   T3q = FMA(T3d, T12, T3h);				   T8J = T3l * T1b;				   T8D = T3l * T17;				   T5j = T3v * T15;				   T8l = T3e * T1b;				   T7R = T3e * T17;				   T8K = FNMS(T3q, T17, T8J);				   T8E = FMA(T3q, T1b, T8D);				   T8m = FNMS(T3i, T17, T8l);				   T7S = FMA(T3i, T1b, T7R);				   T5U = FNMS(T3x, T12, T5j);				   T5k = FMA(T3x, T12, T5j);				   T5e = FNMS(T3x, T15, T5d);				   T5R = FMA(T3x, T15, T5d);			      }			 }		    }	       }	       {		    E T6O, T6i, T7s, T7o, T6j, Tf, T8W, T7V, T99, T8p, T3L, T1t, T3Z, T2X, T5J;		    E T4Z, T7t, T6W, T5v, T4v, TZ, T7x, T91, T9d, T28, T3S, T3R, T2h, T5B, T4Q;		    E T8v, T8a, T5C, T4N, T6Z, T6J, TK, T7w, T3P, T2z, T9c, T94, T3O, T2I, T5y;		    E T4J, T8u, T8h, T5z, T4G, T6Y, T6A, T6P, Tu, T9a, T82, T8X, T8s, T4y, T40;		    E T1Q, T3M, T30, T4B, T5w, T52, T7u, T6q;		    {			 E T6B, T6I, T4M, T4L, T4t, T4u, T6s, T6z;			 {			      E T1d, T3, T6Q, T2S, T2P, T6, T6R, T1g, Td, T6U, T1i, Ta, T2V, T1r, T6T;			      E T1l;			      {				   E T2Q, T2R, T4, T5, T1, T2, T1e, T1f;				   T1 = Rp[0];				   T2 = Rm[WS(rs, 15)];				   {					E T6N, T6h, T7r, T7n;					T6N = T5R * T1b;					T6h = T5R * T17;					T7r = T5e * T1b;					T7n = T5e * T17;					T6O = FNMS(T5U, T17, T6N);					T6i = FMA(T5U, T1b, T6h);					T7s = FNMS(T5k, T17, T7r);					T7o = FMA(T5k, T1b, T7n);					T1d = T1 - T2;					T3 = T1 + T2;				   }				   T2Q = Ip[0];				   T2R = Im[WS(rs, 15)];				   T4 = Rp[WS(rs, 8)];				   T5 = Rm[WS(rs, 7)];				   T1e = Ip[WS(rs, 8)];				   T6Q = T2Q - T2R;				   T2S = T2Q + T2R;				   T2P = T4 - T5;				   T6 = T4 + T5;				   T1f = Im[WS(rs, 7)];				   {					E T1o, T1n, T1p, Tb, Tc;					Tb = Rm[WS(rs, 3)];					Tc = Rp[WS(rs, 12)];					T1o = Ip[WS(rs, 12)];					T6R = T1e - T1f;					T1g = T1e + T1f;					T1n = Tb - Tc;					Td = Tb + Tc;					T1p = Im[WS(rs, 3)];					{					     E T1j, T1k, T8, T9, T1q;					     T8 = Rp[WS(rs, 4)];					     T9 = Rm[WS(rs, 11)];					     T1q = T1o + T1p;					     T6U = T1o - T1p;					     T1j = Ip[WS(rs, 4)];					     T1i = T8 - T9;					     Ta = T8 + T9;					     T1k = Im[WS(rs, 11)];					     T2V = T1n + T1q;					     T1r = T1n - T1q;					     T6T = T1j - T1k;					     T1l = T1j + T1k;					}				   }			      }			      {				   E T2U, T6V, T6S, T1h, T1s, T4Y, T4X, T2T, T2W;				   {					E T7T, T8o, T1m, T7U, T7, Te, T8n;					T7T = T3 - T6;					T7 = T3 + T6;					Te = Ta + Td;					T8o = Ta - Td;					T1m = T1i - T1l;					T2U = T1i + T1l;					T6j = T7 - Te;					Tf = T7 + Te;					T7U = T6U - T6T;					T6V = T6T + T6U;					T6S = T6Q + T6R;					T8n = T6Q - T6R;					T4t = T1d + T1g;					T1h = T1d - T1g;					T8W = T7T + T7U;					T7V = T7T - T7U;					T99 = T8o + T8n;					T8p = T8n - T8o;					T1s = T1m + T1r;					T4Y = T1m - T1r;				   }				   T4X = T2S - T2P;				   T2T = T2P + T2S;				   T2W = T2U - T2V;				   T4u = T2U + T2V;				   T3L = FMA(KP707106781, T1s, T1h);				   T1t = FNMS(KP707106781, T1s, T1h);				   T3Z = FMA(KP707106781, T2W, T2T);				   T2X = FNMS(KP707106781, T2W, T2T);				   T5J = FNMS(KP707106781, T4Y, T4X);				   T4Z = FMA(KP707106781, T4Y, T4X);				   T7t = T6S + T6V;				   T6W = T6S - T6V;			      }			 }			 {			      E T29, T1S, T1V, T87, TR, T2c, T84, T6E, TU, T23, T6F, T22, TX, T24, T2e;			      E T21;			      {				   E TO, TN, TP, TL, TM;				   TL = Rm[0];				   TM = Rp[WS(rs, 15)];				   TO = Rp[WS(rs, 7)];				   T5v = FMA(KP707106781, T4u, T4t);				   T4v = FNMS(KP707106781, T4u, T4t);				   TN = TL + TM;				   T29 = TL - TM;				   TP = Rm[WS(rs, 8)];				   {					E T6C, T6D, T1X, T20;					{					     E T2a, T2b, T1T, T1U, TQ;					     T1T = Ip[WS(rs, 15)];					     T1U = Im[0];					     TQ = TO + TP;					     T1S = TO - TP;					     T2a = Ip[WS(rs, 7)];					     T6C = T1T - T1U;					     T1V = T1T + T1U;					     T2b = Im[WS(rs, 8)];					     T87 = TN - TQ;					     TR = TN + TQ;					     T2c = T2a + T2b;					     T6D = T2a - T2b;					}					{					     E T1Y, T1Z, TS, TT, TV, TW;					     TS = Rp[WS(rs, 3)];					     TT = Rm[WS(rs, 12)];					     T84 = T6C - T6D;					     T6E = T6C + T6D;					     T1Y = Ip[WS(rs, 3)];					     T1X = TS - TT;					     TU = TS + TT;					     T1Z = Im[WS(rs, 12)];					     TV = Rm[WS(rs, 4)];					     TW = Rp[WS(rs, 11)];					     T23 = Ip[WS(rs, 11)];					     T6F = T1Y - T1Z;					     T20 = T1Y + T1Z;					     T22 = TV - TW;					     TX = TV + TW;					     T24 = Im[WS(rs, 4)];					}					T2e = T1X - T20;					T21 = T1X + T20;				   }			      }			      {				   E TY, T85, T25, T6G;				   TY = TU + TX;				   T85 = TU - TX;				   T25 = T23 + T24;				   T6G = T23 - T24;				   {					E T4O, T1W, T2f, T8Z, T86, T89, T90, T27, T88, T26, T6H, T4P, T2d, T2g;					T4O = T1S + T1V;					T1W = T1S - T1V;					TZ = TR + TY;					T6B = TR - TY;					T88 = T6G - T6F;					T6H = T6F + T6G;					T26 = T22 + T25;					T2f = T22 - T25;					T6I = T6E - T6H;					T7x = T6E + T6H;					T8Z = T85 + T84;					T86 = T84 - T85;					T89 = T87 - T88;					T90 = T87 + T88;					T27 = T21 - T26;					T4M = T21 + T26;					T4L = T29 + T2c;					T2d = T29 - T2c;					T2g = T2e + T2f;					T4P = T2e - T2f;					T91 = FNMS(KP414213562, T90, T8Z);					T9d = FMA(KP414213562, T8Z, T90);					T28 = FNMS(KP707106781, T27, T1W);					T3S = FMA(KP707106781, T27, T1W);					T3R = FMA(KP707106781, T2g, T2d);					T2h = FNMS(KP707106781, T2g, T2d);					T5B = FMA(KP707106781, T4P, T4O);					T4Q = FNMS(KP707106781, T4P, T4O);					T8v = FNMS(KP414213562, T86, T89);					T8a = FMA(KP414213562, T89, T86);				   }			      }			 }			 {			      E T2A, T2j, TC, T8e, T2m, T2D, T6v, T8b, TF, T6w, T2F, T2s, T2t, TI, T6x;			      E T2w, TJ, T8c;			      {				   E Tw, Tx, Tz, TA, T6t, T6u;				   Tw = Rp[WS(rs, 1)];				   T5C = FMA(KP707106781, T4M, T4L);				   T4N = FNMS(KP707106781, T4M, T4L);				   T6Z = T6I - T6B;				   T6J = T6B + T6I;				   Tx = Rm[WS(rs, 14)];				   Tz = Rp[WS(rs, 9)];				   TA = Rm[WS(rs, 6)];				   {					E T2k, Ty, TB, T2l, T2B, T2C;					T2k = Ip[WS(rs, 1)];					T2A = Tw - Tx;					Ty = Tw + Tx;					T2j = Tz - TA;					TB = Tz + TA;					T2l = Im[WS(rs, 14)];					T2B = Ip[WS(rs, 9)];					T2C = Im[WS(rs, 6)];					TC = Ty + TB;					T8e = Ty - TB;					T2m = T2k + T2l;					T6t = T2k - T2l;					T6u = T2B - T2C;					T2D = T2B + T2C;				   }				   {					E TG, T2o, T2r, TH, T2u, T2v;					{					     E TD, TE, T2p, T2q;					     TD = Rp[WS(rs, 5)];					     T6v = T6t + T6u;					     T8b = T6t - T6u;					     TE = Rm[WS(rs, 10)];					     T2p = Ip[WS(rs, 5)];					     T2q = Im[WS(rs, 10)];					     TG = Rm[WS(rs, 2)];					     T2o = TD - TE;					     TF = TD + TE;					     T6w = T2p - T2q;					     T2r = T2p + T2q;					     TH = Rp[WS(rs, 13)];					     T2u = Ip[WS(rs, 13)];					     T2v = Im[WS(rs, 2)];					}					T2F = T2o - T2r;					T2s = T2o + T2r;					T2t = TG - TH;					TI = TG + TH;					T6x = T2u - T2v;					T2w = T2u + T2v;				   }			      }			      TJ = TF + TI;			      T8c = TF - TI;			      {				   E T8f, T6y, T2x, T2G;				   T8f = T6x - T6w;				   T6y = T6w + T6x;				   T2x = T2t + T2w;				   T2G = T2t - T2w;				   {					E T4H, T2n, T2y, T4F, T8d, T92, T93, T8g;					T6s = TC - TJ;					TK = TC + TJ;					T7w = T6v + T6y;					T6z = T6v - T6y;					T4H = T2m - T2j;					T2n = T2j + T2m;					T2y = T2s - T2x;					T4F = T2s + T2x;					T8d = T8b - T8c;					T92 = T8c + T8b;					T93 = T8e + T8f;					T8g = T8e - T8f;					{					     E T4E, T2E, T2H, T4I;					     T4E = T2A + T2D;					     T2E = T2A - T2D;					     T3P = FMA(KP707106781, T2y, T2n);					     T2z = FNMS(KP707106781, T2y, T2n);					     T9c = FNMS(KP414213562, T92, T93);					     T94 = FMA(KP414213562, T93, T92);					     T2H = T2F + T2G;					     T4I = T2G - T2F;					     T3O = FMA(KP707106781, T2H, T2E);					     T2I = FNMS(KP707106781, T2H, T2E);					     T5y = FMA(KP707106781, T4I, T4H);					     T4J = FNMS(KP707106781, T4I, T4H);					     T8u = FMA(KP414213562, T8d, T8g);					     T8h = FNMS(KP414213562, T8g, T8d);					     T5z = FMA(KP707106781, T4F, T4E);					     T4G = FNMS(KP707106781, T4F, T4E);					}				   }			      }			 }			 {			      E T4w, T1J, T7Z, Tm, T6p, T80, T4x, T1O, T1z, Tp, T1A, T6k, T1x, T1u, Ts;			      E T1B;			      {				   E T1K, Ti, T1L, T6n, T1I, T1F, Tl, T1M;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -