⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 r2cf_64.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 3 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:56:00 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_r2cf -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 64 -name r2cf_64 -include r2cf.h *//* * This function contains 394 FP additions, 196 FP multiplications, * (or, 198 additions, 0 multiplications, 196 fused multiply/add), * 133 stack variables, 15 constants, and 128 memory accesses */#include "r2cf.h"static void r2cf_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs){     DK(KP773010453, +0.773010453362736960810906609758469800971041293);     DK(KP995184726, +0.995184726672196886244836953109479921575474869);     DK(KP098491403, +0.098491403357164253077197521291327432293052451);     DK(KP820678790, +0.820678790828660330972281985331011598767386482);     DK(KP956940335, +0.956940335732208864935797886980269969482849206);     DK(KP881921264, +0.881921264348355029712756863660388349508442621);     DK(KP534511135, +0.534511135950791641089685961295362908582039528);     DK(KP303346683, +0.303346683607342391675883946941299872384187453);     DK(KP980785280, +0.980785280403230449126182236134239036973933731);     DK(KP198912367, +0.198912367379658006911597622644676228597850501);     DK(KP831469612, +0.831469612302545237078788377617905756738560812);     DK(KP668178637, +0.668178637919298919997757686523080761552472251);     DK(KP923879532, +0.923879532511286756128183189396788286822416626);     DK(KP414213562, +0.414213562373095048801688724209698078569671875);     DK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT i;     for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) {	  E T5n, T5o;	  {	       E T11, T2j, T4P, T5P, T3D, T5p, T3d, Tf, T1k, T1H, T5D, T4l, T5A, T4a, T3i;	       E T2U, T1R, T2e, T5K, T4G, T5H, T4v, T3l, T31, T5s, T42, T5t, T3Z, T2n, T1b;	       E T3f, TZ, T5v, T3T, T5w, T3Q, T2m, T18, T3e, TK, T3K, T5Q, T4S, T5q, T14;	       E T2k, T3p, Tu, T4w, T1U, T5E, T4h, T5B, T4o, T3j, T2X, T1I, T1z, T1Z, T4A;	       E T24, T4x, T1X, T20;	       {		    E TN, T3V, TS, TX, T3X, TQ, T40, TT;		    {			 E T1g, T46, T1B, T1G, T47, T1j, T4j, T1C;			 {			      E T4, T3z, T3, T3B, Td, T5, T8, T9;			      {				   E T1, T2, Tb, Tc;				   T1 = R0[0];				   T2 = R0[WS(rs, 16)];				   Tb = R0[WS(rs, 28)];				   Tc = R0[WS(rs, 12)];				   T4 = R0[WS(rs, 8)];				   T3z = T1 - T2;				   T3 = T1 + T2;				   T3B = Tb - Tc;				   Td = Tb + Tc;				   T5 = R0[WS(rs, 24)];				   T8 = R0[WS(rs, 4)];				   T9 = R0[WS(rs, 20)];			      }			      {				   E T1E, T1F, T1h, T1i;				   {					E T1e, T4N, T6, T3A, Ta, T1f;					T1e = R1[0];					T4N = T4 - T5;					T6 = T4 + T5;					T3A = T8 - T9;					Ta = T8 + T9;					T1f = R1[WS(rs, 16)];					{					     E T7, T3C, T4O, Te;					     T11 = T3 - T6;					     T7 = T3 + T6;					     T3C = T3A + T3B;					     T4O = T3B - T3A;					     T2j = Td - Ta;					     Te = Ta + Td;					     T4P = FNMS(KP707106781, T4O, T4N);					     T5P = FMA(KP707106781, T4O, T4N);					     T3D = FMA(KP707106781, T3C, T3z);					     T5p = FNMS(KP707106781, T3C, T3z);					     T3d = T7 - Te;					     Tf = T7 + Te;					     T1g = T1e + T1f;					     T46 = T1e - T1f;					}				   }				   T1E = R1[WS(rs, 4)];				   T1F = R1[WS(rs, 20)];				   T1h = R1[WS(rs, 8)];				   T1i = R1[WS(rs, 24)];				   T1B = R1[WS(rs, 28)];				   T1G = T1E + T1F;				   T47 = T1E - T1F;				   T1j = T1h + T1i;				   T4j = T1h - T1i;				   T1C = R1[WS(rs, 12)];			      }			 }			 {			      E T1N, T4r, T28, T2d, T4s, T1Q, T4E, T29;			      {				   E T2b, T2c, T1O, T1P;				   {					E T2S, T48, T1D, T1L, T1M, T4k, T49, T2T;					T1L = R1[WS(rs, 31)];					T1M = R1[WS(rs, 15)];					T2S = T1g + T1j;					T1k = T1g - T1j;					T48 = T1B - T1C;					T1D = T1B + T1C;					T1N = T1L + T1M;					T4r = T1L - T1M;					T4k = T47 - T48;					T49 = T47 + T48;					T2T = T1G + T1D;					T1H = T1D - T1G;					T5D = FNMS(KP707106781, T4k, T4j);					T4l = FMA(KP707106781, T4k, T4j);					T5A = FNMS(KP707106781, T49, T46);					T4a = FMA(KP707106781, T49, T46);					T3i = T2S - T2T;					T2U = T2S + T2T;					T2b = R1[WS(rs, 3)];					T2c = R1[WS(rs, 19)];				   }				   T1O = R1[WS(rs, 7)];				   T1P = R1[WS(rs, 23)];				   T28 = R1[WS(rs, 27)];				   T2d = T2b + T2c;				   T4s = T2b - T2c;				   T1Q = T1O + T1P;				   T4E = T1P - T1O;				   T29 = R1[WS(rs, 11)];			      }			      {				   E TV, TW, TO, TP;				   {					E T2Z, T4t, T2a, TL, TM, T4F, T4u, T30;					TL = R0[WS(rs, 31)];					TM = R0[WS(rs, 15)];					T2Z = T1N + T1Q;					T1R = T1N - T1Q;					T4t = T28 - T29;					T2a = T28 + T29;					TN = TL + TM;					T3V = TL - TM;					T4F = T4t - T4s;					T4u = T4s + T4t;					T30 = T2d + T2a;					T2e = T2a - T2d;					T5K = FNMS(KP707106781, T4F, T4E);					T4G = FMA(KP707106781, T4F, T4E);					T5H = FNMS(KP707106781, T4u, T4r);					T4v = FMA(KP707106781, T4u, T4r);					T3l = T2Z - T30;					T31 = T2Z + T30;					TV = R0[WS(rs, 27)];					TW = R0[WS(rs, 11)];				   }				   TO = R0[WS(rs, 7)];				   TP = R0[WS(rs, 23)];				   TS = R0[WS(rs, 3)];				   TX = TV + TW;				   T3X = TV - TW;				   TQ = TO + TP;				   T40 = TO - TP;				   TT = R0[WS(rs, 19)];			      }			 }		    }		    {			 E Ti, T3E, Tn, Ts, T3I, Tl, T3F, To;			 {			      E Ty, T3M, TD, TI, T3O, TB, T3R, TE;			      {				   E TG, TH, Tz, TA;				   {					E T19, TR, T3W, TU, Tw, Tx;					Tw = R0[WS(rs, 1)];					Tx = R0[WS(rs, 17)];					T19 = TN - TQ;					TR = TN + TQ;					T3W = TS - TT;					TU = TS + TT;					Ty = Tw + Tx;					T3M = Tw - Tx;					{					     E T41, T3Y, T1a, TY;					     T41 = T3W - T3X;					     T3Y = T3W + T3X;					     T1a = TX - TU;					     TY = TU + TX;					     T5s = FNMS(KP707106781, T41, T40);					     T42 = FMA(KP707106781, T41, T40);					     T5t = FNMS(KP707106781, T3Y, T3V);					     T3Z = FMA(KP707106781, T3Y, T3V);					     T2n = FMA(KP414213562, T19, T1a);					     T1b = FNMS(KP414213562, T1a, T19);					     T3f = TR - TY;					     TZ = TR + TY;					     TG = R0[WS(rs, 29)];					     TH = R0[WS(rs, 13)];					}				   }				   Tz = R0[WS(rs, 9)];				   TA = R0[WS(rs, 25)];				   TD = R0[WS(rs, 5)];				   TI = TG + TH;				   T3O = TG - TH;				   TB = Tz + TA;				   T3R = Tz - TA;				   TE = R0[WS(rs, 21)];			      }			      {				   E Tq, Tr, Tj, Tk;				   {					E T16, TC, T3N, TF, Tg, Th;					Tg = R0[WS(rs, 2)];					Th = R0[WS(rs, 18)];					T16 = Ty - TB;					TC = Ty + TB;					T3N = TD - TE;					TF = TD + TE;					Ti = Tg + Th;					T3E = Tg - Th;					{					     E T3S, T3P, T17, TJ;					     T3S = T3N - T3O;					     T3P = T3N + T3O;					     T17 = TI - TF;					     TJ = TF + TI;					     T5v = FNMS(KP707106781, T3S, T3R);					     T3T = FMA(KP707106781, T3S, T3R);					     T5w = FNMS(KP707106781, T3P, T3M);					     T3Q = FMA(KP707106781, T3P, T3M);					     T2m = FNMS(KP414213562, T16, T17);					     T18 = FMA(KP414213562, T17, T16);					     T3e = TC - TJ;					     TK = TC + TJ;					     Tq = R0[WS(rs, 6)];					     Tr = R0[WS(rs, 22)];					}				   }				   Tj = R0[WS(rs, 10)];				   Tk = R0[WS(rs, 26)];				   Tn = R0[WS(rs, 30)];				   Ts = Tq + Tr;				   T3I = Tq - Tr;				   Tl = Tj + Tk;				   T3F = Tj - Tk;				   To = R0[WS(rs, 14)];			      }			 }			 {			      E T1n, T4b, T1s, T4f, T1x, T4c, T1q, T1t;			      {				   E T1v, T1w, T1o, T1p;				   {					E T1l, T4Q, T3G, Tm, T12, Tp, T3H, T1m;					T1l = R1[WS(rs, 2)];					T4Q = FMA(KP414213562, T3E, T3F);					T3G = FNMS(KP414213562, T3F, T3E);					Tm = Ti + Tl;					T12 = Ti - Tl;					Tp = Tn + To;					T3H = Tn - To;					T1m = R1[WS(rs, 18)];					T1v = R1[WS(rs, 6)];					{					     E T4R, T3J, Tt, T13;					     T4R = FNMS(KP414213562, T3H, T3I);					     T3J = FMA(KP414213562, T3I, T3H);					     Tt = Tp + Ts;					     T13 = Tp - Ts;					     T1n = T1l + T1m;					     T4b = T1l - T1m;					     T3K = T3G + T3J;					     T5Q = T3J - T3G;					     T4S = T4Q + T4R;					     T5q = T4Q - T4R;					     T14 = T12 + T13;					     T2k = T13 - T12;					     T3p = Tt - Tm;					     Tu = Tm + Tt;					     T1w = R1[WS(rs, 22)];					}				   }				   T1o = R1[WS(rs, 10)];				   T1p = R1[WS(rs, 26)];				   T1s = R1[WS(rs, 30)];				   T4f = T1v - T1w;				   T1x = T1v + T1w;				   T4c = T1o - T1p;				   T1q = T1o + T1p;				   T1t = R1[WS(rs, 14)];			      }			      {				   E T22, T23, T1V, T1W;				   {					E T1S, T4d, T4m, T2V, T1r, T4e, T1u, T1T;					T1S = R1[WS(rs, 1)];					T4d = FNMS(KP414213562, T4c, T4b);					T4m = FMA(KP414213562, T4b, T4c);					T2V = T1n + T1q;					T1r = T1n - T1q;					T4e = T1s - T1t;					T1u = T1s + T1t;					T1T = R1[WS(rs, 17)];					T22 = R1[WS(rs, 5)];					{					     E T4g, T4n, T2W, T1y;					     T4g = FMA(KP414213562, T4f, T4e);					     T4n = FNMS(KP414213562, T4e, T4f);					     T2W = T1u + T1x;					     T1y = T1u - T1x;					     T4w = T1S - T1T;					     T1U = T1S + T1T;					     T5E = T4g - T4d;					     T4h = T4d + T4g;					     T5B = T4m - T4n;					     T4o = T4m + T4n;					     T3j = T2W - T2V;					     T2X = T2V + T2W;					     T1I = T1y - T1r;					     T1z = T1r + T1y;					     T23 = R1[WS(rs, 21)];					}				   }				   T1V = R1[WS(rs, 9)];				   T1W = R1[WS(rs, 25)];				   T1Z = R1[WS(rs, 29)];				   T4A = T23 - T22;				   T24 = T22 + T23;				   T4x = T1W - T1V;				   T1X = T1V + T1W;				   T20 = R1[WS(rs, 13)];			      }			 }		    }	       }	       {		    E T4C, T5L, T4J, T5I, T26, T2f, T3q, T3h, T3w, T3s, T3o, T3r, T3t;		    {			 E T2R, T37, T2Y, T3a, T39, T3m, T3b, T35, Tv, T10, T34, T3c, T3x, T3y;			 {			      E T4y, T4H, T32, T1Y, T4z, T21;			      T2R = Tf - Tu;			      Tv = Tf + Tu;			      T4y = FMA(KP414213562, T4x, T4w);			      T4H = FNMS(KP414213562, T4w, T4x);			      T32 = T1U + T1X;			      T1Y = T1U - T1X;			      T4z = T1Z - T20;			      T21 = T1Z + T20;			      T10 = TK + TZ;			      T37 = TZ - TK;			      T2Y = T2U - T2X;			      T3a = T2U + T2X;			      {				   E T4B, T4I, T33, T25;				   T4B = FNMS(KP414213562, T4A, T4z);				   T4I = FMA(KP414213562, T4z, T4A);				   T33 = T21 + T24;				   T25 = T21 - T24;				   T39 = Tv + T10;				   T4C = T4y + T4B;				   T5L = T4B - T4y;				   T4J = T4H + T4I;				   T5I = T4I - T4H;				   T34 = T32 + T33;				   T3m = T33 - T32;				   T26 = T1Y + T25;				   T2f = T25 - T1Y;			      }			 }			 Cr[WS(csr, 16)] = Tv - T10;			 T3b = T31 + T34;			 T35 = T31 - T34;			 Ci[WS(csi, 16)] = T3b - T3a;			 T3c = T3a + T3b;			 {			      E T3k, T3u, T3v, T3n, T36, T38, T3g;			      T3g = T3e + T3f;			      T3q = T3f - T3e;			      Cr[0] = T39 + T3c;			      Cr[WS(csr, 32)] = T39 - T3c;			      T36 = T2Y + T35;			      T38 = T35 - T2Y;			      T3x = FNMS(KP707106781, T3g, T3d);			      T3h = FMA(KP707106781, T3g, T3d);			      Ci[WS(csi, 8)] = FMA(KP707106781, T38, T37);			      Ci[WS(csi, 24)] = FMS(KP707106781, T38, T37);			      Cr[WS(csr, 8)] = FMA(KP707106781, T36, T2R);			      Cr[WS(csr, 24)] = FNMS(KP707106781, T36, T2R);			      T3k = FMA(KP414213562, T3j, T3i);			      T3u = FNMS(KP414213562, T3i, T3j);			      T3v = FMA(KP414213562, T3l, T3m);			      T3n = FNMS(KP414213562, T3m, T3l);			      T3y = T3v - T3u;			      T3w = T3u + T3v;			      T3s = T3n - T3k;			      T3o = T3k + T3n;			 }			 Cr[WS(csr, 12)] = FMA(KP923879532, T3y, T3x);			 Cr[WS(csr, 20)] = FNMS(KP923879532, T3y, T3x);		    }		    Cr[WS(csr, 4)] = FMA(KP923879532, T3o, T3h);		    Cr[WS(csr, 28)] = FNMS(KP923879532, T3o, T3h);		    T3r = FNMS(KP707106781, T3q, T3p);		    T3t = FMA(KP707106781, T3q, T3p);		    {			 E T27, T2g, T2v, T1d, T2r, T2p, T2s, T1K, T6l, T6m;			 {			      E T15, T2o, T2P, T2z, T2l, T1c, T1A, T1J, T2D, T2L, T2J, T2M, T2C, T2E, T2N;			      E T2F;			      {				   E T2H, T2I, T2x, T2y, T2A, T2B;				   T15 = FMA(KP707106781, T14, T11);				   T2x = FNMS(KP707106781, T14, T11);				   T2y = T2n - T2m;				   T2o = T2m + T2n;				   Ci[WS(csi, 4)] = FMA(KP923879532, T3w, T3t);				   Ci[WS(csi, 28)] = FMS(KP923879532, T3w, T3t);				   Ci[WS(csi, 20)] = FMA(KP923879532, T3s, T3r);				   Ci[WS(csi, 12)] = FMS(KP923879532, T3s, T3r);				   T2P = FNMS(KP923879532, T2y, T2x);				   T2z = FMA(KP923879532, T2y, T2x);				   T2l = FMA(KP707106781, T2k, T2j);				   T2H = FNMS(KP707106781, T2k, T2j);				   T2I = T1b - T18;				   T1c = T18 + T1b;				   T1A = FMA(KP707106781, T1z, T1k);				   T2A = FNMS(KP707106781, T1z, T1k);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -