⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t1fv_64.c

📁 快速fft变换
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:47:52 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twiddle_c -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 64 -name t1fv_64 -include t1f.h *//* * This function contains 519 FP additions, 384 FP multiplications, * (or, 261 additions, 126 multiplications, 258 fused multiply/add), * 187 stack variables, 15 constants, and 128 memory accesses */#include "t1f.h"static void t1fv_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){     DVK(KP773010453, +0.773010453362736960810906609758469800971041293);     DVK(KP995184726, +0.995184726672196886244836953109479921575474869);     DVK(KP820678790, +0.820678790828660330972281985331011598767386482);     DVK(KP098491403, +0.098491403357164253077197521291327432293052451);     DVK(KP956940335, +0.956940335732208864935797886980269969482849206);     DVK(KP881921264, +0.881921264348355029712756863660388349508442621);     DVK(KP303346683, +0.303346683607342391675883946941299872384187453);     DVK(KP534511135, +0.534511135950791641089685961295362908582039528);     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);     DVK(KP668178637, +0.668178637919298919997757686523080761552472251);     DVK(KP198912367, +0.198912367379658006911597622644676228597850501);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP414213562, +0.414213562373095048801688724209698078569671875);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     R *x;     x = ri;     for (m = mb, W = W + (mb * ((TWVL / VL) * 126)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 126), MAKE_VOLATILE_STRIDE(rs)) {	  V T6L, T6M, T6O, T6P, T75, T6V, T5A, T6A, T72, T6K, T6t, T6D, T6w, T6B, T6h;	  V T6E;	  {	       V Ta, T3U, T3V, T37, T7a, T58, T7B, T6l, T1v, T24, T5Q, T7o, T5F, T7l, T43;	       V T4F, T2i, T2R, T6b, T7v, T60, T7s, T4a, T4I, T5u, T7h, T5x, T7g, T1i, T3a;	       V T4j, T4C, T7e, T5l, T7d, T5o, T3b, TV, T4B, T4m, T3X, T3Y, T6o, T7b, T5f;	       V T7C, Tx, T38, T2p, T61, T2n, T65, T2D, T7p, T5M, T7m, T5T, T4G, T46, T25;	       V T1S, T2q, T2u, T2w;	       {		    V T5q, T10, T5v, T15, T1b, T5s, T1c, T1e;		    {			 V T1V, T1p, T5B, T5O, T1u, T1X, T20, T21;			 {			      V T1, T2, T7, T5, T32, T34, T2X, T2Z;			      T1 = LD(&(x[0]), ms, &(x[0]));			      T2 = LD(&(x[WS(rs, 32)]), ms, &(x[0]));			      T7 = LD(&(x[WS(rs, 48)]), ms, &(x[0]));			      T5 = LD(&(x[WS(rs, 16)]), ms, &(x[0]));			      T32 = LD(&(x[WS(rs, 8)]), ms, &(x[0]));			      T34 = LD(&(x[WS(rs, 40)]), ms, &(x[0]));			      T2X = LD(&(x[WS(rs, 56)]), ms, &(x[0]));			      T2Z = LD(&(x[WS(rs, 24)]), ms, &(x[0]));			      {				   V T1m, T54, T6j, T36, T55, T31, T56, T1n, T1q, T1s, T4, T9;				   {					V T3, T8, T6, T33, T35, T2Y, T30, T1l;					T1l = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));					T3 = BYTWJ(&(W[TWVL * 62]), T2);					T8 = BYTWJ(&(W[TWVL * 94]), T7);					T6 = BYTWJ(&(W[TWVL * 30]), T5);					T33 = BYTWJ(&(W[TWVL * 14]), T32);					T35 = BYTWJ(&(W[TWVL * 78]), T34);					T2Y = BYTWJ(&(W[TWVL * 110]), T2X);					T30 = BYTWJ(&(W[TWVL * 46]), T2Z);					T1m = BYTWJ(&(W[0]), T1l);					T54 = VSUB(T1, T3);					T4 = VADD(T1, T3);					T6j = VSUB(T6, T8);					T9 = VADD(T6, T8);					T36 = VADD(T33, T35);					T55 = VSUB(T33, T35);					T31 = VADD(T2Y, T30);					T56 = VSUB(T2Y, T30);					T1n = LD(&(x[WS(rs, 33)]), ms, &(x[WS(rs, 1)]));				   }				   T1q = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));				   T1s = LD(&(x[WS(rs, 49)]), ms, &(x[WS(rs, 1)]));				   Ta = VSUB(T4, T9);				   T3U = VADD(T4, T9);				   {					V T57, T6k, T1o, T1r, T1t, T1W, T1U, T1Z;					T1U = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));					T3V = VADD(T36, T31);					T37 = VSUB(T31, T36);					T57 = VADD(T55, T56);					T6k = VSUB(T56, T55);					T1o = BYTWJ(&(W[TWVL * 64]), T1n);					T1r = BYTWJ(&(W[TWVL * 32]), T1q);					T1t = BYTWJ(&(W[TWVL * 96]), T1s);					T1V = BYTWJ(&(W[TWVL * 16]), T1U);					T1W = LD(&(x[WS(rs, 41)]), ms, &(x[WS(rs, 1)]));					T1Z = LD(&(x[WS(rs, 57)]), ms, &(x[WS(rs, 1)]));					T7a = VFNMS(LDK(KP707106781), T57, T54);					T58 = VFMA(LDK(KP707106781), T57, T54);					T7B = VFMA(LDK(KP707106781), T6k, T6j);					T6l = VFNMS(LDK(KP707106781), T6k, T6j);					T1p = VADD(T1m, T1o);					T5B = VSUB(T1m, T1o);					T5O = VSUB(T1r, T1t);					T1u = VADD(T1r, T1t);					T1X = BYTWJ(&(W[TWVL * 80]), T1W);					T20 = BYTWJ(&(W[TWVL * 112]), T1Z);					T21 = LD(&(x[WS(rs, 25)]), ms, &(x[WS(rs, 1)]));				   }			      }			 }			 {			      V T5W, T2N, T69, T2L, T5Y, T2P, T48, T2c, T2h;			      {				   V T41, T1Y, T5C, T22, T2d, T29, T2b, T2f, T28, T2a, T2H, T2J;				   T28 = LD(&(x[WS(rs, 63)]), ms, &(x[WS(rs, 1)]));				   T2a = LD(&(x[WS(rs, 31)]), ms, &(x[WS(rs, 1)]));				   T1v = VSUB(T1p, T1u);				   T41 = VADD(T1p, T1u);				   T1Y = VADD(T1V, T1X);				   T5C = VSUB(T1V, T1X);				   T22 = BYTWJ(&(W[TWVL * 48]), T21);				   T2d = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));				   T29 = BYTWJ(&(W[TWVL * 124]), T28);				   T2b = BYTWJ(&(W[TWVL * 60]), T2a);				   T2f = LD(&(x[WS(rs, 47)]), ms, &(x[WS(rs, 1)]));				   T2H = LD(&(x[WS(rs, 55)]), ms, &(x[WS(rs, 1)]));				   T2J = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)]));				   {					V T23, T5D, T2e, T2g, T2I, T2K, T2M;					T2M = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));					T23 = VADD(T20, T22);					T5D = VSUB(T20, T22);					T2e = BYTWJ(&(W[TWVL * 28]), T2d);					T2c = VADD(T29, T2b);					T5W = VSUB(T29, T2b);					T2g = BYTWJ(&(W[TWVL * 92]), T2f);					T2I = BYTWJ(&(W[TWVL * 108]), T2H);					T2K = BYTWJ(&(W[TWVL * 44]), T2J);					T2N = BYTWJ(&(W[TWVL * 12]), T2M);					{					     V T5E, T5P, T42, T2O;					     T5E = VADD(T5C, T5D);					     T5P = VSUB(T5C, T5D);					     T24 = VSUB(T1Y, T23);					     T42 = VADD(T1Y, T23);					     T69 = VSUB(T2g, T2e);					     T2h = VADD(T2e, T2g);					     T2O = LD(&(x[WS(rs, 39)]), ms, &(x[WS(rs, 1)]));					     T2L = VADD(T2I, T2K);					     T5Y = VSUB(T2I, T2K);					     T5Q = VFMA(LDK(KP707106781), T5P, T5O);					     T7o = VFNMS(LDK(KP707106781), T5P, T5O);					     T5F = VFMA(LDK(KP707106781), T5E, T5B);					     T7l = VFNMS(LDK(KP707106781), T5E, T5B);					     T43 = VADD(T41, T42);					     T4F = VSUB(T41, T42);					     T2P = BYTWJ(&(W[TWVL * 76]), T2O);					}				   }			      }			      T2i = VSUB(T2c, T2h);			      T48 = VADD(T2c, T2h);			      {				   V TW, TY, T11, T2Q, T5X, T13;				   TW = LD(&(x[WS(rs, 62)]), ms, &(x[0]));				   TY = LD(&(x[WS(rs, 30)]), ms, &(x[0]));				   T11 = LD(&(x[WS(rs, 14)]), ms, &(x[0]));				   T2Q = VADD(T2N, T2P);				   T5X = VSUB(T2N, T2P);				   T13 = LD(&(x[WS(rs, 46)]), ms, &(x[0]));				   {					V T12, T5Z, T6a, T49, T14, T18, T1a;					{					     V T17, T19, TX, TZ;					     T17 = LD(&(x[WS(rs, 54)]), ms, &(x[0]));					     T19 = LD(&(x[WS(rs, 22)]), ms, &(x[0]));					     TX = BYTWJ(&(W[TWVL * 122]), TW);					     TZ = BYTWJ(&(W[TWVL * 58]), TY);					     T12 = BYTWJ(&(W[TWVL * 26]), T11);					     T5Z = VADD(T5X, T5Y);					     T6a = VSUB(T5Y, T5X);					     T2R = VSUB(T2L, T2Q);					     T49 = VADD(T2Q, T2L);					     T14 = BYTWJ(&(W[TWVL * 90]), T13);					     T18 = BYTWJ(&(W[TWVL * 106]), T17);					     T5q = VSUB(TX, TZ);					     T10 = VADD(TX, TZ);					     T1a = BYTWJ(&(W[TWVL * 42]), T19);					}					T6b = VFMA(LDK(KP707106781), T6a, T69);					T7v = VFNMS(LDK(KP707106781), T6a, T69);					T60 = VFMA(LDK(KP707106781), T5Z, T5W);					T7s = VFNMS(LDK(KP707106781), T5Z, T5W);					T4a = VADD(T48, T49);					T4I = VSUB(T48, T49);					T5v = VSUB(T14, T12);					T15 = VADD(T12, T14);					T1b = VADD(T18, T1a);					T5s = VSUB(T18, T1a);				   }				   T1c = LD(&(x[WS(rs, 6)]), ms, &(x[0]));				   T1e = LD(&(x[WS(rs, 38)]), ms, &(x[0]));			      }			 }		    }		    {			 V Th, T59, Tf, Tv, T5d, Tj, Tm, To;			 {			      V T5h, TQ, T5m, T5i, TO, TS, TJ, T4k, TD, TI;			      {				   V T4h, T16, TB, T1d, T1f, TE, TG, TA, Tz, TK, TM, TC;				   Tz = LD(&(x[WS(rs, 2)]), ms, &(x[0]));				   T4h = VADD(T10, T15);				   T16 = VSUB(T10, T15);				   TB = LD(&(x[WS(rs, 34)]), ms, &(x[0]));				   T1d = BYTWJ(&(W[TWVL * 10]), T1c);				   T1f = BYTWJ(&(W[TWVL * 74]), T1e);				   TE = LD(&(x[WS(rs, 18)]), ms, &(x[0]));				   TG = LD(&(x[WS(rs, 50)]), ms, &(x[0]));				   TA = BYTWJ(&(W[TWVL * 2]), Tz);				   TK = LD(&(x[WS(rs, 10)]), ms, &(x[0]));				   TM = LD(&(x[WS(rs, 42)]), ms, &(x[0]));				   TC = BYTWJ(&(W[TWVL * 66]), TB);				   {					V T1g, T5r, TF, TH, TL, TN, TP;					TP = LD(&(x[WS(rs, 58)]), ms, &(x[0]));					T1g = VADD(T1d, T1f);					T5r = VSUB(T1d, T1f);					TF = BYTWJ(&(W[TWVL * 34]), TE);					TH = BYTWJ(&(W[TWVL * 98]), TG);					TL = BYTWJ(&(W[TWVL * 18]), TK);					TN = BYTWJ(&(W[TWVL * 82]), TM);					T5h = VSUB(TA, TC);					TD = VADD(TA, TC);					TQ = BYTWJ(&(W[TWVL * 114]), TP);					{					     V T5w, T5t, T4i, T1h, TR;					     T5w = VSUB(T5s, T5r);					     T5t = VADD(T5r, T5s);					     T4i = VADD(T1g, T1b);					     T1h = VSUB(T1b, T1g);					     T5m = VSUB(TF, TH);					     TI = VADD(TF, TH);					     T5i = VSUB(TL, TN);					     TO = VADD(TL, TN);					     TR = LD(&(x[WS(rs, 26)]), ms, &(x[0]));					     T5u = VFMA(LDK(KP707106781), T5t, T5q);					     T7h = VFNMS(LDK(KP707106781), T5t, T5q);					     T5x = VFMA(LDK(KP707106781), T5w, T5v);					     T7g = VFNMS(LDK(KP707106781), T5w, T5v);					     T1i = VFNMS(LDK(KP414213562), T1h, T16);					     T3a = VFMA(LDK(KP414213562), T16, T1h);					     T4j = VADD(T4h, T4i);					     T4C = VSUB(T4h, T4i);					     TS = BYTWJ(&(W[TWVL * 50]), TR);					}				   }			      }			      TJ = VSUB(TD, TI);			      T4k = VADD(TD, TI);			      {				   V Tb, Td, Tr, T5j, TT, Tt, Tg;				   Tb = LD(&(x[WS(rs, 4)]), ms, &(x[0]));				   Td = LD(&(x[WS(rs, 36)]), ms, &(x[0]));				   Tr = LD(&(x[WS(rs, 12)]), ms, &(x[0]));				   T5j = VSUB(TQ, TS);				   TT = VADD(TQ, TS);				   Tt = LD(&(x[WS(rs, 44)]), ms, &(x[0]));				   Tg = LD(&(x[WS(rs, 20)]), ms, &(x[0]));				   {					V Ti, Tc, Te, Ts;					Ti = LD(&(x[WS(rs, 52)]), ms, &(x[0]));					Tc = BYTWJ(&(W[TWVL * 6]), Tb);					Te = BYTWJ(&(W[TWVL * 70]), Td);					Ts = BYTWJ(&(W[TWVL * 22]), Tr);					{					     V T5k, T5n, TU, T4l, Tu;					     T5k = VADD(T5i, T5j);					     T5n = VSUB(T5i, T5j);					     TU = VSUB(TO, TT);					     T4l = VADD(TO, TT);					     Tu = BYTWJ(&(W[TWVL * 86]), Tt);					     Th = BYTWJ(&(W[TWVL * 38]), Tg);					     T59 = VSUB(Tc, Te);					     Tf = VADD(Tc, Te);					     T7e = VFNMS(LDK(KP707106781), T5k, T5h);					     T5l = VFMA(LDK(KP707106781), T5k, T5h);					     T7d = VFNMS(LDK(KP707106781), T5n, T5m);					     T5o = VFMA(LDK(KP707106781), T5n, T5m);					     T3b = VFMA(LDK(KP414213562), TJ, TU);					     TV = VFNMS(LDK(KP414213562), TU, TJ);					     T4B = VSUB(T4k, T4l);					     T4m = VADD(T4k, T4l);					     Tv = VADD(Ts, Tu);					     T5d = VSUB(Tu, Ts);					     Tj = BYTWJ(&(W[TWVL * 102]), Ti);					}				   }				   Tm = LD(&(x[WS(rs, 60)]), ms, &(x[0]));				   To = LD(&(x[WS(rs, 28)]), ms, &(x[0]));			      }			 }			 {			      V T5b, T6m, Tl, T1A, T5G, T1Q, T5K, T1C, T1D, T5e, T6n, Tw, T1H, T1J;			      {				   V T1w, T1y, T1M, T1O, Tq, T5c, T1B;				   T1w = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));				   T1y = LD(&(x[WS(rs, 37)]), ms, &(x[WS(rs, 1)]));				   T1M = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));				   T1O = LD(&(x[WS(rs, 45)]), ms, &(x[WS(rs, 1)]));				   T1B = LD(&(x[WS(rs, 21)]), ms, &(x[WS(rs, 1)]));				   {					V Tk, T5a, Tn, Tp;					Tk = VADD(Th, Tj);					T5a = VSUB(Th, Tj);					Tn = BYTWJ(&(W[TWVL * 118]), Tm);					Tp = BYTWJ(&(W[TWVL * 54]), To);					{					     V T1x, T1z, T1N, T1P;					     T1x = BYTWJ(&(W[TWVL * 8]), T1w);					     T1z = BYTWJ(&(W[TWVL * 72]), T1y);					     T1N = BYTWJ(&(W[TWVL * 24]), T1M);					     T1P = BYTWJ(&(W[TWVL * 88]), T1O);					     T5b = VFNMS(LDK(KP414213562), T5a, T59);					     T6m = VFMA(LDK(KP414213562), T59, T5a);					     T3X = VADD(Tf, Tk);					     Tl = VSUB(Tf, Tk);					     Tq = VADD(Tn, Tp);					     T5c = VSUB(Tn, Tp);					     T1A = VADD(T1x, T1z);					     T5G = VSUB(T1x, T1z);					     T1Q = VADD(T1N, T1P);					     T5K = VSUB(T1N, T1P);					     T1C = BYTWJ(&(W[TWVL * 40]), T1B);					}				   }				   T1D = LD(&(x[WS(rs, 53)]), ms, &(x[WS(rs, 1)]));				   T5e = VFNMS(LDK(KP414213562), T5d, T5c);				   T6n = VFMA(LDK(KP414213562), T5c, T5d);				   T3Y = VADD(Tq, Tv);				   Tw = VSUB(Tq, Tv);				   T1H = LD(&(x[WS(rs, 61)]), ms, &(x[WS(rs, 1)]));				   T1J = LD(&(x[WS(rs, 29)]), ms, &(x[WS(rs, 1)]));			      }			      {				   V T1I, T1K, T1F, T5H, T2k, T2l, T2z, T2B, T2j, T1E;				   T2j = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));				   T1E = BYTWJ(&(W[TWVL * 104]), T1D);				   T6o = VSUB(T6m, T6n);				   T7b = VADD(T6m, T6n);				   T5f = VADD(T5b, T5e);				   T7C = VSUB(T5e, T5b);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -