⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 t1sv_32.c

📁 用于DFT计算的c语言的库的最新版本,包含丰富的函数库.
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Mon Feb  9 19:53:24 EST 2009 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twiddle -fma -reorder-insns -schedule-for-pipeline -simd -compact -variables 4 -pipeline-latency 8 -n 32 -name t1sv_32 -include ts.h *//* * This function contains 434 FP additions, 260 FP multiplications, * (or, 236 additions, 62 multiplications, 198 fused multiply/add), * 158 stack variables, 7 constants, and 128 memory accesses */#include "ts.h"static void t1sv_32(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);     DVK(KP668178637, +0.668178637919298919997757686523080761552472251);     DVK(KP198912367, +0.198912367379658006911597622644676228597850501);     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);     DVK(KP414213562, +0.414213562373095048801688724209698078569671875);     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);     INT m;     for (m = mb, W = W + (mb * 62); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 62), MAKE_VOLATILE_STRIDE(rs)) {	  V T8Z, T90;	  {	       V T87, T8x, T3w, T8, T3B, T83, Tl, T8y, T6F, Tz, T3J, T5T, T6G, TM, T3Q;	       V T5U, T46, T5Y, T7D, T6L, T5X, T3Z, T6M, T1f, T4l, T61, T7E, T6R, T60, T4e;	       V T6O, T1G, T5r, T6c, T78, T7N, T54, T6f, T32, T7b, T4S, T65, T6X, T7I, T4v;	       V T68, T29, T70, T4x, T2f, T5b, T5s, T7O, T7e, T5t, T5i, T79, T3t, T2h, T2k;	       V T2j, T2o, T2r, T4H, T2y, T2n, T2q, T4y, T2i;	       {		    V T3U, TU, TW, TZ, TY, T13, T16, T12, T15, T3V, TX, T44, T1d;		    {			 V T1, T86, T3, T6, T5, Ta, Td, Tg, Tj, Tf, T84, T4, Tc, Ti, T3x;			 V Tb, T2, T9;			 T1 = LD(&(ri[0]), ms, &(ri[0]));			 T86 = LD(&(ii[0]), ms, &(ii[0]));			 T3 = LD(&(ri[WS(rs, 16)]), ms, &(ri[0]));			 T6 = LD(&(ii[WS(rs, 16)]), ms, &(ii[0]));			 T2 = LDW(&(W[TWVL * 30]));			 T5 = LDW(&(W[TWVL * 31]));			 Ta = LD(&(ri[WS(rs, 8)]), ms, &(ri[0]));			 Td = LD(&(ii[WS(rs, 8)]), ms, &(ii[0]));			 T9 = LDW(&(W[TWVL * 14]));			 Tg = LD(&(ri[WS(rs, 24)]), ms, &(ri[0]));			 Tj = LD(&(ii[WS(rs, 24)]), ms, &(ii[0]));			 Tf = LDW(&(W[TWVL * 46]));			 T84 = VMUL(T2, T6);			 T4 = VMUL(T2, T3);			 Tc = LDW(&(W[TWVL * 15]));			 Ti = LDW(&(W[TWVL * 47]));			 T3x = VMUL(T9, Td);			 Tb = VMUL(T9, Ta);			 {			      V Tu, Tx, T3F, Ts, Tt, Tw;			      {				   V To, Tr, Tq, T3E, Tp;				   {					V T3y, Te, Tn, T3A, Tk;					{					     V T3z, Th, T85, T7;					     To = LD(&(ri[WS(rs, 4)]), ms, &(ri[0]));					     T3z = VMUL(Tf, Tj);					     Th = VMUL(Tf, Tg);					     T85 = VFNMS(T5, T3, T84);					     T7 = VFMA(T5, T6, T4);					     Tr = LD(&(ii[WS(rs, 4)]), ms, &(ii[0]));					     T3y = VFNMS(Tc, Ta, T3x);					     Te = VFMA(Tc, Td, Tb);					     Tn = LDW(&(W[TWVL * 6]));					     T3A = VFNMS(Ti, Tg, T3z);					     Tk = VFMA(Ti, Tj, Th);					     T87 = VADD(T85, T86);					     T8x = VSUB(T86, T85);					     T3w = VSUB(T1, T7);					     T8 = VADD(T1, T7);					}					Tq = LDW(&(W[TWVL * 7]));					T3E = VMUL(Tn, Tr);					Tp = VMUL(Tn, To);					T3B = VSUB(T3y, T3A);					T83 = VADD(T3y, T3A);					Tl = VADD(Te, Tk);					T8y = VSUB(Te, Tk);				   }				   Tu = LD(&(ri[WS(rs, 20)]), ms, &(ri[0]));				   Tx = LD(&(ii[WS(rs, 20)]), ms, &(ii[0]));				   T3F = VFNMS(Tq, To, T3E);				   Ts = VFMA(Tq, Tr, Tp);				   Tt = LDW(&(W[TWVL * 38]));				   Tw = LDW(&(W[TWVL * 39]));			      }			      {				   V TB, TE, TD, TH, TK, T3G, Tv, TG, TJ, T3L, TC, TA;				   TB = LD(&(ri[WS(rs, 28)]), ms, &(ri[0]));				   TE = LD(&(ii[WS(rs, 28)]), ms, &(ii[0]));				   TA = LDW(&(W[TWVL * 54]));				   TD = LDW(&(W[TWVL * 55]));				   TH = LD(&(ri[WS(rs, 12)]), ms, &(ri[0]));				   TK = LD(&(ii[WS(rs, 12)]), ms, &(ii[0]));				   T3G = VMUL(Tt, Tx);				   Tv = VMUL(Tt, Tu);				   TG = LDW(&(W[TWVL * 22]));				   TJ = LDW(&(W[TWVL * 23]));				   T3L = VMUL(TA, TE);				   TC = VMUL(TA, TB);				   {					V T19, T1c, T3P, T3K, T18, T1b, TV, T43, T1a;					{					     V TQ, TT, T3M, TF, TS, T3I, T3D, T3O, TL, T3T, TR;					     {						  V T3H, Ty, T3N, TI, TP;						  TQ = LD(&(ri[WS(rs, 2)]), ms, &(ri[0]));						  TT = LD(&(ii[WS(rs, 2)]), ms, &(ii[0]));						  T3H = VFNMS(Tw, Tu, T3G);						  Ty = VFMA(Tw, Tx, Tv);						  T3N = VMUL(TG, TK);						  TI = VMUL(TG, TH);						  T3M = VFNMS(TD, TB, T3L);						  TF = VFMA(TD, TE, TC);						  TP = LDW(&(W[TWVL * 2]));						  TS = LDW(&(W[TWVL * 3]));						  T6F = VADD(T3F, T3H);						  T3I = VSUB(T3F, T3H);						  Tz = VADD(Ts, Ty);						  T3D = VSUB(Ts, Ty);						  T3O = VFNMS(TJ, TH, T3N);						  TL = VFMA(TJ, TK, TI);						  T3T = VMUL(TP, TT);						  TR = VMUL(TP, TQ);					     }					     T19 = LD(&(ri[WS(rs, 26)]), ms, &(ri[0]));					     T1c = LD(&(ii[WS(rs, 26)]), ms, &(ii[0]));					     T3J = VADD(T3D, T3I);					     T5T = VSUB(T3I, T3D);					     T6G = VADD(T3M, T3O);					     T3P = VSUB(T3M, T3O);					     TM = VADD(TF, TL);					     T3K = VSUB(TF, TL);					     T3U = VFNMS(TS, TQ, T3T);					     TU = VFMA(TS, TT, TR);					     T18 = LDW(&(W[TWVL * 50]));					     T1b = LDW(&(W[TWVL * 51]));					}					TW = LD(&(ri[WS(rs, 18)]), ms, &(ri[0]));					TZ = LD(&(ii[WS(rs, 18)]), ms, &(ii[0]));					T3Q = VSUB(T3K, T3P);					T5U = VADD(T3K, T3P);					TV = LDW(&(W[TWVL * 34]));					TY = LDW(&(W[TWVL * 35]));					T43 = VMUL(T18, T1c);					T1a = VMUL(T18, T19);					T13 = LD(&(ri[WS(rs, 10)]), ms, &(ri[0]));					T16 = LD(&(ii[WS(rs, 10)]), ms, &(ii[0]));					T12 = LDW(&(W[TWVL * 18]));					T15 = LDW(&(W[TWVL * 19]));					T3V = VMUL(TV, TZ);					TX = VMUL(TV, TW);					T44 = VFNMS(T1b, T19, T43);					T1d = VFMA(T1b, T1c, T1a);				   }			      }			 }		    }		    {			 V T4Z, T2H, T2J, T2M, T2L, T2Q, T2T, T2P, T2S, T5p, T30, T50, T2K;			 {			      V T49, T1l, T1n, T1q, T1p, T1u, T1x, T4j, T1E, T1t, T1w, T4a, T1o;			      {				   V T1A, T1D, T1C, T4i, T1B, T1m;				   {					V T1h, T1k, T41, T14, T3W, T10, T1g, T1j;					T1h = LD(&(ri[WS(rs, 30)]), ms, &(ri[0]));					T1k = LD(&(ii[WS(rs, 30)]), ms, &(ii[0]));					T41 = VMUL(T12, T16);					T14 = VMUL(T12, T13);					T3W = VFNMS(TY, TW, T3V);					T10 = VFMA(TY, TZ, TX);					T1g = LDW(&(W[TWVL * 58]));					T1j = LDW(&(W[TWVL * 59]));					{					     V T6J, T3X, T11, T40, T48, T1i, T6K, T45, T1e, T3Y, T1z, T42, T17;					     T1A = LD(&(ri[WS(rs, 22)]), ms, &(ri[0]));					     T1D = LD(&(ii[WS(rs, 22)]), ms, &(ii[0]));					     T42 = VFNMS(T15, T13, T41);					     T17 = VFMA(T15, T16, T14);					     T6J = VADD(T3U, T3W);					     T3X = VSUB(T3U, T3W);					     T11 = VADD(TU, T10);					     T40 = VSUB(TU, T10);					     T48 = VMUL(T1g, T1k);					     T1i = VMUL(T1g, T1h);					     T6K = VADD(T42, T44);					     T45 = VSUB(T42, T44);					     T1e = VADD(T17, T1d);					     T3Y = VSUB(T17, T1d);					     T1z = LDW(&(W[TWVL * 42]));					     T1C = LDW(&(W[TWVL * 43]));					     T49 = VFNMS(T1j, T1h, T48);					     T1l = VFMA(T1j, T1k, T1i);					     T46 = VADD(T40, T45);					     T5Y = VSUB(T40, T45);					     T7D = VADD(T6J, T6K);					     T6L = VSUB(T6J, T6K);					     T5X = VADD(T3X, T3Y);					     T3Z = VSUB(T3X, T3Y);					     T6M = VSUB(T11, T1e);					     T1f = VADD(T11, T1e);					     T4i = VMUL(T1z, T1D);					     T1B = VMUL(T1z, T1A);					}				   }				   T1n = LD(&(ri[WS(rs, 14)]), ms, &(ri[0]));				   T1q = LD(&(ii[WS(rs, 14)]), ms, &(ii[0]));				   T1m = LDW(&(W[TWVL * 26]));				   T1p = LDW(&(W[TWVL * 27]));				   T1u = LD(&(ri[WS(rs, 6)]), ms, &(ri[0]));				   T1x = LD(&(ii[WS(rs, 6)]), ms, &(ii[0]));				   T4j = VFNMS(T1C, T1A, T4i);				   T1E = VFMA(T1C, T1D, T1B);				   T1t = LDW(&(W[TWVL * 10]));				   T1w = LDW(&(W[TWVL * 11]));				   T4a = VMUL(T1m, T1q);				   T1o = VMUL(T1m, T1n);			      }			      {				   V T2W, T2Z, T6P, T4c, T1s, T4f, T6Q, T4k, T1F, T4d, T2V, T2Y, T5o, T2X, T2I;				   {					V T2D, T2G, T2C, T2F, T4g, T1v, T4b, T1r;					T2D = LD(&(ri[WS(rs, 31)]), ms, &(ri[WS(rs, 1)]));					T2G = LD(&(ii[WS(rs, 31)]), ms, &(ii[WS(rs, 1)]));					T2C = LDW(&(W[TWVL * 60]));					T2F = LDW(&(W[TWVL * 61]));					T4g = VMUL(T1t, T1x);					T1v = VMUL(T1t, T1u);					T4b = VFNMS(T1p, T1n, T4a);					T1r = VFMA(T1p, T1q, T1o);					T2W = LD(&(ri[WS(rs, 23)]), ms, &(ri[WS(rs, 1)]));					T2Z = LD(&(ii[WS(rs, 23)]), ms, &(ii[WS(rs, 1)]));					{					     V T4Y, T2E, T4h, T1y;					     T4Y = VMUL(T2C, T2G);					     T2E = VMUL(T2C, T2D);					     T4h = VFNMS(T1w, T1u, T4g);					     T1y = VFMA(T1w, T1x, T1v);					     T6P = VADD(T49, T4b);					     T4c = VSUB(T49, T4b);					     T1s = VADD(T1l, T1r);					     T4f = VSUB(T1l, T1r);					     T4Z = VFNMS(T2F, T2D, T4Y);					     T2H = VFMA(T2F, T2G, T2E);					     T6Q = VADD(T4h, T4j);					     T4k = VSUB(T4h, T4j);					     T1F = VADD(T1y, T1E);					     T4d = VSUB(T1y, T1E);					     T2V = LDW(&(W[TWVL * 44]));					}					T2Y = LDW(&(W[TWVL * 45]));				   }				   T2J = LD(&(ri[WS(rs, 15)]), ms, &(ri[WS(rs, 1)]));				   T2M = LD(&(ii[WS(rs, 15)]), ms, &(ii[WS(rs, 1)]));				   T4l = VADD(T4f, T4k);				   T61 = VSUB(T4f, T4k);				   T7E = VADD(T6P, T6Q);				   T6R = VSUB(T6P, T6Q);				   T60 = VADD(T4c, T4d);				   T4e = VSUB(T4c, T4d);				   T6O = VSUB(T1s, T1F);				   T1G = VADD(T1s, T1F);				   T5o = VMUL(T2V, T2Z);				   T2X = VMUL(T2V, T2W);				   T2I = LDW(&(W[TWVL * 28]));				   T2L = LDW(&(W[TWVL * 29]));				   T2Q = LD(&(ri[WS(rs, 7)]), ms, &(ri[WS(rs, 1)]));				   T2T = LD(&(ii[WS(rs, 7)]), ms, &(ii[WS(rs, 1)]));				   T2P = LDW(&(W[TWVL * 12]));				   T2S = LDW(&(W[TWVL * 13]));				   T5p = VFNMS(T2Y, T2W, T5o);				   T30 = VFMA(T2Y, T2Z, T2X);				   T50 = VMUL(T2I, T2M);				   T2K = VMUL(T2I, T2J);			      }			 }			 {			      V T4q, T1O, T1Q, T1T, T1S, T1X, T20, T4Q, T27, T1W, T1Z, T4r, T1R;			      {				   V T23, T26, T25, T4P, T24, T1P;				   {					V T1K, T1N, T5m, T2R, T1J, T1M, T51, T2N;					T1K = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));					T1N = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));					T5m = VMUL(T2P, T2T);					T2R = VMUL(T2P, T2Q);					T1J = LDW(&(W[0]));					T1M = LDW(&(W[TWVL * 1]));					T51 = VFNMS(T2L, T2J, T50);					T2N = VFMA(T2L, T2M, T2K);					{					     V T76, T52, T2O, T5l, T77, T5q, T31, T53, T22;					     T23 = LD(&(ri[WS(rs, 25)]), ms, &(ri[WS(rs, 1)]));					     T26 = LD(&(ii[WS(rs, 25)]), ms, &(ii[WS(rs, 1)]));					     {						  V T5n, T2U, T4p, T1L;						  T5n = VFNMS(T2S, T2Q, T5m);						  T2U = VFMA(T2S, T2T, T2R);						  T4p = VMUL(T1J, T1N);						  T1L = VMUL(T1J, T1K);						  T76 = VADD(T4Z, T51);						  T52 = VSUB(T4Z, T51);						  T2O = VADD(T2H, T2N);						  T5l = VSUB(T2H, T2N);						  T77 = VADD(T5n, T5p);						  T5q = VSUB(T5n, T5p);						  T31 = VADD(T2U, T30);						  T53 = VSUB(T2U, T30);						  T4q = VFNMS(T1M, T1K, T4p);						  T1O = VFMA(T1M, T1N, T1L);						  T22 = LDW(&(W[TWVL * 48]));					     }					     T25 = LDW(&(W[TWVL * 49]));					     T5r = VADD(T5l, T5q);					     T6c = VSUB(T5l, T5q);					     T78 = VSUB(T76, T77);					     T7N = VADD(T76, T77);					     T54 = VSUB(T52, T53);					     T6f = VADD(T52, T53);					     T32 = VADD(T2O, T31);					     T7b = VSUB(T2O, T31);					     T4P = VMUL(T22, T26);					     T24 = VMUL(T22, T23);					}				   }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -