📄 r2cbiii_64.c
字号:
{ E T59, T2u, T5c, T2b, T2m, T3P, T3O, T2s, T2v; { E T5h, T2g, T6m, T5g, TR, TY, T6n; T59 = TN - TQ; TR = TN + TQ; TY = TU + TX; T5h = TU - TX; T2g = T2c - T2f; T2u = T2c + T2f; T6m = T5e + T5f; T5g = T5e - T5f; T6l = TR - TY; TZ = TR + TY; T6n = T5b + T5a; T5c = T5a - T5b; T3L = T27 + T2a; T2b = T27 - T2a; T5P = T5h + T5g; T5i = T5g - T5h; T6M = T6n + T6m; T6o = T6m - T6n; T2m = T2g + T2l; T3P = T2g - T2l; } T3O = T2r + T2q; T2s = T2q - T2r; T2v = T2t - T2u; T3M = T2u + T2t; T38 = FNMS(KP707106781, T2m, T2b); T2n = FMA(KP707106781, T2m, T2b); T37 = FNMS(KP707106781, T2v, T2s); T2w = FMA(KP707106781, T2v, T2s); T4l = FMA(KP707106781, T3P, T3O); T3Q = FNMS(KP707106781, T3P, T3O); T5Q = T59 - T5c; T5d = T59 + T5c; } } } { E T4m, T3N, T5t, T5L, T63, T4W, T5Y, T5X, T66, T5W, T67, T5S; { E T6T, T6S, T6W, T6P; { E T6L, T6O, T6Y, T6X, T6Z, Tv, T10, T70; T6L = Tf - Tu; Tv = Tf + Tu; T10 = TK + TZ; T6T = TK - TZ; T6O = T6M - T6N; T6Y = T6N + T6M; T4m = FMA(KP707106781, T3M, T3L); T3N = FNMS(KP707106781, T3M, T3L); T6X = Tv - T10; T6S = T6Q - T6R; T6Z = T6R + T6Q; R0[0] = KP2_000000000 * (Tv + T10); R0[WS(rs, 16)] = KP2_000000000 * (T6Z - T6Y); T70 = T6Y + T6Z; T6W = T6L - T6O; T6P = T6L + T6O; R0[WS(rs, 24)] = KP1_414213562 * (T70 - T6X); R0[WS(rs, 8)] = KP1_414213562 * (T6X + T70); } { E T6D, T6f, T6w, T6G, T6p, T6x, T6y, T6k, T6V, T6U; T6D = T6b - T6e; T6f = T6b + T6e; T6w = T6u - T6v; T6G = T6v + T6u; T6V = T6T + T6S; T6U = T6S - T6T; T6p = T6l + T6o; T6x = T6l - T6o; R0[WS(rs, 12)] = KP1_847759065 * (FMA(KP414213562, T6W, T6V)); R0[WS(rs, 28)] = -(KP1_847759065 * (FNMS(KP414213562, T6V, T6W))); R0[WS(rs, 20)] = KP1_847759065 * (FNMS(KP414213562, T6P, T6U)); R0[WS(rs, 4)] = KP1_847759065 * (FMA(KP414213562, T6U, T6P)); T6y = T6g + T6j; T6k = T6g - T6j; { E T5V, T5K, T5O, T5R; T5t = T5r - T5s; T5K = T5s + T5r; { E T6E, T6z, T6H, T6q; T6E = T6y + T6x; T6z = T6x - T6y; T6H = T6k - T6p; T6q = T6k + T6p; { E T6F, T6K, T6B, T6A; T6F = FNMS(KP707106781, T6E, T6D); T6K = FMA(KP707106781, T6E, T6D); T6B = FNMS(KP707106781, T6z, T6w); T6A = FMA(KP707106781, T6z, T6w); { E T6I, T6J, T6C, T6r; T6I = FNMS(KP707106781, T6H, T6G); T6J = FMA(KP707106781, T6H, T6G); T6C = FNMS(KP707106781, T6q, T6f); T6r = FMA(KP707106781, T6q, T6f); R0[WS(rs, 22)] = KP1_662939224 * (FNMS(KP668178637, T6F, T6I)); R0[WS(rs, 6)] = KP1_662939224 * (FMA(KP668178637, T6I, T6F)); R0[WS(rs, 30)] = -(KP1_961570560 * (FNMS(KP198912367, T6J, T6K))); R0[WS(rs, 14)] = KP1_961570560 * (FMA(KP198912367, T6K, T6J)); R0[WS(rs, 26)] = -(KP1_662939224 * (FNMS(KP668178637, T6B, T6C))); R0[WS(rs, 10)] = KP1_662939224 * (FMA(KP668178637, T6C, T6B)); R0[WS(rs, 18)] = KP1_961570560 * (FNMS(KP198912367, T6r, T6A)); R0[WS(rs, 2)] = KP1_961570560 * (FMA(KP198912367, T6A, T6r)); T5L = FNMS(KP707106781, T5K, T5J); T63 = FMA(KP707106781, T5K, T5J); } } } T5V = T4Q - T4V; T4W = T4Q + T4V; T5Y = FNMS(KP414213562, T5M, T5N); T5O = FMA(KP414213562, T5N, T5M); T5R = FNMS(KP414213562, T5Q, T5P); T5X = FMA(KP414213562, T5P, T5Q); T66 = FMA(KP707106781, T5V, T5U); T5W = FNMS(KP707106781, T5V, T5U); T67 = T5O + T5R; T5S = T5O - T5R; } } } { E T1h, T2L, T2I, T3h, T3p, T1E, T3n, T3s, T3b, T3k, T3e, T3o; { E T4X, T5B, T5v, T5w, T5E, T5u, T5F, T5k, T58, T5j; { E T68, T69, T62, T5T, T64, T5Z; T68 = FNMS(KP923879532, T67, T66); T69 = FMA(KP923879532, T67, T66); T62 = FNMS(KP923879532, T5S, T5L); T5T = FMA(KP923879532, T5S, T5L); T64 = T5Y + T5X; T5Z = T5X - T5Y; T4X = FMA(KP707106781, T4W, T4L); T5B = FNMS(KP707106781, T4W, T4L); { E T65, T6a, T61, T60; T65 = FNMS(KP923879532, T64, T63); T6a = FMA(KP923879532, T64, T63); T61 = FNMS(KP923879532, T5Z, T5W); T60 = FMA(KP923879532, T5Z, T5W); R0[WS(rs, 23)] = KP1_546020906 * (FNMS(KP820678790, T65, T68)); R0[WS(rs, 7)] = KP1_546020906 * (FMA(KP820678790, T68, T65)); R0[WS(rs, 31)] = -(KP1_990369453 * (FNMS(KP098491403, T69, T6a))); R0[WS(rs, 15)] = KP1_990369453 * (FMA(KP098491403, T6a, T69)); R0[WS(rs, 27)] = -(KP1_763842528 * (FNMS(KP534511135, T61, T62))); R0[WS(rs, 11)] = KP1_763842528 * (FMA(KP534511135, T62, T61)); R0[WS(rs, 19)] = KP1_913880671 * (FNMS(KP303346683, T5T, T60)); R0[WS(rs, 3)] = KP1_913880671 * (FMA(KP303346683, T60, T5T)); } } T5v = FNMS(KP414213562, T52, T57); T58 = FMA(KP414213562, T57, T52); T5j = FNMS(KP414213562, T5i, T5d); T5w = FMA(KP414213562, T5d, T5i); T5E = FNMS(KP707106781, T5t, T5q); T5u = FMA(KP707106781, T5t, T5q); T5F = T58 - T5j; T5k = T58 + T5j; { E T3l, T33, T3c, T3m, T3a, T3d; { E T39, T3f, T3g, T36; { E T31, T5G, T5H, T5A, T5l, T5C, T5x, T32; T1h = FMA(KP707106781, T1g, T15); T31 = FNMS(KP707106781, T1g, T15); T5G = FNMS(KP923879532, T5F, T5E); T5H = FMA(KP923879532, T5F, T5E); T5A = FNMS(KP923879532, T5k, T4X); T5l = FMA(KP923879532, T5k, T4X); T5C = T5w - T5v; T5x = T5v + T5w; T32 = T2K + T2J; T2L = T2J - T2K; T39 = FNMS(KP668178637, T38, T37); T3f = FMA(KP668178637, T37, T38); { E T5D, T5I, T5z, T5y; T5D = FNMS(KP923879532, T5C, T5B); T5I = FMA(KP923879532, T5C, T5B); T5z = FNMS(KP923879532, T5x, T5u); T5y = FMA(KP923879532, T5x, T5u); T3l = FMA(KP923879532, T32, T31); T33 = FNMS(KP923879532, T32, T31); R0[WS(rs, 21)] = KP1_763842528 * (FNMS(KP534511135, T5D, T5G)); R0[WS(rs, 5)] = KP1_763842528 * (FMA(KP534511135, T5G, T5D)); R0[WS(rs, 29)] = -(KP1_913880671 * (FNMS(KP303346683, T5H, T5I))); R0[WS(rs, 13)] = KP1_913880671 * (FMA(KP303346683, T5I, T5H)); R0[WS(rs, 25)] = -(KP1_546020906 * (FNMS(KP820678790, T5z, T5A))); R0[WS(rs, 9)] = KP1_546020906 * (FMA(KP820678790, T5A, T5z)); R0[WS(rs, 17)] = KP1_990369453 * (FNMS(KP098491403, T5l, T5y)); R0[WS(rs, 1)] = KP1_990369453 * (FMA(KP098491403, T5y, T5l)); T3g = FMA(KP668178637, T34, T35); T36 = FNMS(KP668178637, T35, T34); } } T2I = FNMS(KP707106781, T2H, T2E); T3c = FMA(KP707106781, T2H, T2E); T3m = T3g + T3f; T3h = T3f - T3g; T3p = T39 - T36; T3a = T36 + T39; T3d = T1s - T1D; T1E = T1s + T1D; } T3n = FNMS(KP831469612, T3m, T3l); T3s = FMA(KP831469612, T3m, T3l); T3b = FNMS(KP831469612, T3a, T33); T3k = FMA(KP831469612, T3a, T33); T3e = FMA(KP923879532, T3d, T3c); T3o = FNMS(KP923879532, T3d, T3c); } } { E T3v, T3Z, T3W, T4v, T4D, T3C, T4B, T4G, T4p, T4y, T4s, T4C; { E T4z, T4h, T4q, T4A, T4o, T4r; { E T4n, T4t, T4u, T4k, T4f, T4g; T3v = FNMS(KP707106781, T3u, T3t); T4f = FMA(KP707106781, T3u, T3t); T4g = T3Y + T3X; T3Z = T3X - T3Y; { E T3r, T3q, T3i, T3j; T3r = FNMS(KP831469612, T3p, T3o); T3q = FMA(KP831469612, T3p, T3o); T3i = FNMS(KP831469612, T3h, T3e); T3j = FMA(KP831469612, T3h, T3e); R1[WS(rs, 22)] = -(KP1_606415062 * (FMA(KP741650546, T3n, T3q))); R1[WS(rs, 6)] = KP1_606415062 * (FNMS(KP741650546, T3q, T3n)); R1[WS(rs, 30)] = -(KP1_978353019 * (FMA(KP148335987, T3r, T3s))); R1[WS(rs, 14)] = -(KP1_978353019 * (FNMS(KP148335987, T3s, T3r))); R1[WS(rs, 26)] = -(KP1_715457220 * (FMA(KP599376933, T3j, T3k))); R1[WS(rs, 10)] = -(KP1_715457220 * (FNMS(KP599376933, T3k, T3j))); R1[WS(rs, 18)] = -(KP1_940062506 * (FMA(KP250486960, T3b, T3i))); R1[WS(rs, 2)] = KP1_940062506 * (FNMS(KP250486960, T3i, T3b)); T4z = FMA(KP923879532, T4g, T4f); T4h = FNMS(KP923879532, T4g, T4f); } T4n = FNMS(KP198912367, T4m, T4l); T4t = FMA(KP198912367, T4l, T4m); T4u = FNMS(KP198912367, T4i, T4j); T4k = FMA(KP198912367, T4j, T4i); T3W = FNMS(KP707106781, T3V, T3U); T4q = FMA(KP707106781, T3V, T3U); T4A = T4u + T4t; T4v = T4t - T4u; T4D = T4k + T4n; T4o = T4k - T4n; T4r = T3y + T3B; T3C = T3y - T3B; } T4B = FNMS(KP980785280, T4A, T4z); T4G = FMA(KP980785280, T4A, T4z); T4p = FMA(KP980785280, T4o, T4h); T4y = FNMS(KP980785280, T4o, T4h); T4s = FNMS(KP923879532, T4r, T4q); T4C = FMA(KP923879532, T4r, T4q); } { E T2P, T2X, T2V, T30, T2z, T2S, T2M, T2W; { E T2T, T1F, T2U, T2y; { E T2x, T2N, T2O, T26; { E T4F, T4E, T4w, T4x; T4F = FMA(KP980785280, T4D, T4C); T4E = FNMS(KP980785280, T4D, T4C); T4w = FMA(KP980785280, T4v, T4s); T4x = FNMS(KP980785280, T4v, T4s); R1[WS(rs, 23)] = KP1_481902250 * (FNMS(KP906347169, T4B, T4E)); R1[WS(rs, 7)] = KP1_481902250 * (FMA(KP906347169, T4E, T4B)); R1[WS(rs, 31)] = -(KP1_997590912 * (FNMS(KP049126849, T4F, T4G))); R1[WS(rs, 15)] = KP1_997590912 * (FMA(KP049126849, T4G, T4F)); R1[WS(rs, 27)] = -(KP1_807978586 * (FNMS(KP472964775, T4x, T4y))); R1[WS(rs, 11)] = KP1_807978586 * (FMA(KP472964775, T4y, T4x)); R1[WS(rs, 19)] = KP1_883088130 * (FNMS(KP357805721, T4p, T4w)); R1[WS(rs, 3)] = KP1_883088130 * (FMA(KP357805721, T4w, T4p)); T2T = FNMS(KP923879532, T1E, T1h); T1F = FMA(KP923879532, T1E, T1h); } T2x = FNMS(KP198912367, T2w, T2n); T2N = FMA(KP198912367, T2n, T2w); T2O = FMA(KP198912367, T1W, T25); T26 = FNMS(KP198912367, T25, T1W); T2U = T2O + T2N; T2P = T2N - T2O; T2X = T26 - T2x; T2y = T26 + T2x; } T2V = FNMS(KP980785280, T2U, T2T); T30 = FMA(KP980785280, T2U, T2T); T2z = FMA(KP980785280, T2y, T1F); T2S = FNMS(KP980785280, T2y, T1F); T2M = FNMS(KP923879532, T2L, T2I); T2W = FMA(KP923879532, T2L, T2I); } { E T47, T3D, T48, T3S; { E T3K, T41, T42, T3R; { E T2Z, T2Y, T2Q, T2R; T2Z = FNMS(KP980785280, T2X, T2W); T2Y = FMA(KP980785280, T2X, T2W); T2Q = FNMS(KP980785280, T2P, T2M); T2R = FMA(KP980785280, T2P, T2M); R1[WS(rs, 20)] = -(KP1_807978586 * (FMA(KP472964775, T2V, T2Y))); R1[WS(rs, 4)] = KP1_807978586 * (FNMS(KP472964775, T2Y, T2V)); R1[WS(rs, 28)] = -(KP1_883088130 * (FMA(KP357805721, T2Z, T30))); R1[WS(rs, 12)] = -(KP1_883088130 * (FNMS(KP357805721, T30, T2Z))); R1[WS(rs, 24)] = -(KP1_481902250 * (FMA(KP906347169, T2R, T2S))); R1[WS(rs, 8)] = -(KP1_481902250 * (FNMS(KP906347169, T2S, T2R))); R1[WS(rs, 16)] = -(KP1_997590912 * (FMA(KP049126849, T2z, T2Q))); R1[0] = KP1_997590912 * (FNMS(KP049126849, T2Q, T2z)); T47 = FNMS(KP923879532, T3C, T3v); T3D = FMA(KP923879532, T3C, T3v); } T3K = FMA(KP668178637, T3J, T3G); T41 = FNMS(KP668178637, T3G, T3J); T42 = FMA(KP668178637, T3N, T3Q); T3R = FNMS(KP668178637, T3Q, T3N); T48 = T42 - T41; T43 = T41 + T42; T4b = T3K - T3R; T3S = T3K + T3R; } T49 = FNMS(KP831469612, T48, T47); T4e = FMA(KP831469612, T48, T47); T3T = FMA(KP831469612, T3S, T3D); T46 = FNMS(KP831469612, T3S, T3D); T40 = FMA(KP923879532, T3Z, T3W); T4a = FNMS(KP923879532, T3Z, T3W); } } } } } } { E T4d, T4c, T44, T45; T4d = FMA(KP831469612, T4b, T4a); T4c = FNMS(KP831469612, T4b, T4a); T44 = FMA(KP831469612, T43, T40); T45 = FNMS(KP831469612, T43, T40); R1[WS(rs, 21)] = KP1_715457220 * (FNMS(KP599376933, T49, T4c)); R1[WS(rs, 5)] = KP1_715457220 * (FMA(KP599376933, T4c, T49)); R1[WS(rs, 29)] = -(KP1_940062506 * (FNMS(KP250486960, T4d, T4e))); R1[WS(rs, 13)] = KP1_940062506 * (FMA(KP250486960, T4e, T4d)); R1[WS(rs, 25)] = -(KP1_606415062 * (FNMS(KP741650546, T45, T46))); R1[WS(rs, 9)] = KP1_606415062 * (FMA(KP741650546, T46, T45)); R1[WS(rs, 17)] = KP1_978353019 * (FNMS(KP148335987, T3T, T44)); R1[WS(rs, 1)] = KP1_978353019 * (FMA(KP148335987, T44, T3T)); } }}static const kr2c_desc desc = { 64, "r2cbIII_64", {238, 64, 196, 0}, &GENUS };void X(codelet_r2cbIII_64) (planner *p) { X(kr2c_register) (p, r2cbIII_64, &desc);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_r2cb -compact -variables 4 -pipeline-latency 4 -sign 1 -n 64 -name r2cbIII_64 -dft-III -include r2cbIII.h *//* * This function contains 434 FP additions, 208 FP multiplications, * (or, 342 additions, 116 multiplications, 92 fused multiply/add), * 130 stack variables, 39 constants, and 128 memory accesses */#include "r2cbIII.h"static void r2cbIII_64(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -