📄 r2cf_128.c
字号:
T6K = FNMS(KP472964775, T6J, T6G); } T71 = FNMS(KP773010453, T6Y, T6V); T6Z = FMA(KP773010453, T6Y, T6V); T74 = T72 - T73; T76 = T73 + T72; T70 = T6R - T6K; T6S = T6K + T6R; Ci[WS(csi, 55)] = FMA(KP903989293, T74, T71); Ci[WS(csi, 9)] = FMS(KP903989293, T74, T71); Cr[WS(csr, 9)] = FMA(KP903989293, T6S, T6D); Cr[WS(csr, 55)] = FNMS(KP903989293, T6S, T6D); Ci[WS(csi, 41)] = FMS(KP903989293, T70, T6Z); Ci[WS(csi, 23)] = FMA(KP903989293, T70, T6Z); } { E T7k, T7j, T7l, T7o, T79, T7g; T7p = FNMS(KP773010453, T78, T77); T79 = FMA(KP773010453, T78, T77); T7g = T7c + T7f; T7k = T7f - T7c; T7j = FNMS(KP773010453, T7i, T7h); T7l = FMA(KP773010453, T7i, T7h); Cr[WS(csr, 23)] = FMA(KP903989293, T76, T75); Cr[WS(csr, 41)] = FNMS(KP903989293, T76, T75); Cr[WS(csr, 7)] = FMA(KP941544065, T7g, T79); Cr[WS(csr, 57)] = FNMS(KP941544065, T7g, T79); T7o = T7m - T7n; T7q = T7m + T7n; Ci[WS(csi, 57)] = FMS(KP941544065, T7o, T7l); Ci[WS(csi, 7)] = FMA(KP941544065, T7o, T7l); Ci[WS(csi, 39)] = FMA(KP941544065, T7k, T7j); Ci[WS(csi, 25)] = FMS(KP941544065, T7k, T7j); } } } } { E T7t, T8A, T8x, T7A, T8r, T8k, T88, T81, Ta3, Ta4, T6r, T6s; { E T9L, T99, T9W, T9g, T9M, T9C, T9V, T9z, T9k, T9O, T9T, Ta0, T9H, T9v, T9m; { E T9B, T9c, T9f, T9A, T97, T98; T7t = FMA(KP923879532, T7s, T7r); T97 = FNMS(KP923879532, T7s, T7r); T98 = T8z - T8y; T8A = T8y + T8z; T9B = FNMS(KP534511135, T9a, T9b); T9c = FMA(KP534511135, T9b, T9a); Cr[WS(csr, 25)] = FNMS(KP941544065, T7q, T7p); Cr[WS(csr, 39)] = FMA(KP941544065, T7q, T7p); T9L = FMA(KP831469612, T98, T97); T99 = FNMS(KP831469612, T98, T97); T9f = FNMS(KP534511135, T9e, T9d); T9A = FMA(KP534511135, T9d, T9e); { E T9x, T9y, T9q, T9t; T8x = FMA(KP923879532, T8w, T8v); T9x = FNMS(KP923879532, T8w, T8v); T9W = T9c + T9f; T9g = T9c - T9f; T9M = T9B + T9A; T9C = T9A - T9B; T9y = T7z - T7w; T7A = T7w + T7z; T8r = T8p + T8q; T9q = T8p - T8q; T9t = T8j - T8g; T8k = T8g + T8j; { E T9R, T9r, T9S, T9u, T9j; T88 = T86 + T87; T9j = T87 - T86; T9V = FNMS(KP831469612, T9y, T9x); T9z = FMA(KP831469612, T9y, T9x); T9R = FMA(KP831469612, T9q, T9p); T9r = FNMS(KP831469612, T9q, T9p); T9S = FMA(KP831469612, T9t, T9s); T9u = FNMS(KP831469612, T9t, T9s); T9k = FNMS(KP831469612, T9j, T9i); T9O = FMA(KP831469612, T9j, T9i); T9T = FNMS(KP250486960, T9S, T9R); Ta0 = FMA(KP250486960, T9R, T9S); T9H = FNMS(KP599376933, T9r, T9u); T9v = FMA(KP599376933, T9u, T9r); T9m = T7X - T80; T81 = T7X + T80; } } } { E T9J, T9h, T9F, T9D, T9P, T9n; T9J = FNMS(KP881921264, T9g, T99); T9h = FMA(KP881921264, T9g, T99); T9F = FMA(KP881921264, T9C, T9z); T9D = FNMS(KP881921264, T9C, T9z); T9P = FMA(KP831469612, T9m, T9l); T9n = FNMS(KP831469612, T9m, T9l); { E T9Y, T9X, T9Z, Ta2; { E T9N, Ta1, T9G, T9o, T9U, T9Q; Ta3 = FNMS(KP881921264, T9M, T9L); T9N = FMA(KP881921264, T9M, T9L); T9Q = FNMS(KP250486960, T9P, T9O); Ta1 = FMA(KP250486960, T9O, T9P); T9G = FNMS(KP599376933, T9k, T9n); T9o = FMA(KP599376933, T9n, T9k); T9U = T9Q + T9T; T9Y = T9T - T9Q; T9X = FNMS(KP881921264, T9W, T9V); T9Z = FMA(KP881921264, T9W, T9V); { E T9K, T9I, T9E, T9w; T9K = T9G + T9H; T9I = T9G - T9H; T9E = T9v - T9o; T9w = T9o + T9v; Cr[WS(csr, 5)] = FMA(KP970031253, T9U, T9N); Cr[WS(csr, 59)] = FNMS(KP970031253, T9U, T9N); Cr[WS(csr, 21)] = FNMS(KP857728610, T9K, T9J); Cr[WS(csr, 43)] = FMA(KP857728610, T9K, T9J); Ci[WS(csi, 53)] = FMS(KP857728610, T9I, T9F); Ci[WS(csi, 11)] = FMA(KP857728610, T9I, T9F); Ci[WS(csi, 43)] = FMA(KP857728610, T9E, T9D); Ci[WS(csi, 21)] = FMS(KP857728610, T9E, T9D); Cr[WS(csr, 11)] = FMA(KP857728610, T9w, T9h); Cr[WS(csr, 53)] = FNMS(KP857728610, T9w, T9h); Ta2 = Ta0 - Ta1; Ta4 = Ta1 + Ta0; } } Ci[WS(csi, 59)] = FMA(KP970031253, Ta2, T9Z); Ci[WS(csi, 5)] = FMS(KP970031253, Ta2, T9Z); Ci[WS(csi, 37)] = FMS(KP970031253, T9Y, T9X); Ci[WS(csi, 27)] = FMA(KP970031253, T9Y, T9X); } } } { E T69, T2z, T6k, T3g, T6a, T60, T6j, T5X, T4i, T6c, T6h, T6p, T64, T5L; { E T5Y, T2U, T3f, T5Z; T5Y = FMA(KP098491403, T2M, T2T); T2U = FNMS(KP098491403, T2T, T2M); Cr[WS(csr, 27)] = FMA(KP970031253, Ta4, Ta3); Cr[WS(csr, 37)] = FNMS(KP970031253, Ta4, Ta3); T69 = FNMS(KP980785280, T2y, T2f); T2z = FMA(KP980785280, T2y, T2f); T3f = FMA(KP098491403, T3e, T37); T5Z = FNMS(KP098491403, T37, T3e); T6k = T3f - T2U; T3g = T2U + T3f; T6a = T5Y - T5Z; T60 = T5Y + T5Z; { E T6f, T5x, T6g, T5K; T6j = FNMS(KP980785280, T5W, T5T); T5X = FMA(KP980785280, T5W, T5T); T6f = FNMS(KP980785280, T5w, T4X); T5x = FMA(KP980785280, T5w, T4X); T6g = FNMS(KP980785280, T5J, T5G); T5K = FMA(KP980785280, T5J, T5G); T4i = FMA(KP980785280, T4h, T3I); T6c = FNMS(KP980785280, T4h, T3I); T6h = FMA(KP906347169, T6g, T6f); T6p = FNMS(KP906347169, T6f, T6g); T64 = FMA(KP049126849, T5x, T5K); T5L = FNMS(KP049126849, T5K, T5x); } } { E T67, T3h, T63, T61, T6d, T4v; T67 = FNMS(KP995184726, T3g, T2z); T3h = FMA(KP995184726, T3g, T2z); T63 = FMA(KP995184726, T60, T5X); T61 = FNMS(KP995184726, T60, T5X); T6d = FNMS(KP980785280, T4u, T4r); T4v = FMA(KP980785280, T4u, T4r); { E T6m, T6l, T6n, T6q; { E T6b, T6o, T65, T4w, T6i, T6e; T6r = FNMS(KP995184726, T6a, T69); T6b = FMA(KP995184726, T6a, T69); T6e = FMA(KP906347169, T6d, T6c); T6o = FNMS(KP906347169, T6c, T6d); T65 = FMA(KP049126849, T4i, T4v); T4w = FNMS(KP049126849, T4v, T4i); T6i = T6e + T6h; T6m = T6h - T6e; T6l = FNMS(KP995184726, T6k, T6j); T6n = FMA(KP995184726, T6k, T6j); { E T68, T66, T62, T5M; T68 = T65 + T64; T66 = T64 - T65; T62 = T5L - T4w; T5M = T4w + T5L; Cr[WS(csr, 15)] = FMA(KP740951125, T6i, T6b); Cr[WS(csr, 49)] = FNMS(KP740951125, T6i, T6b); Cr[WS(csr, 31)] = FMA(KP998795456, T68, T67); Cr[WS(csr, 33)] = FNMS(KP998795456, T68, T67); Ci[WS(csi, 63)] = FMA(KP998795456, T66, T63); Ci[WS(csi, 1)] = FMS(KP998795456, T66, T63); Ci[WS(csi, 33)] = FMS(KP998795456, T62, T61); Ci[WS(csi, 31)] = FMA(KP998795456, T62, T61); Cr[WS(csr, 1)] = FMA(KP998795456, T5M, T3h); Cr[WS(csr, 63)] = FNMS(KP998795456, T5M, T3h); T6q = T6o - T6p; T6s = T6o + T6p; } } Ci[WS(csi, 49)] = FMS(KP740951125, T6q, T6n); Ci[WS(csi, 15)] = FMA(KP740951125, T6q, T6n); Ci[WS(csi, 47)] = FMA(KP740951125, T6m, T6l); Ci[WS(csi, 17)] = FMS(KP740951125, T6m, T6l); } } } { E T8N, T7B, T8Y, T7Q, T8O, T8E, T8X, T8B, T82, T8Q, T8V, T92, T8J, T8t; { E T8C, T7I, T7P, T8D; T8C = FNMS(KP303346683, T7E, T7H); T7I = FMA(KP303346683, T7H, T7E); Cr[WS(csr, 17)] = FNMS(KP740951125, T6s, T6r); Cr[WS(csr, 47)] = FMA(KP740951125, T6s, T6r); T8N = FNMS(KP831469612, T7A, T7t); T7B = FMA(KP831469612, T7A, T7t); T7P = FNMS(KP303346683, T7O, T7L); T8D = FMA(KP303346683, T7L, T7O); T8Y = T7P - T7I; T7Q = T7I + T7P; T8O = T8D - T8C; T8E = T8C + T8D; { E T8T, T8l, T8U, T8s; T8X = FNMS(KP831469612, T8A, T8x); T8B = FMA(KP831469612, T8A, T8x); T8T = FNMS(KP831469612, T8k, T8d); T8l = FMA(KP831469612, T8k, T8d); T8U = FNMS(KP831469612, T8r, T8o); T8s = FMA(KP831469612, T8r, T8o); T82 = FMA(KP831469612, T81, T7U); T8Q = FNMS(KP831469612, T81, T7U); T8V = FNMS(KP741650546, T8U, T8T); T92 = FMA(KP741650546, T8T, T8U); T8J = FNMS(KP148335987, T8l, T8s); T8t = FMA(KP148335987, T8s, T8l); } } { E T8L, T7R, T8H, T8F, T8R, T89; T8L = FNMS(KP956940335, T7Q, T7B); T7R = FMA(KP956940335, T7Q, T7B); T8H = FMA(KP956940335, T8E, T8B); T8F = FNMS(KP956940335, T8E, T8B); T8R = FNMS(KP831469612, T88, T85); T89 = FMA(KP831469612, T88, T85); { E T90, T8Z, T91, T94; { E T8P, T93, T8I, T8a, T8W, T8S; T95 = FNMS(KP956940335, T8O, T8N); T8P = FMA(KP956940335, T8O, T8N); T8S = FNMS(KP741650546, T8R, T8Q); T93 = FMA(KP741650546, T8Q, T8R); T8I = FNMS(KP148335987, T82, T89); T8a = FMA(KP148335987, T89, T82); T8W = T8S + T8V; T90 = T8V - T8S; T8Z = FMA(KP956940335, T8Y, T8X); T91 = FNMS(KP956940335, T8Y, T8X); { E T8M, T8K, T8G, T8u; T8M = T8I + T8J; T8K = T8I - T8J; T8G = T8t - T8a; T8u = T8a + T8t; Cr[WS(csr, 13)] = FMA(KP803207531, T8W, T8P); Cr[WS(csr, 51)] = FNMS(KP803207531, T8W, T8P); Cr[WS(csr, 29)] = FNMS(KP989176509, T8M, T8L); Cr[WS(csr, 35)] = FMA(KP989176509, T8M, T8L); Ci[WS(csi, 61)] = FMS(KP989176509, T8K, T8H); Ci[WS(csi, 3)] = FMA(KP989176509, T8K, T8H); Ci[WS(csi, 35)] = FMA(KP989176509, T8G, T8F); Ci[WS(csi, 29)] = FMS(KP989176509, T8G, T8F); Cr[WS(csr, 3)] = FMA(KP989176509, T8u, T7R); Cr[WS(csr, 61)] = FNMS(KP989176509, T8u, T7R); T94 = T92 - T93; T96 = T93 + T92; } } Ci[WS(csi, 51)] = FMA(KP803207531, T94, T91); Ci[WS(csi, 13)] = FMS(KP803207531, T94, T91); Ci[WS(csi, 45)] = FMS(KP803207531, T90, T8Z); Ci[WS(csi, 19)] = FMA(KP803207531, T90, T8Z); } } } } } } } Cr[WS(csr, 19)] = FMA(KP803207531, T96, T95); Cr[WS(csr, 45)] = FNMS(KP803207531, T96, T95); }}static const kr2c_desc desc = { 128, "r2cf_128", {440, 0, 516, 0}, &GENUS };void X(codelet_r2cf_128) (planner *p) { X(kr2c_register) (p, r2cf_128, &desc);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_r2cf -compact -variables 4 -pipeline-latency 4 -n 128 -name r2cf_128 -include r2cf.h *//* * This function contains 956 FP additions, 330 FP multiplications, * (or, 812 additions, 186 multiplications, 144 fused multiply/add), * 186 stack variables, 31 constants, and 256 memory accesses */#include "r2cf.h"static void r2cf_128(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs){ DK(KP803207531, +0.803207531480644909806676512963141923879569427); DK(KP595699304, +0.595699304492433343467036528829969889511926338); DK(KP146730474, +0.146730474455361751658850129646717819706215317); DK(KP989176509, +0.989176509964780973451673738016243063983689533); DK(KP740951125, +0.740951125354959091175616897495162729728955309); DK(KP671558954, +0.671558954847018400625376850427421803228750632); DK(KP049067674, +0.049067674327418014254954976942682658314745363); DK(KP998795456, +0.998795456205172392714771604759100694443203615); DK(KP242980179, +0.242980179903263889948274162077471118320990783); DK(KP970031253, +0.970031253194543992603984207286100251456865962); DK(KP514102744, +0.514102744193221726593693838968815772608049120); DK(KP857728610, +0.857728610000272069902269984284770137042490799); DK(KP336889853, +0.336889853392220050689253212619147570477766780); DK(KP941544065, +0.941544065183020778412509402599502357185589796); DK(KP427555093, +0.427555093430282094320966856888798534304578629); DK(KP903989293, +0.903989293123443331586200297230537048710132025); DK(KP098017140, +0.098017140329560601994195563888641845861136673); DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP634393284, +0.634393284163645498215171613225493370675687095); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP471396736, +0.471396736825997648556387625905254377657460319); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP290284677, +0.290284677254462367636192375817395274691476278); DK(KP555570233, +0.555570233019602224742830813948532874374937191); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP195090322, +0.195090322016128267848284868477022240927691618); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP382683432, +0.382683432365089771728459984030398866761344562); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E TcD, TdU, T27, T7r, T5S, T8y, Tf, Ta5, Tu, Tbq, TcG, TdV, T2e, T8z, T5V; E T7s, TK, Ta6, TcK, TdX, T2o, T5X, T7w, T8B, TZ, Ta7, TcN, TdY, T2x, T5Y; E T7z, T8C, T1g, Taa, TcU, TeA, TcX, Tez, T1v, Tab, T2M, T6z, T7E, T9e, T7H; E T9d, T2T, T6A, T4X, T6L, Tdz, TeL, TdK, TeP, T5G, T6P, T8d, T9p, TaV, Tc3; E Tbi, Tc4, T8o, T9t, T3I, T6H, Tde, TeH, Tdp, TeF, T4r, T6F, T7U, T9l, Tao; E TbW, TaL, TbX, T85, T9j, T1L, Tad, Td3, Tew, Td6, Tex, T20, Tae, T37, T6x; E T7L, T9a, T7O, T9b, T3e, T6w, TbZ, Tc0, T3Z, T4s, Tds, TeI, T4g, T4t, T80; E T87, Tdl, TeE, T7X, T86, TaD, TaM, Tc6, Tc7, T5e, T5H, TdN, TeM, T5v, T5I; E T8j, T8q, TdG, TeO, T8g, T8p, Tba, Tbj; { E T3, T23, Td, T25, T6, T5R, Ta, T24; { E T1, T2, Tb, Tc; T1 = R0[0]; T2 = R0[WS(rs, 32)]; T3 = T1 + T2; T23 = T1 - T2; Tb = R0[WS(rs, 56)]; Tc = R0[WS(rs, 24)]; Td = Tb + Tc; T25 = Tb - Tc; } { E T4, T5, T8, T9; T4 = R0[WS(rs, 16)]; T5 = R0[WS(rs, 48)]; T6 = T4 + T5; T5R = T4 - T5; T8 = R0[WS(rs, 8)]; T9 = R0[WS(rs, 40)]; Ta = T8 + T9; T24 = T8 - T9; } TcD = T3 - T6; TdU = Td - Ta; { E T26, T5Q, T7, Te; T26 = KP707106781 * (T24 + T25); T27 = T23 + T26; T7r = T23 - T26; T5Q = KP707106781 * (T25 - T24); T5S = T5Q - T5R; T8y = T5R + T5Q; T7 = T3 + T6; Te = Ta + Td; Tf = T7 + Te; Ta5 = T7 - Te; } } { E Ti, T28, Ts, T2c, Tl, T29, Tp, T2b; { E Tg, Th, Tq, Tr; Tg = R0[WS(rs, 4)]; Th = R0[WS(rs, 36)];
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -