📄 n2bv_64.c
字号:
STM2(&(xo[124]), T7G, ovs, &(xo[0])); } { V T63, T64, T67, T6a; T63 = VSUB(T5N, T5U); T64 = VBYI(VSUB(T61, T5Y)); T7H = VSUB(T63, T64); STM2(&(xo[92]), T7H, ovs, &(xo[0])); T7I = VADD(T63, T64); STM2(&(xo[36]), T7I, ovs, &(xo[0])); T67 = VBYI(VSUB(T65, T66)); T6a = VSUB(T68, T69); T7J = VADD(T67, T6a); STM2(&(xo[60]), T7J, ovs, &(xo[0])); T7K = VSUB(T6a, T67); STM2(&(xo[68]), T7K, ovs, &(xo[0])); } } { V T7M, T7O, T7P, T7R; { V T11, T2C, T2v, T2D, T2e, T2z, T2s, T2A; { V Tr, T10, T2t, T2u; Tr = VSUB(Tb, Tq); T10 = VSUB(TI, TZ); T11 = VSUB(Tr, T10); T2C = VADD(Tr, T10); T2t = VFNMS(LDK(KP471396736), T1s, VMUL(LDK(KP881921264), T1B)); T2u = VFMA(LDK(KP471396736), T23, VMUL(LDK(KP881921264), T2c)); T2v = VSUB(T2t, T2u); T2D = VADD(T2t, T2u); } { V T1C, T2d, T2i, T2r; T1C = VFMA(LDK(KP881921264), T1s, VMUL(LDK(KP471396736), T1B)); T2d = VFNMS(LDK(KP471396736), T2c, VMUL(LDK(KP881921264), T23)); T2e = VSUB(T1C, T2d); T2z = VADD(T1C, T2d); T2i = VSUB(T2g, T2h); T2r = VSUB(T2l, T2q); T2s = VSUB(T2i, T2r); T2A = VADD(T2r, T2i); } { V T2f, T2w, T7L, T2F, T2G, T7N; T2f = VADD(T11, T2e); T2w = VBYI(VADD(T2s, T2v)); T7L = VSUB(T2f, T2w); STM2(&(xo[106]), T7L, ovs, &(xo[2])); STN2(&(xo[104]), T7A, T7L, ovs); T7M = VADD(T2f, T2w); STM2(&(xo[22]), T7M, ovs, &(xo[2])); T2F = VBYI(VADD(T2A, T2z)); T2G = VADD(T2C, T2D); T7N = VADD(T2F, T2G); STM2(&(xo[10]), T7N, ovs, &(xo[2])); STN2(&(xo[8]), T7C, T7N, ovs); T7O = VSUB(T2G, T2F); STM2(&(xo[118]), T7O, ovs, &(xo[2])); } { V T2x, T2y, T7Q, T2B, T2E, T7S; T2x = VSUB(T11, T2e); T2y = VBYI(VSUB(T2v, T2s)); T7P = VSUB(T2x, T2y); STM2(&(xo[86]), T7P, ovs, &(xo[2])); T7Q = VADD(T2x, T2y); STM2(&(xo[42]), T7Q, ovs, &(xo[2])); STN2(&(xo[40]), T7v, T7Q, ovs); T2B = VBYI(VSUB(T2z, T2A)); T2E = VSUB(T2C, T2D); T7R = VADD(T2B, T2E); STM2(&(xo[54]), T7R, ovs, &(xo[2])); T7S = VSUB(T2E, T2B); STM2(&(xo[74]), T7S, ovs, &(xo[2])); STN2(&(xo[72]), T7x, T7S, ovs); } } { V T3n, T3O, T3J, T3R, T3y, T3Q, T3G, T3N; { V T3f, T3m, T3H, T3I; T3f = VFNMS(LDK(KP098017140), T3e, VMUL(LDK(KP995184726), T3b)); T3m = VFMA(LDK(KP995184726), T3i, VMUL(LDK(KP098017140), T3l)); T3n = VSUB(T3f, T3m); T3O = VADD(T3f, T3m); T3H = VFMA(LDK(KP098017140), T3b, VMUL(LDK(KP995184726), T3e)); T3I = VFNMS(LDK(KP098017140), T3i, VMUL(LDK(KP995184726), T3l)); T3J = VSUB(T3H, T3I); T3R = VADD(T3H, T3I); } { V T3u, T3x, T3C, T3F; T3u = VADD(T3q, T3t); T3x = VADD(T3v, T3w); T3y = VSUB(T3u, T3x); T3Q = VADD(T3x, T3u); T3C = VADD(T3A, T3B); T3F = VADD(T3D, T3E); T3G = VSUB(T3C, T3F); T3N = VADD(T3C, T3F); } { V T3z, T3K, T7T, T7U; T3z = VBYI(VSUB(T3n, T3y)); T3K = VSUB(T3G, T3J); T7T = VADD(T3z, T3K); STM2(&(xo[34]), T7T, ovs, &(xo[2])); STN2(&(xo[32]), T7p, T7T, ovs); T7U = VSUB(T3K, T3z); STM2(&(xo[94]), T7U, ovs, &(xo[2])); STN2(&(xo[92]), T7H, T7U, ovs); } { V T3T, T3U, T7V, T7W; T3T = VSUB(T3N, T3O); T3U = VBYI(VSUB(T3R, T3Q)); T7V = VSUB(T3T, T3U); STM2(&(xo[66]), T7V, ovs, &(xo[2])); STN2(&(xo[64]), T7q, T7V, ovs); T7W = VADD(T3T, T3U); STM2(&(xo[62]), T7W, ovs, &(xo[2])); STN2(&(xo[60]), T7J, T7W, ovs); } { V T3L, T3M, T7X, T7Y; T3L = VBYI(VADD(T3y, T3n)); T3M = VADD(T3G, T3J); T7X = VADD(T3L, T3M); STM2(&(xo[30]), T7X, ovs, &(xo[2])); STN2(&(xo[28]), T7E, T7X, ovs); T7Y = VSUB(T3M, T3L); STM2(&(xo[98]), T7Y, ovs, &(xo[2])); STN2(&(xo[96]), T7n, T7Y, ovs); } { V T3P, T3S, T7Z, T80; T3P = VADD(T3N, T3O); T3S = VBYI(VADD(T3Q, T3R)); T7Z = VSUB(T3P, T3S); STM2(&(xo[126]), T7Z, ovs, &(xo[2])); STN2(&(xo[124]), T7G, T7Z, ovs); T80 = VADD(T3P, T3S); STM2(&(xo[2]), T80, ovs, &(xo[2])); STN2(&(xo[0]), T7o, T80, ovs); } } { V T81, T83, T86, T88; { V T4N, T5G, T5z, T5H, T5m, T5D, T5w, T5E; { V T4x, T4M, T5x, T5y; T4x = VSUB(T4p, T4w); T4M = VSUB(T4E, T4L); T4N = VSUB(T4x, T4M); T5G = VADD(T4x, T4M); T5x = VFNMS(LDK(KP555570233), T4Y, VMUL(LDK(KP831469612), T53)); T5y = VFMA(LDK(KP555570233), T5f, VMUL(LDK(KP831469612), T5k)); T5z = VSUB(T5x, T5y); T5H = VADD(T5x, T5y); } { V T54, T5l, T5q, T5v; T54 = VFMA(LDK(KP831469612), T4Y, VMUL(LDK(KP555570233), T53)); T5l = VFNMS(LDK(KP555570233), T5k, VMUL(LDK(KP831469612), T5f)); T5m = VSUB(T54, T5l); T5D = VADD(T54, T5l); T5q = VSUB(T5o, T5p); T5v = VSUB(T5r, T5u); T5w = VSUB(T5q, T5v); T5E = VADD(T5v, T5q); } { V T5n, T5A, T82, T5J, T5K, T84; T5n = VADD(T4N, T5m); T5A = VBYI(VADD(T5w, T5z)); T81 = VSUB(T5n, T5A); STM2(&(xo[108]), T81, ovs, &(xo[0])); T82 = VADD(T5n, T5A); STM2(&(xo[20]), T82, ovs, &(xo[0])); STN2(&(xo[20]), T82, T7M, ovs); T5J = VBYI(VADD(T5E, T5D)); T5K = VADD(T5G, T5H); T83 = VADD(T5J, T5K); STM2(&(xo[12]), T83, ovs, &(xo[0])); T84 = VSUB(T5K, T5J); STM2(&(xo[116]), T84, ovs, &(xo[0])); STN2(&(xo[116]), T84, T7O, ovs); } { V T5B, T5C, T85, T5F, T5I, T87; T5B = VSUB(T4N, T5m); T5C = VBYI(VSUB(T5z, T5w)); T85 = VSUB(T5B, T5C); STM2(&(xo[84]), T85, ovs, &(xo[0])); STN2(&(xo[84]), T85, T7P, ovs); T86 = VADD(T5B, T5C); STM2(&(xo[44]), T86, ovs, &(xo[0])); T5F = VBYI(VSUB(T5D, T5E)); T5I = VSUB(T5G, T5H); T87 = VADD(T5F, T5I); STM2(&(xo[52]), T87, ovs, &(xo[0])); STN2(&(xo[52]), T87, T7R, ovs); T88 = VSUB(T5I, T5F); STM2(&(xo[76]), T88, ovs, &(xo[0])); } } { V T2J, T34, T2X, T35, T2Q, T31, T2U, T32; { V T2H, T2I, T2V, T2W; T2H = VADD(Tb, Tq); T2I = VADD(T2g, T2h); T2J = VSUB(T2H, T2I); T34 = VADD(T2H, T2I); T2V = VFNMS(LDK(KP290284677), T2K, VMUL(LDK(KP956940335), T2L)); T2W = VFMA(LDK(KP290284677), T2N, VMUL(LDK(KP956940335), T2O)); T2X = VSUB(T2V, T2W); T35 = VADD(T2V, T2W); } { V T2M, T2P, T2S, T2T; T2M = VFMA(LDK(KP956940335), T2K, VMUL(LDK(KP290284677), T2L)); T2P = VFNMS(LDK(KP290284677), T2O, VMUL(LDK(KP956940335), T2N)); T2Q = VSUB(T2M, T2P); T31 = VADD(T2M, T2P); T2S = VADD(TI, TZ); T2T = VADD(T2q, T2l); T2U = VSUB(T2S, T2T); T32 = VADD(T2T, T2S); } { V T2R, T2Y, T89, T8a; T2R = VADD(T2J, T2Q); T2Y = VBYI(VADD(T2U, T2X)); T89 = VSUB(T2R, T2Y); STM2(&(xo[102]), T89, ovs, &(xo[2])); STN2(&(xo[100]), T7D, T89, ovs); T8a = VADD(T2R, T2Y); STM2(&(xo[26]), T8a, ovs, &(xo[2])); STN2(&(xo[24]), T7z, T8a, ovs); } { V T37, T38, T8b, T8c; T37 = VBYI(VADD(T32, T31)); T38 = VADD(T34, T35); T8b = VADD(T37, T38); STM2(&(xo[6]), T8b, ovs, &(xo[2])); STN2(&(xo[4]), T7F, T8b, ovs); T8c = VSUB(T38, T37); STM2(&(xo[122]), T8c, ovs, &(xo[2])); STN2(&(xo[120]), T7B, T8c, ovs); } { V T2Z, T30, T8d, T8e; T2Z = VSUB(T2J, T2Q); T30 = VBYI(VSUB(T2X, T2U)); T8d = VSUB(T2Z, T30); STM2(&(xo[90]), T8d, ovs, &(xo[2])); STN2(&(xo[88]), T7w, T8d, ovs); T8e = VADD(T2Z, T30); STM2(&(xo[38]), T8e, ovs, &(xo[2])); STN2(&(xo[36]), T7I, T8e, ovs); } { V T33, T36, T8f, T8g; T33 = VBYI(VSUB(T31, T32)); T36 = VSUB(T34, T35); T8f = VADD(T33, T36); STM2(&(xo[58]), T8f, ovs, &(xo[2])); STN2(&(xo[56]), T7y, T8f, ovs); T8g = VSUB(T36, T33); STM2(&(xo[70]), T8g, ovs, &(xo[2])); STN2(&(xo[68]), T7K, T8g, ovs); } } { V T41, T4g, T4b, T4j, T44, T4i, T48, T4f; { V T3X, T40, T49, T4a; T3X = VFNMS(LDK(KP634393284), T3W, VMUL(LDK(KP773010453), T3V)); T40 = VFMA(LDK(KP773010453), T3Y, VMUL(LDK(KP634393284), T3Z)); T41 = VSUB(T3X, T40); T4g = VADD(T3X, T40); T49 = VFMA(LDK(KP634393284), T3V, VMUL(LDK(KP773010453), T3W)); T4a = VFNMS(LDK(KP634393284), T3Y, VMUL(LDK(KP773010453), T3Z)); T4b = VSUB(T49, T4a); T4j = VADD(T49, T4a); } { V T42, T43, T46, T47; T42 = VSUB(T3D, T3E); T43 = VSUB(T3w, T3v); T44 = VSUB(T42, T43); T4i = VADD(T43, T42); T46 = VSUB(T3A, T3B); T47 = VSUB(T3q, T3t); T48 = VSUB(T46, T47); T4f = VADD(T46, T47); } { V T45, T4c, T8h, T8i; T45 = VBYI(VSUB(T41, T44)); T4c = VSUB(T48, T4b); T8h = VADD(T45, T4c); STM2(&(xo[46]), T8h, ovs, &(xo[2])); STN2(&(xo[44]), T86, T8h, ovs); T8i = VSUB(T4c, T45); STM2(&(xo[82]), T8i, ovs, &(xo[2])); STN2(&(xo[80]), T7s, T8i, ovs); } { V T4l, T4m, T8j, T8k; T4l = VSUB(T4f, T4g); T4m = VBYI(VSUB(T4j, T4i)); T8j = VSUB(T4l, T4m); STM2(&(xo[78]), T8j, ovs, &(xo[2])); STN2(&(xo[76]), T88, T8j, ovs); T8k = VADD(T4l, T4m); STM2(&(xo[50]), T8k, ovs, &(xo[2])); STN2(&(xo[48]), T7r, T8k, ovs); } { V T4d, T4e, T8l, T8m; T4d = VBYI(VADD(T44, T41)); T4e = VADD(T48, T4b); T8l = VADD(T4d, T4e); STM2(&(xo[18]), T8l, ovs, &(xo[2])); STN2(&(xo[16]), T7t, T8l, ovs); T8m = VSUB(T4e, T4d); STM2(&(xo[110]), T8m, ovs, &(xo[2])); STN2(&(xo[108]), T81, T8m, ovs); } { V T4h, T4k, T8n, T8o; T4h = VADD(T4f, T4g); T4k = VBYI(VADD(T4i, T4j)); T8n = VSUB(T4h, T4k); STM2(&(xo[114]), T8n, ovs, &(xo[2])); STN2(&(xo[112]), T7u, T8n, ovs); T8o = VADD(T4h, T4k); STM2(&(xo[14]), T8o, ovs, &(xo[2])); STN2(&(xo[12]), T83, T8o, ovs); } } } } } }}static const kdft_desc desc = { 64, "n2bv_64", {404, 72, 52, 0}, &GENUS, 0, 2, 0, 0 };void X(codelet_n2bv_64) (planner *p) { X(kdft_register) (p, n2bv_64, &desc);}#endif /* HAVE_FMA */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -