📄 q1_6.c
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:41:37 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twidsq -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 6 -name q1_6 -include q.h *//* * This function contains 276 FP additions, 192 FP multiplications, * (or, 144 additions, 60 multiplications, 132 fused multiply/add), * 129 stack variables, 2 constants, and 144 memory accesses */#include "q.h"static void q1_6(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms){ DK(KP866025403, +0.866025403784438646763723170752936183471402627); DK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + (mb * 10); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 10, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) { E T4c, T4f, T4e, T4g, T4d; { E T3, Tw, Ta, TW, Tg, TG, TM, TT, TU, TP, Tn, T17, TV, TJ, Tv; E T1A, T1e, T20, T1k, T1K, T1Q, T1X, T1Y, T1T, T1r, T1Z, T1N, T1z, T31, T32; E T2X, T2v, T2b, T33, T2R, T2D, T2E, T2i, T34, T3f, T2o, T2O, T2U, T3I, T3m; E T48, T3s, T3S, T3Y, T45, T46, T41, T3z, T4j, T47, T3V, T3H, T4M, T4q, T5c; E T4w, T4W, T52, T59, T5a, T55, T4D, T5b, T4Z, T4L, T6d, T5r, T6e, T69, T5H; E T5w, T5n, T6f, T63, T5P, T5s, T5o, T5p; { E T2f, T2k, T2g, T2c, T2d; { E T1b, T1g, T1c, T18, T19; { E T4, Tc, Te, T9, T5; { E T1, T2, T7, T8; T1 = rio[0]; T2 = rio[WS(rs, 3)]; T7 = rio[WS(rs, 4)]; T8 = rio[WS(rs, 1)]; T4 = rio[WS(rs, 2)]; Tc = T1 - T2; T3 = T1 + T2; Te = T7 - T8; T9 = T7 + T8; T5 = rio[WS(rs, 5)]; } { E TN, Tj, Tk, Tl, Tt, Th, Ti; Th = iio[WS(rs, 2)]; Ti = iio[WS(rs, 5)]; { E Tr, Ts, Td, T6, Tf; Tr = iio[0]; Td = T4 - T5; T6 = T4 + T5; TN = Th + Ti; Tj = Th - Ti; Tf = Td + Te; Tw = Te - Td; Ta = T6 + T9; TW = T9 - T6; Tg = FNMS(KP500000000, Tf, Tc); TG = Tc + Tf; Ts = iio[WS(rs, 3)]; TM = FNMS(KP500000000, Ta, T3); Tk = iio[WS(rs, 4)]; Tl = iio[WS(rs, 1)]; Tt = Tr - Ts; TT = Tr + Ts; } { E T15, TO, Tm, T16, Tu; T15 = rio[WS(vs, 1)]; TO = Tk + Tl; Tm = Tk - Tl; T16 = rio[WS(vs, 1) + WS(rs, 3)]; T1b = rio[WS(vs, 1) + WS(rs, 4)]; TU = TN + TO; TP = TN - TO; Tu = Tj + Tm; Tn = Tj - Tm; T1g = T15 - T16; T17 = T15 + T16; TV = FNMS(KP500000000, TU, TT); TJ = Tt + Tu; Tv = FNMS(KP500000000, Tu, Tt); T1c = rio[WS(vs, 1) + WS(rs, 1)]; T18 = rio[WS(vs, 1) + WS(rs, 2)]; T19 = rio[WS(vs, 1) + WS(rs, 5)]; } } } { E T1v, T1R, T1n, T1w, T1o, T1p; { E T1l, T1i, T1d, T1h, T1a, T1m, T1j; T1l = iio[WS(vs, 1) + WS(rs, 2)]; T1i = T1b - T1c; T1d = T1b + T1c; T1h = T18 - T19; T1a = T18 + T19; T1m = iio[WS(vs, 1) + WS(rs, 5)]; T1v = iio[WS(vs, 1)]; T1j = T1h + T1i; T1A = T1i - T1h; T1e = T1a + T1d; T20 = T1d - T1a; T1R = T1l + T1m; T1n = T1l - T1m; T1k = FNMS(KP500000000, T1j, T1g); T1K = T1g + T1j; T1Q = FNMS(KP500000000, T1e, T17); T1w = iio[WS(vs, 1) + WS(rs, 3)]; T1o = iio[WS(vs, 1) + WS(rs, 4)]; T1p = iio[WS(vs, 1) + WS(rs, 1)]; } { E T2z, T2V, T2r, T2A, T2s, T2t; { E T2p, T1x, T1S, T1q, T2q, T1y; T2p = iio[WS(vs, 2) + WS(rs, 2)]; T1X = T1v + T1w; T1x = T1v - T1w; T1S = T1o + T1p; T1q = T1o - T1p; T2q = iio[WS(vs, 2) + WS(rs, 5)]; T2z = iio[WS(vs, 2)]; T1Y = T1R + T1S; T1T = T1R - T1S; T1y = T1n + T1q; T1r = T1n - T1q; T2V = T2p + T2q; T2r = T2p - T2q; T1Z = FNMS(KP500000000, T1Y, T1X); T1N = T1x + T1y; T1z = FNMS(KP500000000, T1y, T1x); T2A = iio[WS(vs, 2) + WS(rs, 3)]; T2s = iio[WS(vs, 2) + WS(rs, 4)]; T2t = iio[WS(vs, 2) + WS(rs, 1)]; } { E T29, T2B, T2W, T2u, T2a, T2C; T29 = rio[WS(vs, 2)]; T31 = T2z + T2A; T2B = T2z - T2A; T2W = T2s + T2t; T2u = T2s - T2t; T2a = rio[WS(vs, 2) + WS(rs, 3)]; T2f = rio[WS(vs, 2) + WS(rs, 4)]; T32 = T2V + T2W; T2X = T2V - T2W; T2C = T2r + T2u; T2v = T2r - T2u; T2k = T29 - T2a; T2b = T29 + T2a; T33 = FNMS(KP500000000, T32, T31); T2R = T2B + T2C; T2D = FNMS(KP500000000, T2C, T2B); T2g = rio[WS(vs, 2) + WS(rs, 1)]; T2c = rio[WS(vs, 2) + WS(rs, 2)]; T2d = rio[WS(vs, 2) + WS(rs, 5)]; } } } } { E T4n, T4s, T4o, T4k, T4l; { E T3j, T3o, T3k, T3g, T3h; { E T3d, T2m, T2h, T2l, T2e, T3e, T2n; T3d = rio[WS(vs, 3)]; T2m = T2f - T2g; T2h = T2f + T2g; T2l = T2c - T2d; T2e = T2c + T2d; T3e = rio[WS(vs, 3) + WS(rs, 3)]; T3j = rio[WS(vs, 3) + WS(rs, 4)]; T2n = T2l + T2m; T2E = T2m - T2l; T2i = T2e + T2h; T34 = T2h - T2e; T3o = T3d - T3e; T3f = T3d + T3e; T2o = FNMS(KP500000000, T2n, T2k); T2O = T2k + T2n; T2U = FNMS(KP500000000, T2i, T2b); T3k = rio[WS(vs, 3) + WS(rs, 1)]; T3g = rio[WS(vs, 3) + WS(rs, 2)]; T3h = rio[WS(vs, 3) + WS(rs, 5)]; } { E T3D, T3Z, T3v, T3E, T3w, T3x; { E T3t, T3q, T3l, T3p, T3i, T3u, T3r; T3t = iio[WS(vs, 3) + WS(rs, 2)]; T3q = T3j - T3k; T3l = T3j + T3k; T3p = T3g - T3h; T3i = T3g + T3h; T3u = iio[WS(vs, 3) + WS(rs, 5)]; T3D = iio[WS(vs, 3)]; T3r = T3p + T3q; T3I = T3q - T3p; T3m = T3i + T3l; T48 = T3l - T3i; T3Z = T3t + T3u; T3v = T3t - T3u; T3s = FNMS(KP500000000, T3r, T3o); T3S = T3o + T3r; T3Y = FNMS(KP500000000, T3m, T3f); T3E = iio[WS(vs, 3) + WS(rs, 3)]; T3w = iio[WS(vs, 3) + WS(rs, 4)]; T3x = iio[WS(vs, 3) + WS(rs, 1)]; } { E T4h, T3F, T40, T3y, T4i, T3G; T4h = rio[WS(vs, 4)]; T45 = T3D + T3E; T3F = T3D - T3E; T40 = T3w + T3x; T3y = T3w - T3x; T4i = rio[WS(vs, 4) + WS(rs, 3)]; T4n = rio[WS(vs, 4) + WS(rs, 4)]; T46 = T3Z + T40; T41 = T3Z - T40; T3G = T3v + T3y; T3z = T3v - T3y; T4s = T4h - T4i; T4j = T4h + T4i; T47 = FNMS(KP500000000, T46, T45); T3V = T3F + T3G; T3H = FNMS(KP500000000, T3G, T3F); T4o = rio[WS(vs, 4) + WS(rs, 1)]; T4k = rio[WS(vs, 4) + WS(rs, 2)]; T4l = rio[WS(vs, 4) + WS(rs, 5)]; } } } { E T4H, T53, T4z, T4I, T4A, T4B; { E T4x, T4u, T4p, T4t, T4m, T4y, T4v; T4x = iio[WS(vs, 4) + WS(rs, 2)]; T4u = T4n - T4o; T4p = T4n + T4o; T4t = T4k - T4l; T4m = T4k + T4l; T4y = iio[WS(vs, 4) + WS(rs, 5)]; T4H = iio[WS(vs, 4)]; T4v = T4t + T4u; T4M = T4u - T4t; T4q = T4m + T4p; T5c = T4p - T4m; T53 = T4x + T4y; T4z = T4x - T4y; T4w = FNMS(KP500000000, T4v, T4s); T4W = T4s + T4v; T52 = FNMS(KP500000000, T4q, T4j); T4I = iio[WS(vs, 4) + WS(rs, 3)]; T4A = iio[WS(vs, 4) + WS(rs, 4)]; T4B = iio[WS(vs, 4) + WS(rs, 1)]; } { E T5L, T67, T5D, T5M, T5E, T5F; { E T5B, T4J, T54, T4C, T5C, T4K; T5B = iio[WS(vs, 5) + WS(rs, 2)]; T59 = T4H + T4I; T4J = T4H - T4I; T54 = T4A + T4B; T4C = T4A - T4B; T5C = iio[WS(vs, 5) + WS(rs, 5)]; T5L = iio[WS(vs, 5)]; T5a = T53 + T54; T55 = T53 - T54; T4K = T4z + T4C; T4D = T4z - T4C; T67 = T5B + T5C; T5D = T5B - T5C; T5b = FNMS(KP500000000, T5a, T59); T4Z = T4J + T4K; T4L = FNMS(KP500000000, T4K, T4J); T5M = iio[WS(vs, 5) + WS(rs, 3)]; T5E = iio[WS(vs, 5) + WS(rs, 4)]; T5F = iio[WS(vs, 5) + WS(rs, 1)]; } { E T5l, T5N, T68, T5G, T5m, T5O; T5l = rio[WS(vs, 5)]; T6d = T5L + T5M; T5N = T5L - T5M; T68 = T5E + T5F; T5G = T5E - T5F; T5m = rio[WS(vs, 5) + WS(rs, 3)]; T5r = rio[WS(vs, 5) + WS(rs, 4)]; T6e = T67 + T68; T69 = T67 - T68; T5O = T5D + T5G; T5H = T5D - T5G; T5w = T5l - T5m; T5n = T5l + T5m; T6f = FNMS(KP500000000, T6e, T6d); T63 = T5N + T5O; T5P = FNMS(KP500000000, T5O, T5N); T5s = rio[WS(vs, 5) + WS(rs, 1)]; T5o = rio[WS(vs, 5) + WS(rs, 2)]; T5p = rio[WS(vs, 5) + WS(rs, 5)]; } } } } } { E T6a, T6h, T5I, T5R, T65, T6c; { E T5Q, T5u, T6g, T5A, T60, T66; { E T5y, T5t, T5x, T5q, T5z; rio[0] = T3 + Ta; T5y = T5r - T5s; T5t = T5r + T5s; T5x = T5o - T5p; T5q = T5o + T5p; iio[0] = TT + TU; rio[WS(rs, 1)] = T17 + T1e; T5z = T5x + T5y; T5Q = T5y - T5x; T5u = T5q + T5t; T6g = T5t - T5q; T5A = FNMS(KP500000000, T5z, T5w); T60 = T5w + T5z; iio[WS(rs, 1)] = T1X + T1Y; T66 = FNMS(KP500000000, T5u, T5n); rio[WS(rs, 2)] = T2b + T2i; } iio[WS(rs, 2)] = T31 + T32; iio[WS(rs, 4)] = T59 + T5a; rio[WS(rs, 4)] = T4j + T4q; rio[WS(rs, 3)] = T3f + T3m; iio[WS(rs, 3)] = T45 + T46; { E TA, TD, TQ, T10, T13, TX, TZ, T12; rio[WS(rs, 5)] = T5n + T5u; iio[WS(rs, 5)] = T6d + T6e; { E To, Tx, Tb, Tq; TA = FNMS(KP866025403, Tn, Tg); To = FMA(KP866025403, Tn, Tg); Tx = FMA(KP866025403, Tw, Tv); TD = FNMS(KP866025403, Tw, Tv); Tb = W[0]; Tq = W[1]; { E TI, TK, TH, Ty, Tp, TF; Ty = Tb * Tx; Tp = Tb * To; TF = W[4]; TI = W[5]; iio[WS(vs, 1)] = FNMS(Tq, To, Ty); rio[WS(vs, 1)] = FMA(Tq, Tx, Tp); TK = TF * TJ; TH = TF * TG; TQ = FNMS(KP866025403, TP, TM); T10 = FMA(KP866025403, TP, TM); T13 = FMA(KP866025403, TW, TV); TX = FNMS(KP866025403, TW, TV); iio[WS(vs, 3)] = FNMS(TI, TG, TK); rio[WS(vs, 3)] = FMA(TI, TJ, TH); TZ = W[6]; T12 = W[7]; } } { E TC, TE, TB, TL, TS; { E T62, T64, T61, T14, T11, T5Z; T14 = TZ * T13; T11 = TZ * T10; T5Z = W[4]; T62 = W[5]; iio[WS(vs, 4)] = FNMS(T12, T10, T14); rio[WS(vs, 4)] = FMA(T12, T13, T11); T64 = T5Z * T63; T61 = T5Z * T60; { E T6k, T6n, T6j, T6m, T6o, T6l, Tz; T6a = FNMS(KP866025403, T69, T66); T6k = FMA(KP866025403, T69, T66); T6n = FMA(KP866025403, T6g, T6f); T6h = FNMS(KP866025403, T6g, T6f); iio[WS(vs, 3) + WS(rs, 5)] = FNMS(T62, T60, T64); rio[WS(vs, 3) + WS(rs, 5)] = FMA(T62, T63, T61); T6j = W[6]; T6m = W[7]; T6o = T6j * T6n; T6l = T6j * T6k; Tz = W[8]; TC = W[9]; iio[WS(vs, 4) + WS(rs, 5)] = FNMS(T6m, T6k, T6o); rio[WS(vs, 4) + WS(rs, 5)] = FMA(T6m, T6n, T6l); TE = Tz * TD; TB = Tz * TA; } } iio[WS(vs, 5)] = FNMS(TC, TA, TE); rio[WS(vs, 5)] = FMA(TC, TD, TB); TL = W[2]; TS = W[3]; { E T5U, T5X, T5W, T5Y, T5V, TY, TR, T5T; T5I = FMA(KP866025403, T5H, T5A); T5U = FNMS(KP866025403, T5H, T5A); T5X = FNMS(KP866025403, T5Q, T5P);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -