📄 q1_8.c
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:40:39 EST 2008 */#include "codelet-dft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_twidsq -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -reload-twiddle -dif -n 8 -name q1_8 -include q.h *//* * This function contains 528 FP additions, 288 FP multiplications, * (or, 352 additions, 112 multiplications, 176 fused multiply/add), * 190 stack variables, 1 constants, and 256 memory accesses */#include "q.h"static void q1_8(R *rio, R *iio, const R *W, stride rs, stride vs, INT mb, INT me, INT ms){ DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + (mb * 14); m < me; m = m + 1, rio = rio + ms, iio = iio + ms, W = W + 14, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(vs)) { E T9C, T9N, T9l, T9E, T9D, T9O; { E TV, Tk, T1d, T7, T18, T1t, TQ, TD, T5t, T4S, T5L, T4F, T5G, T61, T5o; E T5b, T6Z, T6o, T7h, T6b, T7c, T7x, T6U, T6H, Tbx, TaW, TbP, TaJ, TbK, Tc5; E Tbs, Tbf, T2r, T1Q, T2J, T1D, T2E, T2Z, T2m, T29, T3X, T3m, T4f, T39, T4a; E T4v, T3S, T3F, T8v, T7U, T8N, T7H, T8I, T93, T8q, T8d, Ta1, T9q, Taj, T9d; E Tae, Taz, T9W, T9J, Te, T19, T1u, T1g, Tv, TR, TG, TW, T5H, T4M, T5O; E T62, T5p, T53, T5u, T5e, T6i, T7d, T7y, T7k, T6z, T6V, T6K, T70, TbL, TaQ; E TbS, Tc6, Tbt, Tb7, Tby, Tbi, T1K, T2F, T30, T2M, T21, T2n, T2c, T2s, T4b; E T3g, T4i, T4w, T3T, T3x, T3Y, T3I, T7O, T8J, T94, T8Q, T85, T8r, T8g, T8w; E Tak, T9r, T9K, T9A, Taf, T9k, Tal, T9u; { E T9a, T9F, T99, Tac, T9p, T9b, T9G, T9H; { E TaG, Tbb, TaF, TbI, TaV, TaH, Tbc, Tbd; { E T4C, T57, T4B, T5E, T4R, T4D, T58, T59; { E T4, Tz, T3, T16, Tj, T5, TA, TB; { E T1, T2, Th, Ti; T1 = rio[0]; T2 = rio[WS(rs, 4)]; Th = iio[0]; Ti = iio[WS(rs, 4)]; T4 = rio[WS(rs, 2)]; Tz = T1 - T2; T3 = T1 + T2; T16 = Th + Ti; Tj = Th - Ti; T5 = rio[WS(rs, 6)]; TA = iio[WS(rs, 2)]; TB = iio[WS(rs, 6)]; } { E T4z, T4A, T4P, T4Q; T4z = rio[WS(vs, 3)]; { E Tg, T6, T17, TC; Tg = T4 - T5; T6 = T4 + T5; T17 = TA + TB; TC = TA - TB; TV = Tj - Tg; Tk = Tg + Tj; T1d = T3 - T6; T7 = T3 + T6; T18 = T16 - T17; T1t = T16 + T17; TQ = Tz + TC; TD = Tz - TC; T4A = rio[WS(vs, 3) + WS(rs, 4)]; } T4P = iio[WS(vs, 3)]; T4Q = iio[WS(vs, 3) + WS(rs, 4)]; T4C = rio[WS(vs, 3) + WS(rs, 2)]; T57 = T4z - T4A; T4B = T4z + T4A; T5E = T4P + T4Q; T4R = T4P - T4Q; T4D = rio[WS(vs, 3) + WS(rs, 6)]; T58 = iio[WS(vs, 3) + WS(rs, 2)]; T59 = iio[WS(vs, 3) + WS(rs, 6)]; } } { E T68, T6D, T67, T7a, T6n, T69, T6E, T6F; { E T65, T66, T6l, T6m; T65 = rio[WS(vs, 4)]; { E T4O, T4E, T5F, T5a; T4O = T4C - T4D; T4E = T4C + T4D; T5F = T58 + T59; T5a = T58 - T59; T5t = T4R - T4O; T4S = T4O + T4R; T5L = T4B - T4E; T4F = T4B + T4E; T5G = T5E - T5F; T61 = T5E + T5F; T5o = T57 + T5a; T5b = T57 - T5a; T66 = rio[WS(vs, 4) + WS(rs, 4)]; } T6l = iio[WS(vs, 4)]; T6m = iio[WS(vs, 4) + WS(rs, 4)]; T68 = rio[WS(vs, 4) + WS(rs, 2)]; T6D = T65 - T66; T67 = T65 + T66; T7a = T6l + T6m; T6n = T6l - T6m; T69 = rio[WS(vs, 4) + WS(rs, 6)]; T6E = iio[WS(vs, 4) + WS(rs, 2)]; T6F = iio[WS(vs, 4) + WS(rs, 6)]; } { E TaD, TaE, TaT, TaU; TaD = rio[WS(vs, 7)]; { E T6k, T6a, T7b, T6G; T6k = T68 - T69; T6a = T68 + T69; T7b = T6E + T6F; T6G = T6E - T6F; T6Z = T6n - T6k; T6o = T6k + T6n; T7h = T67 - T6a; T6b = T67 + T6a; T7c = T7a - T7b; T7x = T7a + T7b; T6U = T6D + T6G; T6H = T6D - T6G; TaE = rio[WS(vs, 7) + WS(rs, 4)]; } TaT = iio[WS(vs, 7)]; TaU = iio[WS(vs, 7) + WS(rs, 4)]; TaG = rio[WS(vs, 7) + WS(rs, 2)]; Tbb = TaD - TaE; TaF = TaD + TaE; TbI = TaT + TaU; TaV = TaT - TaU; TaH = rio[WS(vs, 7) + WS(rs, 6)]; Tbc = iio[WS(vs, 7) + WS(rs, 2)]; Tbd = iio[WS(vs, 7) + WS(rs, 6)]; } } } { E T36, T3B, T35, T48, T3l, T37, T3C, T3D; { E T1A, T25, T1z, T2C, T1P, T1B, T26, T27; { E T1x, T1y, T1N, T1O; T1x = rio[WS(vs, 1)]; { E TaS, TaI, TbJ, Tbe; TaS = TaG - TaH; TaI = TaG + TaH; TbJ = Tbc + Tbd; Tbe = Tbc - Tbd; Tbx = TaV - TaS; TaW = TaS + TaV; TbP = TaF - TaI; TaJ = TaF + TaI; TbK = TbI - TbJ; Tc5 = TbI + TbJ; Tbs = Tbb + Tbe; Tbf = Tbb - Tbe; T1y = rio[WS(vs, 1) + WS(rs, 4)]; } T1N = iio[WS(vs, 1)]; T1O = iio[WS(vs, 1) + WS(rs, 4)]; T1A = rio[WS(vs, 1) + WS(rs, 2)]; T25 = T1x - T1y; T1z = T1x + T1y; T2C = T1N + T1O; T1P = T1N - T1O; T1B = rio[WS(vs, 1) + WS(rs, 6)]; T26 = iio[WS(vs, 1) + WS(rs, 2)]; T27 = iio[WS(vs, 1) + WS(rs, 6)]; } { E T33, T34, T3j, T3k; T33 = rio[WS(vs, 2)]; { E T1M, T1C, T2D, T28; T1M = T1A - T1B; T1C = T1A + T1B; T2D = T26 + T27; T28 = T26 - T27; T2r = T1P - T1M; T1Q = T1M + T1P; T2J = T1z - T1C; T1D = T1z + T1C; T2E = T2C - T2D; T2Z = T2C + T2D; T2m = T25 + T28; T29 = T25 - T28; T34 = rio[WS(vs, 2) + WS(rs, 4)]; } T3j = iio[WS(vs, 2)]; T3k = iio[WS(vs, 2) + WS(rs, 4)]; T36 = rio[WS(vs, 2) + WS(rs, 2)]; T3B = T33 - T34; T35 = T33 + T34; T48 = T3j + T3k; T3l = T3j - T3k; T37 = rio[WS(vs, 2) + WS(rs, 6)]; T3C = iio[WS(vs, 2) + WS(rs, 2)]; T3D = iio[WS(vs, 2) + WS(rs, 6)]; } } { E T7E, T89, T7D, T8G, T7T, T7F, T8a, T8b; { E T7B, T7C, T7R, T7S; T7B = rio[WS(vs, 5)]; { E T3i, T38, T49, T3E; T3i = T36 - T37; T38 = T36 + T37; T49 = T3C + T3D; T3E = T3C - T3D; T3X = T3l - T3i; T3m = T3i + T3l; T4f = T35 - T38; T39 = T35 + T38; T4a = T48 - T49; T4v = T48 + T49; T3S = T3B + T3E; T3F = T3B - T3E; T7C = rio[WS(vs, 5) + WS(rs, 4)]; } T7R = iio[WS(vs, 5)]; T7S = iio[WS(vs, 5) + WS(rs, 4)]; T7E = rio[WS(vs, 5) + WS(rs, 2)]; T89 = T7B - T7C; T7D = T7B + T7C; T8G = T7R + T7S; T7T = T7R - T7S; T7F = rio[WS(vs, 5) + WS(rs, 6)]; T8a = iio[WS(vs, 5) + WS(rs, 2)]; T8b = iio[WS(vs, 5) + WS(rs, 6)]; } { E T97, T98, T9n, T9o; T97 = rio[WS(vs, 6)]; { E T7Q, T7G, T8H, T8c; T7Q = T7E - T7F; T7G = T7E + T7F; T8H = T8a + T8b; T8c = T8a - T8b; T8v = T7T - T7Q; T7U = T7Q + T7T; T8N = T7D - T7G; T7H = T7D + T7G; T8I = T8G - T8H; T93 = T8G + T8H; T8q = T89 + T8c; T8d = T89 - T8c; T98 = rio[WS(vs, 6) + WS(rs, 4)]; } T9n = iio[WS(vs, 6)]; T9o = iio[WS(vs, 6) + WS(rs, 4)]; T9a = rio[WS(vs, 6) + WS(rs, 2)]; T9F = T97 - T98; T99 = T97 + T98; Tac = T9n + T9o; T9p = T9n - T9o; T9b = rio[WS(vs, 6) + WS(rs, 6)]; T9G = iio[WS(vs, 6) + WS(rs, 2)]; T9H = iio[WS(vs, 6) + WS(rs, 6)]; } } } } { E TbQ, TaX, Tbg, Tb6, TbR, Tb0; { E T5M, T4T, T5c, T52, T5N, T4W; { E Tu, TE, TF, Tp; { E Tb, Tq, Ta, T1e, Tt, Tc, Tm, Tn; { E T8, T9, Tr, Ts; T8 = rio[WS(rs, 1)]; { E T9m, T9c, Tad, T9I; T9m = T9a - T9b; T9c = T9a + T9b; Tad = T9G + T9H; T9I = T9G - T9H; Ta1 = T9p - T9m; T9q = T9m + T9p; Taj = T99 - T9c; T9d = T99 + T9c; Tae = Tac - Tad; Taz = Tac + Tad; T9W = T9F + T9I; T9J = T9F - T9I; T9 = rio[WS(rs, 5)]; } Tr = iio[WS(rs, 1)]; Ts = iio[WS(rs, 5)]; Tb = rio[WS(rs, 7)]; Tq = T8 - T9; Ta = T8 + T9; T1e = Tr + Ts; Tt = Tr - Ts; Tc = rio[WS(rs, 3)]; Tm = iio[WS(rs, 7)]; Tn = iio[WS(rs, 3)]; } { E Tl, Td, T1f, To; Tu = Tq + Tt; TE = Tt - Tq; Tl = Tb - Tc; Td = Tb + Tc; T1f = Tm + Tn; To = Tm - Tn; Te = Ta + Td; T19 = Td - Ta; T1u = T1e + T1f; T1g = T1e - T1f; TF = Tl + To; Tp = Tl - To; } } { E T4I, T4Y, T4U, T51, T4L, T4V; { E T4Z, T50, T4G, T4H, T4J, T4K; T4G = rio[WS(vs, 3) + WS(rs, 1)]; T4H = rio[WS(vs, 3) + WS(rs, 5)]; Tv = Tp - Tu; TR = Tu + Tp; TG = TE - TF; TW = TE + TF; T4I = T4G + T4H; T4Y = T4G - T4H; T4Z = iio[WS(vs, 3) + WS(rs, 1)]; T50 = iio[WS(vs, 3) + WS(rs, 5)]; T4J = rio[WS(vs, 3) + WS(rs, 7)]; T4K = rio[WS(vs, 3) + WS(rs, 3)]; T4U = iio[WS(vs, 3) + WS(rs, 7)]; T51 = T4Z - T50; T5M = T4Z + T50; T4L = T4J + T4K; T4T = T4J - T4K; T4V = iio[WS(vs, 3) + WS(rs, 3)]; } T5c = T51 - T4Y; T52 = T4Y + T51; T5H = T4L - T4I; T4M = T4I + T4L; T5N = T4U + T4V; T4W = T4U - T4V; } } { E T7i, T6p, T6y, T6I, T6s, T7j; { E T6e, T6u, T6q, T6x, T6h, T6r; { E T6v, T6w, T6f, T6g; { E T4X, T5d, T6c, T6d; T6c = rio[WS(vs, 4) + WS(rs, 1)]; T6d = rio[WS(vs, 4) + WS(rs, 5)]; T5O = T5M - T5N; T62 = T5M + T5N; T4X = T4T - T4W; T5d = T4T + T4W; T6e = T6c + T6d; T6u = T6c - T6d; T5p = T52 + T4X; T53 = T4X - T52; T5u = T5c + T5d; T5e = T5c - T5d; T6v = iio[WS(vs, 4) + WS(rs, 1)]; T6w = iio[WS(vs, 4) + WS(rs, 5)]; } T6f = rio[WS(vs, 4) + WS(rs, 7)]; T6g = rio[WS(vs, 4) + WS(rs, 3)]; T6q = iio[WS(vs, 4) + WS(rs, 7)]; T7i = T6v + T6w; T6x = T6v - T6w; T6p = T6f - T6g; T6h = T6f + T6g; T6r = iio[WS(vs, 4) + WS(rs, 3)]; } T6y = T6u + T6x; T6I = T6x - T6u; T6i = T6e + T6h; T7d = T6h - T6e; T6s = T6q - T6r; T7j = T6q + T6r; } { E Tb2, TaM, TaY, Tb5, TaP, TaZ; { E Tb3, Tb4, TaN, TaO; { E T6J, T6t, TaK, TaL; TaK = rio[WS(vs, 7) + WS(rs, 1)]; TaL = rio[WS(vs, 7) + WS(rs, 5)]; T7y = T7i + T7j; T7k = T7i - T7j; T6J = T6p + T6s; T6t = T6p - T6s; Tb2 = TaK - TaL; TaM = TaK + TaL; T6z = T6t - T6y; T6V = T6y + T6t; T6K = T6I - T6J; T70 = T6I + T6J; Tb3 = iio[WS(vs, 7) + WS(rs, 1)]; Tb4 = iio[WS(vs, 7) + WS(rs, 5)]; } TaN = rio[WS(vs, 7) + WS(rs, 7)]; TaO = rio[WS(vs, 7) + WS(rs, 3)]; TaY = iio[WS(vs, 7) + WS(rs, 7)]; Tb5 = Tb3 - Tb4; TbQ = Tb3 + Tb4; TaP = TaN + TaO; TaX = TaN - TaO; TaZ = iio[WS(vs, 7) + WS(rs, 3)]; } Tbg = Tb5 - Tb2; Tb6 = Tb2 + Tb5; TbL = TaP - TaM; TaQ = TaM + TaP; TbR = TaY + TaZ; Tb0 = TaY - TaZ; } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -