📄 hc2cfdft_32.c
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:03:24 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2cdft -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 32 -dit -name hc2cfdft_32 -include hc2cf.h *//* * This function contains 498 FP additions, 324 FP multiplications, * (or, 300 additions, 126 multiplications, 198 fused multiply/add), * 172 stack variables, 8 constants, and 128 memory accesses */#include "hc2cf.h"static void hc2cfdft_32(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP500000000, +0.500000000000000000000000000000000000000000000); INT m; for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, Rp = Rp + ms, Ip = Ip + ms, Rm = Rm - ms, Im = Im - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) { E T9X, Ta0; { E T3B, T89, T61, T8l, T2F, T7p, T8t, T4B, T7I, T5e, T7L, T1n, T7R, T5E, T82; E T4u, T3m, T8k, T5W, T8a, T2r, T8u, T4G, T7q, T59, T7K, T7H, T12, T5z, T81; E T7Q, T4h, T4Y, T7D, T7A, Tl, T5o, T3Q, T84, T7V, T2V, T4M, T7t, T7s, T1K; E T5L, T8e, T8n, T38, T7v, T4R, T7w, T25, T5Q, T8h, T8o, T3V, T3S, T5p, T3T; E T41, Tz, T3Y, TE, TA, T51, T5r, T3Z, Tv, T50, TB, T3U, T40; { E T49, T46, T5v, T47, T4f, TV, T4c, T10, TW, T57, T5x, T4d, TR, T56, TX; E T48, T4e; { E T4m, T4j, T5A, T4k, T4s, T1g, T4p, T1l, T1h, T5c, T5C, T4q, T1c, T5b, T1i; E T4l, T4r; { E T2E, T4y, T2B, T4A; { E T3y, T3z, T3t, T5Z, T3x, T2v, T3r, T3q, T3n, T2A, T3o, T2s; { E T2C, T2D, T3w, T3u, T3v; T2C = Ip[0]; T2D = Im[0]; T3u = Rm[0]; T3v = Rp[0]; T3y = W[1]; T3z = T2C + T2D; T2E = T2C - T2D; T4y = T3v + T3u; T3w = T3u - T3v; T3t = W[0]; { E T2y, T2z, T2t, T2u; T2t = Ip[WS(rs, 8)]; T2u = Im[WS(rs, 8)]; T5Z = T3y * T3w; T3x = T3t * T3w; T2y = Rp[WS(rs, 8)]; T2v = T2t - T2u; T3r = T2t + T2u; T2z = Rm[WS(rs, 8)]; T3q = W[33]; T3n = W[32]; T2A = T2y + T2z; T3o = T2z - T2y; T2s = W[30]; } } { E T3A, T5X, T4z, T2w, T3s, T3p, T5Y, T60, T2x; T3A = FNMS(T3y, T3z, T3x); T3p = T3n * T3o; T5X = T3q * T3o; T4z = T2s * T2A; T2w = T2s * T2v; T3s = FNMS(T3q, T3r, T3p); T5Y = FMA(T3n, T3r, T5X); T60 = FMA(T3t, T3z, T5Z); T2x = W[31]; T3B = T3s + T3A; T89 = T3A - T3s; T61 = T5Y + T60; T8l = T60 - T5Y; T2B = FNMS(T2x, T2A, T2w); T4A = FMA(T2x, T2v, T4z); } } { E T16, T1b, T17, T5a, T1d, T4o, T18; { E T19, T1a, T13, T4i, T14, T15; T14 = Ip[WS(rs, 3)]; T15 = Im[WS(rs, 3)]; T2F = T2B + T2E; T7p = T2E - T2B; T8t = T4y - T4A; T4B = T4y + T4A; T4m = T14 + T15; T16 = T14 - T15; T19 = Rp[WS(rs, 3)]; T1a = Rm[WS(rs, 3)]; T13 = W[10]; T4i = W[12]; { E T1e, T1f, T1j, T1k; T1e = Ip[WS(rs, 11)]; T4j = T19 - T1a; T1b = T19 + T1a; T17 = T13 * T16; T5A = T4i * T4m; T4k = T4i * T4j; T5a = T13 * T1b; T1f = Im[WS(rs, 11)]; T1j = Rp[WS(rs, 11)]; T1k = Rm[WS(rs, 11)]; T1d = W[42]; T4s = T1e + T1f; T1g = T1e - T1f; T4p = T1j - T1k; T1l = T1j + T1k; T4o = W[44]; T1h = T1d * T1g; } } T18 = W[11]; T5c = T1d * T1l; T5C = T4o * T4s; T4q = T4o * T4p; T1c = FNMS(T18, T1b, T17); T5b = FMA(T18, T16, T5a); T1i = W[43]; T4l = W[13]; T4r = W[45]; } } { E T4D, T2g, T2q, T4F; { E T3d, T3e, T2a, T2f, T3a, T5S, T3c, T4C, T2b, T3j, T2k, T3k, T2p, T3h, T3g; E T2h, T5U, T3b, T27; { E T28, T29, T2d, T2e, T5d, T1m; T28 = Ip[WS(rs, 4)]; T5d = FMA(T1i, T1g, T5c); T1m = FNMS(T1i, T1l, T1h); { E T5B, T4n, T5D, T4t; T5B = FNMS(T4l, T4j, T5A); T4n = FMA(T4l, T4m, T4k); T5D = FNMS(T4r, T4p, T5C); T4t = FMA(T4r, T4s, T4q); T7I = T5b - T5d; T5e = T5b + T5d; T7L = T1c - T1m; T1n = T1c + T1m; T7R = T5D - T5B; T5E = T5B + T5D; T82 = T4t - T4n; T4u = T4n + T4t; T29 = Im[WS(rs, 4)]; } T2d = Rp[WS(rs, 4)]; T2e = Rm[WS(rs, 4)]; T3d = W[17]; T3e = T28 + T29; T2a = T28 - T29; T3b = T2e - T2d; T2f = T2d + T2e; T3a = W[16]; T27 = W[14]; T5S = T3d * T3b; } { E T2i, T2j, T2n, T2o; T2i = Ip[WS(rs, 12)]; T3c = T3a * T3b; T4C = T27 * T2f; T2b = T27 * T2a; T2j = Im[WS(rs, 12)]; T2n = Rp[WS(rs, 12)]; T2o = Rm[WS(rs, 12)]; T3j = W[49]; T2k = T2i - T2j; T3k = T2i + T2j; T2p = T2n + T2o; T3h = T2o - T2n; T3g = W[48]; T2h = W[46]; T5U = T3j * T3h; } { E T3f, T3i, T4E, T2l; T3f = FNMS(T3d, T3e, T3c); T3i = T3g * T3h; T4E = T2h * T2p; T2l = T2h * T2k; { E T5T, T3l, T5V, T2c, T2m; T5T = FMA(T3a, T3e, T5S); T3l = FNMS(T3j, T3k, T3i); T5V = FMA(T3g, T3k, T5U); T2c = W[15]; T2m = W[47]; T3m = T3f + T3l; T8k = T3f - T3l; T5W = T5T + T5V; T8a = T5T - T5V; T4D = FMA(T2c, T2a, T4C); T2g = FNMS(T2c, T2f, T2b); T2q = FNMS(T2m, T2p, T2l); T4F = FMA(T2m, T2k, T4E); } } } { E TL, TQ, TM, T55, TS, T4b, TN; { E TO, TP, TI, T45, TJ, TK; TJ = Ip[WS(rs, 15)]; TK = Im[WS(rs, 15)]; T2r = T2g + T2q; T8u = T2g - T2q; T4G = T4D + T4F; T7q = T4D - T4F; T49 = TJ + TK; TL = TJ - TK; TO = Rp[WS(rs, 15)]; TP = Rm[WS(rs, 15)]; TI = W[58]; T45 = W[60]; { E TT, TU, TY, TZ; TT = Ip[WS(rs, 7)]; T46 = TO - TP; TQ = TO + TP; TM = TI * TL; T5v = T45 * T49; T47 = T45 * T46; T55 = TI * TQ; TU = Im[WS(rs, 7)]; TY = Rp[WS(rs, 7)]; TZ = Rm[WS(rs, 7)]; TS = W[26]; T4f = TT + TU; TV = TT - TU; T4c = TY - TZ; T10 = TY + TZ; T4b = W[28]; TW = TS * TV; } } TN = W[59]; T57 = TS * T10; T5x = T4b * T4f; T4d = T4b * T4c; TR = FNMS(TN, TQ, TM); T56 = FMA(TN, TL, T55); TX = W[27]; T48 = W[61]; T4e = W[29]; } } } { E T8c, T8d, T8f, T8g; { E T3I, T3F, T5k, T3G, T3O, Te, T3L, Tj, Tf, T4W, T5m, T3M, Ta, T4V, Tg; E T3H, T3N; { E T4, T9, T5, T4U, Tb, T3K, T1, T3E, T6; { E T2, T3, T7, T8, T58, T11; T2 = Ip[WS(rs, 1)]; T58 = FMA(TX, TV, T57); T11 = FNMS(TX, T10, TW); { E T5w, T4a, T5y, T4g; T5w = FNMS(T48, T46, T5v); T4a = FMA(T48, T49, T47); T5y = FNMS(T4e, T4c, T5x); T4g = FMA(T4e, T4f, T4d); T59 = T56 + T58; T7K = T56 - T58; T7H = TR - T11; T12 = TR + T11; T5z = T5w + T5y; T81 = T5w - T5y; T7Q = T4g - T4a; T4h = T4a + T4g; T3 = Im[WS(rs, 1)]; } T7 = Rp[WS(rs, 1)]; T8 = Rm[WS(rs, 1)]; T1 = W[2]; T3I = T2 + T3; T4 = T2 - T3; T3F = T7 - T8; T9 = T7 + T8; T3E = W[4]; T5 = T1 * T4; } { E Tc, Td, Th, Ti; Tc = Ip[WS(rs, 9)]; T4U = T1 * T9; T5k = T3E * T3I; T3G = T3E * T3F; Td = Im[WS(rs, 9)]; Th = Rp[WS(rs, 9)]; Ti = Rm[WS(rs, 9)]; Tb = W[34]; T3O = Tc + Td; Te = Tc - Td; T3L = Th - Ti; Tj = Th + Ti; T3K = W[36]; Tf = Tb * Te; } T6 = W[3]; T4W = Tb * Tj; T5m = T3K * T3O; T3M = T3K * T3L; Ta = FNMS(T6, T9, T5); T4V = FMA(T6, T4, T4U); Tg = W[35]; T3H = W[5]; T3N = W[37]; } { E T1t, T2N, T2M, T2J, T1y, T2L, T5H, T4I, T1u, T2S, T1D, T2T, T1I, T2Q, T2P; E T1A, T5J; { E T2K, T1q, T1w, T1x; { E T1r, T7U, T7T, T1s, T4X, Tk; T1r = Ip[WS(rs, 2)]; T4X = FMA(Tg, Te, T4W); Tk = FNMS(Tg, Tj, Tf); { E T5l, T3J, T5n, T3P; T5l = FNMS(T3H, T3F, T5k); T3J = FMA(T3H, T3I, T3G); T5n = FNMS(T3N, T3L, T5m); T3P = FMA(T3N, T3O, T3M); T4Y = T4V + T4X; T7D = T4V - T4X; T7A = Ta - Tk; Tl = Ta + Tk; T7U = T5l - T5n; T5o = T5l + T5n; T7T = T3P - T3J; T3Q = T3J + T3P; T1s = Im[WS(rs, 2)]; } T1w = Rp[WS(rs, 2)]; T84 = T7U + T7T; T7V = T7T - T7U; T1t = T1r - T1s; T2N = T1r + T1s; T1x = Rm[WS(rs, 2)]; } T2M = W[9]; T2J = W[8]; T1y = T1w + T1x; T2K = T1x - T1w; T1q = W[6]; { E T1B, T1C, T1G, T1H; T1B = Ip[WS(rs, 10)]; T2L = T2J * T2K; T5H = T2M * T2K; T4I = T1q * T1y; T1u = T1q * T1t; T1C = Im[WS(rs, 10)]; T1G = Rp[WS(rs, 10)]; T1H = Rm[WS(rs, 10)]; T2S = W[41]; T1D = T1B - T1C; T2T = T1B + T1C; T1I = T1G + T1H; T2Q = T1H - T1G; T2P = W[40]; T1A = W[38]; T5J = T2S * T2Q; } } { E T2R, T4K, T1E, T1z, T4J, T1F, T1v, T2O, T2U; T1v = W[7]; T2R = T2P * T2Q; T4K = T1A * T1I; T1E = T1A * T1D; T1z = FNMS(T1v, T1y, T1u); T4J = FMA(T1v, T1t, T4I); T1F = W[39]; T2O = FNMS(T2M, T2N, T2L); T2U = FNMS(T2S, T2T, T2R); { E T5I, T4L, T1J, T5K; T5I = FMA(T2J, T2N, T5H); T4L = FMA(T1F, T1D, T4K); T1J = FNMS(T1F, T1I, T1E); T8c = T2O - T2U; T2V = T2O + T2U; T5K = FMA(T2P, T2T, T5J); T4M = T4J + T4L; T7t = T4J - T4L; T7s = T1z - T1J; T1K = T1z + T1J; T8d = T5I - T5K; T5L = T5I + T5K; } } } } { E T2Z, T30, T1O, T1T, T2W, T5M, T2Y, T4N, T1P, T35, T1Y, T36, T23, T33, T32; E T1V, T5O, T2X, T1L; { E T1M, T1N, T1R, T1S; T1M = Ip[WS(rs, 14)]; T8e = T8c - T8d; T8n = T8c + T8d; T1N = Im[WS(rs, 14)]; T1R = Rp[WS(rs, 14)]; T1S = Rm[WS(rs, 14)]; T2Z = W[57]; T30 = T1M + T1N; T1O = T1M - T1N; T2X = T1S - T1R; T1T = T1R + T1S; T2W = W[56]; T1L = W[54]; T5M = T2Z * T2X; } { E T1W, T1X, T21, T22; T1W = Ip[WS(rs, 6)]; T2Y = T2W * T2X; T4N = T1L * T1T; T1P = T1L * T1O; T1X = Im[WS(rs, 6)]; T21 = Rp[WS(rs, 6)]; T22 = Rm[WS(rs, 6)]; T35 = W[25]; T1Y = T1W - T1X; T36 = T1W + T1X; T23 = T21 + T22; T33 = T22 - T21; T32 = W[24]; T1V = W[22]; T5O = T35 * T33; } { E T34, T4P, T1Z, T1U, T4O, T20, T1Q, T31, T37; T1Q = W[55]; T34 = T32 * T33; T4P = T1V * T23; T1Z = T1V * T1Y;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -