📄 hb_32.c
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:06:33 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h *//* * This function contains 434 FP additions, 260 FP multiplications, * (or, 236 additions, 62 multiplications, 198 fused multiply/add), * 135 stack variables, 7 constants, and 128 memory accesses */#include "hb.h"static void hb_32(R *cr, R *ci, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + ((mb - 1) * 62); m < me; m = m + 1, cr = cr + ms, ci = ci - ms, W = W + 62, MAKE_VOLATILE_STRIDE(rs)) { E T5o, T5r, T5q, T5n, T5s, T5p; { E T5K, Tf, T8k, T7k, T8x, T7N, T3i, T1i, T3v, T2L, T5f, T4v, T6T, T6m, T52; E T42, TZ, T6X, T1X, T3p, T8p, T8B, T3o, T26, T58, T4n, T7T, T7z, T59, T4k; E T6p, T6a, TK, T6W, T2o, T3m, T8s, T8A, T3l, T2x, T55, T4g, T7S, T7G, T56; E T4d, T6o, T61, T5Q, T5N, T6f, Tu, T8y, T7r, T8l, T7Q, T3w, T1F, T45, T48; E T3j, T2O, T53, T4y; { E T62, T69, T4j, T4i; { E T6l, T6i, T40, T41; { E T12, T3, T2D, T6, T6g, T2G, T6h, T15, Td, T6k, T1g, T2J, Ta, T17, T1a; E T6j; { E T2E, T2F, T13, T14; { E T1, T2, T4, T5; T1 = cr[0]; T2 = ci[WS(rs, 15)]; T4 = cr[WS(rs, 8)]; T5 = ci[WS(rs, 7)]; T2E = ci[WS(rs, 31)]; T12 = T1 - T2; T3 = T1 + T2; T2D = T4 - T5; T6 = T4 + T5; T2F = cr[WS(rs, 16)]; } T13 = ci[WS(rs, 23)]; T14 = cr[WS(rs, 24)]; { E Tb, Tc, T1d, T1e; Tb = ci[WS(rs, 3)]; T6g = T2E - T2F; T2G = T2E + T2F; T6h = T13 - T14; T15 = T13 + T14; Tc = cr[WS(rs, 12)]; T1d = ci[WS(rs, 19)]; T1e = cr[WS(rs, 28)]; { E T8, T1c, T1f, T9, T18, T19; T8 = cr[WS(rs, 4)]; Td = Tb + Tc; T1c = Tb - Tc; T6k = T1d - T1e; T1f = T1d + T1e; T9 = ci[WS(rs, 11)]; T18 = ci[WS(rs, 27)]; T19 = cr[WS(rs, 20)]; T1g = T1c - T1f; T2J = T1c + T1f; Ta = T8 + T9; T17 = T8 - T9; T1a = T18 + T19; T6j = T18 - T19; } } } { E T2I, T7M, T7L, T16, T1h, T4u, T4t, T2H, T2K; { E T7i, T7, T1b, Te, T7j; T7i = T3 - T6; T7 = T3 + T6; T2I = T17 + T1a; T1b = T17 - T1a; Te = Ta + Td; T7M = Ta - Td; T7j = T6k - T6j; T6l = T6j + T6k; T6i = T6g + T6h; T7L = T6g - T6h; T5K = T7 - Te; Tf = T7 + Te; T8k = T7i + T7j; T7k = T7i - T7j; T40 = T12 + T15; T16 = T12 - T15; T1h = T1b + T1g; T4u = T1b - T1g; } T4t = T2G - T2D; T2H = T2D + T2G; T8x = T7M + T7L; T7N = T7L - T7M; T3i = FMA(KP707106781, T1h, T16); T1i = FNMS(KP707106781, T1h, T16); T2K = T2I - T2J; T41 = T2I + T2J; T3v = FMA(KP707106781, T2K, T2H); T2L = FNMS(KP707106781, T2K, T2H); T5f = FNMS(KP707106781, T4u, T4t); T4v = FMA(KP707106781, T4u, T4t); } } { E T1Y, T1H, TR, T7w, T1K, T21, T65, T7t, TV, T1M, TU, T67, T1U, TW, T1N; E T1O; { E TL, TM, TO, TP, T63, T64; TL = ci[0]; T6T = T6i + T6l; T6m = T6i - T6l; T52 = FMA(KP707106781, T41, T40); T42 = FNMS(KP707106781, T41, T40); TM = cr[WS(rs, 15)]; TO = cr[WS(rs, 7)]; TP = ci[WS(rs, 8)]; { E T1I, TN, TQ, T1J, T1Z, T20; T1I = ci[WS(rs, 16)]; T1Y = TL - TM; TN = TL + TM; T1H = TO - TP; TQ = TO + TP; T1J = cr[WS(rs, 31)]; T1Z = ci[WS(rs, 24)]; T20 = cr[WS(rs, 23)]; TR = TN + TQ; T7w = TN - TQ; T1K = T1I + T1J; T63 = T1I - T1J; T64 = T1Z - T20; T21 = T1Z + T20; } { E TS, TT, T1S, T1T; TS = cr[WS(rs, 3)]; T65 = T63 + T64; T7t = T63 - T64; TT = ci[WS(rs, 12)]; T1S = ci[WS(rs, 20)]; T1T = cr[WS(rs, 27)]; TV = ci[WS(rs, 4)]; T1M = TS - TT; TU = TS + TT; T67 = T1S - T1T; T1U = T1S + T1T; TW = cr[WS(rs, 11)]; T1N = ci[WS(rs, 28)]; T1O = cr[WS(rs, 19)]; } } { E T4l, T1L, T24, T23, T8n, T7v, T1W, T8o, T7y, T4m, T22, T25; { E T1V, T7u, T7x, T1Q, T1R, TX; T4l = T1H + T1K; T1L = T1H - T1K; T1R = TV - TW; TX = TV + TW; { E T66, T1P, TY, T68; T66 = T1N - T1O; T1P = T1N + T1O; T24 = T1R - T1U; T1V = T1R + T1U; T7u = TU - TX; TY = TU + TX; T68 = T66 + T67; T7x = T67 - T66; T23 = T1M - T1P; T1Q = T1M + T1P; TZ = TR + TY; T62 = TR - TY; T69 = T65 - T68; T6X = T65 + T68; } T8n = T7u + T7t; T7v = T7t - T7u; T4j = T1Q + T1V; T1W = T1Q - T1V; T8o = T7w + T7x; T7y = T7w - T7x; } T4i = T1Y + T21; T22 = T1Y - T21; T25 = T23 + T24; T4m = T23 - T24; T1X = FNMS(KP707106781, T1W, T1L); T3p = FMA(KP707106781, T1W, T1L); T8p = FNMS(KP414213562, T8o, T8n); T8B = FMA(KP414213562, T8n, T8o); T3o = FMA(KP707106781, T25, T22); T26 = FNMS(KP707106781, T25, T22); T58 = FMA(KP707106781, T4m, T4l); T4n = FNMS(KP707106781, T4m, T4l); T7T = FNMS(KP414213562, T7v, T7y); T7z = FMA(KP414213562, T7y, T7v); } } } { E T5T, T60, T4c, T4b; { E T2p, T28, TC, T7D, T2b, T2s, T5W, T7A, TG, T2d, TF, T5Y, T2l, TH, T2e; E T2f; { E Tw, Tx, Tz, TA, T5U, T5V; Tw = cr[WS(rs, 1)]; T59 = FMA(KP707106781, T4j, T4i); T4k = FNMS(KP707106781, T4j, T4i); T6p = T69 - T62; T6a = T62 + T69; Tx = ci[WS(rs, 14)]; Tz = cr[WS(rs, 9)]; TA = ci[WS(rs, 6)]; { E T29, Ty, TB, T2a, T2q, T2r; T29 = ci[WS(rs, 30)]; T2p = Tw - Tx; Ty = Tw + Tx; T28 = Tz - TA; TB = Tz + TA; T2a = cr[WS(rs, 17)]; T2q = ci[WS(rs, 22)]; T2r = cr[WS(rs, 25)]; TC = Ty + TB; T7D = Ty - TB; T2b = T29 + T2a; T5U = T29 - T2a; T5V = T2q - T2r; T2s = T2q + T2r; } { E TD, TE, T2j, T2k; TD = cr[WS(rs, 5)]; T5W = T5U + T5V; T7A = T5U - T5V; TE = ci[WS(rs, 10)]; T2j = ci[WS(rs, 18)]; T2k = cr[WS(rs, 29)]; TG = ci[WS(rs, 2)]; T2d = TD - TE; TF = TD + TE; T5Y = T2j - T2k; T2l = T2j + T2k; TH = cr[WS(rs, 13)]; T2e = ci[WS(rs, 26)]; T2f = cr[WS(rs, 21)]; } } { E T4e, T2c, T2v, T2u, T8q, T7C, T2n, T8r, T7F, T4f, T2t, T2w; { E T2m, T7B, T7E, T2h, T2i, TI; T4e = T2b - T28; T2c = T28 + T2b; T2i = TG - TH; TI = TG + TH; { E T5X, T2g, TJ, T5Z; T5X = T2e - T2f; T2g = T2e + T2f; T2v = T2i - T2l; T2m = T2i + T2l; T7B = TF - TI; TJ = TF + TI; T5Z = T5X + T5Y; T7E = T5Y - T5X; T2u = T2d - T2g; T2h = T2d + T2g; TK = TC + TJ; T5T = TC - TJ; T60 = T5W - T5Z; T6W = T5W + T5Z; } T8q = T7B + T7A; T7C = T7A - T7B; T4c = T2h + T2m; T2n = T2h - T2m; T8r = T7D + T7E; T7F = T7D - T7E; } T4b = T2p + T2s; T2t = T2p - T2s; T2w = T2u + T2v; T4f = T2v - T2u; T2o = FNMS(KP707106781, T2n, T2c); T3m = FMA(KP707106781, T2n, T2c); T8s = FMA(KP414213562, T8r, T8q); T8A = FNMS(KP414213562, T8q, T8r); T3l = FMA(KP707106781, T2w, T2t); T2x = FNMS(KP707106781, T2w, T2t); T55 = FMA(KP707106781, T4f, T4e); T4g = FNMS(KP707106781, T4f, T4e); T7S = FMA(KP414213562, T7C, T7F); T7G = FNMS(KP414213562, T7F, T7C); } } { E T44, T1D, Tm, T7o, T7p, T43, T1y, T47, T1s, Tt, T7m, T7l, T46, T1n; { E Tj, T1z, Ti, T5P, T1C, Tk, T1v, T1w; { E Tg, Th, T1A, T1B; Tg = cr[WS(rs, 2)]; T56 = FMA(KP707106781, T4c, T4b); T4d = FNMS(KP707106781, T4c, T4b); T6o = T5T + T60; T61 = T5T - T60; Th = ci[WS(rs, 13)]; T1A = ci[WS(rs, 21)]; T1B = cr[WS(rs, 26)]; Tj = cr[WS(rs, 10)]; T1z = Tg - Th; Ti = Tg + Th; T5P = T1A - T1B; T1C = T1A + T1B; Tk = ci[WS(rs, 5)]; T1v = ci[WS(rs, 29)]; T1w = cr[WS(rs, 18)]; } { E T1u, Tl, T5O, T1x; T44 = T1z + T1C; T1D = T1z - T1C; T1u = Tj - Tk; Tl = Tj + Tk; T5O = T1v - T1w; T1x = T1v + T1w; Tm = Ti + Tl; T7o = Ti - Tl; T7p = T5O - T5P; T5Q = T5O + T5P; T43 = T1x - T1u; T1y = T1u + T1x; } } { E Tq, T1o, Tp, T5M, T1r, Tr, T1k, T1l; { E Tn, To, T1p, T1q; Tn = ci[WS(rs, 1)]; To = cr[WS(rs, 14)]; T1p = ci[WS(rs, 25)]; T1q = cr[WS(rs, 22)]; Tq = cr[WS(rs, 6)]; T1o = Tn - To; Tp = Tn + To; T5M = T1p - T1q; T1r = T1p + T1q; Tr = ci[WS(rs, 9)]; T1k = ci[WS(rs, 17)]; T1l = cr[WS(rs, 30)]; } { E T1j, Ts, T5L, T1m; T47 = T1o + T1r; T1s = T1o - T1r; T1j = Tq - Tr; Ts = Tq + Tr; T5L = T1k - T1l; T1m = T1k + T1l; Tt = Tp + Ts; T7m = Tp - Ts; T7l = T5L - T5M; T5N = T5L + T5M; T46 = T1j + T1m; T1n = T1j - T1m; } } { E T7P, T7O, T2N, T1t, T1E, T2M, T7n, T7q, T4w, T4x; T7P = T7m + T7l; T7n = T7l - T7m; T7q = T7o + T7p; T7O = T7o - T7p; T6f = Tm - Tt; Tu = Tm + Tt; T8y = T7q + T7n; T7r = T7n - T7q; T2N = FMA(KP414213562, T1n, T1s); T1t = FNMS(KP414213562, T1s, T1n); T1E = FMA(KP414213562, T1D, T1y); T2M = FNMS(KP414213562, T1y, T1D); T8l = T7O + T7P; T7Q = T7O - T7P; T3w = T1E + T1t; T1F = T1t - T1E; T45 = FNMS(KP414213562, T44, T43); T4w = FMA(KP414213562, T43, T44); T4x = FMA(KP414213562, T46, T47); T48 = FNMS(KP414213562, T47, T46); T3j = T2M + T2N; T2O = T2M - T2N; T53 = T4w + T4x; T4y = T4w - T4x; } } } } {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -