📄 hb_32.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 20:42:55 EST 2006 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_hc2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 32 -dif -name hb_32 -include hb.h *//* * This function contains 434 FP additions, 260 FP multiplications, * (or, 236 additions, 62 multiplications, 198 fused multiply/add), * 141 stack variables, and 128 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_hc2hc.ml,v 1.15 2006-01-05 03:04:27 stevenj Exp $ */#include "hb.h"static const R *hb_32(R *rio, R *iio, const R *W, stride ios, INT m, INT dist){ DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = m - 2; i > 0; i = i - 2, rio = rio + dist, iio = iio - dist, W = W + 62, MAKE_VOLATILE_STRIDE(ios)) { E T86, T81, T8b, T7Y, T7Z, T82, T80, T83, T88, T8c, T87; { E T3y, Tf, T8x, T7k, T8k, T7N, T2Q, T1a, T33, T2j, T6N, T50, T6A, T63, T4w; E T3U, TZ, T4q, T26, T2p, T8q, T7F, T37, T2Y, T6H, T5K, T8r, T7C, T3X, T3M; E T6G, T5R, Tu, T3R, T2m, T2R, T1t, T34, T8y, T7Q, T4x, T3B, T6O, T66, T8l; E T7r, T6B, T5f, TK, T4p, T5x, T1N, T2o, T8n, T7y, T5u, T36, T2V, T6E, T5r; E T8o, T7v, T3W, T3H, T5Q, T5N; { E T5Z, T62, T3T, T3S; { E T4Q, T3, T5Y, T2e, T5X, T6, T4R, T2h, Td, T18, T61, T4V, Ta, T4X, T4W; E T15; { E T4, T5, T2f, T2g; { E T1, T2, T2c, T2d; T1 = rio[0]; T2 = iio[-WS(ios, 16)]; T2c = iio[0]; T2d = rio[WS(ios, 16)]; T4 = rio[WS(ios, 8)]; T4Q = T1 - T2; T3 = T1 + T2; T5Y = T2c + T2d; T2e = T2c - T2d; T5 = iio[-WS(ios, 24)]; } T2f = iio[-WS(ios, 8)]; T2g = rio[WS(ios, 24)]; { E Tb, Tc, T16, T17; Tb = iio[-WS(ios, 28)]; T5X = T4 - T5; T6 = T4 + T5; T4R = T2f + T2g; T2h = T2f - T2g; Tc = rio[WS(ios, 12)]; T16 = iio[-WS(ios, 12)]; T17 = rio[WS(ios, 28)]; { E T8, T4T, T4U, T9, T13, T14; T8 = rio[WS(ios, 4)]; Td = Tb + Tc; T4T = Tb - Tc; T18 = T16 - T17; T4U = T17 + T16; T9 = iio[-WS(ios, 20)]; T13 = iio[-WS(ios, 4)]; T14 = rio[WS(ios, 20)]; T61 = T4T + T4U; T4V = T4T - T4U; Ta = T8 + T9; T4X = T8 - T9; T4W = T13 + T14; T15 = T13 - T14; } } } { E T12, T2b, T4Z, T4S, T7L, T7M, T19, T2i; { E T7, T60, Te, T7i, T7j, T4Y; T12 = T3 - T6; T7 = T3 + T6; T4Y = T4W - T4X; T60 = T4X + T4W; Te = Ta + Td; T2b = Td - Ta; T5Z = T5X + T5Y; T7i = T5Y - T5X; T7j = T4Y + T4V; T4Z = T4V - T4Y; T4S = T4Q - T4R; T7L = T4Q + T4R; T3y = T7 - Te; Tf = T7 + Te; T8x = FNMS(KP707106781, T7j, T7i); T7k = FMA(KP707106781, T7j, T7i); T7M = T60 + T61; T62 = T60 - T61; T19 = T15 - T18; T3T = T15 + T18; } T8k = FNMS(KP707106781, T7M, T7L); T7N = FMA(KP707106781, T7M, T7L); T2Q = T12 - T19; T1a = T12 + T19; T2i = T2e - T2h; T3S = T2e + T2h; T33 = T2i - T2b; T2j = T2b + T2i; T6N = FNMS(KP707106781, T4Z, T4S); T50 = FMA(KP707106781, T4Z, T4S); } } { E T5A, T5L, TR, T1O, T5M, T5B, T3J, T24, T5F, T5O, TY, T1X, T5I, T5P, T1V; E T3K; { E TL, TM, TO, TP, T20, T23; TL = iio[-WS(ios, 31)]; T6A = FNMS(KP707106781, T62, T5Z); T63 = FMA(KP707106781, T62, T5Z); T4w = T3T + T3S; T3U = T3S - T3T; TM = rio[WS(ios, 15)]; TO = rio[WS(ios, 7)]; TP = iio[-WS(ios, 23)]; { E T1Y, TN, TQ, T1Z, T21, T22; T1Y = iio[-WS(ios, 15)]; T5A = TL - TM; TN = TL + TM; T5L = TO - TP; TQ = TO + TP; T1Z = rio[WS(ios, 31)]; T21 = iio[-WS(ios, 7)]; T22 = rio[WS(ios, 23)]; TR = TN + TQ; T1O = TN - TQ; T5M = T1Z + T1Y; T20 = T1Y - T1Z; T23 = T21 - T22; T5B = T21 + T22; } { E TV, T5D, TU, T5E, T1R, TW, T1S, T1T; { E TS, TT, T1P, T1Q; TS = rio[WS(ios, 3)]; T3J = T20 + T23; T24 = T20 - T23; TT = iio[-WS(ios, 19)]; T1P = iio[-WS(ios, 3)]; T1Q = rio[WS(ios, 19)]; TV = iio[-WS(ios, 27)]; T5D = TS - TT; TU = TS + TT; T5E = T1P + T1Q; T1R = T1P - T1Q; TW = rio[WS(ios, 11)]; T1S = iio[-WS(ios, 11)]; T1T = rio[WS(ios, 27)]; } { E T5G, TX, T5H, T1U; T5F = T5D - T5E; T5O = T5D + T5E; T5G = TV - TW; TX = TV + TW; T5H = T1T + T1S; T1U = T1S - T1T; TY = TU + TX; T1X = TX - TU; T5I = T5G - T5H; T5P = T5G + T5H; T1V = T1R - T1U; T3K = T1R + T1U; } } } { E T3I, T2W, T2X, T3L, T5C, T7D, T7E, T1W, T25, T5J, T7B, T7A; T3I = TR - TY; TZ = TR + TY; T1W = T1O + T1V; T2W = T1O - T1V; T2X = T24 - T1X; T25 = T1X + T24; T4q = T3K + T3J; T3L = T3J - T3K; T5C = T5A - T5B; T7D = T5A + T5B; T7E = T5O + T5P; T5Q = T5O - T5P; T26 = FNMS(KP414213562, T25, T1W); T2p = FMA(KP414213562, T1W, T25); T8q = FNMS(KP707106781, T7E, T7D); T7F = FMA(KP707106781, T7E, T7D); T5J = T5F + T5I; T7B = T5F - T5I; T5N = T5L - T5M; T7A = T5L + T5M; T37 = FNMS(KP414213562, T2W, T2X); T2Y = FMA(KP414213562, T2X, T2W); T6H = FNMS(KP707106781, T5J, T5C); T5K = FMA(KP707106781, T5J, T5C); T8r = FNMS(KP707106781, T7B, T7A); T7C = FMA(KP707106781, T7B, T7A); T3X = T3I + T3L; T3M = T3I - T3L; } } } { E T7n, T7q, T57, T5e; { E T56, T7l, T1b, Tm, T7m, T53, T3z, T1i, T58, Tp, T1o, T5c, T1n, T5b, Ts; E T1p; { E T51, Ti, T1f, T55, T1e, T54, Tl, T1g; { E T1c, T1d, Tg, Th, Tj, Tk; Tg = rio[WS(ios, 2)]; Th = iio[-WS(ios, 18)]; T1c = iio[-WS(ios, 2)]; T6G = FNMS(KP707106781, T5Q, T5N); T5R = FMA(KP707106781, T5Q, T5N); T51 = Tg - Th; Ti = Tg + Th; T1d = rio[WS(ios, 18)]; Tj = rio[WS(ios, 10)]; Tk = iio[-WS(ios, 26)]; T1f = iio[-WS(ios, 10)]; T55 = T1c + T1d; T1e = T1c - T1d; T54 = Tj - Tk; Tl = Tj + Tk; T1g = rio[WS(ios, 26)]; } { E T1l, T1m, Tq, Tr; { E Tn, T1h, T52, To; Tn = iio[-WS(ios, 30)]; T56 = T54 + T55; T7l = T55 - T54; T1b = Ti - Tl; Tm = Ti + Tl; T1h = T1f - T1g; T52 = T1f + T1g; To = rio[WS(ios, 14)]; T1l = iio[-WS(ios, 14)]; T7m = T51 + T52; T53 = T51 - T52; T3z = T1e + T1h; T1i = T1e - T1h; T58 = Tn - To; Tp = Tn + To; T1m = rio[WS(ios, 30)]; } Tq = rio[WS(ios, 6)]; Tr = iio[-WS(ios, 22)]; T1o = iio[-WS(ios, 6)]; T5c = T1m + T1l; T1n = T1l - T1m; T5b = Tq - Tr; Ts = Tq + Tr; T1p = rio[WS(ios, 22)]; } } { E T5d, T5a, T3A, T64, T65; { E T2k, T1j, T7o, T1k, Tt, T1q, T59, T7p, T1r; T2k = T1i - T1b; T1j = T1b + T1i; T5d = T5b - T5c; T7o = T5b + T5c; T1k = Tp - Ts; Tt = Tp + Ts; T1q = T1o - T1p; T59 = T1o + T1p; T7p = T58 + T59; T5a = T58 - T59; T3A = T1n + T1q; T1r = T1n - T1q; { E T7O, T7P, T1s, T2l; T7n = FNMS(KP414213562, T7m, T7l); T7O = FMA(KP414213562, T7l, T7m); T7P = FMA(KP414213562, T7o, T7p); T7q = FNMS(KP414213562, T7p, T7o); T1s = T1k - T1r; T2l = T1k + T1r; Tu = Tm + Tt; T3R = Tt - Tm; T2m = T2k + T2l; T2R = T2l - T2k; T1t = T1j + T1s; T34 = T1j - T1s; T8y = T7O - T7P; T7Q = T7O + T7P; } } T57 = FNMS(KP414213562, T56, T53); T64 = FMA(KP414213562, T53, T56); T65 = FNMS(KP414213562, T5a, T5d); T5e = FMA(KP414213562, T5d, T5a); T4x = T3z + T3A; T3B = T3z - T3A; T6O = T64 - T65; T66 = T64 + T65; } } { E T5h, T5s, TC, T1v, T5t, T5i, T3E, T1L, T5p, T5v, TJ, T1E, T5w, T5m, T3F; E T1C; { E Tw, Tx, Tz, TA, T1H, T1K; Tw = rio[WS(ios, 1)]; T8l = T7n + T7q; T7r = T7n - T7q; T6B = T5e - T57; T5f = T57 + T5e; Tx = iio[-WS(ios, 17)]; Tz = rio[WS(ios, 9)]; TA = iio[-WS(ios, 25)]; { E T1F, Ty, TB, T1G, T1I, T1J; T1F = iio[-WS(ios, 1)]; T5h = Tw - Tx; Ty = Tw + Tx; T5s = Tz - TA; TB = Tz + TA; T1G = rio[WS(ios, 17)]; T1I = iio[-WS(ios, 9)]; T1J = rio[WS(ios, 25)]; TC = Ty + TB; T1v = Ty - TB; T5t = T1F + T1G; T1H = T1F - T1G; T1K = T1I - T1J; T5i = T1I + T1J; } { E TG, T5o, TF, T5n, T1y, TH, T1z, T1A; { E TD, TE, T1w, T1x; TD = rio[WS(ios, 5)]; T3E = T1H + T1K; T1L = T1H - T1K; TE = iio[-WS(ios, 21)]; T1w = iio[-WS(ios, 5)]; T1x = rio[WS(ios, 21)]; TG = iio[-WS(ios, 29)]; T5o = TD - TE; TF = TD + TE; T5n = T1w + T1x; T1y = T1w - T1x; TH = rio[WS(ios, 13)]; T1z = iio[-WS(ios, 13)]; T1A = rio[WS(ios, 29)]; } { E T5k, TI, T5l, T1B; T5p = T5n - T5o; T5v = T5o + T5n; T5k = TG - TH; TI = TG + TH; T5l = T1A + T1z; T1B = T1z - T1A; TJ = TF + TI; T1E = TI - TF; T5w = T5k + T5l; T5m = T5k - T5l; T3F = T1y + T1B; T1C = T1y - T1B; } } } { E T3D, T2U, T2T, T3G, T5j, T7w, T7x, T1M, T1D, T5q, T7u, T7t; T3D = TC - TJ; TK = TC + TJ; T1M = T1E + T1L; T2U = T1L - T1E; T2T = T1v - T1C; T1D = T1v + T1C; T4p = T3F + T3E; T3G = T3E - T3F; T5j = T5h - T5i; T7w = T5h + T5i; T7x = T5v + T5w; T5x = T5v - T5w; T1N = FMA(KP414213562, T1M, T1D); T2o = FNMS(KP414213562, T1D, T1M); T8n = FNMS(KP707106781, T7x, T7w); T7y = FMA(KP707106781, T7x, T7w); T5q = T5m - T5p; T7u = T5p + T5m; T5u = T5s + T5t; T7t = T5t - T5s; T36 = FMA(KP414213562, T2T, T2U); T2V = FNMS(KP414213562, T2U, T2T); T6E = FNMS(KP707106781, T5q, T5j); T5r = FMA(KP707106781, T5q, T5j); T8o = FNMS(KP707106781, T7u, T7t); T7v = FMA(KP707106781, T7u, T7t); T3W = T3G - T3D; T3H = T3D + T3G;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -