📄 r2hc_128.c
字号:
/* * Copyright (c) 2003, 2006 Matteo Frigo * Copyright (c) 2003, 2006 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Fri Jan 27 20:17:01 EST 2006 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_r2hc -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 128 -name r2hc_128 -include r2hc.h *//* * This function contains 956 FP additions, 516 FP multiplications, * (or, 440 additions, 0 multiplications, 516 fused multiply/add), * 229 stack variables, and 256 memory accesses *//* * Generator Id's : * $Id: algsimp.ml,v 1.8 2006-01-05 03:04:27 stevenj Exp $ * $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ * $Id: gen_r2hc.ml,v 1.17 2006-01-05 03:04:27 stevenj Exp $ */#include "r2hc.h"static void r2hc_128(const R *I, R *ro, R *io, stride is, stride ros, stride ios, INT v, INT ivs, INT ovs){ DK(KP989176509, +0.989176509964780973451673738016243063983689533); DK(KP803207531, +0.803207531480644909806676512963141923879569427); DK(KP148335987, +0.148335987538347428753676511486911367000625355); DK(KP741650546, +0.741650546272035369581266691172079863842265220); DK(KP998795456, +0.998795456205172392714771604759100694443203615); DK(KP740951125, +0.740951125354959091175616897495162729728955309); DK(KP049126849, +0.049126849769467254105343321271313617079695752); DK(KP906347169, +0.906347169019147157946142717268914412664134293); DK(KP857728610, +0.857728610000272069902269984284770137042490799); DK(KP970031253, +0.970031253194543992603984207286100251456865962); DK(KP599376933, +0.599376933681923766271389869014404232837890546); DK(KP250486960, +0.250486960191305461595702160124721208578685568); DK(KP941544065, +0.941544065183020778412509402599502357185589796); DK(KP903989293, +0.903989293123443331586200297230537048710132025); DK(KP472964775, +0.472964775891319928124438237972992463904131113); DK(KP357805721, +0.357805721314524104672487743774474392487532769); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = v; i > 0; i = i - 1, I = I + ivs, ro = ro + ovs, io = io + ovs, MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(ros), MAKE_VOLATILE_STRIDE(ios)) { E T95, T96; { E TcD, TdR, T5P, T8v, T27, T7r, Tf, Ta5, T7s, T5S, T8w, T2e, TdS, TcG, Tbn; E Tu, TcK, TdU, TK, Ta6, T7w, T8y, T2o, T5U, TcN, TdV, TZ, Ta7, T7z, T8z; E T2x, T5V, T1g, Taa, Tab, T1v, Tew, TcX, Tex, TcU, T6A, T2M, T9b, T7E, T9a; E T7H, T6z, T2T, TeO, TdK, TeL, Tdz, T9p, T8d, T6O, T5G, T6L, T4X, Tc3, TaV; E Tc4, Tbi, T9s, T8o, TeH, Tdp, TeE, Tde, T9i, T7U, T6H, T4r, T6E, T3I, TbW; E Tao, TbX, TaL, T9l, T85, T1L, Tad, Tae, T20, Tez, Td6, TeA, Td3, T6x, T37; E T9e, T7L, T9d, T7O, T6w, T3e, TbZ, T3Z, T4s, Tc0, TeF, Tds, T4t, T4g, T87; E T80, TeI, Tdl, T86, T7X, TaM, TaD, Tb2, Tc6, T8e, T8f, T5e, T5H, Tb9, Tc7; E TeM, TdN, T5I, T5v, T8q, T8j, TeP, TdG; { E T7G, T2S, T2P, T7F; { E T28, Ti, Tn, T2c, Ts, T29, Tl, To; { E T4, T23, T3, T25, Td, T5, T8, T9; { E T1, T2, Tb, Tc; T1 = I[0]; T2 = I[WS(is, 64)]; Tb = I[WS(is, 112)]; Tc = I[WS(is, 48)]; T4 = I[WS(is, 32)]; T23 = T1 - T2; T3 = T1 + T2; T25 = Tb - Tc; Td = Tb + Tc; T5 = I[WS(is, 96)]; T8 = I[WS(is, 16)]; T9 = I[WS(is, 80)]; } { E Tq, Tr, Tj, Tk; { E Tg, T5N, T6, T24, Ta, Th; Tg = I[WS(is, 8)]; T5N = T4 - T5; T6 = T4 + T5; T24 = T8 - T9; Ta = T8 + T9; Th = I[WS(is, 72)]; { E T7, T26, T5O, Te; TcD = T3 - T6; T7 = T3 + T6; T26 = T24 + T25; T5O = T25 - T24; TdR = Td - Ta; Te = Ta + Td; T5P = FNMS(KP707106781, T5O, T5N); T8v = FMA(KP707106781, T5O, T5N); T27 = FMA(KP707106781, T26, T23); T7r = FNMS(KP707106781, T26, T23); Tf = T7 + Te; Ta5 = T7 - Te; T28 = Tg - Th; Ti = Tg + Th; } } Tq = I[WS(is, 24)]; Tr = I[WS(is, 88)]; Tj = I[WS(is, 40)]; Tk = I[WS(is, 104)]; Tn = I[WS(is, 120)]; T2c = Tq - Tr; Ts = Tq + Tr; T29 = Tj - Tk; Tl = Tj + Tk; To = I[WS(is, 56)]; } } { E T2g, T2l, T2h, TF, TcI, TC, T2i, TI; { E Ty, TG, TB, TH; { E Tw, T5Q, T2a, TcE, Tm, T2b, Tp, Tx; Tw = I[WS(is, 4)]; T5Q = FMA(KP414213562, T28, T29); T2a = FNMS(KP414213562, T29, T28); TcE = Ti - Tl; Tm = Ti + Tl; T2b = Tn - To; Tp = Tn + To; Tx = I[WS(is, 68)]; { E Tz, TA, TD, TE; Tz = I[WS(is, 36)]; { E T5R, T2d, TcF, Tt; T5R = FNMS(KP414213562, T2b, T2c); T2d = FMA(KP414213562, T2c, T2b); TcF = Tp - Ts; Tt = Tp + Ts; T2g = Tw - Tx; Ty = Tw + Tx; T7s = T5Q - T5R; T5S = T5Q + T5R; T8w = T2d - T2a; T2e = T2a + T2d; TdS = TcF - TcE; TcG = TcE + TcF; Tbn = Tt - Tm; Tu = Tm + Tt; TA = I[WS(is, 100)]; } TD = I[WS(is, 20)]; TE = I[WS(is, 84)]; TG = I[WS(is, 116)]; T2l = Tz - TA; TB = Tz + TA; T2h = TD - TE; TF = TD + TE; TH = I[WS(is, 52)]; } } TcI = Ty - TB; TC = Ty + TB; T2i = TG - TH; TI = TG + TH; } { E T2p, T2u, T2q, TU, TcL, TR, T2r, TX; { E TN, TV, TQ, TW; { E T2k, T7u, T2n, T7v, TL, TM; TL = I[WS(is, 124)]; TM = I[WS(is, 60)]; { E TJ, TcJ, T2m, T2j; TJ = TF + TI; TcJ = TI - TF; T2m = T2h - T2i; T2j = T2h + T2i; TcK = FMA(KP414213562, TcJ, TcI); TdU = FNMS(KP414213562, TcI, TcJ); TK = TC + TJ; Ta6 = TC - TJ; T2k = FMA(KP707106781, T2j, T2g); T7u = FNMS(KP707106781, T2j, T2g); T2n = FMA(KP707106781, T2m, T2l); T7v = FNMS(KP707106781, T2m, T2l); T2p = TL - TM; TN = TL + TM; } T7w = FMA(KP668178637, T7v, T7u); T8y = FNMS(KP668178637, T7u, T7v); T2o = FNMS(KP198912367, T2n, T2k); T5U = FMA(KP198912367, T2k, T2n); { E TO, TP, TS, TT; TO = I[WS(is, 28)]; TP = I[WS(is, 92)]; TS = I[WS(is, 12)]; TT = I[WS(is, 76)]; TV = I[WS(is, 108)]; T2u = TO - TP; TQ = TO + TP; T2q = TS - TT; TU = TS + TT; TW = I[WS(is, 44)]; } } TcL = TN - TQ; TR = TN + TQ; T2r = TV - TW; TX = TV + TW; } { E T2A, T14, T2N, T17, T1b, T1e, T2D, T2O, T1r, T2I, T1q, T2Q, T2H, TcR, T1n; E T1s, T15, T16; { E T2t, T7x, T2w, T7y, T12, T13; T12 = I[WS(is, 2)]; T13 = I[WS(is, 66)]; { E TY, TcM, T2v, T2s; TY = TU + TX; TcM = TX - TU; T2v = T2q - T2r; T2s = T2q + T2r; TcN = FNMS(KP414213562, TcM, TcL); TdV = FMA(KP414213562, TcL, TcM); TZ = TR + TY; Ta7 = TR - TY; T2t = FMA(KP707106781, T2s, T2p); T7x = FNMS(KP707106781, T2s, T2p); T2w = FMA(KP707106781, T2v, T2u); T7y = FNMS(KP707106781, T2v, T2u); T2A = T12 - T13; T14 = T12 + T13; } T7z = FNMS(KP668178637, T7y, T7x); T8z = FMA(KP668178637, T7x, T7y); T2x = FMA(KP198912367, T2w, T2t); T5V = FNMS(KP198912367, T2t, T2w); T15 = I[WS(is, 34)]; T16 = I[WS(is, 98)]; } { E T1c, T2B, T1d, T19, T1a; T19 = I[WS(is, 18)]; T1a = I[WS(is, 82)]; T1c = I[WS(is, 114)]; T2N = T15 - T16; T17 = T15 + T16; T2B = T19 - T1a; T1b = T19 + T1a; T1d = I[WS(is, 50)]; { E T1k, T2F, T1j, T1l, T1h, T1i, T2C; T1h = I[WS(is, 10)]; T1i = I[WS(is, 74)]; T2C = T1c - T1d; T1e = T1c + T1d; T1k = I[WS(is, 42)]; T2F = T1h - T1i; T1j = T1h + T1i; T2D = T2B + T2C; T2O = T2B - T2C; T1l = I[WS(is, 106)]; { E T1o, T1p, T2G, T1m; T1o = I[WS(is, 122)]; T1p = I[WS(is, 58)]; T1r = I[WS(is, 26)]; T2G = T1k - T1l; T1m = T1k + T1l; T2I = T1o - T1p; T1q = T1o + T1p; T2Q = FMA(KP414213562, T2F, T2G); T2H = FNMS(KP414213562, T2G, T2F); TcR = T1j - T1m; T1n = T1j + T1m; T1s = I[WS(is, 90)]; } } } { E TcQ, TcV, T2K, T2R, T1u, TcT, TcW, TcS; { E T18, T1f, T1t, T2J; T18 = T14 + T17; TcQ = T14 - T17; TcV = T1e - T1b; T1f = T1b + T1e; T1t = T1r + T1s; T2J = T1r - T1s; T1g = T18 + T1f; Taa = T18 - T1f; T2K = FMA(KP414213562, T2J, T2I); T2R = FNMS(KP414213562, T2I, T2J); T1u = T1q + T1t; TcS = T1q - T1t; } TcT = TcR + TcS; TcW = TcS - TcR; { E T7C, T2E, T2L, T7D; T7C = FNMS(KP707106781, T2D, T2A); T2E = FMA(KP707106781, T2D, T2A); Tab = T1u - T1n; T1v = T1n + T1u; Tew = FNMS(KP707106781, TcW, TcV); TcX = FMA(KP707106781, TcW, TcV); Tex = FNMS(KP707106781, TcT, TcQ); TcU = FMA(KP707106781, TcT, TcQ); T2L = T2H + T2K; T7G = T2K - T2H; T7D = T2Q - T2R; T2S = T2Q + T2R; T2P = FMA(KP707106781, T2O, T2N); T7F = FNMS(KP707106781, T2O, T2N); T6A = FNMS(KP923879532, T2L, T2E); T2M = FMA(KP923879532, T2L, T2E); T9b = FNMS(KP923879532, T7D, T7C); T7E = FMA(KP923879532, T7D, T7C); } } } } } } { E T83, T84, T8m, T8n; { E TaP, T4z, TaQ, T5A, TaS, TaT, T4G, T5B, T4O, T5D, Tbh, Tdw, T4R, Tbc, T4S; E T4T; { E T4x, T4y, T5y, T5z; T4x = I[WS(is, 127)]; T9a = FNMS(KP923879532, T7G, T7F); T7H = FMA(KP923879532, T7G, T7F); T6z = FNMS(KP923879532, T2S, T2P); T2T = FMA(KP923879532, T2S, T2P); T4y = I[WS(is, 63)]; T5y = I[WS(is, 95)]; T5z = I[WS(is, 31)]; { E T4A, T4B, T4D, T4E; T4A = I[WS(is, 15)]; TaP = T4x + T4y; T4z = T4x - T4y; TaQ = T5z + T5y; T5A = T5y - T5z; T4B = I[WS(is, 79)]; T4D = I[WS(is, 111)]; T4E = I[WS(is, 47)]; { E T4K, Tbf, Tbg, T4N, T4P, T4Q; { E T4I, T4C, T4F, T4J, T4L, T4M; T4I = I[WS(is, 7)]; TaS = T4A + T4B; T4C = T4A - T4B; TaT = T4D + T4E; T4F = T4D - T4E; T4J = I[WS(is, 71)]; T4L = I[WS(is, 103)]; T4M = I[WS(is, 39)]; T4G = T4C + T4F; T5B = T4F - T4C; T4K = T4I - T4J; Tbf = T4I + T4J; Tbg = T4M + T4L; T4N = T4L - T4M; } T4P = I[WS(is, 119)]; T4Q = I[WS(is, 55)]; T4O = FMA(KP414213562, T4N, T4K); T5D = FNMS(KP414213562, T4K, T4N); Tbh = Tbf + Tbg; Tdw = Tbf - Tbg; T4R = T4P - T4Q; Tbc = T4P + T4Q; T4S = I[WS(is, 87)]; T4T = I[WS(is, 23)]; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -