📄 r2cf_128.c
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 20:56:00 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_r2cf -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -n 128 -name r2cf_128 -include r2cf.h *//* * This function contains 956 FP additions, 516 FP multiplications, * (or, 440 additions, 0 multiplications, 516 fused multiply/add), * 229 stack variables, 31 constants, and 256 memory accesses */#include "r2cf.h"static void r2cf_128(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs){ DK(KP989176509, +0.989176509964780973451673738016243063983689533); DK(KP803207531, +0.803207531480644909806676512963141923879569427); DK(KP148335987, +0.148335987538347428753676511486911367000625355); DK(KP741650546, +0.741650546272035369581266691172079863842265220); DK(KP998795456, +0.998795456205172392714771604759100694443203615); DK(KP740951125, +0.740951125354959091175616897495162729728955309); DK(KP049126849, +0.049126849769467254105343321271313617079695752); DK(KP906347169, +0.906347169019147157946142717268914412664134293); DK(KP857728610, +0.857728610000272069902269984284770137042490799); DK(KP970031253, +0.970031253194543992603984207286100251456865962); DK(KP599376933, +0.599376933681923766271389869014404232837890546); DK(KP250486960, +0.250486960191305461595702160124721208578685568); DK(KP941544065, +0.941544065183020778412509402599502357185589796); DK(KP903989293, +0.903989293123443331586200297230537048710132025); DK(KP472964775, +0.472964775891319928124438237972992463904131113); DK(KP357805721, +0.357805721314524104672487743774474392487532769); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ivs, R1 = R1 + ivs, Cr = Cr + ovs, Ci = Ci + ovs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T95, T96; { E TcD, TdR, T5P, T8v, T27, T7r, Tf, Ta5, T7s, T5S, T8w, T2e, TdS, TcG, Tbn; E Tu, TcK, TdU, TK, Ta6, T7w, T8y, T2o, T5U, TcN, TdV, TZ, Ta7, T7z, T8z; E T2x, T5V, T1g, Taa, Tab, T1v, Tew, TcX, Tex, TcU, T6A, T2M, T9b, T7E, T9a; E T7H, T6z, T2T, TeO, TdK, TeL, Tdz, T9p, T8d, T6O, T5G, T6L, T4X, Tc3, TaV; E Tc4, Tbi, T9s, T8o, TeH, Tdp, TeE, Tde, T9i, T7U, T6H, T4r, T6E, T3I, TbW; E Tao, TbX, TaL, T9l, T85, T1L, Tad, Tae, T20, Tez, Td6, TeA, Td3, T6x, T37; E T9e, T7L, T9d, T7O, T6w, T3e, TbZ, T3Z, T4s, Tc0, TeF, Tds, T4t, T4g, T87; E T80, TeI, Tdl, T86, T7X, TaM, TaD, Tb2, Tc6, T8e, T8f, T5e, T5H, Tb9, Tc7; E TeM, TdN, T5I, T5v, T8q, T8j, TeP, TdG; { E T7G, T2S, T2P, T7F; { E T28, Ti, Tn, T2c, Ts, T29, Tl, To; { E T4, T23, T3, T25, Td, T5, T8, T9; { E T1, T2, Tb, Tc; T1 = R0[0]; T2 = R0[WS(rs, 32)]; Tb = R0[WS(rs, 56)]; Tc = R0[WS(rs, 24)]; T4 = R0[WS(rs, 16)]; T23 = T1 - T2; T3 = T1 + T2; T25 = Tb - Tc; Td = Tb + Tc; T5 = R0[WS(rs, 48)]; T8 = R0[WS(rs, 8)]; T9 = R0[WS(rs, 40)]; } { E Tq, Tr, Tj, Tk; { E Tg, T5N, T6, T24, Ta, Th; Tg = R0[WS(rs, 4)]; T5N = T4 - T5; T6 = T4 + T5; T24 = T8 - T9; Ta = T8 + T9; Th = R0[WS(rs, 36)]; { E T7, T26, T5O, Te; TcD = T3 - T6; T7 = T3 + T6; T26 = T24 + T25; T5O = T25 - T24; TdR = Td - Ta; Te = Ta + Td; T5P = FNMS(KP707106781, T5O, T5N); T8v = FMA(KP707106781, T5O, T5N); T27 = FMA(KP707106781, T26, T23); T7r = FNMS(KP707106781, T26, T23); Tf = T7 + Te; Ta5 = T7 - Te; T28 = Tg - Th; Ti = Tg + Th; } } Tq = R0[WS(rs, 12)]; Tr = R0[WS(rs, 44)]; Tj = R0[WS(rs, 20)]; Tk = R0[WS(rs, 52)]; Tn = R0[WS(rs, 60)]; T2c = Tq - Tr; Ts = Tq + Tr; T29 = Tj - Tk; Tl = Tj + Tk; To = R0[WS(rs, 28)]; } } { E T2g, T2l, T2h, TF, TcI, TC, T2i, TI; { E Ty, TG, TB, TH; { E Tw, T5Q, T2a, TcE, Tm, T2b, Tp, Tx; Tw = R0[WS(rs, 2)]; T5Q = FMA(KP414213562, T28, T29); T2a = FNMS(KP414213562, T29, T28); TcE = Ti - Tl; Tm = Ti + Tl; T2b = Tn - To; Tp = Tn + To; Tx = R0[WS(rs, 34)]; { E Tz, TA, TD, TE; Tz = R0[WS(rs, 18)]; { E T5R, T2d, TcF, Tt; T5R = FNMS(KP414213562, T2b, T2c); T2d = FMA(KP414213562, T2c, T2b); TcF = Tp - Ts; Tt = Tp + Ts; T2g = Tw - Tx; Ty = Tw + Tx; T7s = T5Q - T5R; T5S = T5Q + T5R; T8w = T2d - T2a; T2e = T2a + T2d; TdS = TcF - TcE; TcG = TcE + TcF; Tbn = Tt - Tm; Tu = Tm + Tt; TA = R0[WS(rs, 50)]; } TD = R0[WS(rs, 10)]; TE = R0[WS(rs, 42)]; TG = R0[WS(rs, 58)]; T2l = Tz - TA; TB = Tz + TA; T2h = TD - TE; TF = TD + TE; TH = R0[WS(rs, 26)]; } } TcI = Ty - TB; TC = Ty + TB; T2i = TG - TH; TI = TG + TH; } { E T2p, T2u, T2q, TU, TcL, TR, T2r, TX; { E TN, TV, TQ, TW; { E T2k, T7u, T2n, T7v, TL, TM; TL = R0[WS(rs, 62)]; TM = R0[WS(rs, 30)]; { E TJ, TcJ, T2m, T2j; TJ = TF + TI; TcJ = TI - TF; T2m = T2h - T2i; T2j = T2h + T2i; TcK = FMA(KP414213562, TcJ, TcI); TdU = FNMS(KP414213562, TcI, TcJ); TK = TC + TJ; Ta6 = TC - TJ; T2k = FMA(KP707106781, T2j, T2g); T7u = FNMS(KP707106781, T2j, T2g); T2n = FMA(KP707106781, T2m, T2l); T7v = FNMS(KP707106781, T2m, T2l); T2p = TL - TM; TN = TL + TM; } T7w = FMA(KP668178637, T7v, T7u); T8y = FNMS(KP668178637, T7u, T7v); T2o = FNMS(KP198912367, T2n, T2k); T5U = FMA(KP198912367, T2k, T2n); { E TO, TP, TS, TT; TO = R0[WS(rs, 14)]; TP = R0[WS(rs, 46)]; TS = R0[WS(rs, 6)]; TT = R0[WS(rs, 38)]; TV = R0[WS(rs, 54)]; T2u = TO - TP; TQ = TO + TP; T2q = TS - TT; TU = TS + TT; TW = R0[WS(rs, 22)]; } } TcL = TN - TQ; TR = TN + TQ; T2r = TV - TW; TX = TV + TW; } { E T2A, T14, T2N, T17, T1b, T1e, T2D, T2O, T1r, T2I, T1q, T2Q, T2H, TcR, T1n; E T1s, T15, T16; { E T2t, T7x, T2w, T7y, T12, T13; T12 = R0[WS(rs, 1)]; T13 = R0[WS(rs, 33)]; { E TY, TcM, T2v, T2s; TY = TU + TX; TcM = TX - TU; T2v = T2q - T2r; T2s = T2q + T2r; TcN = FNMS(KP414213562, TcM, TcL); TdV = FMA(KP414213562, TcL, TcM); TZ = TR + TY; Ta7 = TR - TY; T2t = FMA(KP707106781, T2s, T2p); T7x = FNMS(KP707106781, T2s, T2p); T2w = FMA(KP707106781, T2v, T2u); T7y = FNMS(KP707106781, T2v, T2u); T2A = T12 - T13; T14 = T12 + T13; } T7z = FNMS(KP668178637, T7y, T7x); T8z = FMA(KP668178637, T7x, T7y); T2x = FMA(KP198912367, T2w, T2t); T5V = FNMS(KP198912367, T2t, T2w); T15 = R0[WS(rs, 17)]; T16 = R0[WS(rs, 49)]; } { E T1c, T2B, T1d, T19, T1a; T19 = R0[WS(rs, 9)]; T1a = R0[WS(rs, 41)]; T1c = R0[WS(rs, 57)]; T2N = T15 - T16; T17 = T15 + T16; T2B = T19 - T1a; T1b = T19 + T1a; T1d = R0[WS(rs, 25)]; { E T1k, T2F, T1j, T1l, T1h, T1i, T2C; T1h = R0[WS(rs, 5)]; T1i = R0[WS(rs, 37)]; T2C = T1c - T1d; T1e = T1c + T1d; T1k = R0[WS(rs, 21)]; T2F = T1h - T1i; T1j = T1h + T1i; T2D = T2B + T2C; T2O = T2B - T2C; T1l = R0[WS(rs, 53)]; { E T1o, T1p, T2G, T1m; T1o = R0[WS(rs, 61)]; T1p = R0[WS(rs, 29)]; T1r = R0[WS(rs, 13)]; T2G = T1k - T1l; T1m = T1k + T1l; T2I = T1o - T1p; T1q = T1o + T1p; T2Q = FMA(KP414213562, T2F, T2G); T2H = FNMS(KP414213562, T2G, T2F); TcR = T1j - T1m; T1n = T1j + T1m; T1s = R0[WS(rs, 45)]; } } } { E TcQ, TcV, T2K, T2R, T1u, TcT, TcW, TcS; { E T18, T1f, T1t, T2J; T18 = T14 + T17; TcQ = T14 - T17; TcV = T1e - T1b; T1f = T1b + T1e; T1t = T1r + T1s; T2J = T1r - T1s; T1g = T18 + T1f; Taa = T18 - T1f; T2K = FMA(KP414213562, T2J, T2I); T2R = FNMS(KP414213562, T2I, T2J); T1u = T1q + T1t; TcS = T1q - T1t; } TcT = TcR + TcS; TcW = TcS - TcR; { E T7C, T2E, T2L, T7D; T7C = FNMS(KP707106781, T2D, T2A); T2E = FMA(KP707106781, T2D, T2A); Tab = T1u - T1n; T1v = T1n + T1u; Tew = FNMS(KP707106781, TcW, TcV); TcX = FMA(KP707106781, TcW, TcV); Tex = FNMS(KP707106781, TcT, TcQ); TcU = FMA(KP707106781, TcT, TcQ); T2L = T2H + T2K; T7G = T2K - T2H; T7D = T2Q - T2R; T2S = T2Q + T2R; T2P = FMA(KP707106781, T2O, T2N); T7F = FNMS(KP707106781, T2O, T2N); T6A = FNMS(KP923879532, T2L, T2E); T2M = FMA(KP923879532, T2L, T2E); T9b = FNMS(KP923879532, T7D, T7C); T7E = FMA(KP923879532, T7D, T7C); } } } } } } { E T83, T84, T8m, T8n; { E TaP, T4z, TaQ, T5A, TaS, TaT, T4G, T5B, T4O, T5D, Tbh, Tdw, T4R, Tbc, T4S; E T4T; { E T4x, T4y, T5y, T5z; T4x = R1[WS(rs, 63)]; T9a = FNMS(KP923879532, T7G, T7F); T7H = FMA(KP923879532, T7G, T7F); T6z = FNMS(KP923879532, T2S, T2P); T2T = FMA(KP923879532, T2S, T2P); T4y = R1[WS(rs, 31)]; T5y = R1[WS(rs, 47)]; T5z = R1[WS(rs, 15)]; { E T4A, T4B, T4D, T4E; T4A = R1[WS(rs, 7)]; TaP = T4x + T4y; T4z = T4x - T4y; TaQ = T5z + T5y; T5A = T5y - T5z; T4B = R1[WS(rs, 39)]; T4D = R1[WS(rs, 55)]; T4E = R1[WS(rs, 23)]; { E T4K, Tbf, Tbg, T4N, T4P, T4Q; { E T4I, T4C, T4F, T4J, T4L, T4M; T4I = R1[WS(rs, 3)]; TaS = T4A + T4B; T4C = T4A - T4B; TaT = T4D + T4E; T4F = T4D - T4E; T4J = R1[WS(rs, 35)]; T4L = R1[WS(rs, 51)]; T4M = R1[WS(rs, 19)]; T4G = T4C + T4F; T5B = T4F - T4C; T4K = T4I - T4J; Tbf = T4I + T4J; Tbg = T4M + T4L; T4N = T4L - T4M; } T4P = R1[WS(rs, 59)]; T4Q = R1[WS(rs, 27)]; T4O = FMA(KP414213562, T4N, T4K); T5D = FNMS(KP414213562, T4K, T4N); Tbh = Tbf + Tbg; Tdw = Tbf - Tbg; T4R = T4P - T4Q; Tbc = T4P + T4Q; T4S = R1[WS(rs, 43)]; T4T = R1[WS(rs, 11)]; } } } { E T4H, T8b, TaR, Tdv, TdI, TaU, T4U, Tbd, T5C; T4H = FMA(KP707106781, T4G, T4z); T8b = FNMS(KP707106781, T4G, T4z);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -