📄 r2cb_128.c
字号:
/* * Copyright (c) 2003, 2007-8 Matteo Frigo * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * *//* This file was automatically generated --- DO NOT EDIT *//* Generated on Sat Nov 15 21:05:56 EST 2008 */#include "codelet-rdft.h"#ifdef HAVE_FMA/* Generated by: ../../../genfft/gen_r2cb -fma -reorder-insns -schedule-for-pipeline -compact -variables 4 -pipeline-latency 4 -sign 1 -n 128 -name r2cb_128 -include r2cb.h *//* * This function contains 956 FP additions, 540 FP multiplications, * (or, 416 additions, 0 multiplications, 540 fused multiply/add), * 242 stack variables, 36 constants, and 256 memory accesses */#include "r2cb.h"static void r2cb_128(R *R0, R *R1, R *Cr, R *Ci, stride rs, stride csr, stride csi, INT v, INT ivs, INT ovs){ DK(KP1_715457220, +1.715457220000544139804539968569540274084981599); DK(KP1_606415062, +1.606415062961289819613353025926283847759138854); DK(KP599376933, +0.599376933681923766271389869014404232837890546); DK(KP741650546, +0.741650546272035369581266691172079863842265220); DK(KP1_978353019, +1.978353019929561946903347476032486127967379067); DK(KP1_940062506, +1.940062506389087985207968414572200502913731924); DK(KP148335987, +0.148335987538347428753676511486911367000625355); DK(KP250486960, +0.250486960191305461595702160124721208578685568); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP1_807978586, +1.807978586246886663172400594461074097420264050); DK(KP1_481902250, +1.481902250709918182351233794990325459457910619); DK(KP472964775, +0.472964775891319928124438237972992463904131113); DK(KP906347169, +0.906347169019147157946142717268914412664134293); DK(KP1_997590912, +1.997590912410344785429543209518201388886407229); DK(KP1_883088130, +1.883088130366041556825018805199004714371179592); DK(KP049126849, +0.049126849769467254105343321271313617079695752); DK(KP357805721, +0.357805721314524104672487743774474392487532769); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP1_763842528, +1.763842528696710059425513727320776699016885241); DK(KP1_913880671, +1.913880671464417729871595773960539938965698411); DK(KP534511135, +0.534511135950791641089685961295362908582039528); DK(KP303346683, +0.303346683607342391675883946941299872384187453); DK(KP1_662939224, +1.662939224605090474157576755235811513477121624); DK(KP668178637, +0.668178637919298919997757686523080761552472251); DK(KP1_990369453, +1.990369453344393772489673906218959843150949737); DK(KP1_546020906, +1.546020906725473921621813219516939601942082586); DK(KP098491403, +0.098491403357164253077197521291327432293052451); DK(KP820678790, +0.820678790828660330972281985331011598767386482); DK(KP1_961570560, +1.961570560806460898252364472268478073947867462); DK(KP198912367, +0.198912367379658006911597622644676228597850501); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP1_847759065, +1.847759065022573512256366378793576573644833252); DK(KP707106781, +0.707106781186547524400844362104849039284835938); DK(KP414213562, +0.414213562373095048801688724209698078569671875); DK(KP1_414213562, +1.414213562373095048801688724209698078569671875); DK(KP2_000000000, +2.000000000000000000000000000000000000000000000); INT i; for (i = v; i > 0; i = i - 1, R0 = R0 + ovs, R1 = R1 + ovs, Cr = Cr + ivs, Ci = Ci + ivs, MAKE_VOLATILE_STRIDE(rs), MAKE_VOLATILE_STRIDE(csr), MAKE_VOLATILE_STRIDE(csi)) { E T9H, T9I, T9X, T9Y; { E Tdr, T9, Tcl, Ta9, T6b, T2d, T91, T7j, Tg, Tds, Tcm, Tae, T92, T7m, T6c; E T2o, Tdu, Tw, Tco, Tap, TeM, Tdx, T6f, T2G, T6e, T2P, T94, T7t, Tcp, Tak; E T95, T7q, TdM, T1i, TcL, TbD, Tf0, Te6, T6q, T42, T6B, T5t, T9r, T8j, TcA; E TaY, T9g, T7S, TdA, TM, Tcv, TaN, TeP, TdI, T6i, T38, T6l, T3F, T9b, T7J; E Tcs, Taw, T98, T7y, T1N, TeW, T6x, T4H, Te8, TdV, T6w, T4Q, T9j, T86, TcO; E TcI, T9k, T83, TbI, Tbl, T22, TeV, Te0, Te9, T58, T6u, T6t, T5h, T9m, T8d; E TcP, TcF, T9n, T8a, TbJ, Tbw, Te3, T1x, TcB, TbG, Tf1, TdP, T6C, T4p, T6r; E T5w, T9h, T8m, TcM, Tb9, T9s, T7Z, TaB, TaG, TdF, T11, Tct, TaQ, TeQ, TdD; E T6m, T3v, T7B, T7E, T6j, T3I, T99, T7M; { E TaU, TaX, T7Q, T7R, Tbk, Tbf; { E Td, T2e, Tc, Tab, T2m, Te, T2f, T2g; { E T7h, T27, T2c, T7i; { E T4, T26, T29, T25, T3, T28, T8, T2a; T4 = Cr[WS(csr, 32)]; T26 = Ci[WS(csi, 32)]; { E T1, T2, T6, T7; T1 = Cr[0]; T2 = Cr[WS(csr, 64)]; T6 = Cr[WS(csr, 16)]; T7 = Cr[WS(csr, 48)]; T29 = Ci[WS(csi, 16)]; T25 = T1 - T2; T3 = T1 + T2; T28 = T6 - T7; T8 = T6 + T7; T2a = Ci[WS(csi, 48)]; } { E Ta7, T5, Ta8, T2b; Ta7 = FNMS(KP2_000000000, T4, T3); T5 = FMA(KP2_000000000, T4, T3); T7h = FMA(KP2_000000000, T26, T25); T27 = FNMS(KP2_000000000, T26, T25); Ta8 = T29 - T2a; T2b = T29 + T2a; Tdr = FNMS(KP2_000000000, T8, T5); T9 = FMA(KP2_000000000, T8, T5); Tcl = FMA(KP2_000000000, Ta8, Ta7); Ta9 = FNMS(KP2_000000000, Ta8, Ta7); T2c = T28 - T2b; T7i = T28 + T2b; } } { E Ta, Tb, T2k, T2l; Ta = Cr[WS(csr, 8)]; T6b = FNMS(KP1_414213562, T2c, T27); T2d = FMA(KP1_414213562, T2c, T27); T91 = FMA(KP1_414213562, T7i, T7h); T7j = FNMS(KP1_414213562, T7i, T7h); Tb = Cr[WS(csr, 56)]; T2k = Ci[WS(csi, 8)]; T2l = Ci[WS(csi, 56)]; Td = Cr[WS(csr, 40)]; T2e = Ta - Tb; Tc = Ta + Tb; Tab = T2k - T2l; T2m = T2k + T2l; Te = Cr[WS(csr, 24)]; T2f = Ci[WS(csi, 40)]; T2g = Ci[WS(csi, 24)]; } } { E Tag, Taj, T7o, T7p; { E T2q, Tk, Tam, T2K, T2H, Tn, Tan, T2t, Tu, Tah, T2E, T2N, Tr, T2v, T2y; E Tai; { E Tl, Tm, T2r, T2s; { E Ti, Tj, T2j, Tf, T2I, T2J; Ti = Cr[WS(csr, 4)]; T2j = Td - Te; Tf = Td + Te; { E Tac, T2h, T7k, T2n; Tac = T2f - T2g; T2h = T2f + T2g; T7k = T2m - T2j; T2n = T2j + T2m; { E Taa, Tad, T7l, T2i; Taa = Tc - Tf; Tg = Tc + Tf; Tad = Tab - Tac; Tds = Tac + Tab; T7l = T2e + T2h; T2i = T2e - T2h; Tcm = Taa + Tad; Tae = Taa - Tad; T92 = FMA(KP414213562, T7k, T7l); T7m = FNMS(KP414213562, T7l, T7k); T6c = FMA(KP414213562, T2i, T2n); T2o = FNMS(KP414213562, T2n, T2i); Tj = Cr[WS(csr, 60)]; } } T2I = Ci[WS(csi, 4)]; T2J = Ci[WS(csi, 60)]; Tl = Cr[WS(csr, 36)]; T2q = Ti - Tj; Tk = Ti + Tj; Tam = T2I - T2J; T2K = T2I + T2J; Tm = Cr[WS(csr, 28)]; } T2r = Ci[WS(csi, 36)]; T2s = Ci[WS(csi, 28)]; { E Ts, Tt, T2B, T2C; Ts = Cr[WS(csr, 12)]; T2H = Tl - Tm; Tn = Tl + Tm; Tan = T2r - T2s; T2t = T2r + T2s; Tt = Cr[WS(csr, 52)]; T2B = Ci[WS(csi, 12)]; T2C = Ci[WS(csi, 52)]; { E Tp, T2A, T2D, Tq, T2w, T2x; Tp = Cr[WS(csr, 20)]; Tu = Ts + Tt; T2A = Ts - Tt; Tah = T2C - T2B; T2D = T2B + T2C; Tq = Cr[WS(csr, 44)]; T2w = Ci[WS(csi, 20)]; T2x = Ci[WS(csi, 44)]; T2E = T2A - T2D; T2N = T2A + T2D; Tr = Tp + Tq; T2v = Tp - Tq; T2y = T2w + T2x; Tai = T2w - T2x; } } } { E T2M, Tdv, Tdw, T2u, T2F, T7s, T7r, T2L, T2O; { E To, T2z, Tv, Tal, Tao; Tag = Tk - Tn; To = Tk + Tn; T2M = T2v + T2y; T2z = T2v - T2y; Tv = Tr + Tu; Tal = Tr - Tu; Tao = Tam - Tan; Tdv = Tan + Tam; Tdu = To - Tv; Tw = To + Tv; Tco = Tao - Tal; Tap = Tal + Tao; Tdw = Tai + Tah; Taj = Tah - Tai; T7o = T2q + T2t; T2u = T2q - T2t; T2F = T2z + T2E; T7s = T2E - T2z; } T7r = T2K - T2H; T2L = T2H + T2K; TeM = Tdw + Tdv; Tdx = Tdv - Tdw; T6f = FNMS(KP707106781, T2F, T2u); T2G = FMA(KP707106781, T2F, T2u); T2O = T2M - T2N; T7p = T2M + T2N; T6e = FNMS(KP707106781, T2O, T2L); T2P = FMA(KP707106781, T2O, T2L); T94 = FMA(KP707106781, T7s, T7r); T7t = FNMS(KP707106781, T7s, T7r); } } { E T3M, T16, TbA, T5o, T5l, T19, TbB, T3P, T1g, TaV, T40, T5r, T1d, T3R, T3U; E TaW; { E T17, T18, T3N, T3O; { E T14, T15, T5m, T5n; T14 = Cr[WS(csr, 1)]; Tcp = Tag - Taj; Tak = Tag + Taj; T95 = FMA(KP707106781, T7p, T7o); T7q = FNMS(KP707106781, T7p, T7o); T15 = Cr[WS(csr, 63)]; T5m = Ci[WS(csi, 1)]; T5n = Ci[WS(csi, 63)]; T17 = Cr[WS(csr, 33)]; T3M = T14 - T15; T16 = T14 + T15; TbA = T5m - T5n; T5o = T5m + T5n; T18 = Cr[WS(csr, 31)]; } T3N = Ci[WS(csi, 33)]; T3O = Ci[WS(csi, 31)]; { E T1e, T1f, T3X, T3Y; T1e = Cr[WS(csr, 15)]; T5l = T17 - T18; T19 = T17 + T18; TbB = T3N - T3O; T3P = T3N + T3O; T1f = Cr[WS(csr, 49)]; T3X = Ci[WS(csi, 15)]; T3Y = Ci[WS(csi, 49)]; { E T1b, T3W, T3Z, T1c, T3S, T3T; T1b = Cr[WS(csr, 17)]; T1g = T1e + T1f; T3W = T1e - T1f; TaV = T3Y - T3X; T3Z = T3X + T3Y; T1c = Cr[WS(csr, 47)]; T3S = Ci[WS(csi, 17)]; T3T = Ci[WS(csi, 47)]; T40 = T3W - T3Z; T5r = T3W + T3Z; T1d = T1b + T1c; T3R = T1b - T1c; T3U = T3S + T3T; TaW = T3S - T3T; } } } { E T5q, Te4, Te5, T3Q, T41, T8i, T8h, T5p, T5s; { E T1a, T3V, T1h, Tbz, TbC; TaU = T16 - T19; T1a = T16 + T19; T5q = T3R + T3U; T3V = T3R - T3U; T1h = T1d + T1g; Tbz = T1d - T1g; TbC = TbA - TbB; Te4 = TbB + TbA; TdM = T1a - T1h; T1i = T1a + T1h; TcL = TbC - Tbz; TbD = Tbz + TbC; Te5 = TaW + TaV; TaX = TaV - TaW; T7Q = T3M + T3P; T3Q = T3M - T3P; T41 = T3V + T40; T8i = T40 - T3V; } T8h = T5o - T5l; T5p = T5l + T5o; Tf0 = Te5 + Te4; Te6 = Te4 - Te5; T6q = FNMS(KP707106781, T41, T3Q); T42 = FMA(KP707106781, T41, T3Q); T5s = T5q - T5r; T7R = T5q + T5r; T6B = FNMS(KP707106781, T5s, T5p); T5t = FMA(KP707106781, T5s, T5p); T9r = FMA(KP707106781, T8i, T8h); T8j = FNMS(KP707106781, T8i, T8h); } } } } { E Tas, Tav, T7w, T7x; { E T2S, TA, TaK, T3A, T3x, TD, TaL, T2V, TK, Tat, T36, T3D, TH, T2X, T30; E Tau; { E TB, TC, T2T, T2U; { E Ty, Tz, T3y, T3z; Ty = Cr[WS(csr, 2)]; TcA = TaU - TaX; TaY = TaU + TaX; T9g = FMA(KP707106781, T7R, T7Q); T7S = FNMS(KP707106781, T7R, T7Q); Tz = Cr[WS(csr, 62)]; T3y = Ci[WS(csi, 2)]; T3z = Ci[WS(csi, 62)]; TB = Cr[WS(csr, 34)]; T2S = Ty - Tz; TA = Ty + Tz; TaK = T3y - T3z; T3A = T3y + T3z; TC = Cr[WS(csr, 30)]; } T2T = Ci[WS(csi, 34)]; T2U = Ci[WS(csi, 30)]; { E TI, TJ, T33, T34; TI = Cr[WS(csr, 14)]; T3x = TB - TC; TD = TB + TC; TaL = T2T - T2U; T2V = T2T + T2U; TJ = Cr[WS(csr, 50)]; T33 = Ci[WS(csi, 14)]; T34 = Ci[WS(csi, 50)]; { E TF, T32, T35, TG, T2Y, T2Z; TF = Cr[WS(csr, 18)]; TK = TI + TJ; T32 = TI - TJ; Tat = T34 - T33; T35 = T33 + T34; TG = Cr[WS(csr, 46)]; T2Y = Ci[WS(csi, 18)]; T2Z = Ci[WS(csi, 46)]; T36 = T32 - T35; T3D = T32 + T35; TH = TF + TG; T2X = TF - TG; T30 = T2Y + T2Z; Tau = T2Y - T2Z; } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -