📄 t1_64.c
字号:
TcZ = FMA(KP707106781, TcY, TcX); Te9 = FNMS(KP707106781, TcY, TcX); { E Tdg, TdQ, Tdd, Tdj; { E Td5, TdL, TcT, TdM, Td0, Tdc; Teg = FNMS(KP707106781, Td4, Td3); Td5 = FMA(KP707106781, Td4, Td3); TdL = FMA(KP198912367, TcP, TcS); TcT = FNMS(KP198912367, TcS, TcP); TdM = FNMS(KP198912367, TcW, TcZ); Td0 = FMA(KP198912367, TcZ, TcW); Tdc = Td8 + Tdb; Tee = Tdb - Td8; Ted = FNMS(KP707106781, Tdf, Tde); Tdg = FMA(KP707106781, Tdf, Tde); TdN = TdL + TdM; Tk2 = TdM - TdL; TjW = TcT + Td0; Td1 = TcT - Td0; TdQ = FMA(KP923879532, Tdc, Td5); Tdd = FNMS(KP923879532, Tdc, Td5); Tdj = Tdh + Tdi; Teh = Tdh - Tdi; } { E Tek, Tel, TdP, Tdk; Tdz = FMA(KP707106781, Tdy, Tdx); Tek = FNMS(KP707106781, Tdy, Tdx); Tel = Tdu - Tdr; Tdv = Tdr + Tdu; Tdo = FMA(KP707106781, Tdn, Tdm); Ten = FNMS(KP707106781, Tdn, Tdm); TdP = FMA(KP923879532, Tdj, Tdg); Tdk = FNMS(KP923879532, Tdj, Tdg); TeF = FMA(KP923879532, Tel, Tek); Tem = FNMS(KP923879532, Tel, Tek); TdR = FMA(KP098491403, TdQ, TdP); TdX = FNMS(KP098491403, TdP, TdQ); TdH = FNMS(KP820678790, Tdd, Tdk); Tdl = FMA(KP820678790, Tdk, Tdd); Teo = TdA - TdB; TdC = TdA + TdB; } } } { E TeC, Tef, TeE, Tep, TeB, Tei; TeE = FMA(KP923879532, Teo, Ten); Tep = FNMS(KP923879532, Teo, Ten); TeC = FMA(KP923879532, Tee, Ted); Tef = FNMS(KP923879532, Tee, Ted); TeG = FNMS(KP303346683, TeF, TeE); TeK = FMA(KP303346683, TeE, TeF); Teu = FMA(KP534511135, Tem, Tep); Teq = FNMS(KP534511135, Tep, Tem); TeB = FMA(KP923879532, Teh, Teg); Tei = FNMS(KP923879532, Teh, Teg); Tex = FNMS(KP668178637, Te5, Te6); Te7 = FMA(KP668178637, Te6, Te5); TeD = FMA(KP303346683, TeC, TeB); TeJ = FNMS(KP303346683, TeB, TeC); Tet = FNMS(KP534511135, Tef, Tei); Tej = FMA(KP534511135, Tei, Tef); } { E TdT, Tdw, Tey, Tea, TdS, TdD; Tey = FMA(KP668178637, Te8, Te9); Tea = FNMS(KP668178637, Te9, Te8); TdT = FMA(KP923879532, Tdv, Tdo); Tdw = FNMS(KP923879532, Tdv, Tdo); Tka = Tey - Tex; Tez = Tex + Tey; Tkg = Te7 + Tea; Teb = Te7 - Tea; Te1 = FNMS(KP414213562, TcF, TcG); TcH = FMA(KP414213562, TcG, TcF); TdS = FMA(KP923879532, TdC, Tdz); TdD = FNMS(KP923879532, TdC, Tdz); Te0 = FNMS(KP707106781, TcD, TcA); TcE = FMA(KP707106781, TcD, TcA); Tk7 = FNMS(KP707106781, TjS, TjR); TjT = FMA(KP707106781, TjS, TjR); TdU = FNMS(KP098491403, TdT, TdS); TdY = FMA(KP098491403, TdS, TdT); TdI = FMA(KP820678790, Tdw, TdD); TdE = FNMS(KP820678790, TdD, Tdw); TcK = FNMS(KP414213562, TcJ, TcI); Te2 = FMA(KP414213562, TcI, TcJ); } } { E Tkf, Tk9, Tew, Te4, TdW, Tk0, TjZ, TdZ; { E Tk6, TdK, TjV, TdG, Tk5, TdJ; { E TdF, Td2, Tk3, Tk4, Tk1; Tk6 = Tdl + TdE; TdF = Tdl - TdE; { E Tk8, TcL, TjU, Te3, TcM; Tk8 = TcK - TcH; TcL = TcH + TcK; TjU = Te1 + Te2; Te3 = Te1 - Te2; Tkf = FNMS(KP923879532, Tk8, Tk7); Tk9 = FMA(KP923879532, Tk8, Tk7); TdK = FMA(KP923879532, TcL, TcE); TcM = FNMS(KP923879532, TcL, TcE); Tk1 = FNMS(KP923879532, TjU, TjT); TjV = FMA(KP923879532, TjU, TjT); Tew = FNMS(KP923879532, Te3, Te0); Te4 = FMA(KP923879532, Te3, Te0); Td2 = FMA(KP980785280, Td1, TcM); TdG = FNMS(KP980785280, Td1, TcM); } Tk5 = FNMS(KP980785280, Tk2, Tk1); Tk3 = FMA(KP980785280, Tk2, Tk1); Tk4 = TdI - TdH; TdJ = TdH + TdI; ri[WS(rs, 9)] = FMA(KP773010453, TdF, Td2); ri[WS(rs, 41)] = FNMS(KP773010453, TdF, Td2); ii[WS(rs, 41)] = FNMS(KP773010453, Tk4, Tk3); ii[WS(rs, 9)] = FMA(KP773010453, Tk4, Tk3); } { E TdO, TdV, TjX, TjY; TdW = FNMS(KP980785280, TdN, TdK); TdO = FMA(KP980785280, TdN, TdK); ri[WS(rs, 57)] = FMA(KP773010453, TdJ, TdG); ri[WS(rs, 25)] = FNMS(KP773010453, TdJ, TdG); ii[WS(rs, 57)] = FMA(KP773010453, Tk6, Tk5); ii[WS(rs, 25)] = FNMS(KP773010453, Tk6, Tk5); TdV = TdR + TdU; Tk0 = TdU - TdR; TjZ = FNMS(KP980785280, TjW, TjV); TjX = FMA(KP980785280, TjW, TjV); TjY = TdX + TdY; TdZ = TdX - TdY; ri[WS(rs, 1)] = FMA(KP995184726, TdV, TdO); ri[WS(rs, 33)] = FNMS(KP995184726, TdV, TdO); ii[WS(rs, 33)] = FNMS(KP995184726, TjY, TjX); ii[WS(rs, 1)] = FMA(KP995184726, TjY, TjX); } } { E Tes, Tke, Tkd, Tev; { E Tec, Ter, Tkb, Tkc; Tes = FNMS(KP831469612, Teb, Te4); Tec = FMA(KP831469612, Teb, Te4); ri[WS(rs, 17)] = FMA(KP995184726, TdZ, TdW); ri[WS(rs, 49)] = FNMS(KP995184726, TdZ, TdW); ii[WS(rs, 49)] = FNMS(KP995184726, Tk0, TjZ); ii[WS(rs, 17)] = FMA(KP995184726, Tk0, TjZ); Ter = Tej + Teq; Tke = Teq - Tej; Tkd = FNMS(KP831469612, Tka, Tk9); Tkb = FMA(KP831469612, Tka, Tk9); Tkc = Tet + Teu; Tev = Tet - Teu; ri[WS(rs, 5)] = FMA(KP881921264, Ter, Tec); ri[WS(rs, 37)] = FNMS(KP881921264, Ter, Tec); ii[WS(rs, 37)] = FNMS(KP881921264, Tkc, Tkb); ii[WS(rs, 5)] = FMA(KP881921264, Tkc, Tkb); } { E TeA, TeH, Tkh, Tki; TeI = FMA(KP831469612, Tez, Tew); TeA = FNMS(KP831469612, Tez, Tew); ri[WS(rs, 21)] = FMA(KP881921264, Tev, Tes); ri[WS(rs, 53)] = FNMS(KP881921264, Tev, Tes); ii[WS(rs, 53)] = FNMS(KP881921264, Tke, Tkd); ii[WS(rs, 21)] = FMA(KP881921264, Tke, Tkd); TeH = TeD - TeG; Tkk = TeD + TeG; Tkj = FMA(KP831469612, Tkg, Tkf); Tkh = FNMS(KP831469612, Tkg, Tkf); Tki = TeK - TeJ; TeL = TeJ + TeK; ri[WS(rs, 13)] = FMA(KP956940335, TeH, TeA); ri[WS(rs, 45)] = FNMS(KP956940335, TeH, TeA); ii[WS(rs, 45)] = FNMS(KP956940335, Tki, Tkh); ii[WS(rs, 13)] = FMA(KP956940335, Tki, Tkh); } } } } } } } ri[WS(rs, 61)] = FMA(KP956940335, TeL, TeI); ri[WS(rs, 29)] = FNMS(KP956940335, TeL, TeI); ii[WS(rs, 61)] = FMA(KP956940335, Tkk, Tkj); ii[WS(rs, 29)] = FNMS(KP956940335, Tkk, Tkj); }}static const tw_instr twinstr[] = { {TW_FULL, 0, 64}, {TW_NEXT, 1, 0}};static const ct_desc desc = { 64, "t1_64", twinstr, &GENUS, {520, 126, 518, 0}, 0, 0, 0 };void X(codelet_t1_64) (planner *p) { X(kdft_dit_register) (p, t1_64, &desc);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_twiddle -compact -variables 4 -pipeline-latency 4 -n 64 -name t1_64 -include t.h *//* * This function contains 1038 FP additions, 500 FP multiplications, * (or, 808 additions, 270 multiplications, 230 fused multiply/add), * 176 stack variables, 15 constants, and 256 memory accesses */#include "t.h"static void t1_64(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){ DK(KP471396736, +0.471396736825997648556387625905254377657460319); DK(KP881921264, +0.881921264348355029712756863660388349508442621); DK(KP290284677, +0.290284677254462367636192375817395274691476278); DK(KP956940335, +0.956940335732208864935797886980269969482849206); DK(KP634393284, +0.634393284163645498215171613225493370675687095); DK(KP773010453, +0.773010453362736960810906609758469800971041293); DK(KP098017140, +0.098017140329560601994195563888641845861136673); DK(KP995184726, +0.995184726672196886244836953109479921575474869); DK(KP555570233, +0.555570233019602224742830813948532874374937191); DK(KP831469612, +0.831469612302545237078788377617905756738560812); DK(KP980785280, +0.980785280403230449126182236134239036973933731); DK(KP195090322, +0.195090322016128267848284868477022240927691618); DK(KP923879532, +0.923879532511286756128183189396788286822416626); DK(KP382683432, +0.382683432365089771728459984030398866761344562); DK(KP707106781, +0.707106781186547524400844362104849039284835938); INT m; for (m = mb, W = W + (mb * 126); m < me; m = m + 1, ri = ri + ms, ii = ii + ms, W = W + 126, MAKE_VOLATILE_STRIDE(rs)) { E Tj, TcL, ThT, Tin, T6b, Taz, TgT, Thn, TG, Thm, TcO, TgO, T6m, ThQ, TaC; E Tim, T14, Tfq, T6y, T9O, TaG, Tc0, TcU, TeE, T1r, Tfr, T6J, T9P, TaJ, Tc1; E TcZ, TeF, T1Q, T2d, Tfx, Tfu, Tfv, Tfw, T6Q, TaM, Tdb, TeJ, T71, TaQ, T7a; E TaN, Td6, TeI, T77, TaP, T2B, T2Y, Tfz, TfA, TfB, TfC, T7h, TaW, Tdm, TeM; E T7s, TaU, T7B, TaX, Tdh, TeL, T7y, TaT, T5j, TfR, Tec, Tf0, TfY, Tgy, T8D; E Tbl, T8O, Tbx, T9l, Tbm, TdV, TeX, T9i, Tbw, T3M, TfL, TdL, TeQ, TfI, Tgt; E T7K, Tb2, T7V, Tbe, T8s, Tb3, Tdu, TeT, T8p, Tbd, T4x, TfJ, TdE, TdM, TfO; E Tgu, T87, T8v, T8i, T8u, Tba, Tbg, Tdz, TdN, Tb7, Tbh, T64, TfZ, Te5, Ted; E TfU, Tgz, T90, T9o, T9b, T9n, Tbt, Tbz, Te0, Tee, Tbq, TbA; { E T1, TgR, T6, TgQ, Tc, T68, Th, T69; T1 = ri[0]; TgR = ii[0]; { E T3, T5, T2, T4; T3 = ri[WS(rs, 32)]; T5 = ii[WS(rs, 32)]; T2 = W[62]; T4 = W[63]; T6 = FMA(T2, T3, T4 * T5); TgQ = FNMS(T4, T3, T2 * T5); } { E T9, Tb, T8, Ta; T9 = ri[WS(rs, 16)]; Tb = ii[WS(rs, 16)]; T8 = W[30]; Ta = W[31]; Tc = FMA(T8, T9, Ta * Tb); T68 = FNMS(Ta, T9, T8 * Tb); } { E Te, Tg, Td, Tf; Te = ri[WS(rs, 48)]; Tg = ii[WS(rs, 48)]; Td = W[94]; Tf = W[95]; Th = FMA(Td, Te, Tf * Tg); T69 = FNMS(Tf, Te, Td * Tg); } { E T7, Ti, ThR, ThS; T7 = T1 + T6; Ti = Tc + Th; Tj = T7 + Ti; TcL = T7 - Ti; ThR = TgR - TgQ; ThS = Tc - Th; ThT = ThR - ThS; Tin = ThS + ThR; } { E T67, T6a, TgP, TgS; T67 = T1 - T6; T6a = T68 - T69; T6b = T67 - T6a; Taz = T67 + T6a; TgP = T68 + T69; TgS = TgQ + TgR; TgT = TgP + TgS; Thn = TgS - TgP; } } { E To, T6c, Tt, T6d, T6e, T6f, Tz, T6i, TE, T6j, T6h, T6k; { E Tl, Tn, Tk, Tm; Tl = ri[WS(rs, 8)]; Tn = ii[WS(rs, 8)]; Tk = W[14]; Tm = W[15]; To = FMA(Tk, Tl, Tm * Tn); T6c = FNMS(Tm, Tl, Tk * Tn); } { E Tq, Ts, Tp, Tr; Tq = ri[WS(rs, 40)]; Ts = ii[WS(rs, 40)]; Tp = W[78]; Tr = W[79]; Tt = FMA(Tp, Tq, Tr * Ts); T6d = FNMS(Tr, Tq, Tp * Ts); } T6e = T6c - T6d; T6f = To - Tt; { E Tw, Ty, Tv, Tx; Tw = ri[WS(rs, 56)]; Ty = ii[WS(rs, 56)]; Tv = W[110]; Tx = W[111]; Tz = FMA(Tv, Tw, Tx * Ty); T6i = FNMS(Tx, Tw, Tv * Ty); } { E TB, TD, TA, TC; TB = ri[WS(rs, 24)]; TD = ii[WS(rs, 24)]; TA = W[46]; TC = W[47]; TE = FMA(TA, TB, TC * TD); T6j = FNMS(TC, TB, TA * TD); } T6h = Tz - TE; T6k = T6i - T6j; { E Tu, TF, TcM, TcN; Tu = To + Tt; TF = Tz + TE; TG = Tu + TF; Thm = TF - Tu; TcM = T6c + T6d; TcN = T6i + T6j; TcO = TcM - TcN; TgO = TcM + TcN; } { E T6g, T6l, TaA, TaB; T6g = T6e - T6f; T6l = T6h + T6k; T6m = KP707106781 * (T6g - T6l); ThQ = KP707106781 * (T6g + T6l); TaA = T6f + T6e; TaB = T6h - T6k; TaC = KP707106781 * (TaA + TaB); Tim = KP707106781 * (TaB - TaA); } } { E TS, TcQ, T6q, T6t, T13, TcR, T6r, T6w, T6s, T6x; { E TM, T6o, TR, T6p; { E TJ, TL, TI, TK; TJ = ri[WS(rs, 4)]; TL = ii[WS(rs, 4)]; TI = W[6]; TK = W[7]; TM = FMA(TI, TJ, TK * TL); T6o = FNMS(TK, TJ, TI * TL); } { E TO, TQ, TN, TP; TO = ri[WS(rs, 36)]; TQ = ii[WS(rs, 36)]; TN = W[70]; TP = W[71]; TR = FMA(TN, TO, TP * TQ); T6p = FNMS(TP, TO, TN * TQ); } TS = TM + TR; TcQ = T6o + T6p; T6q = T6o - T6p; T6t = TM - TR; } { E TX, T6u, T12, T6v; { E TU, TW, TT, TV; TU = ri[WS(rs, 20)]; TW = ii[WS(rs, 20)]; TT = W[38]; TV = W[39]; TX = FMA(TT, TU, TV * TW); T6u = FNMS(TV, TU, TT * TW); } { E TZ, T11, TY, T10; TZ = ri[WS(rs, 52)]; T11 = ii[WS(rs, 52)]; TY = W[102]; T10 = W[103]; T12 = FMA(TY, TZ, T10 * T11); T6v = FNMS(T10, TZ, TY * T11); } T13 = TX + T12; TcR = T6u + T6v; T6r = TX - T12; T6w = T6u - T6v; } T14 = TS + T13; Tfq = TcQ + TcR; T6s = T6q + T6r; T6x = T6t - T6w; T6y = FNMS(KP923879532, T6x, KP382683432 * T6s); T9O = FMA(KP923879532, T6s, KP382683432 * T6x); { E TaE, TaF, TcS, TcT; TaE = T6q - T6r; TaF = T6t + T6w; TaG = FNMS(KP382683432, TaF, KP923879532 * TaE); Tc0 = FMA(KP382683432, TaE, KP923879532 * TaF); TcS = TcQ - TcR; TcT = TS - T13; TcU = TcS - TcT; TeE = TcT + TcS; } } { E T1f, TcW, T6B, T6E, T1q, TcX, T6C, T6H, T6D, T6I; { E T19, T6z, T1e, T6A; { E T16, T18, T15, T17; T16 = ri[WS(rs, 60)]; T18 = ii[WS(rs, 60)]; T15 = W[118]; T17 = W[119]; T19 = FMA(T15, T16, T17 * T18); T6z = FNMS(T17, T16, T15 * T18
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -