📄 t1bv_25.c
字号:
T3k = VFMA(LDK(KP603558818), T2H, T2I); T1G = VFMA(LDK(KP578046249), T1a, T1d); T1e = VFNMS(LDK(KP522847744), T1d, T1a); T28 = VFNMS(LDK(KP494780565), T1a, T1d); T2g = VFMA(LDK(KP447533225), T1d, T1a); { V T3U, T3S, T40, T3Y; T3U = VSUB(T3O, T3R); T3S = VADD(T3O, T3R); T40 = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), T3W, T3X)); T3Y = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), T3X, T3W)); { V T3s, T3l, T2N, T36; T3s = VFNMS(LDK(KP845997307), T3k, T3j); T3l = VFMA(LDK(KP845997307), T3k, T3j); T2N = VFNMS(LDK(KP772036680), T2M, T2J); T36 = VFMA(LDK(KP772036680), T2M, T2J); { V T30, T2S, T3d, T3z, T3T; T30 = VFNMS(LDK(KP772036680), T2R, T2Q); T2S = VFMA(LDK(KP772036680), T2R, T2Q); T3d = VFNMS(LDK(KP845997307), T3c, T3b); T3z = VFMA(LDK(KP845997307), T3c, T3b); ST(&(x[0]), VADD(T3S, T3L), ms, &(x[0])); T3T = VFNMS(LDK(KP250000000), T3S, T3L); { V T3C, T3p, T2O, T37; T3C = VFMA(LDK(KP906616052), T3o, T3l); T3p = VFNMS(LDK(KP906616052), T3o, T3l); T2O = VFMA(LDK(KP956723877), T2N, T2G); T37 = VFMA(LDK(KP522616830), T2V, T36); { V T31, T2W, T3u, T3h; T31 = VFNMS(LDK(KP522616830), T2G, T30); T2W = VFMA(LDK(KP945422727), T2V, T2S); T3u = VFNMS(LDK(KP923225144), T3g, T3d); T3h = VFMA(LDK(KP923225144), T3g, T3d); { V T3I, T3B, T3V, T3Z; T3I = VFNMS(LDK(KP669429328), T3z, T3A); T3B = VFMA(LDK(KP570584518), T3A, T3z); T3V = VFMA(LDK(KP559016994), T3U, T3T); T3Z = VFNMS(LDK(KP559016994), T3U, T3T); { V T3y, T3q, T2P, T38; T3y = VFMA(LDK(KP262346850), T3p, T2X); T3q = VMUL(LDK(KP998026728), VFNMS(LDK(KP952936919), T2X, T3p)); T2P = VFMA(LDK(KP992114701), T2O, T2z); T38 = VFNMS(LDK(KP690983005), T37, T2S); { V T32, T2Y, T3v, T3F; T32 = VFMA(LDK(KP763932022), T31, T2N); T2Y = VMUL(LDK(KP998026728), VFMA(LDK(KP952936919), T2X, T2W)); T3v = VFNMS(LDK(KP997675361), T3u, T3t); T3F = VFNMS(LDK(KP904508497), T3u, T3s); { V T3i, T3r, T3J, T3D; T3i = VFMA(LDK(KP949179823), T3h, T2z); T3r = VFNMS(LDK(KP237294955), T3h, T2z); T3J = VFNMS(LDK(KP669429328), T3C, T3I); T3D = VFMA(LDK(KP618033988), T3C, T3B); ST(&(x[WS(rs, 20)]), VFNMSI(T3Y, T3V), ms, &(x[0])); ST(&(x[WS(rs, 5)]), VFMAI(T3Y, T3V), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 15)]), VFMAI(T40, T3Z), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 10)]), VFNMSI(T40, T3Z), ms, &(x[0])); { V T39, T33, T3w, T3G; T39 = VFMA(LDK(KP855719849), T38, T35); T33 = VFNMS(LDK(KP855719849), T32, T2Z); ST(&(x[WS(rs, 3)]), VFMAI(T2Y, T2P), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 22)]), VFNMSI(T2Y, T2P), ms, &(x[0])); T3w = VFMA(LDK(KP560319534), T3v, T3s); T3G = VFNMS(LDK(KP681693190), T3F, T3t); ST(&(x[WS(rs, 2)]), VFMAI(T3q, T3i), ms, &(x[0])); ST(&(x[WS(rs, 23)]), VFNMSI(T3q, T3i), ms, &(x[WS(rs, 1)])); T3K = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T3J, T3y)); T3E = VMUL(LDK(KP951056516), VFNMS(LDK(KP949179823), T3D, T3y)); T3a = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T39, T2X)); T34 = VFMA(LDK(KP897376177), T33, T2z); T3x = VFNMS(LDK(KP949179823), T3w, T3r); T3H = VFNMS(LDK(KP860541664), T3G, T3r); T2t = VFNMS(LDK(KP912575812), T2b, T2a); T2c = VFMA(LDK(KP912575812), T2b, T2a); TU = VFMA(LDK(KP829049696), TT, Tz); T1T = VFNMS(LDK(KP829049696), TT, Tz); T1U = VFNMS(LDK(KP831864738), T1y, T1e); T1z = VFMA(LDK(KP831864738), T1y, T1e); } } } } } } } } } } } { V T2o, T2h, T29, T2u, T2v, T2p; T2o = VFNMS(LDK(KP958953096), T2g, T2f); T2h = VFMA(LDK(KP958953096), T2g, T2f); ST(&(x[WS(rs, 17)]), VFNMSI(T3a, T34), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 8)]), VFMAI(T3a, T34), ms, &(x[0])); ST(&(x[WS(rs, 13)]), VFMAI(T3E, T3x), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 12)]), VFNMSI(T3E, T3x), ms, &(x[0])); ST(&(x[WS(rs, 7)]), VFNMSI(T3K, T3H), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 18)]), VFMAI(T3K, T3H), ms, &(x[0])); T1V = VFMA(LDK(KP559154169), T1U, T1T); T22 = VFNMS(LDK(KP683113946), T1T, T1U); T29 = VFNMS(LDK(KP867381224), T28, T27); T2u = VFMA(LDK(KP867381224), T28, T27); T2l = VFMA(LDK(KP894834959), T2k, T2h); T2v = VFMA(LDK(KP447417479), T2k, T2u); T2d = VFNMS(LDK(KP809385824), T2c, T29); T2p = VFMA(LDK(KP447417479), T2c, T2o); T1Q = VFMA(LDK(KP831864738), T1H, T1G); T1I = VFNMS(LDK(KP831864738), T1H, T1G); T2w = VFNMS(LDK(KP763932022), T2v, T2h); T1A = VFMA(LDK(KP904730450), T1z, TU); T1F = VFNMS(LDK(KP904730450), T1z, TU); T2q = VFMA(LDK(KP690983005), T2p, T29); } } { V T2e, T1E, T1P, T2m; T2e = VFNMS(LDK(KP992114701), T2d, Tf); T1E = VFMA(LDK(KP916574801), T1D, T1C); T1P = VFNMS(LDK(KP916574801), T1D, T1C); T2m = VMUL(LDK(KP951056516), VFNMS(LDK(KP992114701), T2l, T1O)); { V T1J, T2r, T1R, T1W, T1Z, T2x; T2x = VFNMS(LDK(KP999544308), T2w, T2t); T1J = VFNMS(LDK(KP904730450), T1I, T1F); T25 = VFMA(LDK(KP968583161), T1A, Tf); T1B = VFNMS(LDK(KP242145790), T1A, Tf); T2r = VFNMS(LDK(KP999544308), T2q, T2n); T1R = VFMA(LDK(KP904730450), T1Q, T1P); T1W = VFNMS(LDK(KP904730450), T1Q, T1P); T1Z = VADD(T1E, T1F); ST(&(x[WS(rs, 21)]), VFMAI(T2m, T2e), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 4)]), VFNMSI(T2m, T2e), ms, &(x[0])); T2y = VMUL(LDK(KP951056516), VFNMS(LDK(KP803003575), T2x, T1O)); T1K = VFNMS(LDK(KP618033988), T1J, T1E); T2s = VFNMS(LDK(KP803003575), T2r, Tf); T23 = VFMA(LDK(KP617882369), T1W, T22); T1S = VFNMS(LDK(KP242145790), T1R, T1O); T26 = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1R, T1O)); T20 = VFNMS(LDK(KP683113946), T1Z, T1I); T1X = VFMA(LDK(KP559016994), T1W, T1V); } } } } } { V T1L, T24, T21, T1Y; T1L = VFNMS(LDK(KP876091699), T1K, T1B); ST(&(x[WS(rs, 16)]), VFMAI(T2y, T2s), ms, &(x[0])); ST(&(x[WS(rs, 9)]), VFNMSI(T2y, T2s), ms, &(x[WS(rs, 1)])); T24 = VMUL(LDK(KP951056516), VFNMS(LDK(KP876306680), T23, T1S)); ST(&(x[WS(rs, 24)]), VFNMSI(T26, T25), ms, &(x[0])); ST(&(x[WS(rs, 1)]), VFMAI(T26, T25), ms, &(x[WS(rs, 1)])); T21 = VFMA(LDK(KP792626838), T20, T1B); T1Y = VMUL(LDK(KP951056516), VFMA(LDK(KP968583161), T1X, T1S)); ST(&(x[WS(rs, 11)]), VFMAI(T24, T21), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 14)]), VFNMSI(T24, T21), ms, &(x[0])); ST(&(x[WS(rs, 19)]), VFNMSI(T1Y, T1L), ms, &(x[WS(rs, 1)])); ST(&(x[WS(rs, 6)]), VFMAI(T1Y, T1L), ms, &(x[0])); } }}static const tw_instr twinstr[] = { VTW(0, 1), VTW(0, 2), VTW(0, 3), VTW(0, 4), VTW(0, 5), VTW(0, 6), VTW(0, 7), VTW(0, 8), VTW(0, 9), VTW(0, 10), VTW(0, 11), VTW(0, 12), VTW(0, 13), VTW(0, 14), VTW(0, 15), VTW(0, 16), VTW(0, 17), VTW(0, 18), VTW(0, 19), VTW(0, 20), VTW(0, 21), VTW(0, 22), VTW(0, 23), VTW(0, 24), {TW_NEXT, VL, 0}};static const ct_desc desc = { 25, "t1bv_25", twinstr, &GENUS, {67, 60, 181, 0}, 0, 0, 0 };void X(codelet_t1bv_25) (planner *p) { X(kdft_dit_register) (p, t1bv_25, &desc);}#else /* HAVE_FMA *//* Generated by: ../../../genfft/gen_twiddle_c -simd -compact -variables 4 -pipeline-latency 8 -n 25 -name t1bv_25 -include t1b.h -sign 1 *//* * This function contains 248 FP additions, 188 FP multiplications, * (or, 171 additions, 111 multiplications, 77 fused multiply/add), * 100 stack variables, 40 constants, and 50 memory accesses */#include "t1b.h"static void t1bv_25(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms){ DVK(KP497379774, +0.497379774329709576484567492012895936835134813); DVK(KP968583161, +0.968583161128631119490168375464735813836012403); DVK(KP248689887, +0.248689887164854788242283746006447968417567406); DVK(KP1_937166322, +1.937166322257262238980336750929471627672024806); DVK(KP809016994, +0.809016994374947424102293417182819058860154590); DVK(KP309016994, +0.309016994374947424102293417182819058860154590); DVK(KP1_688655851, +1.688655851004030157097116127933363010763318483); DVK(KP535826794, +0.535826794978996618271308767867639978063575346); DVK(KP425779291, +0.425779291565072648862502445744251703979973042); DVK(KP1_809654104, +1.809654104932039055427337295865395187940827822); DVK(KP963507348, +0.963507348203430549974383005744259307057084020); DVK(KP876306680, +0.876306680043863587308115903922062583399064238); DVK(KP844327925, +0.844327925502015078548558063966681505381659241); DVK(KP1_071653589, +1.071653589957993236542617535735279956127150691); DVK(KP481753674, +0.481753674101715274987191502872129653528542010); DVK(KP1_752613360, +1.752613360087727174616231807844125166798128477); DVK(KP851558583, +0.851558583130145297725004891488503407959946084); DVK(KP904827052, +0.904827052466019527713668647932697593970413911); DVK(KP125333233, +0.125333233564304245373118759816508793942918247); DVK(KP1_984229402, +1.984229402628955662099586085571557042906073418); DVK(KP1_457937254, +1.457937254842823046293460638110518222745143328); DVK(KP684547105, +0.684547105928688673732283357621209269889519233); DVK(KP637423989, +0.637423989748689710176712811676016195434917298); DVK(KP1_541026485, +1.541026485551578461606019272792355694543335344); DVK(KP062790519, +0.062790519529313376076178224565631133122484832); DVK(KP1_996053456, +1.996053456856543123904673613726901106673810439); DVK(KP770513242, +0.770513242775789230803009636396177847271667672); DVK(KP1_274847979, +1.274847979497379420353425623352032390869834596); DVK(KP125581039, +0.125581039058626752152356449131262266244969664); DVK(KP998026728, +0.998026728428271561952336806863450553336905220); DVK(KP992114701, +0.992114701314477831049793042785778521453036709); DVK(KP250666467, +0.250666467128608490746237519633017587885836494); DVK(KP728968627, +0.728968627421411523146730319055259111372571664); DVK(KP1_369094211, +1.369094211857377347464566715242418539779038465); DVK(KP293892626, +0.293892626146236564584352977319536384298826219); DVK(KP475528258, +0.475528258147576786058219666689691071702849317); DVK(KP951056516, +0.951056516295153572116439333379382143405698634); DVK(KP250000000, +0.250000000000000000000000000000000000000000000); DVK(KP587785252, +0.587785252292473129168705954639072768597652438); DVK(KP559016994, +0.559016994374947424102293417182819058860154590); INT m; R *x; x = ii; for (m = mb, W = W + (mb * ((TWVL / VL) * 48)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 48), MAKE_VOLATILE_STRIDE(rs)) { V T1A, T1z, T1R, T1S, T1B, T1C, T1Q, T2L, T1l, T2v, T1i, T3e, T2u, Tb, T2i; V Tj, T3b, T2h, Tv, T2k, TD, T3a, T2l, T11, T2s, TY, T3d, T2r; { V T1v, T1x, T1y, T1q, T1s, T1t, T1P; T1A = LD(&(x[0]), ms, &(x[0])); { V T1u, T1w, T1p, T1r; T1u = LD(&(x[WS(rs, 10)]), ms, &(x[0])); T1v = BYTW(&(W[TWVL * 18]), T1u); T1w = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)])); T1x = BYTW(&(W[TWVL * 28]), T1w); T1y = VADD(T1v, T1x); T1p = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)])); T1q = BYTW(&(W[TWVL * 8]), T1p); T1r = LD(&(x[WS(rs, 20)]), ms, &(x[0])); T1s = BYTW(&(W[TWVL * 38]), T1r); T1t = VADD(T1q, T1s); } T1z = VMUL(LDK(KP559016994), VSUB(T1t, T1y)); T1R = VSUB(T1v, T1x); T1S = VMUL(LDK(KP587785252), T1R); T1B = VADD(T1t, T1y); T1C = VFNMS(LDK(KP250000000), T1B, T1A); T1P = VSUB(T1q, T1s); T1Q = VMUL(LDK(KP951056516), T1P); T2L = VMUL(LDK(KP587785252), T1P); } { V T1f, T19, T1b, T1c, T14, T16, T17, T1e; T1e = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)])); T1f = BYTW(&(W[TWVL * 4]), T1e); { V T18, T1a, T13, T15; T18 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)])); T19 = BYTW(&(W[TWVL * 24]), T18); T1a = LD(&(x[WS(rs, 18)]), ms, &(x[0])); T1b = BYTW(&(W[TWVL * 34]), T1a); T1c = VADD(T19, T1b); T13 = LD(&(x[WS(rs, 8)]), ms, &(x[0])); T14 = BYTW(&(W[TWVL * 14]), T13); T15 = LD(&(x[WS(rs, 23)]), ms, &(x[WS(rs, 1)])); T16 = BYTW(&(W[TWVL * 44]), T15); T17 = VADD(T14, T16); } { V T1j, T1k, T1d, T1g, T1h; T1j = VSUB(T14, T16);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -