📄 s_tanhl.s
字号:
{ .mfi nop.m 0 fms.s1 fRes1L = fA0H, f1, fRes1H // A1*x+A0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA23 = fA25, fArgAbsNorm2, fA23 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fA12 = fA14, fArgAbsNorm2, fA12 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fA19 = fA21, fArgAbsNorm2, fA19 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fA8 = fA10, fArgAbsNorm2, fA8 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fA15 = fA17, fArgAbsNorm2, fA15 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fms.s1 fArgAbsNorm11 = fArgAbsNorm11, fArgAbsNorm3, f0 // x^11 nop.i 0};;{ .mfi nop.m 0 fma.s1 fTT = fRes3L, fArgAbsNorm2, f0 // (A3*x+A2)*x^2 nop.i 0}{ .mfi nop.m 0 fma.s1 fA4 = fA6, fArgAbsNorm2, fA4 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes1L = fRes1L, f1, fTH2 // A1*x+A0 nop.i 0}{ .mfi nop.m 0 fms.s1 fArgAbsNorm4X = fArgAbsNorm4, fSignumX, f0 // x^4 * signum nop.i 0};;{ .mfi nop.m 0 fma.s1 fA19 = fA23, fArgAbsNorm4, fA19 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fA8 = fA12, fArgAbsNorm4, fA8 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fTT = fRes3H, fArgAbsNorm2L, fTT // (A3*x+A2)*x^2 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes1L = fRes1L, f1, fTL2 // A1*x+A0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA15 = fA19, fArgAbsNorm4, fA15 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fA4 = fA8, fArgAbsNorm4, fA4 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes2H = fRes3H, fArgAbsNorm2, fTT // (A3*x+A2)*x^2 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes1L = fRes1L, f1, fA0L // A1*x+A0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes4 = fA15, fArgAbsNorm11, fA4 // Result of // polynomial tail nop.i 0};;{ .mfi nop.m 0 fms.s1 fRes2L = fRes3H, fArgAbsNorm2, fRes2H // (A3*x+A2)*x^2 nop.i 0}{ .mfi nop.m 0 fma.s1 fResH = fRes2H, f1, fRes1H // High result nop.i 0};;{ .mfi nop.m 0(p14) fma.s1 fRes1L = fRes4, fArgAbsNorm4X, fRes1L // A1*x+A0 nop.i 0}{ .mfi nop.m 0(p15) fms.s1 fRes1L = fRes4, fArgAbsNorm4X, fRes1L // A1*x+A0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes2L = fRes2L, f1, fTT // (A3*x+A2)*x^2 nop.i 0}{ .mfi nop.m 0 fms.s1 fResL = fRes1H, f1, fResH // Low result nop.i 0};;{ .mfi nop.m 0 fma.s0 fRes1L = fRes2L, fSignumX, fRes1L // Low result // .s0 - for symmetry issue resolving at +/-inf rounding mode nop.i 0}{ .mfi nop.m 0 fma.s1 fResL = fResL, f1, fRes2H // Low result nop.i 0};;{ .mfi nop.m 0(p14) fma.s0 fResL = fRes1L, f1, fResL // Low result // .s0 - for symmetry issue resolving at +/-inf rounding mode nop.i 0}{ .mfi nop.m 0(p15) fms.s0 fResL = fRes1L, f1, fResL // Low result // .s0 - for symmetry issue resolving at +/-inf rounding mode nop.i 0};;.pred.rel "mutex",p14,p15{ .mfi nop.m 0(p14) fma.s0 f8 = fResL, f1, fResH// Add high and low results nop.i 0}{ .mfb nop.m 0(p15) fms.s0 f8 = fResL, f1, fResH // Add high and low results br.ret.sptk b0 // Main path return};;// satiration path ////////////////////////////////////////////////////////////_saturation:.pred.rel "mutex",p14,p15{ .mfi nop.m 0(p14) fms.s0 f8 = f1, f1, fTiny // Saturation result r = 1-tiny nop.i 0};;{ .mfb nop.m 0(p15) fnma.s0 f8 = f1, f1, fTiny // Saturation result r = tiny-1 br.ret.sptk b0 // Saturation path return};;// 0, denormals and special IEEE numbers path /////////////////////////////////tanhl_spec:{ .mfi nop.m 0 fclass.m p6,p0 = f8, 0x23 // To filter infinities // 0x23 = @pos|@neg|@inf nop.i 0};;{ .mfi nop.m 0 fclass.m p7,p0 = f8, 0xC7 // To filter NaNs & Zeros // 0xC7 = @pos|@neg|@zero|@qnan|@snan nop.i 0};;{ .mfb nop.m 0(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args (p6) br.ret.spnt b0 // exit for x = INF};;{ .mfb nop.m 0(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args // and NaNs for NaNs(p7) br.ret.spnt b0 // exit for x = NaN or +/-0};;{ .mfi nop.m 0 fnorm.s0 f8 = f8 // Normalize arg nop.i 0};;.pred.rel "mutex",p14,p15{ .mfi nop.m 0(p14) fnma.s0 f8 = f8, f8, f8 // res = r-r^2 nop.i 0}{ .mfb nop.m 0(p15) fma.s0 f8 = f8, f8, f8 // res = r+r^2 br.ret.sptk b0 // 0, denormals, IEEE specials return};;// 0 < |x| < 1/8 path /////////////////////////////////////////////////////////_0_to_1o8:{ .mmi adds rAddr1 = 0x11e0, rDataPtr // Ptr 1 to coeffs adds rAddr2 = 0x11f0, rDataPtr // Ptr 2 to coeffs nop.i 0};;{ .mmi ldfe fA15 = [rAddr1], 32 // Load A15 ldfe fA13 = [rAddr2], 32 // Load A13 nop.i 0};;{ .mmi ldfe fA11 = [rAddr1], 32 // Load A11 ldfe fA9 = [rAddr2], 32 // Load A9 nop.i 0};;{ .mmi ldfe fA7 = [rAddr1], 32 // Load A7 ldfe fA5 = [rAddr2] // Load A5 nop.i 0};;{ .mfi ldfe fA3 = [rAddr1] // Load A3 fma.s1 fA11 = fA13, fArgSqr, fA11 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA3 = fA5, fArgSqr, fA3 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fA7 = fA9, fArgSqr, fA7 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fA11 = fA15, fArgFour, fA11 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fA3 = fA7, fArgFour, fA3 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fArgEight = fArgFour, fArgFour, f0 // a^8 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fA11, fArgEight, fA3 //Polynomial tail result nop.i 0};;{ .mfb nop.m 0 fma.s0 f8 = fRes, fArgCube, f8 // (Polynomial tail)*x^3 br.ret.sptk b0 // [0;1/8] interval return};; GLOBAL_LIBM_END(tanhl)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -