📄 s_erfl.s
字号:
nop.m 0 fma.s1 fRes1L = fRes1L, f1, fTL2 // A1*x+A0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA15 = fA19, fArgAbsNorm4, fA15 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fA4 = fA8, fArgAbsNorm4, fA4 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes2H = fRes3H, fArgAbsNorm2, fTT // (A3*x+A2)*x^2 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes1L = fRes1L, f1, fA0L // A1*x+A0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes4 = fA15, fArgAbsNorm11, fA4 // Result of // polynomial tail nop.i 0};;{ .mfi nop.m 0 fms.s1 fRes2L = fRes3H, fArgAbsNorm2, fRes2H // (A3*x+A2)*x^2 nop.i 0}{ .mfi nop.m 0 fma.s1 fResH = fRes2H, f1, fRes1H // High result nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes1L = fRes4, fArgAbsNorm4, fRes1L // A1*x+A0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes2L = fRes2L, f1, fTT // (A3*x+A2)*x^2 nop.i 0}{ .mfi nop.m 0 fms.s1 fResL = fRes1H, f1, fResH // Low result nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes1L = fRes1L, f1, fRes2L // Low result nop.i 0}{ .mfi nop.m 0 fma.s1 fResL = fResL, f1, fRes2H // Low result nop.i 0};;{ .mfi nop.m 0(p15) fneg fResH = fResH // Invert high result if arg is neg. nop.i 0};;{ .mfi nop.m 0 fma.s1 fResL = fResL, f1, fRes1L // Low result nop.i 0};;.pred.rel "mutex",p14,p15{ .mfi nop.m 0(p14) fma.s0 f8 = fResH, f1, fResL // Add high and low results nop.i 0}{ .mfb nop.m 0(p15) fms.s0 f8 = fResH, f1, fResL // Add high and low results br.ret.sptk b0 // Main path return};;// satiration path ////////////////////////////////////////////////////////////_saturation:.pred.rel "mutex",p14,p15{ .mfi nop.m 0(p14) fms.s0 f8 = f1, f1, fTiny // Saturation result r = 1-tiny nop.i 0};;{ .mfb nop.m 0(p15) fnma.s0 f8 = f1, f1, fTiny // Saturation result r = tiny-1 br.ret.sptk b0 // Saturation path return};;// 0, denormals and special IEEE numbers path /////////////////////////////////erfl_spec:{ .mfi addl rDataPtr = 0xBE0, rDataPtr // Ptr to denormals coeffs fclass.m p6,p0 = f8, 0x23 // To filter infinities // 0x23 = @pos|@neg|@inf nop.i 0};;{ .mfi ldfpd fA1H, fA1L = [rDataPtr] // Load denormals coeffs A1H, A1L fclass.m p7,p0 = f8, 0xC7 // To filter NaNs & Zeros // 0xC7 = @pos|@neg|@zero|@qnan|@snan nop.i 0};;{ .mfb nop.m 0(p6) fmerge.s f8 = f8, f1 // +/-1 for INF args (p6) br.ret.spnt b0 // exit for x = INF};;{ .mfb nop.m 0(p7) fma.s0 f8 = f8, f1, f8 // +/-0 for 0 args // and NaNs for NaNs(p7) br.ret.spnt b0 // exit for x = NaN or +/-0};;{ .mfi nop.m 0 fnorm.s0 f8 = f8 // Normalize arg nop.i 0};;{ .mfi nop.m 0 fms.s1 fRes1H = f8, fA1H, f0 // HighRes nop.i 0}{ .mfi nop.m 0 fms.s1 fRes1L = f8, fA1L, f0 // LowRes nop.i 0};;{ .mfi nop.m 0 fms.s1 fRes1Hd = f8, fA1H, fRes1H // HighRes delta nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fRes1L, f1, fRes1Hd // LowRes+HighRes delta nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = f8, f8, fRes // r=x^2+r nop.i 0};;{ .mfb nop.m 0 fma.s0 f8 = fRes, f1, fRes1H // res = r+ResHigh br.ret.sptk b0 // 0, denormals, specials return};;// 0 < |x| < 1/8 path /////////////////////////////////////////////////////////_0_to_1o8:{ .mmi adds rAddr1 = 0xB60, rDataPtr // Ptr 1 to coeffs adds rAddr2 = 0xB80, rDataPtr // Ptr 2 to coeffs nop.i 0};;{ .mmi ldfpd fA1H, fA1L = [rAddr1], 16 // Load A1High, A1Low ldfe fA13 = [rAddr2], 16 // Load A13 nop.i 0};;{ .mmi ldfe fA15 = [rAddr1], 48 // Load A15 ldfe fA11 = [rAddr2], 32 // Load A11 nop.i 0};;{ .mmi ldfe fA9 = [rAddr1], 32 // Load A9 ldfe fA7 = [rAddr2], 32 // Load A7 nop.i 0};;{ .mmi ldfe fA5 = [rAddr1] // Load A5 ldfe fA3 = [rAddr2] // Load A3 nop.i 0};;{ .mfi nop.m 0 fms.s1 fRes1H = f8, fA1H, f0 // x*(A1H+A1L) nop.i 0}{ .mfi nop.m 0 fms.s1 fRes1L = f8, fA1L, f0 // x*(A1H+A1L) nop.i 0};;{ .mfi nop.m 0 fma.s1 fA11 = fA13, fArgSqr, fA11 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fArgFour = fArgSqr, fArgSqr, f0 // a^4 nop.i 0};;{ .mfi nop.m 0 fma.s1 fA3 = fA5, fArgSqr, fA3 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fA7 = fA9, fArgSqr, fA7 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fms.s1 fRes1Hd = f8, fA1H, fRes1H // x*(A1H+A1L) delta nop.i 0};;{ .mfi nop.m 0 fma.s1 fA11 = fA15, fArgFour, fA11 // Polynomial tail nop.i 0};;{ .mfi nop.m 0 fma.s1 fA3 = fA7, fArgFour, fA3 // Polynomial tail nop.i 0}{ .mfi nop.m 0 fma.s1 fArgEight = fArgFour, fArgFour, f0 // a^8 nop.i 0};;{ .mfi nop.m 0 fma.s1 f8 = fRes1L, f1, fRes1Hd // x*(A1H+A1L) nop.i 0};;{ .mfi nop.m 0 fma.s1 fRes = fA11, fArgEight, fA3 //Polynomial tail result nop.i 0};;{ .mfi nop.m 0 fma.s1 f8 = fRes, fArgCube, f8 // (Polynomial tail)*x^3 nop.i 0};;{ .mfb nop.m 0 fma.s0 f8 = f8, f1, fRes1H // (Polynomial tail)*x^3 + // + x*(A1H+A1L) br.ret.sptk b0 // [0;1/8] interval return};; GLOBAL_LIBM_END(erfl)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -