📄 e_atanh.s
字号:
};;// NR method: iteration #3{ .mfi adds RcpTablePtr = 0xB0, DataPtr fnma.s1 fRcp3 = fRcp2, fOneMx, f1 // t = 1 - r2*x nop.i 0}{ .mfi nop.m 0 fma.s1 fY4Rcp = fRcp2, fOnePx, f0 // fY4Rcp = r2*(1 + x) nop.i 0};;// polynomial approximation & final reconstruction{ .mfi nop.m 0 frcpa.s1 fRcp, p0 = f1, fY4Rcp nop.i 0}{ .mfi nop.m 0 // y = r2 * (1 + x) + r2 * (1 + x) * t = (1 + x) * (r2 + r2*(1 - r2*x)) fma.s1 fY = fY4Rcp, fRcp3, fY4Rcp nop.i 0};;{ .mmi getf.exp rSExpb = fY4Rcp // biased exponent and sign;; getf.sig rSig = fY4Rcp // significand nop.i 0};;{ .mfi nop.m 0 fms.s1 fR = fY, fRcp, f1 // fR = fY * fRcp - 1 nop.i 0};;{ .mmi and rExpb = rSExpb, rExpbMask;; sub rN = rExpb, rBias // exponent extr.u rInd = rSig,55,8 // Extract 8 bits};;{ .mmi setf.sig fN4Cvt = rN shladd RcpTablePtr = rInd, 4, RcpTablePtr nop.i 0};;{ .mfi ldfe fLogT = [RcpTablePtr] fma.s1 fR2 = fR, fR, f0 // r^2 nop.i 0}{ nop.m 0 fma.s1 fP54 = fP5, fR, fP4 // P5*r + P4 nop.i 0};;{ .mfi nop.m 0 fma.s1 fP32 = fP3, fR, fP2 // P3*r + P2 nop.i 0};;{ .mfi nop.m 0 fma.s1 fR3 = fR2, fR, f0 // r^3 nop.i 0}{ .mfi nop.m 0 fma.s1 fP10 = fP1, fR2, fR // P1*r^2 + r nop.i 0};;{ .mfi nop.m 0 fcvt.xf fN = fN4Cvt nop.i 0}{ .mfi nop.m 0 fma.s1 fP54 = fP54, fR2, fP32 // (P5*r + P4)*r^2 + P3*r + P2 nop.i 0};;{ .mfi nop.m 0 fma.s1 fLogT_N = fN, fLog2, fLogT // N*Log2 + LogT nop.i 0}{ .mfi nop.m 0 // ((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r fma.s1 fP54 = fP54, fR3, fP10 nop.i 0};;.pred.rel "mutex",p11,p10{ .mfi nop.m 0 // 0.5*(((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r) + 0.5*(N*Log2 + T)(p11) fnma.d.s0 f8 = fP54, fP1, fLogT_N nop.i 0}{ .mfb nop.m 0 // -0.5*(((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r) - 0.5*(N*Log2 + T)(p10) fms.d.s0 f8 = fP54, fP1, fLogT_N br.ret.sptk b0 // Exit for 0.25 <= |x| < 1.0};;// Here if 0 < |x| < 0.25atanh_near_zero:{ .mfi ldfe fC4 = [Data2Ptr], 16 fma.s1 fP98 = fC9, fX2, fC8 // C9*x^2 + C8 nop.i 0}{ .mfi ldfe fC1 = [Data3Ptr], 16 fma.s1 fP76 = fC7, fX2, fC6 // C7*x^2 + C6 nop.i 0};;{ .mfi ldfe fC3 = [Data2Ptr], 16 fma.s1 fX8 = fX4, fX4, f0 // x^8 nop.i 0}{ .mfi ldfe fC0 = [Data3Ptr], 16 nop.f 0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fP98 = fP98, fX4, fP76 // C9*x^6 + C8*x^4 + C7*x^2 + C6 nop.i 0};;{ .mfi nop.m 0 fma.s1 fP54 = fC5, fX2, fC4 // C5*x^2 + C4 nop.i 0};;{ .mfi nop.m 0 fma.s1 fP32 = fC3, fX2, fC2 // C3*x^2 + C2 nop.i 0};;{ .mfi nop.m 0 fma.s1 fP10 = fC1, fX2, fC0 // C1*x^2 + C0 nop.i 0};;{ .mfi nop.m 0 fma.s1 fP54 = fP54, fX4, fP32 // C5*x^6 + C4*x^4 + C3*x^2 + C2 nop.i 0};;{ .mfi nop.m 0 // C9*x^14 + C8*x^12 + C7*x^10 + C6*x^8 + C5*x^6 + C4*x^4 + C3*x^2 + C2 fma.s1 fP98 = fP98, fX8, fP54 nop.i 0};;{ .mfi nop.m 0 // C9*x^18 + C8*x^16 + C7*x^14 + C6*x^12 + C5*x^10 + C4*x^8 + C3*x^6 + // C2*x^4 + C1*x^2 + C0 fma.s1 fP98 = fP98, fX4, fP10 nop.i 0};;{ .mfb nop.m 0 // C9*x^21 + C8*x^19 + C7*x^17 + C6*x^15 + C5*x^13 + C4*x^11 + C3*x^9 + // C2*x^7 + C1*x^5 + C0*x^3 + x fma.d.s0 f8 = fP98, fX3, fNormX br.ret.sptk b0 // Exit for 0 < |x| < 0.25};;ATANH_UNORM:// Here if x=unorm{ .mfi getf.exp rArgSExpb = fNormX // Recompute if x unorm fclass.m p0,p13 = fNormX, 0x0b // Test x denorm nop.i 0};;{ .mfb nop.m 0 fcmp.eq.s0 p7,p0 = f8, f0 // Dummy to set denormal flag(p13) br.cond.sptk ATANH_COMMON // Continue if x unorm and not denorm};;.pred.rel "mutex",p10,p11{ .mfi nop.m 0(p10) fnma.d.s0 f8 = f8,f8,f8 // Result x-x^2 if x=-denorm nop.i 0}{ .mfb nop.m 0(p11) fma.d.s0 f8 = f8,f8,f8 // Result x+x^2 if x=+denorm br.ret.spnt b0 // Exit if denorm};;// Here if |x| >= 1.0atanh_ge_one:{ .mfi alloc r32 = ar.pfs,1,3,4,0 fmerge.s fAbsX = f0, f8 // Form |x| nop.i 0};;{ .mfi nop.m 0 fmerge.s f10 = f8, f8 // Save input for error call nop.i 0};;{ .mfi nop.m 0 fcmp.eq.s1 p6,p7 = fAbsX, f1 // Test for |x| = 1.0 nop.i 0};;// Set error tag and result, and raise invalid flag if |x| > 1.0{ .mfi(p7) mov atanh_GR_tag = 131(p7) frcpa.s0 f8, p0 = f0, f0 // Get QNaN, and raise invalid nop.i 0};;// Set error tag and result, and raise Z flag if |x| = 1.0{ .mfi nop.m 0(p6) frcpa.s0 fRcp, p0 = f1, f0 // Get inf, and raise Z flag nop.i 0};;{ .mfb(p6) mov atanh_GR_tag = 132(p6) fmerge.s f8 = f8, fRcp // result is +-inf br.cond.sptk __libm_error_region // Exit if |x| >= 1.0};;GLOBAL_LIBM_END(atanh)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body{ .mib stfd [GR_Parameter_X] = f10 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0}{ .mib stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi add GR_Parameter_RESULT = 48,sp nop.m 0 nop.i 0};;{ .mmi ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;LOCAL_LIBM_END(__libm_error_region).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -