📄 s_expm1l.s
字号:
nop.m 999 mov FR_Y_hi = FR_T // Assume Y_hi = T nop.i 999};;{ .mfi nop.m 999 fma.s1 FR_poly = FR_rsq, FR_poly, FR_r // poly = rsq * poly + r nop.i 999};;{ .mfi nop.m 999 fma.s1 FR_Wp1_T_scale = FR_W, FR_T_scale, FR_T_scale // (W+1)*T*scale nop.i 999}{ .mfi nop.m 999 fma.s1 FR_W_T_scale = FR_W, FR_T_scale, f0 // W*T*scale nop.i 999};;{ .mfi nop.m 999(p9) fsub.s1 FR_Y_hi = f0, FR_2_mk // If expm1, if K < -10 set Y_hi nop.i 999}{ .mfi nop.m 999(p10) fsub.s1 FR_Y_hi = FR_T, FR_2_mk // If expm1, if |K|<=10 set Y_hi nop.i 999};;{ .mfi nop.m 999 fma.s1 FR_result_lo = FR_Wp1_T_scale, FR_poly, FR_W_T_scale nop.i 999};;.pred.rel "mutex",p8,p9// If K > 10 adjust result_lo = result_lo - scale * 2^-k// If |K| <= 10 adjust result_lo = result_lo + scale * T{ .mfi nop.m 999(p8) fnma.s1 FR_result_lo = FR_scale, FR_2_mk, FR_result_lo // If K > 10 nop.i 999}{ .mfi nop.m 999(p9) fma.s1 FR_result_lo = FR_T_scale, f1, FR_result_lo // If |K| <= 10 nop.i 999};;{ .mfi nop.m 999 fmpy.s0 FR_tmp = FR_A1, FR_A1 // Dummy op to set inexact nop.i 999}{ .mfb nop.m 999(p15) fma.s0 f8 = FR_Y_hi, FR_scale, FR_result_lo // Safe result(p15) br.ret.sptk b0 // Safe exit for normal path};;// Here if unsafe, will only be here for exp with K < big_expo_neg{ .mfb nop.m 999 fma.s0 FR_RESULT = FR_Y_hi, FR_scale, FR_result_lo // Prelim result br.cond.sptk EXP_POSSIBLE_UNDERFLOW // Branch to unsafe code};; EXP_SMALL: // Here if 2^-60 < |x| < 2^-m, m=12 for exp, m=7 for expm1{ .mfi(p7) ldfe FR_Q3 = [GR_ad_Q],16 // Get Q3 for small path, if expm1(p6) fma.s1 FR_p65 = FR_P6, FR_r, FR_P5 // If exp, p65 = P6 * r + P5 nop.i 999}{ .mfi mov GR_minus_one = -1(p7) fma.s1 FR_q98 = FR_Q9, FR_r, FR_Q8 // If expm1, q98 = Q9 * r + Q8 nop.i 999};;{ .mfi(p7) ldfe FR_Q2 = [GR_ad_Q],16 // Get Q2 for small path, if expm1(p7) fma.s1 FR_q65 = FR_Q6, FR_r, FR_Q5 // If expm1, q65 = Q6 * r + Q5 nop.i 999};;{ .mfi setf.sig FR_tmp = GR_minus_one // Create value to force inexact(p6) fma.s1 FR_p21 = FR_P2, FR_r, FR_P1 // If exp, p21 = P2 * r + P1 nop.i 999}{ .mfi(p7) ldfe FR_Q1 = [GR_ad_Q],16 // Get Q1 for small path, if expm1(p7) fma.s1 FR_q43 = FR_Q4, FR_r, FR_Q3 // If expm1, q43 = Q4 * r + Q3 nop.i 999};;{ .mfi nop.m 999(p6) fma.s1 FR_p654 = FR_p65, FR_r, FR_P4 // If exp, p654 = p65 * r + P4 nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_q987 = FR_q98, FR_r, FR_Q7 // If expm1, q987 = q98 * r + Q7 nop.i 999};;{ .mfi nop.m 999(p7) fma.s1 FR_q21 = FR_Q2, FR_r, FR_Q1 // If expm1, q21 = Q2 * r + Q1 nop.i 999};;{ .mfi nop.m 999(p6) fma.s1 FR_p210 = FR_p21, FR_rsq, FR_r // If exp, p210 = p21 * r + P0 nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_q6543 = FR_q65, FR_rsq, FR_q43 // If expm1, q6543 = q65*r2+q43 nop.i 999};;{ .mfi nop.m 999(p6) fma.s1 FR_p6543 = FR_p654, FR_r, FR_P3 // If exp, p6543 = p654 * r + P3 nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_q9876543 = FR_q987, FR_r4, FR_q6543 // If expm1, q9876543 = ... nop.i 999};;{ .mfi nop.m 999(p6) fma.s1 FR_Y_lo = FR_p6543, FR_r4, FR_p210 // If exp, form Y_lo nop.i 999};;{ .mfi nop.m 999(p7) fma.s1 FR_Y_lo = FR_q9876543, FR_rsq, FR_q21 // If expm1, form Y_lo nop.i 999};;{ .mfi nop.m 999 fmpy.s0 FR_tmp = FR_tmp, FR_tmp // Dummy op to set inexact nop.i 999};;.pred.rel "mutex",p6,p7{ .mfi nop.m 999(p6) fma.s0 f8 = FR_Y_lo, f1, f1 // If exp, result = 1 + Y_lo nop.i 999}{ .mfb nop.m 999(p7) fma.s0 f8 = FR_Y_lo, FR_rsq, FR_norm_x // If expm1, result = Y_lo*r2+x br.ret.sptk b0 // Exit for 2^-60 <= |x| < 2^-m // m=12 for exp, m=7 for expm1};;EXP_VERY_SMALL: //// Here if 0 < |x| < 2^-60// If exp, result = 1.0 + x// If expm1, result = x +x*x/2, but have to check for possible underflow//{ .mfi(p7) mov GR_exp_underflow = -16381 // Exponent for possible underflow(p6) fadd.s0 f8 = f1, FR_norm_x // If exp, result = 1+x nop.i 999}{ .mfi nop.m 999(p7) fmpy.s1 FR_result_lo = FR_half_x, FR_norm_x // If expm1 result_lo = x*x/2 nop.i 999};;{ .mfi(p7) cmp.lt.unc p0, p8 = GR_exp_x, GR_exp_underflow // Unsafe if expm1 x small(p7) mov FR_Y_hi = FR_norm_x // If expm1, Y_hi = x(p7) cmp.lt p0, p15 = GR_exp_x, GR_exp_underflow // Unsafe if expm1 x small};;{ .mfb nop.m 999(p8) fma.s0 f8 = FR_norm_x, f1, FR_result_lo // If expm1, result=x+x*x/2(p15) br.ret.sptk b0 // If Safe, exit};;// Here if expm1 and 0 < |x| < 2^-16381; may be possible underflow{ .mfb nop.m 999 fma.s0 FR_RESULT = FR_Y_hi, FR_scale, FR_result_lo // Prelim result br.cond.sptk EXP_POSSIBLE_UNDERFLOW // Branch to unsafe code};;EXP_CERTAIN_UNDERFLOW_ZERO:// Here if x < zero_uflow_x// For exp, set result to tiny+0.0 and set I, U, and branch to error handling// For expm1, set result to tiny-1.0 and set I, and exit{ .mmi alloc GR_SAVE_PFS = ar.pfs,0,3,4,0 nop.m 999 mov GR_one = 1};;{ .mmi setf.exp FR_small = GR_one // Form small value nop.m 999(p6) mov GR_Parameter_TAG = 13 // Error tag for exp underflow};;{ .mfi nop.m 999 fmerge.s FR_X = f8,f8 // Save x for error call nop.i 999};;.pred.rel "mutex",p6,p7{ .mfb nop.m 999(p6) fma.s0 FR_RESULT = FR_small, FR_small, f0 // If exp, set I,U, tiny result(p6) br.cond.sptk __libm_error_region // If exp, go to error handling}{ .mfb nop.m 999(p7) fms.s0 f8 = FR_small, FR_small, f1 // If expm1, set I, result -1.0(p7) br.ret.sptk b0 // If expm1, exit};; EXP_OVERFLOW:// Here if x >= min_oflow_x{ .mmi alloc GR_SAVE_PFS = ar.pfs,0,3,4,0 mov GR_huge_exp = 0x1fffe nop.i 999}{ .mfi mov GR_huge_signif = -0x1 nop.f 999(p6) mov GR_Parameter_TAG = 12 // Error tag for exp overflow};;{ .mmf setf.exp FR_huge_exp = GR_huge_exp // Create huge value setf.sig FR_huge_signif = GR_huge_signif // Create huge value fmerge.s FR_X = f8,f8 // Save x for error call};;{ .mfi nop.m 999 fmerge.se FR_huge = FR_huge_exp, FR_huge_signif(p7) mov GR_Parameter_TAG = 39 // Error tag for expm1 overflow};;{ .mfb nop.m 999 fma.s0 FR_RESULT = FR_huge, FR_huge, FR_huge // Force I, O, and Inf br.cond.sptk __libm_error_region // Branch to error handling};;EXP_POSSIBLE_UNDERFLOW:// Here if exp and zero_uflow_x < x < about -11356 [where k < -16381]// Here if expm1 and |x| < 2^-16381{ .mfi alloc GR_SAVE_PFS = ar.pfs,0,3,4,0 fsetc.s2 0x7F,0x41 // Set FTZ and disable traps nop.i 999};;{ .mfi nop.m 999 fma.s2 FR_ftz = FR_Y_hi, FR_scale, FR_result_lo // Result with FTZ nop.i 999};;{ .mfi nop.m 999 fsetc.s2 0x7F,0x40 // Disable traps (set s2 default) nop.i 999};;{ .mfi nop.m 999(p6) fclass.m.unc p11, p0 = FR_ftz, 0x00F // If exp, FTZ result denorm or zero? nop.i 999};;{ .mfb(p11) mov GR_Parameter_TAG = 13 // exp underflow fmerge.s FR_X = f8,f8 // Save x for error call(p11) br.cond.spnt __libm_error_region // Branch on exp underflow};;{ .mfb nop.m 999 mov f8 = FR_RESULT // Was safe after all br.ret.sptk b0};;EXP_64_SPECIAL: // Here if x natval, nan, inf, zero// If x natval, +inf, or if expm1 and x zero, just return x.// The other cases must be tested for, and results set.// These cases do not generate exceptions.{ .mfi nop.m 999 fclass.m p8, p0 = f8, 0x0c3 // Is x nan? nop.i 999};;{ .mfi nop.m 999(p6) fclass.m.unc p13, p0 = f8, 0x007 // If exp, is x zero? nop.i 999};;{ .mfi nop.m 999(p6) fclass.m.unc p11, p0 = f8, 0x022 // If exp, is x -inf? nop.i 999}{ .mfi nop.m 999(p8) fadd.s0 f8 = f8, f1 // If x nan, result quietized x nop.i 999};;{ .mfi nop.m 999(p7) fclass.m.unc p10, p0 = f8, 0x022 // If expm1, is x -inf? nop.i 999}{ .mfi nop.m 999(p13) fadd.s0 f8 = f0, f1 // If exp and x zero, result 1.0 nop.i 999};;{ .mfi nop.m 999(p11) mov f8 = f0 // If exp and x -inf, result 0 nop.i 999};;{ .mfb nop.m 999(p10) fsub.s1 f8 = f0, f1 // If expm1, x -inf, result -1.0 br.ret.sptk b0 // Exit special cases};;EXP_64_UNSUPPORTED: // Here if x unsupported type{ .mfb nop.m 999 fmpy.s0 f8 = f8, f0 // Return nan br.ret.sptk b0};;GLOBAL_IEEE754_END(expl)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body{ .mib stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 // Parameter 3 address}{ .mib stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi add GR_Parameter_RESULT = 48,sp nop.m 0 nop.i 0};;{ .mmi ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;LOCAL_LIBM_END(__libm_error_region#).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -