📄 e_atanhl.s
字号:
fmax.s1 FR_AA = FR_X,f1 // for S_lo,form AA = max(X,1.0) nop.i 0}{ .mfi shladd GR_ad_tbl_1 = GR_Index1,4,GR_ad_tbl_1 // point to G_1 nop.f 0 mov GR_Bias = 0x0FFFF // exponent bias};;{ .mfi ldfps FR_G,FR_H = [GR_ad_tbl_1],8 // load G_1,H_1 fmerge.se FR_S_hi = f1,FR_Z // form |x+1| nop.i 0};;{ .mfi getf.exp GR_N = FR_Z // get N = exponent of x+1 nop.f 0 nop.i 0}{ .mfi ldfd FR_h = [GR_ad_tbl_1] // load h_1 fnma.s1 FR_R1 = FR_B_lo,FR_X,FR_R1 // r1 = r1-b_lo*x nop.i 0};;{ .mfi ldfe FR_log2_hi = [GR_ad_q],16 // load log2_hi nop.f 0 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // get bits 30-15 of X_0 * Z_1};;//// For performance,don't use result of pmpyshr2.u for 4 cycles.//{ .mfi ldfe FR_log2_lo = [GR_ad_q],16 // load log2_lo nop.f 0 sub GR_N = GR_N,GR_Bias };;{ .mfi ldfe FR_Q4 = [GR_ad_q],16 // load Q4 fms.s1 FR_S_lo = FR_AA,f1,FR_Z // form S_lo = AA - Z sub GR_minus_N = GR_Bias,GR_N // form exponent of 2^(-N)};;{ .mmf ldfe FR_Q3 = [GR_ad_q],16 // load Q3 // put integer N into rightmost significand setf.sig FR_float_N = GR_N fmin.s1 FR_BB = FR_X,f1 // for S_lo,form BB = min(X,1.0)};;{ .mfi ldfe FR_Q2 = [GR_ad_q],16 // load Q2 nop.f 0 extr.u GR_Index2 = GR_X_1,6,4 // extract bits 6-9 of X_1 };;{ .mmi ldfe FR_Q1 = [GR_ad_q] // load Q1 shladd GR_ad_z_2 = GR_Index2,2,GR_ad_z_2 // point to Z_2 nop.i 0};;{ .mmi ld4 GR_Z_2 = [GR_ad_z_2] // load Z_2 shladd GR_ad_tbl_2 = GR_Index2,4,GR_ad_tbl_2 // point to G_2 nop.i 0};;{ .mfi ldfps FR_G2,FR_H2 = [GR_ad_tbl_2],8 // load G_2,H_2 nop.f 0 nop.i 0};;{ .mfi ldfd FR_h2 = [GR_ad_tbl_2] // load h_2 fma.s1 FR_S_lo = FR_S_lo,f1,FR_BB // S_lo = S_lo + BB nop.i 0}{ .mfi setf.exp FR_2_to_minus_N = GR_minus_N // form 2^(-N) fma.s1 FR_X_lo = FR_R1,FR_Y3,f0 // x_lo = r1*y3 nop.i 0};;{ .mfi nop.m 0 nop.f 0 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // get bits 30-15 of X_1 * Z_2};;//// For performance,don't use result of pmpyshr2.u for 4 cycles//{ .mfi add GR_ad2_tbl_3 = 8,GR_ad_tbl_3 nop.f 0 nop.i 0}{ .mfi nop.m 0 nop.f 0 nop.i 0};;{ .mfi nop.m 0 nop.f 0 nop.i 0};;{ .mfi nop.m 0 nop.f 0 nop.i 0};;//// Now GR_X_2 can be used//{ .mfi nop.m 0 nop.f 0 extr.u GR_Index3 = GR_X_2,1,5 // extract bits 1-5 of X_2}{ .mfi nop.m 0 fma.s1 FR_S_lo = FR_S_lo,f1,FR_X_lo // S_lo = S_lo + Arg_lo nop.i 0};;{ .mfi shladd GR_ad_tbl_3 = GR_Index3,4,GR_ad_tbl_3 // point to G_3 fcvt.xf FR_float_N = FR_float_N nop.i 0}{ .mfi shladd GR_ad2_tbl_3 = GR_Index3,4,GR_ad2_tbl_3 // point to h_3 fma.s1 FR_Q1 = FR_Q1,FR_Half,f0 // sign(arg)*Q1/2 nop.i 0};;{ .mmi ldfps FR_G3,FR_H3 = [GR_ad_tbl_3],8 // load G_3,H_3 ldfd FR_h3 = [GR_ad2_tbl_3] // load h_3 nop.i 0};;{ .mfi nop.m 0 fmpy.s1 FR_G = FR_G,FR_G2 // G = G_1 * G_2 nop.i 0}{ .mfi nop.m 0 fadd.s1 FR_H = FR_H,FR_H2 // H = H_1 + H_2 nop.i 0};;{ .mfi nop.m 0 fadd.s1 FR_h = FR_h,FR_h2 // h = h_1 + h_2 nop.i 0};;{ .mfi nop.m 0 // S_lo = S_lo * 2^(-N) fma.s1 FR_S_lo = FR_S_lo,FR_2_to_minus_N,f0 nop.i 0};;{ .mfi nop.m 0 fmpy.s1 FR_G = FR_G,FR_G3 // G = (G_1 * G_2) * G_3 nop.i 0}{ .mfi nop.m 0 fadd.s1 FR_H = FR_H,FR_H3 // H = (H_1 + H_2) + H_3 nop.i 0};;{ .mfi nop.m 0 fadd.s1 FR_h = FR_h,FR_h3 // h = (h_1 + h_2) + h_3 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_r = FR_G,FR_S_hi,f1 // r = G * S_hi - 1 nop.i 0}{ .mfi nop.m 0 // Y_hi = N * log2_hi + H fma.s1 FR_Y_hi = FR_float_N,FR_log2_hi,FR_H nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_h = FR_float_N,FR_log2_lo,FR_h // h = N * log2_lo + h nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_r = FR_G,FR_S_lo,FR_r // r = G * S_lo + (G * S_hi - 1) nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_lo = FR_r,FR_Q4,FR_Q3 // poly_lo = r * Q4 + Q3 nop.i 0}{ .mfi nop.m 0 fmpy.s1 FR_rsq = FR_r,FR_r // rsq = r * r nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_05r = FR_r,FR_Half,f0 // sign(arg)*r/2 nop.i 0};;{ .mfi nop.m 0 // poly_lo = poly_lo * r + Q2 fma.s1 FR_poly_lo = FR_poly_lo,FR_r,FR_Q2 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_rcub = FR_rsq,FR_r,f0 // rcub = r^3 nop.i 0};;{ .mfi nop.m 0 // poly_hi = sing(arg)*(Q1*r^2 + r)/2 fma.s1 FR_poly_hi = FR_Q1,FR_rsq,FR_05r nop.i 0};;{ .mfi nop.m 0 // poly_lo = poly_lo*r^3 + h fma.s1 FR_poly_lo = FR_poly_lo,FR_rcub,FR_h nop.i 0};;{ .mfi nop.m 0 // Y_lo = poly_hi + poly_lo/2 fma.s0 FR_Y_lo = FR_poly_lo,FR_Half,FR_poly_hi nop.i 0};;{ .mfb nop.m 0 // Result = arctanh(x) = Y_hi/2 + Y_lo fma.s0 f8 = FR_Y_hi,FR_Half,FR_Y_lo br.ret.sptk b0};;// Taylor's seriesatanhl_near_zero:{ .mfi nop.m 0 fma.s1 FR_x3 = FR_x2,f8,f0 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_x4 = FR_x2,FR_x2,f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_C17 = FR_C17,FR_x2,FR_C15 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_C13 = FR_C13,FR_x2,FR_C11 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_C9 = FR_C9,FR_x2,FR_C7 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_C5 = FR_C5,FR_x2,FR_C3 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_x8 = FR_x4,FR_x4,f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_C17 = FR_C17,FR_x4,FR_C13 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_C9 = FR_C9,FR_x4,FR_C5 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_C17 = FR_C17,FR_x8,FR_C9 nop.i 0};;{ .mfb nop.m 0 fma.s0 f8 = FR_C17,FR_x3,f8 br.ret.sptk b0 };;atanhl_eq_one:{ .mfi nop.m 0 frcpa.s0 FR_Rcp,p0 = f1,f0 // get inf,and raise Z flag nop.i 0}{ .mfi nop.m 0 fmerge.s FR_Arg_X = f8, f8 nop.i 0};;{ .mfb mov GR_Parameter_TAG = 130 fmerge.s FR_RESULT = f8,FR_Rcp // result is +-inf br.cond.sptk __libm_error_region // exit if |x| = 1.0};;atanhl_gt_one:{ .mfi nop.m 0 fmerge.s FR_Arg_X = f8, f8 nop.i 0};;{ .mfb mov GR_Parameter_TAG = 129 frcpa.s0 FR_RESULT,p0 = f0,f0 // get QNaN,and raise invalid br.cond.sptk __libm_error_region // exit if |x| > 1.0};;GLOBAL_LIBM_END(atanhl)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfe [GR_Parameter_Y] = FR_Arg_Y,16 // Save Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0,GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body{ .mib stfe [GR_Parameter_X] = FR_Arg_X // Store Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 // Parameter 3 address}{ .mib stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;{ .mmi ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;LOCAL_LIBM_END(__libm_error_region#).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -