⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 e_atanhl.s

📁 glibc 库, 不仅可以学习使用库函数,还可以学习函数的具体实现,是提高功力的好资料
💻 S
📖 第 1 页 / 共 3 页
字号:
      fmax.s1       FR_AA = FR_X,f1 // for S_lo,form AA = max(X,1.0)      nop.i         0}{ .mfi      shladd        GR_ad_tbl_1 = GR_Index1,4,GR_ad_tbl_1 // point to G_1      nop.f         0      mov           GR_Bias = 0x0FFFF // exponent bias};;{ .mfi      ldfps         FR_G,FR_H = [GR_ad_tbl_1],8  // load G_1,H_1      fmerge.se     FR_S_hi =  f1,FR_Z // form |x+1|      nop.i         0};;{ .mfi      getf.exp      GR_N =  FR_Z // get N = exponent of x+1      nop.f         0      nop.i         0}{ .mfi      ldfd          FR_h = [GR_ad_tbl_1] // load h_1      fnma.s1       FR_R1 = FR_B_lo,FR_X,FR_R1 // r1 = r1-b_lo*x      nop.i         0};;{ .mfi      ldfe          FR_log2_hi = [GR_ad_q],16 // load log2_hi      nop.f         0      pmpyshr2.u    GR_X_1 = GR_X_0,GR_Z_1,15 // get bits 30-15 of X_0 * Z_1};;////    For performance,don't use result of pmpyshr2.u for 4 cycles.//{ .mfi      ldfe          FR_log2_lo = [GR_ad_q],16 // load log2_lo      nop.f         0      sub           GR_N = GR_N,GR_Bias };;{ .mfi      ldfe          FR_Q4 = [GR_ad_q],16  // load Q4      fms.s1        FR_S_lo = FR_AA,f1,FR_Z // form S_lo = AA - Z       sub           GR_minus_N = GR_Bias,GR_N // form exponent of 2^(-N)};;{ .mmf      ldfe          FR_Q3 = [GR_ad_q],16 // load Q3      // put integer N into rightmost significand      setf.sig      FR_float_N = GR_N      fmin.s1       FR_BB = FR_X,f1 // for S_lo,form BB = min(X,1.0)};;{ .mfi      ldfe          FR_Q2 = [GR_ad_q],16 // load Q2      nop.f         0      extr.u        GR_Index2 = GR_X_1,6,4 // extract bits 6-9 of X_1 };;{ .mmi      ldfe          FR_Q1 = [GR_ad_q] // load Q1      shladd        GR_ad_z_2 = GR_Index2,2,GR_ad_z_2 // point to Z_2      nop.i         0};;{ .mmi      ld4           GR_Z_2 = [GR_ad_z_2] // load Z_2      shladd        GR_ad_tbl_2 = GR_Index2,4,GR_ad_tbl_2 // point to G_2      nop.i         0};;{ .mfi      ldfps         FR_G2,FR_H2 = [GR_ad_tbl_2],8 // load G_2,H_2      nop.f         0      nop.i         0};;{ .mfi      ldfd          FR_h2 = [GR_ad_tbl_2] // load h_2      fma.s1        FR_S_lo = FR_S_lo,f1,FR_BB // S_lo = S_lo + BB      nop.i         0}{ .mfi      setf.exp      FR_2_to_minus_N = GR_minus_N // form 2^(-N)      fma.s1        FR_X_lo = FR_R1,FR_Y3,f0 // x_lo = r1*y3      nop.i         0};;{ .mfi      nop.m         0      nop.f         0      pmpyshr2.u    GR_X_2 = GR_X_1,GR_Z_2,15 // get bits 30-15 of X_1 * Z_2};;////    For performance,don't use result of pmpyshr2.u for 4 cycles//{ .mfi      add           GR_ad2_tbl_3 = 8,GR_ad_tbl_3      nop.f         0      nop.i         0}{ .mfi      nop.m         0      nop.f         0       nop.i         0};;{ .mfi      nop.m         0      nop.f         0       nop.i         0};;{ .mfi      nop.m         0      nop.f         0       nop.i         0};;////    Now GR_X_2 can be used//{ .mfi      nop.m         0      nop.f         0      extr.u        GR_Index3 = GR_X_2,1,5 // extract bits 1-5 of X_2}{ .mfi      nop.m         0      fma.s1        FR_S_lo = FR_S_lo,f1,FR_X_lo // S_lo = S_lo + Arg_lo      nop.i         0};;{ .mfi      shladd        GR_ad_tbl_3 = GR_Index3,4,GR_ad_tbl_3 // point to G_3      fcvt.xf       FR_float_N = FR_float_N      nop.i         0}{ .mfi      shladd        GR_ad2_tbl_3 = GR_Index3,4,GR_ad2_tbl_3 // point to h_3      fma.s1        FR_Q1 = FR_Q1,FR_Half,f0 // sign(arg)*Q1/2      nop.i         0};;{ .mmi      ldfps         FR_G3,FR_H3 = [GR_ad_tbl_3],8 // load G_3,H_3      ldfd          FR_h3 = [GR_ad2_tbl_3] // load h_3      nop.i         0};;{ .mfi      nop.m         0      fmpy.s1       FR_G = FR_G,FR_G2 // G = G_1 * G_2      nop.i         0}{ .mfi      nop.m         0      fadd.s1       FR_H = FR_H,FR_H2 // H = H_1 + H_2      nop.i         0};;{ .mfi      nop.m         0      fadd.s1       FR_h = FR_h,FR_h2 // h = h_1 + h_2      nop.i         0};;{ .mfi      nop.m         0      // S_lo = S_lo * 2^(-N)      fma.s1        FR_S_lo = FR_S_lo,FR_2_to_minus_N,f0      nop.i         0};;{ .mfi      nop.m         0      fmpy.s1       FR_G = FR_G,FR_G3 // G = (G_1 * G_2) * G_3      nop.i         0}{ .mfi      nop.m         0      fadd.s1       FR_H = FR_H,FR_H3 // H = (H_1 + H_2) + H_3      nop.i         0};;{ .mfi      nop.m         0      fadd.s1       FR_h = FR_h,FR_h3 // h = (h_1 + h_2) + h_3      nop.i         0};;{ .mfi      nop.m         0      fms.s1        FR_r = FR_G,FR_S_hi,f1 // r = G * S_hi - 1      nop.i         0}{ .mfi      nop.m         0      // Y_hi = N * log2_hi + H      fma.s1        FR_Y_hi = FR_float_N,FR_log2_hi,FR_H      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_h = FR_float_N,FR_log2_lo,FR_h // h = N * log2_lo + h      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_r = FR_G,FR_S_lo,FR_r // r = G * S_lo + (G * S_hi - 1)      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_poly_lo = FR_r,FR_Q4,FR_Q3 // poly_lo = r * Q4 + Q3      nop.i         0}{ .mfi      nop.m         0      fmpy.s1       FR_rsq = FR_r,FR_r // rsq = r * r      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_05r = FR_r,FR_Half,f0 // sign(arg)*r/2      nop.i         0};;{ .mfi      nop.m         0      // poly_lo = poly_lo * r + Q2      fma.s1        FR_poly_lo = FR_poly_lo,FR_r,FR_Q2      nop.i         0}{ .mfi      nop.m         0      fma.s1        FR_rcub = FR_rsq,FR_r,f0 // rcub = r^3      nop.i         0};;{ .mfi      nop.m         0      // poly_hi = sing(arg)*(Q1*r^2 + r)/2      fma.s1        FR_poly_hi = FR_Q1,FR_rsq,FR_05r      nop.i         0};;{ .mfi      nop.m         0      // poly_lo = poly_lo*r^3 + h      fma.s1        FR_poly_lo = FR_poly_lo,FR_rcub,FR_h      nop.i         0};;{ .mfi      nop.m         0      // Y_lo = poly_hi + poly_lo/2      fma.s0        FR_Y_lo = FR_poly_lo,FR_Half,FR_poly_hi      nop.i         0};;{ .mfb      nop.m         0     // Result = arctanh(x) = Y_hi/2 + Y_lo      fma.s0        f8 = FR_Y_hi,FR_Half,FR_Y_lo      br.ret.sptk   b0};;// Taylor's seriesatanhl_near_zero:{ .mfi      nop.m         0      fma.s1        FR_x3 = FR_x2,f8,f0      nop.i         0}{ .mfi      nop.m         0      fma.s1        FR_x4 = FR_x2,FR_x2,f0      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_C17 = FR_C17,FR_x2,FR_C15      nop.i         0}{ .mfi      nop.m         0      fma.s1        FR_C13 = FR_C13,FR_x2,FR_C11      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_C9 = FR_C9,FR_x2,FR_C7      nop.i         0}{ .mfi      nop.m         0      fma.s1        FR_C5 = FR_C5,FR_x2,FR_C3      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_x8 = FR_x4,FR_x4,f0      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_C17 = FR_C17,FR_x4,FR_C13      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_C9 = FR_C9,FR_x4,FR_C5      nop.i         0};;{ .mfi      nop.m         0      fma.s1        FR_C17 = FR_C17,FR_x8,FR_C9      nop.i         0};;{ .mfb      nop.m         0      fma.s0        f8 = FR_C17,FR_x3,f8      br.ret.sptk   b0 };;atanhl_eq_one:{ .mfi      nop.m         0      frcpa.s0      FR_Rcp,p0 = f1,f0 // get inf,and raise Z flag      nop.i         0}{ .mfi      nop.m         0      fmerge.s      FR_Arg_X = f8, f8      nop.i         0};;{ .mfb      mov           GR_Parameter_TAG = 130      fmerge.s      FR_RESULT = f8,FR_Rcp // result is +-inf      br.cond.sptk  __libm_error_region // exit if |x| = 1.0};;atanhl_gt_one:{ .mfi      nop.m         0      fmerge.s      FR_Arg_X = f8, f8      nop.i         0};;{ .mfb      mov           GR_Parameter_TAG = 129      frcpa.s0      FR_RESULT,p0 = f0,f0 // get QNaN,and raise invalid      br.cond.sptk  __libm_error_region // exit if |x| > 1.0};;GLOBAL_LIBM_END(atanhl)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi        add   GR_Parameter_Y=-32,sp             // Parameter 2 value        nop.f 0.save   ar.pfs,GR_SAVE_PFS        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs}{ .mfi.fframe 64        add sp=-64,sp                           // Create new stack        nop.f 0        mov GR_SAVE_GP=gp                       // Save gp};;{ .mmi        stfe [GR_Parameter_Y] = FR_Arg_Y,16     // Save Parameter 2 on stack        add GR_Parameter_X = 16,sp              // Parameter 1 address.save   b0,GR_SAVE_B0        mov GR_SAVE_B0=b0                       // Save b0};;.body{ .mib        stfe [GR_Parameter_X] = FR_Arg_X        // Store Parameter 1 on stack        add   GR_Parameter_RESULT = 0,GR_Parameter_Y        nop.b 0                                 // Parameter 3 address}{ .mib        stfe [GR_Parameter_Y] = FR_RESULT       // Store Parameter 3 on stack        add   GR_Parameter_Y = -16,GR_Parameter_Y        br.call.sptk b0=__libm_error_support#  // Call error handling function};;{ .mmi        nop.m 0        nop.m 0        add   GR_Parameter_RESULT = 48,sp};;{ .mmi        ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack.restore sp        add   sp = 64,sp                       // Restore stack pointer        mov   b0 = GR_SAVE_B0                  // Restore return address};;{ .mib        mov   gp = GR_SAVE_GP                  // Restore gp        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs        br.ret.sptk     b0                     // Return};;LOCAL_LIBM_END(__libm_error_region#).type   __libm_error_support#,@function.global __libm_error_support#

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -