⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 s_cosl.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 4 页
字号:
};;{ .mmi(p9)  ldfe FR_S_4 = [GR_ad_se], -16    // Load S_4 if i_1=0(p10) ldfe FR_C_4 = [GR_ad_ce], -16    // Load C_4 if i_1=1      nop.i 999};;SINCOSL_SMALL_R_0:// Entry point for 2^-3 < |x| < pi/4.pred.rel "mutex",p9,p10SINCOSL_SMALL_R_1:// Entry point for pi/4 < |x| < 2^24 and |r| < 2^-3.pred.rel "mutex",p9,p10{ .mfi(p9)  ldfe FR_S_3 = [GR_ad_se], -16    // Load S_3 if i_1=0      fma.s1 FR_Z = FR_rsq, FR_rsq, f0 // Z = rsq * rsq      nop.i 999}{ .mfi(p10) ldfe FR_C_3 = [GR_ad_ce], -16    // Load C_3 if i_1=1(p10) fnma.s1 FR_c = FR_c, FR_r, f0    // c = -c * r if i_1=0      nop.i 999};;{ .mmf(p9)  ldfe FR_S_2 = [GR_ad_se], -16    // Load S_2 if i_1=0(p10) ldfe FR_C_2 = [GR_ad_ce], -16    // Load C_2 if i_1=1(p10) fmerge.s FR_r = f1, f1};;{ .mmi(p9)  ldfe FR_S_1 = [GR_ad_se], -16    // Load S_1 if i_1=0(p10) ldfe FR_C_1 = [GR_ad_ce], -16    // Load C_1 if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_Z = FR_Z, FR_r, f0     // Z = Z * r if i_1=0      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly_lo = FR_rsq, FR_S_5, FR_S_4 // poly_lo=rsq*S_5+S_4 if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4 // poly_lo=rsq*C_5+C_4 if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1 // poly_hi=rsq*S_2+S_1 if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1 // poly_hi=rsq*C_2+C_1 if i_1=1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_Z = FR_Z, FR_rsq, f0             // Z = Z * rsq      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3 // p_lo=p_lo*rsq+S_3, i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3 // p_lo=p_lo*rsq+C_3, i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s0 FR_inexact = FR_S_4, FR_S_4, f0     // Dummy op to set inexact      tbit.z p11,p12 = GR_N_Inc, 1     // p11 if i_0=0, N mod 4 = 0,2                                       // p12 if i_0=1, N mod 4 = 1,3}{ .mfi      nop.m 999(p10) fma.s0 FR_inexact = FR_C_1, FR_C_1, f0     // Dummy op to set inexact      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0     // p_hi=p_hi*rsq if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0     // p_hi=p_hi*rsq if i_1=1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_poly = FR_Z, FR_poly_lo, FR_c        // poly=Z*poly_lo+c      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0       // p_hi=r*p_hi if i_1=0      nop.i 999};;{ .mfi      nop.m 999(p12) fms.s1 FR_r = f0, f1, FR_r                     // r = -r if i_0=1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_poly = FR_poly, f1, FR_poly_hi       // poly=poly+poly_hi      nop.i 999};;////      if (i_0 == 0) Result = r + poly//      if (i_0 != 0) Result = r - poly//{ .mfi      nop.m 999(p11) fma.s0 FR_Result = FR_r, f1, FR_poly      nop.i 999}{ .mfb      nop.m 999(p12) fms.s0 FR_Result = FR_r, f1, FR_poly      br.ret.sptk   b0                               // Exit for |r| < 2^-3};;SINCOSL_NORMAL_R://// Here if 2^-3 <= |r| < pi/4// THIS IS THE MAIN PATH//// Enter with r, c, and N_Inc having been computed//{ .mfi      ldfe FR_PP_6 = [GR_ad_pp], 16    // Load PP_6      fma.s1 FR_rsq = FR_r, FR_r, f0   // rsq = r * r      tbit.z p9,p10 = GR_N_Inc, 0      // p9  if i_1=0, N mod 4 = 0,1                                       // p10 if i_1=1, N mod 4 = 2,3}{ .mfi      ldfe FR_QQ_6 = [GR_ad_qq], 16    // Load QQ_6      nop.f 999      nop.i 999};;{ .mmi(p9)  ldfe FR_PP_5 = [GR_ad_pp], 16    // Load PP_5 if i_1=0(p10) ldfe FR_QQ_5 = [GR_ad_qq], 16    // Load QQ_5 if i_1=1      nop.i 999};;SINCOSL_NORMAL_R_0:// Entry for 2^-3 < |x| < pi/4.pred.rel "mutex",p9,p10{ .mmf(p9)  ldfe FR_C_1 = [GR_ad_pp], 16     // Load C_1 if i_1=0(p10) ldfe FR_S_1 = [GR_ad_qq], 16     // Load S_1 if i_1=1      frcpa.s1 FR_r_hi, p6 = f1, FR_r  // r_hi = frcpa(r)};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 // poly = rsq*PP_8+PP_7 if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 // poly = rsq*QQ_8+QQ_7 if i_1=1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_r_cubed = FR_r, FR_rsq, f0  // rcubed = r * rsq      nop.i 999};;SINCOSL_NORMAL_R_1:// Entry for pi/4 <= |x| < 2^24.pred.rel "mutex",p9,p10{ .mmf(p9)  ldfe FR_PP_1 = [GR_ad_pp], 16             // Load PP_1_hi if i_1=0(p10) ldfe FR_QQ_1 = [GR_ad_qq], 16             // Load QQ_1    if i_1=1      frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi        // r_hi = frpca(frcpa(r))};;{ .mfi(p9)  ldfe FR_PP_4 = [GR_ad_pp], 16             // Load PP_4 if i_1=0(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6 // poly = rsq*poly+PP_6 if i_1=0      nop.i 999}{ .mfi(p10) ldfe FR_QQ_4 = [GR_ad_qq], 16             // Load QQ_4 if i_1=1(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6 // poly = rsq*poly+QQ_6 if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_corr = FR_C_1, FR_rsq, f0       // corr = C_1 * rsq if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r // corr = S_1 * r^3 + r if i_1=1      nop.i 999};;{ .mfi(p9)  ldfe FR_PP_3 = [GR_ad_pp], 16             // Load PP_3 if i_1=0      fma.s1 FR_r_hi_sq = FR_r_hi, FR_r_hi, f0  // r_hi_sq = r_hi * r_hi      nop.i 999}{ .mfi(p10) ldfe FR_QQ_3 = [GR_ad_qq], 16             // Load QQ_3 if i_1=1      fms.s1 FR_r_lo = FR_r, f1, FR_r_hi        // r_lo = r - r_hi      nop.i 999};;{ .mfi(p9)  ldfe FR_PP_2 = [GR_ad_pp], 16             // Load PP_2 if i_1=0(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5 // poly = rsq*poly+PP_5 if i_1=0      nop.i 999}{ .mfi(p10) ldfe FR_QQ_2 = [GR_ad_qq], 16             // Load QQ_2 if i_1=1(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5 // poly = rsq*poly+QQ_5 if i_1=1      nop.i 999};;{ .mfi(p9)  ldfe FR_PP_1_lo = [GR_ad_pp], 16          // Load PP_1_lo if i_1=0(p9)  fma.s1 FR_corr = FR_corr, FR_c, FR_c      // corr = corr * c + c if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fnma.s1 FR_corr = FR_corr, FR_c, f0       // corr = -corr * c if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_r_hi_sq // U_lo = r*r_hi+r_hi_sq, i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_U_lo = FR_r_hi, f1, FR_r        // U_lo = r_hi + r if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_U_hi = FR_r_hi, FR_r_hi_sq, f0  // U_hi = r_hi*r_hi_sq if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_U_hi = FR_QQ_1, FR_r_hi_sq, f1  // U_hi = QQ_1*r_hi_sq+1, i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4 // poly = poly*rsq+PP_4 if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4 // poly = poly*rsq+QQ_4 if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo      // U_lo = r * r + U_lo if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0     // U_lo = r_lo * U_lo if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0     // U_hi = PP_1 * U_hi if i_1=0      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3 // poly = poly*rsq+PP_3 if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3 // poly = poly*rsq+QQ_3 if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0     // U_lo = r_lo * U_lo if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0      // U_lo = QQ_1 * U_lo if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_U_hi = FR_r, f1, FR_U_hi        // U_hi = r + U_hi if i_1=0      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2 // poly = poly*rsq+PP_2 if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2 // poly = poly*rsq+QQ_2 if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0     // U_lo = PP_1 * U_lo if i_1=0      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo // poly =poly*rsq+PP1lo i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0      // poly = poly*rsq if i_1=1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_V = FR_U_lo, f1, FR_corr        // V = U_lo + corr      tbit.z p11,p12 = GR_N_Inc, 1              // p11 if i_0=0, N mod 4 = 0,2                                                // p12 if i_0=1, N mod 4 = 1,3};;{ .mfi      nop.m 999(p9)  fma.s0 FR_inexact = FR_PP_5, FR_PP_4, f0  // Dummy op to set inexact      nop.i 999}{ .mfi      nop.m 999(p10) fma.s0 FR_inexact = FR_QQ_5, FR_QQ_5, f0  // Dummy op to set inexact      nop.i 999};;{ .mfi      nop.m 999(p9)  fma.s1 FR_poly = FR_r_cubed, FR_poly, f0  // poly = poly*r^3 if i_1=0      nop.i 999}{ .mfi      nop.m 999(p10) fma.s1 FR_poly = FR_rsq, FR_poly, f0      // poly = poly*rsq if i_1=1      nop.i 999};;{ .mfi      nop.m 999(p11) fma.s1 FR_tmp_result = f0, f1, f1// tmp_result=+1.0 if i_0=0      nop.i 999}{ .mfi      nop.m 999(p12) fms.s1 FR_tmp_result = f0, f1, f1// tmp_result=-1.0 if i_0=1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_V = FR_poly, f1, FR_V           // V = poly + V      nop.i 999};;// If i_0 = 0  Result =  U_hi + V// If i_0 = 1  Result = -U_hi - V{ .mfi        nop.m 999(p11)        fma.s0 FR_Result = FR_tmp_result, FR_U_hi, FR_V        nop.i 999}{ .mfb        nop.m 999(p12)        fms.s0 FR_Result = FR_tmp_result, FR_U_hi, FR_V        br.ret.sptk   b0                     // Exit for 2^-3 <= |r| < pi/4};;SINCOSL_ZERO:// Here if x = 0{ .mfi      cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos      nop.f 999      nop.i 999};;{ .mfi      nop.m 999(p7)  fmerge.s FR_Result = FR_Input_X, FR_Input_X // If sin, result = input      nop.i 999}{ .mfb      nop.m 999(p6)  fma.s0 FR_Result = f1, f1, f0    // If cos, result=1.0      br.ret.sptk   b0                  // Exit for x=0};;SINCOSL_DENORMAL:{ .mmb      getf.exp GR_signexp_x = FR_norm_x   // Get sign and exponent of x      nop.m 999      br.cond.sptk  SINCOSL_COMMON        // Return to common code};;SINCOSL_SPECIAL:{ .mfb        nop.m 999////      Path for Arg = +/- QNaN, SNaN, Inf//      Invalid can be raised. SNaNs//      become QNaNs//        fmpy.s0 FR_Result = FR_Input_X, f0        br.ret.sptk   b0 ;;}GLOBAL_IEEE754_END(cosl)// *******************************************************************// *******************************************************************// *******************************************************************////     Special Code to handle very large argument case.//     Call int __libm_pi_by_2_reduce(x,r,c) for |arguments| >= 2**63//     The interface is custom://       On input://         (Arg or x) is in f8//       On output://         r is in f8//         c is in f9//         N is in r8//     Be sure to allocate at least 2 GP registers as output registers for//     __libm_pi_by_2_reduce.  This routine uses r59-60. These are used as//     scratch registers within the __libm_pi_by_2_reduce routine (for speed).////     We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127.  We//     use this to eliminate save/restore of key fp registers in this calling//     function.//// *******************************************************************// *******************************************************************// *******************************************************************LOCAL_LIBM_ENTRY(__libm_callout)SINCOSL_ARG_TOO_LARGE:.prologue{ .mfi        nop.f 0.save   ar.pfs,GR_SAVE_PFS        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs};;{ .mmi        setf.exp FR_Two_to_M3 = GR_exp_2_to_m3  // Form 2^-3        mov GR_SAVE_GP=gp                       // Save gp.save   b0, GR_SAVE_B0        mov GR_SAVE_B0=b0                       // Save b0};;.body////     Call argument reduction with x in f8//     Returns with N in r8, r in f8, c in f9//     Assumes f71-127 are preserved across the call//{ .mib        setf.exp FR_Neg_Two_to_M3 = GR_exp_m2_to_m3 // Form -(2^-3)        nop.i 0        br.call.sptk b0=__libm_pi_by_2_reduce#};;{ .mfi        add   GR_N_Inc = GR_Sin_or_Cos,r8        fcmp.lt.unc.s1        p6, p0 = FR_r, FR_Two_to_M3        mov   b0 = GR_SAVE_B0                  // Restore return address};;{ .mfi        mov   gp = GR_SAVE_GP                  // Restore gp(p6)    fcmp.gt.unc.s1        p6, p0 = FR_r, FR_Neg_Two_to_M3        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs};;{ .mbb        nop.m 999(p6)    br.cond.spnt SINCOSL_SMALL_R     // Branch if |r|< 2^-3 for |x| >= 2^63        br.cond.sptk SINCOSL_NORMAL_R    // Branch if |r|>=2^-3 for |x| >= 2^63};;LOCAL_LIBM_END(__libm_callout).type   __libm_pi_by_2_reduce#,@function.global __libm_pi_by_2_reduce#

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -