⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 libm_sincos_large.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 4 页
字号:
}{ .mfi      nop.m 999(p9)    fma.s1 FR_poly_hi = FR_rsq, FR_S_2, FR_S_1      nop.i 999}{ .mfi      nop.m 999////      if (i_1 == 0)://      poly_lo = FR_rsq * S_5 + S_4//      poly_hi = FR_rsq * S_2 + S_1//(p10)   fma.s1 FR_poly_lo = FR_rsq, FR_C_5, FR_C_4      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1 == 0)://      Z = Z * r  for only one of the small r cases - not there//      in original implementation notes.//(p9)    fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_S_3      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly_hi = FR_rsq, FR_C_2, FR_C_1      nop.i 999}{ .mfi      nop.m 999(p10)   fma.d.s1 FR_C_1 = FR_C_1, FR_C_1, f0      nop.i 999 ;;}{ .mfi      nop.m 999(p9)    fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0      nop.i 999}{ .mfi      nop.m 999////      poly_lo = FR_rsq * poly_lo + S_3//      poly_hi = FR_rsq * poly_hi//(p10)   fma.s1 FR_poly_lo = FR_rsq, FR_poly_lo, FR_C_3      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, f0      nop.i 999 ;;}{ .mfi      nop.m 999////  if (i_1 == 0): dummy fmpy's to flag inexact//  r = 1//(p9)    fma.s1 FR_poly_hi = FR_r, FR_poly_hi, f0      nop.i 999}{ .mfi      nop.m 999////  poly_hi = r * poly_hi//        fma.s1  FR_poly = FR_Z, FR_poly_lo, FR_c      nop.i 999 ;;}{ .mfi      nop.m 999(p12)   fms.s1  FR_r = f0, f1, FR_r      nop.i 999 ;;}{ .mfi      nop.m 999////      poly_hi = Z * poly_lo + c//  if i_0 == 1: r = -r//        fma.s1  FR_poly = FR_poly, f1, FR_poly_hi      nop.i 999 ;;}{ .mfi      nop.m 999(p12)   fms.s1 FR_Input_X = FR_r, f1, FR_poly      nop.i 999}{ .mfb      nop.m 999////      poly = poly + poly_hi//(p11)   fma.s1 FR_Input_X = FR_r, f1, FR_poly////      if (i_0 == 0) Result = r + poly//      if (i_0 != 0) Result = r - poly//       br.ret.sptk   b0 ;;}SINCOS_NORMAL_R:{ .mii      nop.m 999        extr.u  GR_i_1 = GR_N_Inc, 0, 1 ;;////      Set table_ptr1 and table_ptr2 to base address of//      constant table.        cmp.eq.unc p9, p10 = 0x0, GR_i_1 ;;}{ .mfi      nop.m 999        fma.s1  FR_rsq = FR_r, FR_r, f0        extr.u  GR_i_0 = GR_N_Inc, 1, 1 ;;}{ .mfi      nop.m 999        frcpa.s1 FR_r_hi, p6 = f1, FR_r        cmp.eq.unc p11, p12 = 0x0, GR_i_0};;// ******************************************************************// ******************************************************************// ******************************************************************////      r and c have been computed.//      We known whether this is the sine or cosine routine.//      Make sure ftz mode is set - should be automatic when using wre//      Get [i_0,i_1] - two lsb of N_fix_gr alone.//{ .mmi      nop.m 999      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp      nop.i 999};;{ .mmi      ld8 GR_Table_Base = [GR_Table_Base]      nop.m 999      nop.i 999};;{ .mfi(p10)   add GR_Table_Base = 384, GR_Table_Base//(p12)   fms.s1 FR_Input_X = f0, f1, f1(p12)   fms.s1 FR_prelim = f0, f1, f1(p9)    add GR_Table_Base = 224, GR_Table_Base ;;}{ .mmf      nop.m 999(p10)   ldfe FR_QQ_8 = [GR_Table_Base], 16////      if (i_1==0) poly = poly * FR_rsq + PP_1_lo//      else        poly = FR_rsq * poly////(p11)   fma.s1 FR_Input_X = f0, f1, f1 ;;(p11)   fma.s1 FR_prelim = f0, f1, f1 ;;}{ .mmf(p10)   ldfe FR_QQ_7 = [GR_Table_Base], 16////  Adjust table pointers based on i_0//      Compute rsq = r * r//(p9)    ldfe FR_PP_8 = [GR_Table_Base], 16        fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 ;;}{ .mmf(p9)    ldfe FR_PP_7 = [GR_Table_Base], 16(p10)   ldfe FR_QQ_6 = [GR_Table_Base], 16////      Load PP_8 and QQ_8; PP_7 and QQ_7//        frcpa.s1 FR_r_hi, p6 = f1, FR_r_hi ;;}////      if (i_1==0) poly =   PP_7 + FR_rsq * PP_8.//      else        poly =   QQ_7 + FR_rsq * QQ_8.//{ .mmb(p9)    ldfe FR_PP_6 = [GR_Table_Base], 16(p10)   ldfe FR_QQ_5 = [GR_Table_Base], 16      nop.b 999 ;;}{ .mmb(p9)    ldfe FR_PP_5 = [GR_Table_Base], 16(p10)   ldfe FR_S_1 = [GR_Table_Base], 16      nop.b 999 ;;}{ .mmb(p10)   ldfe FR_QQ_1 = [GR_Table_Base], 16(p9)    ldfe FR_C_1 = [GR_Table_Base], 16      nop.b 999 ;;}{ .mmi(p10)   ldfe FR_QQ_4 = [GR_Table_Base], 16 ;;(p9)    ldfe FR_PP_1 = [GR_Table_Base], 16      nop.i 999 ;;}{ .mmf(p10)   ldfe FR_QQ_3 = [GR_Table_Base], 16////      if (i_1=0) corr = corr + c*c//      else       corr = corr * c//(p9)    ldfe FR_PP_4 = [GR_Table_Base], 16(p10)   fma.s1 FR_poly = FR_rsq, FR_QQ_8, FR_QQ_7 ;;}////      if (i_1=0) poly = rsq * poly + PP_5//      else       poly = rsq * poly + QQ_5//      Load PP_4 or QQ_4//{ .mmf(p9)    ldfe FR_PP_3 = [GR_Table_Base], 16(p10)   ldfe FR_QQ_2 = [GR_Table_Base], 16////      r_hi =   frcpa(frcpa(r)).//      r_cube = r * FR_rsq.//(p9)    fma.s1 FR_poly = FR_rsq, FR_PP_8, FR_PP_7 ;;}////      Do dummy multiplies so inexact is always set.//{ .mfi(p9)    ldfe FR_PP_2 = [GR_Table_Base], 16////      r_lo = r - r_hi//(p9)    fma.s1 FR_U_lo = FR_r_hi, FR_r_hi, f0      nop.i 999 ;;}{ .mmf      nop.m 999(p9)    ldfe FR_PP_1_lo = [GR_Table_Base], 16(p10)   fma.s1 FR_corr = FR_S_1, FR_r_cubed, FR_r}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_6      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1=0) U_lo = r_hi * r_hi//      else       U_lo = r_hi + r//(p9)    fma.s1 FR_corr = FR_C_1, FR_rsq, f0      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1=0) corr = C_1 * rsq//      else       corr = S_1 * r_cubed + r//(p9)    fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_6      nop.i 999}{ .mfi      nop.m 999(p10)   fma.s1 FR_U_lo = FR_r_hi, f1, FR_r      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1=0) U_hi = r_hi + U_hi//      else       U_hi = QQ_1 * U_hi + 1//(p9)    fma.s1 FR_U_lo = FR_r, FR_r_hi, FR_U_lo      nop.i 999}{ .mfi      nop.m 999////      U_hi = r_hi * r_hi//        fms.s1 FR_r_lo = FR_r, f1, FR_r_hi      nop.i 999 ;;}{ .mfi      nop.m 999////      Load PP_1, PP_6, PP_5, and C_1//      Load QQ_1, QQ_6, QQ_5, and S_1//        fma.s1 FR_U_hi = FR_r_hi, FR_r_hi, f0      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_5      nop.i 999}{ .mfi      nop.m 999(p10)   fnma.s1 FR_corr = FR_corr, FR_c, f0      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1=0) U_lo = r * r_hi + U_lo//      else       U_lo = r_lo * U_lo//(p9)    fma.s1 FR_corr = FR_corr, FR_c, FR_c      nop.i 999 ;;}{ .mfi      nop.m 999(p9)    fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_5      nop.i 999}{ .mfi      nop.m 999////      if (i_1 =0) U_hi = r + U_hi//      if (i_1 =0) U_lo = r_lo * U_lo////(p9)    fma.d.s1 FR_PP_5 = FR_PP_5, FR_PP_4, f0      nop.i 999 ;;}{ .mfi      nop.m 999(p9)    fma.s1 FR_U_lo = FR_r, FR_r, FR_U_lo      nop.i 999}{ .mfi      nop.m 999(p10)   fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1=0) poly = poly * rsq + PP_6//      else       poly = poly * rsq + QQ_6//(p9)    fma.s1 FR_U_hi = FR_r_hi, FR_U_hi, f0      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_4      nop.i 999}{ .mfi      nop.m 999(p10)   fma.s1 FR_U_hi = FR_QQ_1, FR_U_hi, f1      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.d.s1 FR_QQ_5 = FR_QQ_5, FR_QQ_5, f0      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1!=0) U_hi = PP_1 * U_hi//      if (i_1!=0) U_lo = r * r  + U_lo//      Load PP_3 or QQ_3//(p9)    fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_4      nop.i 999 ;;}{ .mfi      nop.m 999(p9)    fma.s1 FR_U_lo = FR_r_lo, FR_U_lo, f0      nop.i 999}{ .mfi      nop.m 999(p10)   fma.s1 FR_U_lo = FR_QQ_1,FR_U_lo, f0      nop.i 999 ;;}{ .mfi      nop.m 999(p9)    fma.s1 FR_U_hi = FR_PP_1, FR_U_hi, f0      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_3      nop.i 999 ;;}{ .mfi      nop.m 999////      Load PP_2, QQ_2//(p9)    fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_3      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1==0) poly = FR_rsq * poly  + PP_3//      else        poly = FR_rsq * poly  + QQ_3//      Load PP_1_lo//(p9)    fma.s1 FR_U_lo = FR_PP_1, FR_U_lo, f0      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1 =0) poly = poly * rsq + pp_r4//      else        poly = poly * rsq + qq_r4//(p9)    fma.s1 FR_U_hi = FR_r, f1, FR_U_hi      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly = FR_rsq, FR_poly, FR_QQ_2      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1==0) U_lo =  PP_1_hi * U_lo//      else        U_lo =  QQ_1 * U_lo//(p9)    fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_2      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_0==0)  Result = 1//      else         Result = -1//        fma.s1 FR_V = FR_U_lo, f1, FR_corr      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly = FR_rsq, FR_poly, f0      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1==0) poly =  FR_rsq * poly + PP_2//      else poly =  FR_rsq * poly + QQ_2//(p9)    fma.s1 FR_poly = FR_rsq, FR_poly, FR_PP_1_lo      nop.i 999 ;;}{ .mfi      nop.m 999(p10)   fma.s1 FR_poly = FR_rsq, FR_poly, f0      nop.i 999 ;;}{ .mfi      nop.m 999////      V = U_lo + corr//(p9)    fma.s1 FR_poly = FR_r_cubed, FR_poly, f0      nop.i 999 ;;}{ .mfi      nop.m 999////      if (i_1==0) poly = r_cube * poly//      else        poly = FR_rsq * poly//        fma.s1  FR_V = FR_poly, f1, FR_V      nop.i 999 ;;}{ .mfi      nop.m 999//(p12)   fms.s1 FR_Input_X = FR_Input_X, FR_U_hi, FR_V(p12)   fms.s1 FR_Input_X = FR_prelim, FR_U_hi, FR_V      nop.i 999}{ .mfb      nop.m 999////      V = V + poly////(p11)   fma.s1 FR_Input_X = FR_Input_X, FR_U_hi, FR_V(p11)   fma.s1 FR_Input_X = FR_prelim, FR_U_hi, FR_V////      if (i_0==0) Result = Result * U_hi + V//      else        Result = Result * U_hi - V//       br.ret.sptk   b0 ;;}////      If cosine, FR_Input_X = 1//      If sine, FR_Input_X = +/-Zero (Input FR_Input_X)//      Results are exact, no exceptions//SINCOS_ZERO:{ .mmb        cmp.eq.unc p6, p7 = 0x1, GR_Sin_or_Cos      nop.m 999      nop.b 999 ;;}{ .mfi      nop.m 999(p7)    fmerge.s FR_Input_X = FR_Input_X, FR_Input_X      nop.i 999}{ .mfb      nop.m 999(p6)    fmerge.s FR_Input_X = f1, f1       br.ret.sptk   b0 ;;}SINCOS_SPECIAL:////      Path for Arg = +/- QNaN, SNaN, Inf//      Invalid can be raised. SNaNs//      become QNaNs//{ .mfb      nop.m 999        fmpy.s1 FR_Input_X = FR_Input_X, f0        br.ret.sptk   b0 ;;}GLOBAL_LIBM_END(__libm_cos_large)// *******************************************************************// *******************************************************************// *******************************************************************////     Special Code to handle very large argument case.//     Call int __libm_pi_by_2_reduce(x,r,c) for |arguments| >= 2**63//     The interface is custom://       On input://         (Arg or x) is in f8//       On output://         r is in f8//         c is in f9//         N is in r8//     Be sure to allocate at least 2 GP registers as output registers for//     __libm_pi_by_2_reduce.  This routine uses r49-50. These are used as//     scratch registers within the __libm_pi_by_2_reduce routine (for speed).////     We know also that __libm_pi_by_2_reduce preserves f10-15, f71-127.  We//     use this to eliminate save/restore of key fp registers in this calling//     function.//// *******************************************************************// *******************************************************************// *******************************************************************LOCAL_LIBM_ENTRY(__libm_callout_2)SINCOS_ARG_TOO_LARGE:.prologue//      Readjust Table ptr{ .mfi        adds  GR_Table_Base1 = -16, GR_Table_Base1        nop.f 999.save   ar.pfs,GR_SAVE_PFS        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs};;{ .mmi        ldfs FR_Two_to_M3 = [GR_Table_Base1],4        mov GR_SAVE_GP=gp                       // Save gp.save   b0, GR_SAVE_B0        mov GR_SAVE_B0=b0                       // Save b0};;.body////     Call argument reduction with x in f8//     Returns with N in r8, r in f8, c in f9//     Assumes f71-127 are preserved across the call//{ .mib        ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1],0        nop.i 0        br.call.sptk b0=__libm_pi_by_2_reduce#};;{ .mfi        add   GR_N_Inc = GR_Sin_or_Cos,r8        fcmp.lt.unc.s1  p6, p0 = FR_r, FR_Two_to_M3        mov   b0 = GR_SAVE_B0                  // Restore return address};;{ .mfi        mov   gp = GR_SAVE_GP                  // Restore gp(p6)    fcmp.gt.unc.s1  p6, p0 = FR_r, FR_Neg_Two_to_M3        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs};;{ .mbb        nop.m 999(p6)    br.cond.spnt SINCOS_SMALL_R            // Branch if |r| < 1/4        br.cond.sptk SINCOS_NORMAL_R ;;        // Branch if 1/4 <= |r| < pi/4}LOCAL_LIBM_END(__libm_callout_2).type   __libm_pi_by_2_reduce#,@function.global __libm_pi_by_2_reduce#

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -