⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 libm_sincosl.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 5 页
字号:
//   A := U_hi + V_hi//   if |U_hi| >= |V_hi| then//      a := (U_hi - A) + V_hi//   else//      a := (V_hi - A) + U_hi//   endif//   ...order in computing "a" must be observed. This branch is//   ...best implemented by predicates.//   ...A + a  is U_hi + V_hi accurately. Moreover, "a" is//   ...much smaller than A: |a| <= (1/2)ulp(A).////   ...Just need to calculate   s + A + a + t//   C_hi := s + A    t := t + a//   C_lo := (s - C_hi) + A//   C_lo := C_lo + t////   ...Final steps for reduction//   r := C_hi + C_lo//   c := (C_hi - r) + C_lo////   ...At this point, we have r and c//   ...And all we need is a couple of terms of the corresponding//   ...Taylor series.////   If i_1 = 0//      poly := c + r*FR_rsq*(S_1 + FR_rsq*S_2)//      result := r//   Else//      poly := FR_rsq*(C_1 + FR_rsq*C_2)//      result := 1//   Endif////   If i_0 = 1, result := -result////   Last operation. Perform in user-set rounding mode////   result := (i_0 == 0?     result + poly ://                            result - poly )//   Return////   Large Arguments: For arguments above 2**63, a Payne-Hanek//   style argument reduction is used and pi_by_2 reduce is called.//RODATA.align 64LOCAL_OBJECT_START(FSINCOSL_CONSTANTS)sincosl_table_p://data4 0x4E44152A, 0xA2F9836E, 0x00003FFE,0x00000000 // Inv_pi_by_2//data4 0xCE81B9F1, 0xC84D32B0, 0x00004016,0x00000000 // P_0//data4 0x2168C235, 0xC90FDAA2, 0x00003FFF,0x00000000 // P_1//data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD,0x00000000 // P_2//data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C,0x00000000 // P_3//data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF,0x00000000 // d_1//data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C,0x00000000 // d_2data8 0xA2F9836E4E44152A, 0x00003FFE // Inv_pi_by_2data8 0xC84D32B0CE81B9F1, 0x00004016 // P_0data8 0xC90FDAA22168C235, 0x00003FFF // P_1data8 0xECE675D1FC8F8CBB, 0x0000BFBD // P_2data8 0xB7ED8FBBACC19C60, 0x0000BF7C // P_3data8 0x8D848E89DBD171A1, 0x0000BFBF // d_1data8 0xD5394C3618A66F8E, 0x0000BF7C // d_2LOCAL_OBJECT_END(FSINCOSL_CONSTANTS)LOCAL_OBJECT_START(sincosl_table_d)//data4 0x2168C234, 0xC90FDAA2, 0x00003FFE,0x00000000 // pi_by_4//data4 0x6EC6B45A, 0xA397E504, 0x00003FE7,0x00000000 // Inv_P_0data8 0xC90FDAA22168C234, 0x00003FFE // pi_by_4data8 0xA397E5046EC6B45A, 0x00003FE7 // Inv_P_0data4 0x3E000000, 0xBE000000         // 2^-3 and -2^-3data4 0x2F000000, 0xAF000000         // 2^-33 and -2^-33data4 0x9E000000, 0x00000000         // -2^-67data4 0x00000000, 0x00000000         // padLOCAL_OBJECT_END(sincosl_table_d)LOCAL_OBJECT_START(sincosl_table_pp)//data4 0xA21C0BC9, 0xCC8ABEBC, 0x00003FCE,0x00000000 // PP_8//data4 0x720221DA, 0xD7468A05, 0x0000BFD6,0x00000000 // PP_7//data4 0x640AD517, 0xB092382F, 0x00003FDE,0x00000000 // PP_6//data4 0xD1EB75A4, 0xD7322B47, 0x0000BFE5,0x00000000 // PP_5//data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1//data4 0x00000000, 0xAAAA0000, 0x0000BFFC,0x00000000 // PP_1_hi//data4 0xBAF69EEA, 0xB8EF1D2A, 0x00003FEC,0x00000000 // PP_4//data4 0x0D03BB69, 0xD00D00D0, 0x0000BFF2,0x00000000 // PP_3//data4 0x88888962, 0x88888888, 0x00003FF8,0x00000000 // PP_2//data4 0xAAAB0000, 0xAAAAAAAA, 0x0000BFEC,0x00000000 // PP_1_lodata8 0xCC8ABEBCA21C0BC9, 0x00003FCE // PP_8data8 0xD7468A05720221DA, 0x0000BFD6 // PP_7data8 0xB092382F640AD517, 0x00003FDE // PP_6data8 0xD7322B47D1EB75A4, 0x0000BFE5 // PP_5data8 0xFFFFFFFFFFFFFFFE, 0x0000BFFD // C_1data8 0xAAAA000000000000, 0x0000BFFC // PP_1_hidata8 0xB8EF1D2ABAF69EEA, 0x00003FEC // PP_4data8 0xD00D00D00D03BB69, 0x0000BFF2 // PP_3data8 0x8888888888888962, 0x00003FF8 // PP_2data8 0xAAAAAAAAAAAB0000, 0x0000BFEC // PP_1_loLOCAL_OBJECT_END(sincosl_table_pp)LOCAL_OBJECT_START(sincosl_table_qq)//data4 0xC2B0FE52, 0xD56232EF, 0x00003FD2 // QQ_8//data4 0x2B48DCA6, 0xC9C99ABA, 0x0000BFDA // QQ_7//data4 0x9C716658, 0x8F76C650, 0x00003FE2 // QQ_6//data4 0xFDA8D0FC, 0x93F27DBA, 0x0000BFE9 // QQ_5//data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC // S_1//data4 0x00000000, 0x80000000, 0x0000BFFE,0x00000000 // QQ_1//data4 0x0C6E5041, 0xD00D00D0, 0x00003FEF,0x00000000 // QQ_4//data4 0x0B607F60, 0xB60B60B6, 0x0000BFF5,0x00000000 // QQ_3//data4 0xAAAAAA9B, 0xAAAAAAAA, 0x00003FFA,0x00000000 // QQ_2data8 0xD56232EFC2B0FE52, 0x00003FD2 // QQ_8data8 0xC9C99ABA2B48DCA6, 0x0000BFDA // QQ_7data8 0x8F76C6509C716658, 0x00003FE2 // QQ_6data8 0x93F27DBAFDA8D0FC, 0x0000BFE9 // QQ_5data8 0xAAAAAAAAAAAAAAAA, 0x0000BFFC // S_1data8 0x8000000000000000, 0x0000BFFE // QQ_1data8 0xD00D00D00C6E5041, 0x00003FEF // QQ_4data8 0xB60B60B60B607F60, 0x0000BFF5 // QQ_3data8 0xAAAAAAAAAAAAAA9B, 0x00003FFA // QQ_2LOCAL_OBJECT_END(sincosl_table_qq)LOCAL_OBJECT_START(sincosl_table_c)//data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1//data4 0xAAAA719F, 0xAAAAAAAA, 0x00003FFA,0x00000000 // C_2//data4 0x0356F994, 0xB60B60B6, 0x0000BFF5,0x00000000 // C_3//data4 0xB2385EA9, 0xD00CFFD5, 0x00003FEF,0x00000000 // C_4//data4 0x292A14CD, 0x93E4BD18, 0x0000BFE9,0x00000000 // C_5data8 0xFFFFFFFFFFFFFFFE, 0x0000BFFD // C_1data8 0xAAAAAAAAAAAA719F, 0x00003FFA // C_2data8 0xB60B60B60356F994, 0x0000BFF5 // C_3data8 0xD00CFFD5B2385EA9, 0x00003FEF // C_4data8 0x93E4BD18292A14CD, 0x0000BFE9 // C_5LOCAL_OBJECT_END(sincosl_table_c)LOCAL_OBJECT_START(sincosl_table_s)//data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1//data4 0x888868DB, 0x88888888, 0x00003FF8,0x00000000 // S_2//data4 0x055EFD4B, 0xD00D00D0, 0x0000BFF2,0x00000000 // S_3//data4 0x839730B9, 0xB8EF1C5D, 0x00003FEC,0x00000000 // S_4//data4 0xE5B3F492, 0xD71EA3A4, 0x0000BFE5,0x00000000 // S_5data8 0xAAAAAAAAAAAAAAAA, 0x0000BFFC // S_1data8 0x88888888888868DB, 0x00003FF8 // S_2data8 0xD00D00D0055EFD4B, 0x0000BFF2 // S_3data8 0xB8EF1C5D839730B9, 0x00003FEC // S_4data8 0xD71EA3A4E5B3F492, 0x0000BFE5 // S_5data4 0x38800000, 0xB8800000         // two**-14 and -two**-14LOCAL_OBJECT_END(sincosl_table_s)FR_Input_X        = f8FR_Result         = f8FR_ResultS        = f9FR_ResultC        = f8FR_r              = f8FR_c              = f9FR_norm_x         = f9FR_inv_pi_2to63   = f10FR_rshf_2to64     = f11FR_2tom64         = f12FR_rshf           = f13FR_N_float_signif = f14FR_abs_x          = f15FR_r6             = f32FR_r7             = f33FR_Pi_by_4        = f34FR_Two_to_M14     = f35FR_Neg_Two_to_M14 = f36FR_Two_to_M33     = f37FR_Neg_Two_to_M33 = f38FR_Neg_Two_to_M67 = f39FR_Inv_pi_by_2    = f40FR_N_float        = f41FR_N_fix          = f42FR_P_1            = f43FR_P_2            = f44FR_P_3            = f45FR_s              = f46FR_w              = f47FR_Z              = f50FR_A              = f51FR_a              = f52FR_t              = f53FR_U_1            = f54FR_U_2            = f55FR_C_1            = f56FR_C_2            = f57FR_C_3            = f58FR_C_4            = f59FR_C_5            = f60FR_S_1            = f61FR_S_2            = f62FR_S_3            = f63FR_S_4            = f64FR_S_5            = f65FR_r_hi           = f68FR_r_lo           = f69FR_rsq            = f70FR_r_cubed        = f71FR_C_hi           = f72FR_N_0            = f73FR_d_1            = f74FR_V_hi           = f75FR_V_lo           = f76FR_U_hi           = f77FR_U_lo           = f78FR_U_hiabs        = f79FR_V_hiabs        = f80FR_PP_8           = f81FR_QQ_8           = f101FR_PP_7           = f82FR_QQ_7           = f102FR_PP_6           = f83FR_QQ_6           = f103FR_PP_5           = f84FR_QQ_5           = f104FR_PP_4           = f85FR_QQ_4           = f105FR_PP_3           = f86FR_QQ_3           = f106FR_PP_2           = f87FR_QQ_2           = f107FR_QQ_1           = f108FR_r_hi_sq        = f88FR_N_0_fix        = f89FR_Inv_P_0        = f90FR_d_2            = f93FR_P_0            = f95FR_C_lo           = f96FR_PP_1           = f97FR_PP_1_lo        = f98FR_ArgPrime       = f99FR_inexact        = f100FR_Neg_Two_to_M3  = f109FR_Two_to_M3      = f110FR_poly_hiS       = f66FR_poly_hiC       = f112FR_poly_loS       = f67FR_poly_loC       = f113FR_polyS          = f92FR_polyC          = f114FR_cS             = FR_cFR_cC             = f115FR_corrS          = f91FR_corrC          = f116FR_U_hiC          = f117FR_U_loC          = f118FR_VS             = f75FR_VC             = f119FR_FirstS         = f120FR_FirstC         = f121FR_U_hiS          = FR_U_hiFR_U_loS          = FR_U_loFR_Tmp            = f94sincos_pResSin = r34sincos_pResCos = r35GR_exp_m2_to_m3= r36GR_N_Inc       = r37GR_Cis         = r38GR_signexp_x   = r40GR_exp_x       = r40GR_exp_mask    = r41GR_exp_2_to_63 = r42GR_exp_2_to_m3 = r43GR_exp_2_to_24 = r44GR_N_SignS     = r45GR_N_SignC     = r46GR_N_SinCos    = r47GR_sig_inv_pi  = r48GR_rshf_2to64  = r49GR_exp_2tom64  = r50GR_rshf        = r51GR_ad_p        = r52GR_ad_d        = r53GR_ad_pp       = r54GR_ad_qq       = r55GR_ad_c        = r56GR_ad_s        = r57GR_ad_ce       = r58GR_ad_se       = r59GR_ad_m14      = r60GR_ad_s1       = r61// For unwind supportGR_SAVE_B0     = r39GR_SAVE_GP     = r40GR_SAVE_PFS    = r41.section .textGLOBAL_IEEE754_ENTRY(sincosl){ .mlx  ///////////////////////////// 1 /////////////////      alloc r32 = ar.pfs,3,27,2,0      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi}{ .mlx      mov GR_N_Inc = 0x0      movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)};;{ .mfi ///////////////////////////// 2 /////////////////      addl           GR_ad_p   = @ltoff(FSINCOSL_CONSTANTS#), gp      fclass.m p6, p0 =  FR_Input_X, 0x1E3 // Test x natval, nan, inf      mov GR_exp_2_to_m3 = 0xffff - 3      // Exponent of 2^-3}{ .mfb      mov GR_Cis = 0x0      fnorm.s1 FR_norm_x = FR_Input_X      // Normalize x    br.cond.sptk _COMMON_SINCOSL};;GLOBAL_IEEE754_END(sincosl)GLOBAL_LIBM_ENTRY(__libm_sincosl){ .mlx  ///////////////////////////// 1 /////////////////      alloc r32 = ar.pfs,3,27,2,0      movl GR_sig_inv_pi = 0xa2f9836e4e44152a // significand of 1/pi}{ .mlx      mov GR_N_Inc = 0x0      movl GR_rshf_2to64 = 0x47e8000000000000 // 1.1000 2^(63+64)};;{ .mfi ///////////////////////////// 2 /////////////////      addl           GR_ad_p   = @ltoff(FSINCOSL_CONSTANTS#), gp      fclass.m p6, p0 =  FR_Input_X, 0x1E3 // Test x natval, nan, inf      mov GR_exp_2_to_m3 = 0xffff - 3      // Exponent of 2^-3}{ .mfb      mov GR_Cis = 0x1      fnorm.s1 FR_norm_x = FR_Input_X      // Normalize x      nop.b 0};;_COMMON_SINCOSL:{ .mfi ///////////////////////////// 3 /////////////////      setf.sig FR_inv_pi_2to63 = GR_sig_inv_pi // Form 1/pi * 2^63      nop.f 0      mov GR_exp_2tom64 = 0xffff - 64      // Scaling constant to compute N}{ .mlx      setf.d FR_rshf_2to64 = GR_rshf_2to64    // Form const 1.1000 * 2^(63+64)      movl GR_rshf = 0x43e8000000000000       // Form const 1.1000 * 2^63};;{ .mfi ///////////////////////////// 4 /////////////////      ld8 GR_ad_p = [GR_ad_p]              // Point to Inv_pi_by_2      fclass.m p7, p0 = FR_Input_X, 0x0b   // Test x denormal      nop.i 0};;{ .mfi    ///////////////////////////// 5 /////////////////      getf.exp GR_signexp_x = FR_Input_X   // Get sign and exponent of x      fclass.m p10, p0 = FR_Input_X, 0x007 // Test x zero      nop.i 0}{ .mib      mov GR_exp_mask = 0x1ffff            // Exponent mask      nop.i 0(p6)  br.cond.spnt SINCOSL_SPECIAL         // Branch if x natval, nan, inf};;{ .mfi ///////////////////////////// 6 /////////////////      setf.exp FR_2tom64 = GR_exp_2tom64   // Form 2^-64 for scaling N_float      nop.f 0      add GR_ad_d = 0x70, GR_ad_p          // Point to constant table d}{ .mib      setf.d FR_rshf = GR_rshf         // Form right shift const 1.1000 * 2^63      mov  GR_exp_m2_to_m3 = 0x2fffc       // Form -(2^-3)(p7)  br.cond.spnt SINCOSL_DENORMAL        // Branch if x denormal};;SINCOSL_COMMON2:{ .mfi ///////////////////////////// 7 /////////////////      and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x      fclass.nm p8, p0 = FR_Input_X, 0x1FF // Test x unsupported type      mov GR_exp_2_to_63 = 0xffff + 63     // Exponent of 2^63}{ .mib      add GR_ad_pp = 0x40, GR_ad_d         // Point to constant table pp      mov GR_exp_2_to_24 = 0xffff + 24     // Exponent of 2^24(p10) br.cond.spnt SINCOSL_ZERO            // Branch if x zero};;{ .mfi ///////////////////////////// 8 /////////////////      ldfe FR_Inv_pi_by_2 = [GR_ad_p], 16  // Load 2/pi      fcmp.eq.s0 p15, p0 = FR_Input_X, f0  // Dummy to set denormal      add GR_ad_qq = 0xa0, GR_ad_pp        // Point to constant table qq}{ .mfi      ldfe FR_Pi_by_4 = [GR_ad_d], 16      // Load pi/4 for range test      nop.f 0      cmp.ge p10,p0 = GR_exp_x, GR_exp_2_to_63   // Is |x| >= 2^63};;{ .mfi ///////////////////////////// 9 /////////////////      ldfe FR_P_0 = [GR_ad_p], 16          // Load P_0 for pi/4 <= |x| < 2^63      fmerge.s FR_abs_x = f1, FR_norm_x    // |x|      add GR_ad_c = 0x90, GR_ad_qq         // Point to constant table c}{ .mfi      ldfe FR_Inv_P_0 = [GR_ad_d], 16      // Load 1/P_0 for pi/4 <= |x| < 2^63      nop.f 0      cmp.ge p7,p0 = GR_exp_x, GR_exp_2_to_24   // Is |x| >= 2^24};;{ .mfi ///////////////////////////// 10 /////////////////      ldfe FR_P_1 = [GR_ad_p], 16          // Load P_1 for pi/4 <= |x| < 2^63      nop.f 0      add GR_ad_s = 0x50, GR_ad_c          // Point to constant table s}{ .mfi      ldfe FR_PP_8 = [GR_ad_pp], 16        // Load PP_8 for 2^-3 < |r| < pi/4      nop.f 0      nop.i 0};;{ .mfi ///////////////////////////// 11 /////////////////      ldfe FR_P_2 = [GR_ad_p], 16          // Load P_2 for pi/4 <= |x| < 2^63      nop.f 0      add GR_ad_ce = 0x40, GR_ad_c         // Point to end of constant table c}{ .mfi      ldfe FR_QQ_8 = [GR_ad_qq], 16        // Load QQ_8 for 2^-3 < |r| < pi/4      nop.f 0      nop.i 0};;{ .mfi ///////////////////////////// 12 /////////////////      ldfe FR_QQ_7 = [GR_ad_qq], 16        // Load QQ_7 for 2^-3 < |r| < pi/4      fma.s1  FR_N_float_signif = FR_Input_X, FR_inv_pi_2to63, FR_rshf_2to64      add GR_ad_se = 0x40, GR_ad_s         // Point to end of constant table s}{ .mib      ldfe FR_PP_7 = [GR_ad_pp], 16        // Load PP_7 for 2^-3 < |r| < pi/4      mov GR_ad_s1 = GR_ad_s               // Save pointer to S_1(p10) br.cond.spnt SINCOSL_ARG_TOO_LARGE   // Branch if |x| >= 2^63                                           // Use Payne-Hanek Reduction};;{ .mfi ///////////////////////////// 13 /////////////////      ldfe FR_P_3 = [GR_ad_p], 16          // Load P_3 for pi/4 <= |x| < 2^63      fmerge.se FR_r = FR_norm_x, FR_norm_x // r = x, in case |x| < pi/4      add GR_ad_m14 = 0x50, GR_ad_s        // Point to constant table m14}{ .mfb      ldfps FR_Two_to_M3, FR_Neg_Two_to_M3 = [GR_ad_d], 8      fma.s1 FR_rsq = FR_norm_x, FR_norm_x, f0 // rsq = x*x, in case |x| < pi/4(p7)  br.cond.spnt SINCOSL_LARGER_ARG      // Branch if 2^24 <= |x| < 2^63                                           // Use pre-reduction};;{ .mmf ///////////////////////////// 14 /////////////////      ldfe FR_PP_6 = [GR_ad_pp], 16       // Load PP_6 for normal path      ldfe FR_QQ_6 = [GR_ad_qq], 16       // Load QQ_6 for normal path      fmerge.se FR_c = f0, f0             // c = 0 in case |x| < pi/4};;{ .mmf ///////////////////////////// 15 /////////////////      ldfe FR_PP_5 = [GR_ad_pp], 16       // Load PP_5 for normal path      ldfe FR_QQ_5 = [GR_ad_qq], 16       // Load QQ_5 for normal path      nop.f 0};;// Here if 0 < |x| < 2^24{ .mfi ///////////////////////////// 17 /////////////////      ldfe FR_S_5 = [GR_ad_se], -16       // Load S_5 if i_1=0      fcmp.lt.s1  p6, p7 = FR_abs_x, FR_Pi_by_4  // Test |x| < pi/4      nop.i 0}{ .mfi      ldfe FR_C_5 = [GR_ad_ce], -16       // Load C_5 if i_1=1      fms.s1 FR_N_float = FR_N_float_signif, FR_2tom64, FR_rshf      nop.i 0};;{ .mmi ///////////////////////////// 18 /////////////////      ldfe FR_S_4 = [GR_ad_se], -16       // Load S_4 if i_1=0      ldfe FR_C_4 = [GR_ad_ce], -16       // Load C_4 if i_1=1      nop.i 0};;////     N  = Arg * 2/pi//     Check if Arg < pi/4//

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -