📄 s_cos.s

📁 Glibc 2.3.2源代码(解压后有100多M)
💻 S
📖 第 1 页 / 共 5 页
字号:
////   * If Arg is +-0, +-inf, NaN, NaT, go to Step 10 for special//     handling.//   * If |Arg| < 2^24, go to Step 2 for reduction of moderate//     arguments. This is the most likely case.//   * If |Arg| < 2^63, go to Step 8 for pre-reduction of large//     arguments.//   * If |Arg| >= 2^63, go to Step 10 for special handling.////  Step 2. Reduction of moderate arguments.////  If |Arg| < pi/4 	...quick branch//     N_fix := N_inc	(integer)//     r     := Arg//     c     := 0.0//     Branch to Step 4, Case_1_complete//  Else 		...cf. argument reduction//     N     := Arg * two_by_PI	(fp)//     N_fix := fcvt.fx( N )	(int)//     N     := fcvt.xf( N_fix )//     N_fix := N_fix + N_inc//     s     := Arg - N * P_1	(first piece of pi/2)//     w     := -N * P_2	(second piece of pi/2)////     If |s| >= 2^(-33)//        go to Step 3, Case_1_reduce//     Else//        go to Step 7, Case_2_reduce//     Endif//  Endif////  Step 3. Case_1_reduce.////  r := s + w//  c := (s - r) + w	...observe order////  Step 4. Case_1_complete////  ...At this point, the reduced argument alpha is//  ...accurately represented as r + c.//  If |r| < 2^(-3), go to Step 6, small_r.////  Step 5. Normal_r.////  Let [i_0 i_1] by the 2 lsb of N_fix.//  FR_rsq  := r * r//  r_hi := frcpa( frcpa( r ) )//  r_lo := r - r_hi////  If i_1 = 0, then//    poly := r*FR_rsq*(PP_1_lo + FR_rsq*(PP_2 + ... FR_rsq*PP_8))//    U_hi := r + PP_1_hi*r_hi*r_hi*r_hi	...any order//    U_lo := PP_1_hi*r_lo*(r*r + r*r_hi + r_hi*r_hi)//    correction := c + c*C_1*FR_rsq		...any order//  Else//    poly := FR_rsq*FR_rsq*(QQ_2 + FR_rsq*(QQ_3 + ... + FR_rsq*QQ_8))//    U_hi := 1 + QQ_1 * r_hi * r_hi		...any order//    U_lo := QQ_1 * r_lo * (r + r_hi)//    correction := -c*(r + S_1*FR_rsq*r)	...any order//  Endif////  V := poly + (U_lo + correction)	...observe order////  result := (i_0 == 0?   1.0 : -1.0)////  Last instruction in user-set rounding mode////  result := (i_0 == 0?   result*U_hi + V ://                        result*U_hi - V)////  Return////  Step 6. Small_r.////  ...Use flush to zero mode without causing exception//    Let [i_0 i_1] be the two lsb of N_fix.////  FR_rsq := r * r////  If i_1 = 0 then//     z := FR_rsq*FR_rsq; z := FR_rsq*z *r//     poly_lo := S_3 + FR_rsq*(S_4 + FR_rsq*S_5)//     poly_hi := r*FR_rsq*(S_1 + FR_rsq*S_2)//     correction := c//     result := r//  Else//     z := FR_rsq*FR_rsq; z := FR_rsq*z//     poly_lo := C_3 + FR_rsq*(C_4 + FR_rsq*C_5)//     poly_hi := FR_rsq*(C_1 + FR_rsq*C_2)//     correction := -c*r//     result := 1//  Endif////  poly := poly_hi + (z * poly_lo + correction)////  If i_0 = 1, result := -result////  Last operation. Perform in user-set rounding mode////  result := (i_0 == 0?     result + poly ://                          result - poly )//  Return////  Step 7. Case_2_reduce.////  ...Refer to the write up for argument reduction for//  ...rationale. The reduction algorithm below is taken from//  ...argument reduction description and integrated this.////  w := N*P_3//  U_1 := N*P_2 + w		...FMA//  U_2 := (N*P_2 - U_1) + w	...2 FMA//  ...U_1 + U_2 is  N*(P_2+P_3) accurately////  r := s - U_1//  c := ( (s - r) - U_1 ) - U_2////  ...The mathematical sum r + c approximates the reduced//  ...argument accurately. Note that although compared to//  ...Case 1, this case requires much more work to reduce//  ...the argument, the subsequent calculation needed for//  ...any of the trigonometric function is very little because//  ...|alpha| < 1.01*2^(-33) and thus two terms of the//  ...Taylor series expansion suffices.////  If i_1 = 0 then//     poly := c + S_1 * r * r * r	...any order//     result := r//  Else//     poly := -2^(-67)//     result := 1.0//  Endif////  If i_0 = 1, result := -result////  Last operation. Perform in user-set rounding mode////  result := (i_0 == 0?     result + poly ://                           result - poly )////  Return//////  Step 8. Pre-reduction of large arguments.////  ...Again, the following reduction procedure was described//  ...in the separate write up for argument reduction, which//  ...is tightly integrated here.//  N_0 := Arg * Inv_P_0//  N_0_fix := fcvt.fx( N_0 )//  N_0 := fcvt.xf( N_0_fix)//  Arg' := Arg - N_0 * P_0//  w := N_0 * d_1//  N := Arg' * two_by_PI//  N_fix := fcvt.fx( N )//  N := fcvt.xf( N_fix )//  N_fix := N_fix + N_inc////  s := Arg' - N * P_1//  w := w - N * P_2////  If |s| >= 2^(-14)//     go to Step 3//  Else//     go to Step 9//  Endif////  Step 9. Case_4_reduce.////    ...first obtain N_0*d_1 and -N*P_2 accurately//   U_hi := N_0 * d_1		V_hi := -N*P_2//   U_lo := N_0 * d_1 - U_hi	V_lo := -N*P_2 - U_hi	...FMAs////   ...compute the contribution from N_0*d_1 and -N*P_3//   w := -N*P_3//   w := w + N_0*d_2//   t := U_lo + V_lo + w		...any order////   ...at this point, the mathematical value//   ...s + U_hi + V_hi  + t approximates the true reduced argument//   ...accurately. Just need to compute this accurately.////   ...Calculate U_hi + V_hi accurately://   A := U_hi + V_hi//   if |U_hi| >= |V_hi| then//      a := (U_hi - A) + V_hi//   else//      a := (V_hi - A) + U_hi//   endif//   ...order in computing "a" must be observed. This branch is//   ...best implemented by predicates.//   ...A + a  is U_hi + V_hi accurately. Moreover, "a" is//   ...much smaller than A: |a| <= (1/2)ulp(A).////   ...Just need to calculate   s + A + a + t//   C_hi := s + A		t := t + a//   C_lo := (s - C_hi) + A//   C_lo := C_lo + t////   ...Final steps for reduction//   r := C_hi + C_lo//   c := (C_hi - r) + C_lo////   ...At this point, we have r and c//   ...And all we need is a couple of terms of the corresponding//   ...Taylor series.////   If i_1 = 0//      poly := c + r*FR_rsq*(S_1 + FR_rsq*S_2)//      result := r//   Else//      poly := FR_rsq*(C_1 + FR_rsq*C_2)//      result := 1//   Endif////   If i_0 = 1, result := -result////   Last operation. Perform in user-set rounding mode////   result := (i_0 == 0?     result + poly ://                            result - poly )//   Return////   Large Arguments: For arguments above 2**63, a Payne-Hanek//   style argument reduction is used and pi_by_2 reduce is called.//#ifdef _LIBC.rodata#else.data#endif.align 64FSINCOS_CONSTANTS:ASM_TYPE_DIRECTIVE(FSINCOS_CONSTANTS,@object)data4 0x4B800000, 0xCB800000, 0x00000000,0x00000000 // two**24, -two**24data4 0x4E44152A, 0xA2F9836E, 0x00003FFE,0x00000000 // Inv_pi_by_2data4 0xCE81B9F1, 0xC84D32B0, 0x00004016,0x00000000 // P_0data4 0x2168C235, 0xC90FDAA2, 0x00003FFF,0x00000000 // P_1data4 0xFC8F8CBB, 0xECE675D1, 0x0000BFBD,0x00000000 // P_2data4 0xACC19C60, 0xB7ED8FBB, 0x0000BF7C,0x00000000 // P_3data4 0x5F000000, 0xDF000000, 0x00000000,0x00000000 // two_to_63, -two_to_63data4 0x6EC6B45A, 0xA397E504, 0x00003FE7,0x00000000 // Inv_P_0data4 0xDBD171A1, 0x8D848E89, 0x0000BFBF,0x00000000 // d_1data4 0x18A66F8E, 0xD5394C36, 0x0000BF7C,0x00000000 // d_2data4 0x2168C234, 0xC90FDAA2, 0x00003FFE,0x00000000 // pi_by_4data4 0x2168C234, 0xC90FDAA2, 0x0000BFFE,0x00000000 // neg_pi_by_4data4 0x3E000000, 0xBE000000, 0x00000000,0x00000000 // two**-3, -two**-3data4 0x2F000000, 0xAF000000, 0x9E000000,0x00000000 // two**-33, -two**-33, -two**-67data4 0xA21C0BC9, 0xCC8ABEBC, 0x00003FCE,0x00000000 // PP_8data4 0x720221DA, 0xD7468A05, 0x0000BFD6,0x00000000 // PP_7data4 0x640AD517, 0xB092382F, 0x00003FDE,0x00000000 // PP_6data4 0xD1EB75A4, 0xD7322B47, 0x0000BFE5,0x00000000 // PP_5data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1data4 0x00000000, 0xAAAA0000, 0x0000BFFC,0x00000000 // PP_1_hidata4 0xBAF69EEA, 0xB8EF1D2A, 0x00003FEC,0x00000000 // PP_4data4 0x0D03BB69, 0xD00D00D0, 0x0000BFF2,0x00000000 // PP_3data4 0x88888962, 0x88888888, 0x00003FF8,0x00000000 // PP_2data4 0xAAAB0000, 0xAAAAAAAA, 0x0000BFEC,0x00000000 // PP_1_lodata4 0xC2B0FE52, 0xD56232EF, 0x00003FD2,0x00000000 // QQ_8data4 0x2B48DCA6, 0xC9C99ABA, 0x0000BFDA,0x00000000 // QQ_7data4 0x9C716658, 0x8F76C650, 0x00003FE2,0x00000000 // QQ_6data4 0xFDA8D0FC, 0x93F27DBA, 0x0000BFE9,0x00000000 // QQ_5data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1data4 0x00000000, 0x80000000, 0x0000BFFE,0x00000000 // QQ_1data4 0x0C6E5041, 0xD00D00D0, 0x00003FEF,0x00000000 // QQ_4data4 0x0B607F60, 0xB60B60B6, 0x0000BFF5,0x00000000 // QQ_3data4 0xAAAAAA9B, 0xAAAAAAAA, 0x00003FFA,0x00000000 // QQ_2data4 0xFFFFFFFE, 0xFFFFFFFF, 0x0000BFFD,0x00000000 // C_1data4 0xAAAA719F, 0xAAAAAAAA, 0x00003FFA,0x00000000 // C_2data4 0x0356F994, 0xB60B60B6, 0x0000BFF5,0x00000000 // C_3data4 0xB2385EA9, 0xD00CFFD5, 0x00003FEF,0x00000000 // C_4data4 0x292A14CD, 0x93E4BD18, 0x0000BFE9,0x00000000 // C_5data4 0xAAAAAAAA, 0xAAAAAAAA, 0x0000BFFC,0x00000000 // S_1data4 0x888868DB, 0x88888888, 0x00003FF8,0x00000000 // S_2data4 0x055EFD4B, 0xD00D00D0, 0x0000BFF2,0x00000000 // S_3data4 0x839730B9, 0xB8EF1C5D, 0x00003FEC,0x00000000 // S_4data4 0xE5B3F492, 0xD71EA3A4, 0x0000BFE5,0x00000000 // S_5data4 0x38800000, 0xB8800000, 0x00000000            // two**-14, -two**-14ASM_SIZE_DIRECTIVE(FSINCOS_CONSTANTS)FR_Input_X        = f8FR_Neg_Two_to_M3  = f32FR_Two_to_63      = f32FR_Two_to_24      = f33FR_Pi_by_4        = f33FR_Two_to_M14     = f34FR_Two_to_M33     = f35FR_Neg_Two_to_24  = f36FR_Neg_Pi_by_4    = f36FR_Neg_Two_to_M14 = f37FR_Neg_Two_to_M33 = f38FR_Neg_Two_to_M67 = f39FR_Inv_pi_by_2    = f40FR_N_float        = f41FR_N_fix          = f42FR_P_1            = f43FR_P_2            = f44FR_P_3            = f45FR_s              = f46FR_w              = f47FR_c              = f48FR_r              = f49FR_Z              = f50FR_A              = f51FR_a              = f52FR_t              = f53FR_U_1            = f54FR_U_2            = f55FR_C_1            = f56FR_C_2            = f57FR_C_3            = f58FR_C_4            = f59FR_C_5            = f60FR_S_1            = f61FR_S_2            = f62FR_S_3            = f63FR_S_4            = f64FR_S_5            = f65FR_poly_hi        = f66FR_poly_lo        = f67FR_r_hi           = f68FR_r_lo           = f69FR_rsq            = f70FR_r_cubed        = f71FR_C_hi           = f72FR_N_0            = f73FR_d_1            = f74FR_V              = f75FR_V_hi           = f75FR_V_lo           = f76FR_U_hi           = f77FR_U_lo           = f78FR_U_hiabs        = f79FR_V_hiabs        = f80FR_PP_8           = f81FR_QQ_8           = f81FR_PP_7           = f82FR_QQ_7           = f82FR_PP_6           = f83FR_QQ_6           = f83FR_PP_5           = f84FR_QQ_5           = f84FR_PP_4           = f85FR_QQ_4           = f85FR_PP_3           = f86FR_QQ_3           = f86FR_PP_2           = f87FR_QQ_2           = f87FR_QQ_1           = f88FR_N_0_fix        = f89FR_Inv_P_0        = f90FR_corr           = f91FR_poly           = f92FR_d_2            = f93FR_Two_to_M3      = f94FR_Neg_Two_to_63  = f94FR_P_0            = f95FR_C_lo           = f96FR_PP_1           = f97FR_PP_1_lo        = f98FR_ArgPrime       = f99GR_Table_Base  = r32GR_Table_Base1 = r33GR_i_0         = r34GR_i_1         = r35GR_N_Inc       = r36GR_Sin_or_Cos  = r37GR_SAVE_B0     = r39GR_SAVE_GP     = r40GR_SAVE_PFS    = r41.section .text.proc __libm_sin_double_dbx#.align 64__libm_sin_double_dbx:{ .mlxalloc GR_Table_Base = ar.pfs,0,12,2,0       movl GR_Sin_or_Cos = 0x0 ;;}{ .mmi      nop.m 999      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp      nop.i 999};;{ .mmi      ld8 GR_Table_Base = [GR_Table_Base]      nop.m 999      nop.i 999};;{ .mib      nop.m 999      nop.i 999       br.cond.sptk L(SINCOS_CONTINUE) ;;}.endp __libm_sin_double_dbx#ASM_SIZE_DIRECTIVE(__libm_sin_double_dbx).section .text.proc __libm_cos_double_dbx#__libm_cos_double_dbx:{ .mlxalloc GR_Table_Base= ar.pfs,0,12,2,0       movl GR_Sin_or_Cos = 0x1 ;;}{ .mmi      nop.m 999      addl           GR_Table_Base   = @ltoff(FSINCOS_CONSTANTS#), gp      nop.i 999};;{ .mmi      ld8 GR_Table_Base = [GR_Table_Base]      nop.m 999      nop.i 999};;////     Load Table Address//L(SINCOS_CONTINUE):{ .mmi       add GR_Table_Base1 = 96, GR_Table_Base       ldfs	FR_Two_to_24 = [GR_Table_Base], 4       nop.i 999};;{ .mmi      nop.m 999////     Load 2**24, load 2**63.//       ldfs	FR_Neg_Two_to_24 = [GR_Table_Base], 12       mov   r41 = ar.pfs ;;}{ .mfi       ldfs	FR_Two_to_63 = [GR_Table_Base1], 4////     Check for unnormals - unsupported operands. We do not want//     to generate denormal exception//     Check for NatVals, QNaNs, SNaNs, +/-Infs//     Check for EM unsupporteds//     Check for Zero//       fclass.m.unc  p6, p8 =  FR_Input_X, 0x1E3       mov   r40 = gp ;;}{ .mfi      nop.m 999       fclass.nm.unc p8, p0 =  FR_Input_X, 0x1FF// GR_Sin_or_Cos denotes       mov   r39 = b0}{ .mfb       ldfs	FR_Neg_Two_to_63 = [GR_Table_Base1], 12       fclass.m.unc p10, p0 = FR_Input_X, 0x007(p6)   br.cond.spnt L(SINCOS_SPECIAL) ;;}{ .mib      nop.m 999      nop.i 999(p8)   br.cond.spnt L(SINCOS_SPECIAL) ;;}{ .mib      nop.m 999      nop.i 999////     Branch if +/- NaN, Inf.//     Load -2**24, load -2**63.//(p10)  br.cond.spnt L(SINCOS_ZERO) ;;}{ .mmb       ldfe	FR_Inv_pi_by_2 = [GR_Table_Base], 16       ldfe	FR_Inv_P_0 = [GR_Table_Base1], 16      nop.b 999 ;;}{ .mmb      nop.m 999       ldfe		FR_d_1 = [GR_Table_Base1], 16      nop.b 999 ;;}////     Raise possible denormal operand flag with useful fcmp//     Is x <= -2**63//     Load Inv_P_0 for pre-reduction//     Load Inv_pi_by_2//{ .mmb       ldfe		FR_P_0 = [GR_Table_Base], 16       ldfe	FR_d_2 = [GR_Table_Base1], 16      nop.b 999 ;;}////     Load P_0//     Load d_1//     Is x >= 2**63//     Is x <= -2**24?//{ .mmi       ldfe	FR_P_1 = [GR_Table_Base], 16 ;;////     Load P_1//     Load d_2//     Is x >= 2**24?//       ldfe	FR_P_2 = [GR_Table_Base], 16      nop.i 999 ;;}{ .mmf      nop.m 999       ldfe	FR_P_3 = [GR_Table_Base], 16       fcmp.le.unc.s1	p7, p8 = FR_Input_X, FR_Neg_Two_to_24}{ .mfi      nop.m 999////     Branch if +/- zero.//     Decide about the paths to take://     If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2//     OTHERWISE - CASE 3 OR 4//       fcmp.le.unc.s0	p10, p11 = FR_Input_X, FR_Neg_Two_to_63      nop.i 999 ;;}{ .mfi      nop.m 999(p8)   fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24      nop.i 999}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -