📄 libm_reduce.s

📁 Glibc 2.3.2源代码(解压后有100多M)
💻 S
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
//    If Tmp_C >= sigma_C then//      C_hi := Tmp_C;//      C_lo := x*p_1 - C_hi ...fma, exact//    Else//      C_hi := fadd.fpsr3(sigma_C, Tmp_C) - sigma_C//      C_lo := x*p_1 - C_hi ...fma, exact//    End If//    If Tmp_B >= sigma_B then//      B_hi := Tmp_B;//      B_lo := x*p_2 - B_hi ...fma, exact//    Else//      B_hi := fadd.fpsr3(sigma_B, Tmp_B) - sigma_B//      B_lo := x*p_2 - B_hi ...fma, exact//    End If//    If Tmp_A >= sigma_A then//      A_hi := Tmp_A;//      A_lo := x*p_3 - A_hi ...fma, exact//    Else//      A_hi := fadd.fpsr3(sigma_A, Tmp_A) - sigma_A//      Exact, regardless ...of rounding direction//      A_lo := x*p_3 - A_hi ...fma, exact//    Endif(p0)  fmpy.s3 FR_Tmp_C = FR_X,FR_p_1	nop.i 999 ;;}{ .mfi	nop.m 999(p0)  fmpy.s1 FR_p_2 = FR_p_2,FR_ScaleP2	nop.i 999}{ .mlx	nop.m 999(p0)  movl GR_Temp = 0x0000000000000400}{ .mlx	nop.m 999(p0)  movl GR_TEMP3 = 0x000000000000FF3F ;;}{ .mmf	nop.m 999(p0)  setf.exp FR_ScaleP4 = GR_TEMP3(p0)  fmpy.s1 FR_p_3 = FR_p_3,FR_ScaleP3 ;;}{ .mlx	nop.m 999(p0)  movl GR_TEMP4 = 0x0000000000010045 ;;}{ .mmf	nop.m 999(p0)  setf.exp FR_Tmp2_C = GR_TEMP4(p0)  fmpy.s3 FR_Tmp_B = FR_X,FR_p_2 ;;}{ .mfi	nop.m 999(p0)  fcmp.ge.unc.s1 p12,  p9 = FR_Tmp_C,FR_sigma_C	nop.i 999 ;;}{ .mfi	nop.m 999(p0)  fmpy.s3 FR_Tmp_A = FR_X,FR_p_3	nop.i 999 ;;}{ .mfi	nop.m 999(p12) mov FR_C_hi = FR_Tmp_C	nop.i 999 ;;}{ .mfi(p0)  addl           GR_BASE   = @ltoff(Constants_Bits_of_pi_by_2#), gp(p9)  fadd.s3 FR_C_hi = FR_sigma_C,FR_Tmp_C	nop.i 999};;//   End If//   Step 3. Get reduced argument//   If sgn_x == 0 (that is original x is positive)//      D_hi := Pi_by_2_hi//      D_lo := Pi_by_2_lo//      Load from table//   Else//      D_hi := neg_Pi_by_2_hi//      D_lo := neg_Pi_by_2_lo//      Load from table//   End If{ .mmi      ld8 GR_BASE = [GR_BASE]      nop.m 999      nop.i 999};;{ .mfi(p0) ldfe FR_D_hi = [GR_BASE],16(p0)  fmpy.s1 FR_p_4 = FR_p_4,FR_ScaleP4	nop.i 999 ;;}{ .mfi(p0) ldfe FR_D_lo = [GR_BASE],0(p0)  fcmp.ge.unc.s1 p13, p10 = FR_Tmp_B,FR_sigma_B	nop.i 999 ;;}{ .mfi	nop.m 999(p13) mov FR_B_hi = FR_Tmp_B	nop.i 999}{ .mfi	nop.m 999(p12) fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi	nop.i 999 ;;}{ .mfi	nop.m 999(p10) fadd.s3 FR_B_hi = FR_sigma_B,FR_Tmp_B	nop.i 999}{ .mfi	nop.m 999(p9)  fsub.s1 FR_C_hi = FR_C_hi,FR_sigma_C	nop.i 999 ;;}{ .mfi	nop.m 999(p0)  fcmp.ge.unc.s1 p14, p11 = FR_Tmp_A,FR_sigma_A	nop.i 999 ;;}{ .mfi	nop.m 999(p14) mov FR_A_hi = FR_Tmp_A	nop.i 999 ;;}{ .mfi	nop.m 999(p11) fadd.s3 FR_A_hi = FR_sigma_A,FR_Tmp_A	nop.i 999 ;;}{ .mfi	nop.m 999(p9)  fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi(p0)  cmp.eq.unc p12,p9 = 0x1,GR_sgn_x}{ .mfi	nop.m 999(p13) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi	nop.i 999 ;;}{ .mfi	nop.m 999(p10) fsub.s1 FR_B_hi = FR_B_hi,FR_sigma_B	nop.i 999}{ .mfi	nop.m 999//    Note that C_hi is of integer value. We need only the//    last few bits. Thus we can ensure C_hi is never a big//    integer, freeing us from overflow worry.//    Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);//    Tmp_C is the upper portion of C_hi(p0)  fadd.s3 FR_Tmp_C = FR_C_hi,FR_Tmp2_C	nop.i 999 ;;}{ .mfi	nop.m 999(p14) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi	nop.i 999}{ .mfi	nop.m 999(p11) fsub.s1 FR_A_hi = FR_A_hi,FR_sigma_A	nop.i 999 ;;}{ .mfi	nop.m 999//    *******************//    Step 2. Get N and f//    *******************//    We have all the components to obtain//    S_0, S_1, S_2, S_3 and thus N and f. We start by adding//    C_lo and B_hi. This sum together with C_hi estimates//    N and f well.//    A := fadd.fpsr3( B_hi, C_lo )//    B := max( B_hi, C_lo )//    b := min( B_hi, C_lo )(p0)  fadd.s3 FR_A = FR_B_hi,FR_C_lo	nop.i 999}{ .mfi	nop.m 999(p10) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi	nop.i 999 ;;}{ .mfi	nop.m 999(p0)  fsub.s1 FR_Tmp_C = FR_Tmp_C,FR_Tmp2_C	nop.i 999 ;;}{ .mfi	nop.m 999(p0)  fmax.s1 FR_B = FR_B_hi,FR_C_lo	nop.i 999 ;;}{ .mfi	nop.m 999(p0)  fmin.s1 FR_b = FR_B_hi,FR_C_lo	nop.i 999}{ .mfi	nop.m 999(p11) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi	nop.i 999 ;;}{ .mfi	nop.m 999//    N := round_to_nearest_integer_value( A );(p0)  fcvt.fx.s1 FR_N = FR_A	nop.i 999 ;;}{ .mfi	nop.m 999//    C_hi := C_hi - Tmp_C ...0 <= C_hi < 2^7(p0)  fsub.s1 FR_C_hi = FR_C_hi,FR_Tmp_C	nop.i 999 ;;}{ .mfi	nop.m 999//    a := (B - A) + b: Exact - note that a is either 0 or 2^(-64).(p0)  fsub.s1 FR_a = FR_B,FR_A	nop.i 999 ;;}{ .mfi	nop.m 999//    f := A - N; Exact because lsb(A) >= 2^(-64) and |f| <= 1/2.(p0)  fnorm.s1 FR_N = FR_N	nop.i 999}{ .mfi	nop.m 999(p0)  fadd.s1 FR_a = FR_a,FR_b	nop.i 999 ;;}{ .mfi	nop.m 999(p0)  fsub.s1 FR_f = FR_A,FR_N	nop.i 999}{ .mfi	nop.m 999//    N := convert to integer format( C_hi + N );//    M := P_0 * x_lo;//    N := N + M;(p0)  fadd.s1 FR_N = FR_N,FR_C_hi	nop.i 999 ;;}{ .mfi	nop.m 999//    f = f + a	Exact because a is 0 or 2^(-64);//    the msb of the sum is <= 1/2 and lsb >= 2^(-64).(p0)  fadd.s1 FR_f = FR_f,FR_a	nop.i 999}{ .mfi	nop.m 999////    Create 2**(-33)//(p0)  fcvt.fx.s1 FR_N = FR_N	nop.i 999 ;;}{ .mfi	nop.m 999(p0)  fabs FR_f_abs = FR_f	nop.i 999 ;;}{ .mfi(p0)  getf.sig GR_N = FR_N	nop.f 999	nop.i 999 ;;}{ .mii	nop.m 999	nop.i 999 ;;(p0)  add GR_N = GR_N,GR_M ;;}//    If sgn_x == 1 (that is original x was negative)//       N := 2^10 - N//       this maintains N to be non-negative, but still//       equivalent to the (negated N) mod 4.//    End If{ .mii(p12) sub GR_N = GR_Temp,GR_N(p0) cmp.eq.unc p12,p9 = 0x0,GR_sgn_x ;;	nop.i 999}{ .mfi	nop.m 999(p0)  fcmp.ge.unc.s1 p13, p10 = FR_f_abs,FR_TWOM33	nop.i 999 ;;}{ .mfi	nop.m 999(p9) fsub.s1 FR_D_hi = f0, FR_D_hi	nop.i 999 ;;}{ .mfi	nop.m 999(p10)    fadd.s3 FR_A = FR_A_hi,FR_B_lo	nop.i 999}{ .mfi	nop.m 999(p13)    fadd.s1 FR_g = FR_A_hi,FR_B_lo	nop.i 999 ;;}{ .mfi	nop.m 999(p10)    fmax.s1 FR_B = FR_A_hi,FR_B_lo	nop.i 999}{ .mfi	nop.m 999(p9) fsub.s1 FR_D_lo = f0, FR_D_lo	nop.i 999 ;;}{ .mfi	nop.m 999(p10)    fmin.s1 FR_b = FR_A_hi,FR_B_lo	nop.i 999 ;;}{ .mfi	nop.m 999(p0) fsetc.s3 0x7F,0x40	nop.i 999}{ .mlx	nop.m 999(p10)    movl GR_Temp = 0x000000000000FFCD ;;}{ .mmf	nop.m 999(p10)    setf.exp FR_TWOM50 = GR_Temp(p10)    fadd.s1 FR_f_hi = FR_A,FR_f ;;}{ .mfi	nop.m 999//       a := (B - A) + b	Exact.//       Note that a is either 0 or 2^(-128).//       f_hi := A + f;//       f_lo := (f - f_hi) + A//       f_lo=f-f_hi is exact because either |f| >= |A|, in which//       case f-f_hi is clearly exact; or otherwise, 0<|f|<|A|//       means msb(f) <= msb(A) = 2^(-64) => |f| = 2^(-64).//       If f = 2^(-64), f-f_hi involves cancellation and is//       exact. If f = -2^(-64), then A + f is exact. Hence//       f-f_hi is -A exactly, giving f_lo = 0.//       f_lo := f_lo + a;(p10)    fsub.s1 FR_a = FR_B,FR_A	nop.i 999}{ .mfi	nop.m 999(p13)    fadd.s1 FR_s_hi = FR_f,FR_g	nop.i 999 ;;}{ .mlx	nop.m 999//    If |f| >= 2^(-33)//       Case 1//       CASE := 1//       g := A_hi + B_lo;//       s_hi := f + g;//       s_lo := (f - s_hi) + g;(p13)    movl GR_CASE = 0x1 ;;}{ .mlx	nop.m 999//   Else//       Case 2//       CASE := 2//       A := fadd.fpsr3( A_hi, B_lo )//       B := max( A_hi, B_lo )//       b := min( A_hi, B_lo )(p10)    movl GR_CASE = 0x2}{ .mfi	nop.m 999(p10)    fsub.s1 FR_f_lo = FR_f,FR_f_hi	nop.i 999 ;;}{ .mfi	nop.m 999(p10)    fadd.s1 FR_a = FR_a,FR_b	nop.i 999}{ .mfi	nop.m 999(p13)    fsub.s1 FR_s_lo = FR_f,FR_s_hi	nop.i 999 ;;}{ .mfi	nop.m 999(p13)    fadd.s1 FR_s_lo = FR_s_lo,FR_g	nop.i 999 ;;}{ .mfi	nop.m 999(p10)    fcmp.ge.unc.s1 p14, p11 = FR_f_abs,FR_TWOM50	nop.i 999 ;;}{ .mfi	nop.m 999////       Create 2**(-50)(p10)    fadd.s1 FR_f_lo = FR_f_lo,FR_A	nop.i 999 ;;}{ .mfi	nop.m 999//       If |f| >= 2^(-50) then//          s_hi := f_hi;//          s_lo := f_lo;//       Else//          f_lo := (f_lo + A_lo) + x*p_4//          s_hi := f_hi + f_lo//          s_lo := (f_hi - s_hi) + f_lo//       End If(p14)  mov FR_s_hi = FR_f_hi	nop.i 999 ;;}{ .mfi	nop.m 999(p10)    fadd.s1 FR_f_lo = FR_f_lo,FR_a	nop.i 999 ;;}{ .mfi	nop.m 999(p14)  mov FR_s_lo = FR_f_lo	nop.i 999}{ .mfi	nop.m 999(p11)  fadd.s1 FR_f_lo = FR_f_lo,FR_A_lo	nop.i 999 ;;}{ .mfi	nop.m 999(p11)  fma.s1 FR_f_lo = FR_X,FR_p_4,FR_f_lo	nop.i 999 ;;}{ .mfi	nop.m 999(p11)  fadd.s1 FR_s_hi = FR_f_hi,FR_f_lo	nop.i 999 ;;}{ .mfi	nop.m 999//   r_hi :=  s_hi*D_hi//   r_lo :=  s_hi*D_hi - r_hi  with fma//   r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi(p0) fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi	nop.i 999}{ .mfi	nop.m 999(p11)  fsub.s1 FR_s_lo = FR_f_hi,FR_s_hi	nop.i 999 ;;}{ .mfi	nop.m 999(p0) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi	nop.i 999}{ .mfi	nop.m 999(p11)  fadd.s1 FR_s_lo = FR_s_lo,FR_f_lo	nop.i 999 ;;}{ .mmi	nop.m 999 ;;//   Return  N, r_hi, r_lo//   We do not return CASE(p0) stfe [GR_Address_of_Outputs] = FR_r_hi,16	nop.i 999 ;;}{ .mfi	nop.m 999(p0) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo	nop.i 999 ;;}{ .mfi	nop.m 999(p0) fma.s1 FR_r_lo = FR_s_lo,FR_D_hi,FR_r_lo	nop.i 999 ;;}{ .mmi	nop.m 999 ;;(p0) stfe [GR_Address_of_Outputs] = FR_r_lo,-16	nop.i 999}{ .mib	nop.m 999	nop.i 999(p0) br.ret.sptk   b0 ;;}.endp __libm_pi_by_2_reduceASM_SIZE_DIRECTIVE(__libm_pi_by_2_reduce)
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -