📄 libm_reduce.s
字号:
// If Tmp_C >= sigma_C then// C_hi := Tmp_C;// C_lo := x*p_1 - C_hi ...fma, exact// Else// C_hi := fadd.fpsr3(sigma_C, Tmp_C) - sigma_C// C_lo := x*p_1 - C_hi ...fma, exact// End If// If Tmp_B >= sigma_B then// B_hi := Tmp_B;// B_lo := x*p_2 - B_hi ...fma, exact// Else// B_hi := fadd.fpsr3(sigma_B, Tmp_B) - sigma_B// B_lo := x*p_2 - B_hi ...fma, exact// End If// If Tmp_A >= sigma_A then// A_hi := Tmp_A;// A_lo := x*p_3 - A_hi ...fma, exact// Else// A_hi := fadd.fpsr3(sigma_A, Tmp_A) - sigma_A// Exact, regardless ...of rounding direction// A_lo := x*p_3 - A_hi ...fma, exact// Endif(p0) fmpy.s3 FR_Tmp_C = FR_X,FR_p_1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fmpy.s1 FR_p_2 = FR_p_2,FR_ScaleP2 nop.i 999}{ .mlx nop.m 999(p0) movl GR_Temp = 0x0000000000000400}{ .mlx nop.m 999(p0) movl GR_TEMP3 = 0x000000000000FF3F ;;}{ .mmf nop.m 999(p0) setf.exp FR_ScaleP4 = GR_TEMP3(p0) fmpy.s1 FR_p_3 = FR_p_3,FR_ScaleP3 ;;}{ .mlx nop.m 999(p0) movl GR_TEMP4 = 0x0000000000010045 ;;}{ .mmf nop.m 999(p0) setf.exp FR_Tmp2_C = GR_TEMP4(p0) fmpy.s3 FR_Tmp_B = FR_X,FR_p_2 ;;}{ .mfi nop.m 999(p0) fcmp.ge.unc.s1 p12, p9 = FR_Tmp_C,FR_sigma_C nop.i 999 ;;}{ .mfi nop.m 999(p0) fmpy.s3 FR_Tmp_A = FR_X,FR_p_3 nop.i 999 ;;}{ .mfi nop.m 999(p12) mov FR_C_hi = FR_Tmp_C nop.i 999 ;;}{ .mfi(p0) addl GR_BASE = @ltoff(Constants_Bits_of_pi_by_2#), gp(p9) fadd.s3 FR_C_hi = FR_sigma_C,FR_Tmp_C nop.i 999};;// End If// Step 3. Get reduced argument// If sgn_x == 0 (that is original x is positive)// D_hi := Pi_by_2_hi// D_lo := Pi_by_2_lo// Load from table// Else// D_hi := neg_Pi_by_2_hi// D_lo := neg_Pi_by_2_lo// Load from table// End If{ .mmi ld8 GR_BASE = [GR_BASE] nop.m 999 nop.i 999};;{ .mfi(p0) ldfe FR_D_hi = [GR_BASE],16(p0) fmpy.s1 FR_p_4 = FR_p_4,FR_ScaleP4 nop.i 999 ;;}{ .mfi(p0) ldfe FR_D_lo = [GR_BASE],0(p0) fcmp.ge.unc.s1 p13, p10 = FR_Tmp_B,FR_sigma_B nop.i 999 ;;}{ .mfi nop.m 999(p13) mov FR_B_hi = FR_Tmp_B nop.i 999}{ .mfi nop.m 999(p12) fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi nop.i 999 ;;}{ .mfi nop.m 999(p10) fadd.s3 FR_B_hi = FR_sigma_B,FR_Tmp_B nop.i 999}{ .mfi nop.m 999(p9) fsub.s1 FR_C_hi = FR_C_hi,FR_sigma_C nop.i 999 ;;}{ .mfi nop.m 999(p0) fcmp.ge.unc.s1 p14, p11 = FR_Tmp_A,FR_sigma_A nop.i 999 ;;}{ .mfi nop.m 999(p14) mov FR_A_hi = FR_Tmp_A nop.i 999 ;;}{ .mfi nop.m 999(p11) fadd.s3 FR_A_hi = FR_sigma_A,FR_Tmp_A nop.i 999 ;;}{ .mfi nop.m 999(p9) fms.s1 FR_C_lo = FR_X,FR_p_1,FR_C_hi(p0) cmp.eq.unc p12,p9 = 0x1,GR_sgn_x}{ .mfi nop.m 999(p13) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi nop.i 999 ;;}{ .mfi nop.m 999(p10) fsub.s1 FR_B_hi = FR_B_hi,FR_sigma_B nop.i 999}{ .mfi nop.m 999// Note that C_hi is of integer value. We need only the// last few bits. Thus we can ensure C_hi is never a big// integer, freeing us from overflow worry.// Tmp_C := fadd.fpsr3( C_hi, 2^(70) ) - 2^(70);// Tmp_C is the upper portion of C_hi(p0) fadd.s3 FR_Tmp_C = FR_C_hi,FR_Tmp2_C nop.i 999 ;;}{ .mfi nop.m 999(p14) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi nop.i 999}{ .mfi nop.m 999(p11) fsub.s1 FR_A_hi = FR_A_hi,FR_sigma_A nop.i 999 ;;}{ .mfi nop.m 999// *******************// Step 2. Get N and f// *******************// We have all the components to obtain// S_0, S_1, S_2, S_3 and thus N and f. We start by adding// C_lo and B_hi. This sum together with C_hi estimates// N and f well.// A := fadd.fpsr3( B_hi, C_lo )// B := max( B_hi, C_lo )// b := min( B_hi, C_lo )(p0) fadd.s3 FR_A = FR_B_hi,FR_C_lo nop.i 999}{ .mfi nop.m 999(p10) fms.s1 FR_B_lo = FR_X,FR_p_2,FR_B_hi nop.i 999 ;;}{ .mfi nop.m 999(p0) fsub.s1 FR_Tmp_C = FR_Tmp_C,FR_Tmp2_C nop.i 999 ;;}{ .mfi nop.m 999(p0) fmax.s1 FR_B = FR_B_hi,FR_C_lo nop.i 999 ;;}{ .mfi nop.m 999(p0) fmin.s1 FR_b = FR_B_hi,FR_C_lo nop.i 999}{ .mfi nop.m 999(p11) fms.s1 FR_A_lo = FR_X,FR_p_3,FR_A_hi nop.i 999 ;;}{ .mfi nop.m 999// N := round_to_nearest_integer_value( A );(p0) fcvt.fx.s1 FR_N = FR_A nop.i 999 ;;}{ .mfi nop.m 999// C_hi := C_hi - Tmp_C ...0 <= C_hi < 2^7(p0) fsub.s1 FR_C_hi = FR_C_hi,FR_Tmp_C nop.i 999 ;;}{ .mfi nop.m 999// a := (B - A) + b: Exact - note that a is either 0 or 2^(-64).(p0) fsub.s1 FR_a = FR_B,FR_A nop.i 999 ;;}{ .mfi nop.m 999// f := A - N; Exact because lsb(A) >= 2^(-64) and |f| <= 1/2.(p0) fnorm.s1 FR_N = FR_N nop.i 999}{ .mfi nop.m 999(p0) fadd.s1 FR_a = FR_a,FR_b nop.i 999 ;;}{ .mfi nop.m 999(p0) fsub.s1 FR_f = FR_A,FR_N nop.i 999}{ .mfi nop.m 999// N := convert to integer format( C_hi + N );// M := P_0 * x_lo;// N := N + M;(p0) fadd.s1 FR_N = FR_N,FR_C_hi nop.i 999 ;;}{ .mfi nop.m 999// f = f + a Exact because a is 0 or 2^(-64);// the msb of the sum is <= 1/2 and lsb >= 2^(-64).(p0) fadd.s1 FR_f = FR_f,FR_a nop.i 999}{ .mfi nop.m 999//// Create 2**(-33)//(p0) fcvt.fx.s1 FR_N = FR_N nop.i 999 ;;}{ .mfi nop.m 999(p0) fabs FR_f_abs = FR_f nop.i 999 ;;}{ .mfi(p0) getf.sig GR_N = FR_N nop.f 999 nop.i 999 ;;}{ .mii nop.m 999 nop.i 999 ;;(p0) add GR_N = GR_N,GR_M ;;}// If sgn_x == 1 (that is original x was negative)// N := 2^10 - N// this maintains N to be non-negative, but still// equivalent to the (negated N) mod 4.// End If{ .mii(p12) sub GR_N = GR_Temp,GR_N(p0) cmp.eq.unc p12,p9 = 0x0,GR_sgn_x ;; nop.i 999}{ .mfi nop.m 999(p0) fcmp.ge.unc.s1 p13, p10 = FR_f_abs,FR_TWOM33 nop.i 999 ;;}{ .mfi nop.m 999(p9) fsub.s1 FR_D_hi = f0, FR_D_hi nop.i 999 ;;}{ .mfi nop.m 999(p10) fadd.s3 FR_A = FR_A_hi,FR_B_lo nop.i 999}{ .mfi nop.m 999(p13) fadd.s1 FR_g = FR_A_hi,FR_B_lo nop.i 999 ;;}{ .mfi nop.m 999(p10) fmax.s1 FR_B = FR_A_hi,FR_B_lo nop.i 999}{ .mfi nop.m 999(p9) fsub.s1 FR_D_lo = f0, FR_D_lo nop.i 999 ;;}{ .mfi nop.m 999(p10) fmin.s1 FR_b = FR_A_hi,FR_B_lo nop.i 999 ;;}{ .mfi nop.m 999(p0) fsetc.s3 0x7F,0x40 nop.i 999}{ .mlx nop.m 999(p10) movl GR_Temp = 0x000000000000FFCD ;;}{ .mmf nop.m 999(p10) setf.exp FR_TWOM50 = GR_Temp(p10) fadd.s1 FR_f_hi = FR_A,FR_f ;;}{ .mfi nop.m 999// a := (B - A) + b Exact.// Note that a is either 0 or 2^(-128).// f_hi := A + f;// f_lo := (f - f_hi) + A// f_lo=f-f_hi is exact because either |f| >= |A|, in which// case f-f_hi is clearly exact; or otherwise, 0<|f|<|A|// means msb(f) <= msb(A) = 2^(-64) => |f| = 2^(-64).// If f = 2^(-64), f-f_hi involves cancellation and is// exact. If f = -2^(-64), then A + f is exact. Hence// f-f_hi is -A exactly, giving f_lo = 0.// f_lo := f_lo + a;(p10) fsub.s1 FR_a = FR_B,FR_A nop.i 999}{ .mfi nop.m 999(p13) fadd.s1 FR_s_hi = FR_f,FR_g nop.i 999 ;;}{ .mlx nop.m 999// If |f| >= 2^(-33)// Case 1// CASE := 1// g := A_hi + B_lo;// s_hi := f + g;// s_lo := (f - s_hi) + g;(p13) movl GR_CASE = 0x1 ;;}{ .mlx nop.m 999// Else// Case 2// CASE := 2// A := fadd.fpsr3( A_hi, B_lo )// B := max( A_hi, B_lo )// b := min( A_hi, B_lo )(p10) movl GR_CASE = 0x2}{ .mfi nop.m 999(p10) fsub.s1 FR_f_lo = FR_f,FR_f_hi nop.i 999 ;;}{ .mfi nop.m 999(p10) fadd.s1 FR_a = FR_a,FR_b nop.i 999}{ .mfi nop.m 999(p13) fsub.s1 FR_s_lo = FR_f,FR_s_hi nop.i 999 ;;}{ .mfi nop.m 999(p13) fadd.s1 FR_s_lo = FR_s_lo,FR_g nop.i 999 ;;}{ .mfi nop.m 999(p10) fcmp.ge.unc.s1 p14, p11 = FR_f_abs,FR_TWOM50 nop.i 999 ;;}{ .mfi nop.m 999//// Create 2**(-50)(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_A nop.i 999 ;;}{ .mfi nop.m 999// If |f| >= 2^(-50) then// s_hi := f_hi;// s_lo := f_lo;// Else// f_lo := (f_lo + A_lo) + x*p_4// s_hi := f_hi + f_lo// s_lo := (f_hi - s_hi) + f_lo// End If(p14) mov FR_s_hi = FR_f_hi nop.i 999 ;;}{ .mfi nop.m 999(p10) fadd.s1 FR_f_lo = FR_f_lo,FR_a nop.i 999 ;;}{ .mfi nop.m 999(p14) mov FR_s_lo = FR_f_lo nop.i 999}{ .mfi nop.m 999(p11) fadd.s1 FR_f_lo = FR_f_lo,FR_A_lo nop.i 999 ;;}{ .mfi nop.m 999(p11) fma.s1 FR_f_lo = FR_X,FR_p_4,FR_f_lo nop.i 999 ;;}{ .mfi nop.m 999(p11) fadd.s1 FR_s_hi = FR_f_hi,FR_f_lo nop.i 999 ;;}{ .mfi nop.m 999// r_hi := s_hi*D_hi// r_lo := s_hi*D_hi - r_hi with fma// r_lo := (s_hi*D_lo + r_lo) + s_lo*D_hi(p0) fmpy.s1 FR_r_hi = FR_s_hi,FR_D_hi nop.i 999}{ .mfi nop.m 999(p11) fsub.s1 FR_s_lo = FR_f_hi,FR_s_hi nop.i 999 ;;}{ .mfi nop.m 999(p0) fms.s1 FR_r_lo = FR_s_hi,FR_D_hi,FR_r_hi nop.i 999}{ .mfi nop.m 999(p11) fadd.s1 FR_s_lo = FR_s_lo,FR_f_lo nop.i 999 ;;}{ .mmi nop.m 999 ;;// Return N, r_hi, r_lo// We do not return CASE(p0) stfe [GR_Address_of_Outputs] = FR_r_hi,16 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_r_lo = FR_s_hi,FR_D_lo,FR_r_lo nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_r_lo = FR_s_lo,FR_D_hi,FR_r_lo nop.i 999 ;;}{ .mmi nop.m 999 ;;(p0) stfe [GR_Address_of_Outputs] = FR_r_lo,-16 nop.i 999}{ .mib nop.m 999 nop.i 999(p0) br.ret.sptk b0 ;;}.endp __libm_pi_by_2_reduceASM_SIZE_DIRECTIVE(__libm_pi_by_2_reduce)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -