📄 libm_sincosl.s
字号:
//// Case 2: Convert integer N_fix back to normalized floating-point value.// Case 1: p8 is only affected when p6 is set////// Grab the integer part of N and call it N_fix//{ .mfi ///////////////////////////// 19 /////////////////(p7) ldfps FR_Two_to_M33, FR_Neg_Two_to_M33 = [GR_ad_d], 8(p6) fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 // r^3 if |x| < pi/4(p6) mov GR_N_Inc = 0x0 // N_IncS if |x| < pi/4};;// If |x| < pi/4, r = x and c = 0// lf |x| < pi/4, is x < 2**(-3).// r = Arg// c = 0{ .mmi ///////////////////////////// 20 /////////////////(p7) getf.sig GR_N_Inc = FR_N_float_signif nop.m 0(p6) cmp.lt.unc p8,p0 = GR_exp_x, GR_exp_2_to_m3 // Is |x| < 2^-3};;//// lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.// If |x| >= pi/4,// Create the right N for |x| < pi/4 and otherwise// Case 2: Place integer part of N in GP register//{ .mbb ///////////////////////////// 21 ///////////////// nop.m 0(p8) br.cond.spnt SINCOSL_SMALL_R_0 // Branch if 0 < |x| < 2^-3(p6) br.cond.spnt SINCOSL_NORMAL_R_0 // Branch if 2^-3 <= |x| < pi/4};;// Here if pi/4 <= |x| < 2^24{ .mfi ldfs FR_Neg_Two_to_M67 = [GR_ad_d], 8 // Load -2^-67 fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X // s = -N * P_1 + Arg nop.i 0}{ .mfi nop.m 0 fma.s1 FR_w = FR_N_float, FR_P_2, f0 // w = N * P_2 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_r = FR_s, f1, FR_w // r = s - w, assume |s| >= 2^-33 nop.i 0};;{ .mfi nop.m 0 fcmp.lt.s1 p7, p6 = FR_s, FR_Two_to_M33 nop.i 0};;{ .mfi nop.m 0(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33 // p6 if |s| >= 2^-33, else p7 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_c = FR_s, f1, FR_r // c = s - r, for |s| >= 2^-33 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_rsq = FR_r, FR_r, f0 // rsq = r * r, for |s| >= 2^-33 nop.i 0};;{ .mfi nop.m 0(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0 nop.i 0};;{ .mmf ldfe FR_C_1 = [GR_ad_pp], 16 // Load C_1 if i_1=0 ldfe FR_S_1 = [GR_ad_qq], 16 // Load S_1 if i_1=1 frcpa.s1 FR_r_hi, p15 = f1, FR_r // r_hi = frcpa(r)};;{ .mfi nop.m 0(p6) fcmp.lt.unc.s1 p8, p13 = FR_r, FR_Two_to_M3 // If big s, test r with 2^-3 nop.i 0};;{ .mfi nop.m 0(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w nop.i 0};;//// For big s: r = s - w: No futher reduction is necessary// For small s: w = N * P_3 (change sign) More reduction//{ .mfi nop.m 0(p8) fcmp.gt.s1 p8, p13 = FR_r, FR_Neg_Two_to_M3 // If big s, p8 if |r| < 2^-3 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_polyS = FR_rsq, FR_PP_8, FR_PP_7 // poly = rsq*PP_8+PP_7 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_polyC = FR_rsq, FR_QQ_8, FR_QQ_7 // poly = rsq*QQ_8+QQ_7 nop.i 0};;{ .mfi nop.m 0(p7) fms.s1 FR_r = FR_s, f1, FR_U_1 nop.i 0};;{ .mfi nop.m 0(p6) fma.s1 FR_r_cubed = FR_r, FR_rsq, f0 // rcubed = r * rsq nop.i 0};;{ .mfi//// For big s: Is |r| < 2**(-3)?// For big s: c = S - r// For small s: U_1 = N * P_2 + w//// If p8 is set, prepare to branch to Small_R.// If p9 is set, prepare to branch to Normal_R.// For big s, r is complete here.////// For big s: c = c + w (w has not been negated.)// For small s: r = S - U_1// nop.m 0(p6) fms.s1 FR_c = FR_c, f1, FR_w nop.i 0}{ .mbb nop.m 0(p8) br.cond.spnt SINCOSL_SMALL_R_1 // Branch if |s|>=2^-33, |r| < 2^-3, // and pi/4 <= |x| < 2^24(p13) br.cond.sptk SINCOSL_NORMAL_R_1 // Branch if |s|>=2^-33, |r| >= 2^-3, // and pi/4 <= |x| < 2^24};;SINCOSL_S_TINY://// Here if |s| < 2^-33, and pi/4 <= |x| < 2^24//{ .mfi and GR_N_SinCos = 0x1, GR_N_Inc fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1 tbit.z p8,p12 = GR_N_Inc, 0};;//// For small s: U_2 = N * P_2 - U_1// S_1 stored constant - grab the one stored with the// coefficients.//{ .mfi ldfe FR_S_1 = [GR_ad_s1], 16 fma.s1 FR_polyC = f0, f1, FR_Neg_Two_to_M67 sub GR_N_SignS = GR_N_Inc, GR_N_SinCos}{ .mfi add GR_N_SignC = GR_N_Inc, GR_N_SinCos nop.f 0 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_s = FR_s, f1, FR_r(p8) tbit.z.unc p10,p11 = GR_N_SignC, 1}{ .mfi nop.m 0 fma.s1 FR_rsq = FR_r, FR_r, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_U_2 = FR_U_2, f1, FR_w(p8) tbit.z.unc p8,p9 = GR_N_SignS, 1};;{ .mfi nop.m 0 fmerge.se FR_FirstS = FR_r, FR_r(p12) tbit.z.unc p14,p15 = GR_N_SignC, 1}{ .mfi nop.m 0 fma.s1 FR_FirstC = f0, f1, f1 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_c = FR_s, f1, FR_U_1(p12) tbit.z.unc p12,p13 = GR_N_SignS, 1};;{ .mfi nop.m 0 fma.s1 FR_r = FR_S_1, FR_r, f0 nop.i 0};;{ .mfi nop.m 0 fma.s0 FR_S_1 = FR_S_1, FR_S_1, f0 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_c = FR_c, f1, FR_U_2 nop.i 0};;.pred.rel "mutex",p9,p15{ .mfi nop.m 0(p9) fms.s0 FR_FirstS = f1, f0, FR_FirstS nop.i 0}{ .mfi nop.m 0(p15) fms.s0 FR_FirstS = f1, f0, FR_FirstS nop.i 0};;.pred.rel "mutex",p11,p13{ .mfi nop.m 0(p11) fms.s0 FR_FirstC = f1, f0, FR_FirstC nop.i 0}{ .mfi nop.m 0(p13) fms.s0 FR_FirstC = f1, f0, FR_FirstC nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_polyS = FR_r, FR_rsq, FR_c nop.i 0};;.pred.rel "mutex",p8,p9{ .mfi nop.m 0(p8) fma.s0 FR_ResultS = FR_FirstS, f1, FR_polyS nop.i 0}{ .mfi nop.m 0(p9) fms.s0 FR_ResultS = FR_FirstS, f1, FR_polyS nop.i 0};;.pred.rel "mutex",p10,p11{ .mfi nop.m 0(p10) fma.s0 FR_ResultC = FR_FirstC, f1, FR_polyC nop.i 0}{ .mfi nop.m 0(p11) fms.s0 FR_ResultC = FR_FirstC, f1, FR_polyC nop.i 0};;.pred.rel "mutex",p12,p13{ .mfi nop.m 0(p12) fma.s0 FR_ResultS = FR_FirstC, f1, FR_polyC nop.i 0}{ .mfi nop.m 0(p13) fms.s0 FR_ResultS = FR_FirstC, f1, FR_polyC nop.i 0};;.pred.rel "mutex",p14,p15{ .mfi nop.m 0(p14) fma.s0 FR_ResultC = FR_FirstS, f1, FR_polyS nop.i 0}{ .mfb cmp.eq p10, p0 = 0x1, GR_Cis(p15) fms.s0 FR_ResultC = FR_FirstS, f1, FR_polyS(p10) br.ret.sptk b0};;{ .mmb // exit for sincosl stfe [sincos_pResSin] = FR_ResultS stfe [sincos_pResCos] = FR_ResultC br.ret.sptk b0};;SINCOSL_LARGER_ARG://// Here if 2^24 <= |x| < 2^63//{ .mfi ldfe FR_d_1 = [GR_ad_p], 16 // Load d_1 for |x| >= 2^24 path fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0 // N_0 = Arg * Inv_P_0 nop.i 0};;{ .mmi ldfps FR_Two_to_M14, FR_Neg_Two_to_M14 = [GR_ad_m14] nop.m 0 nop.i 0};;{ .mfi ldfe FR_d_2 = [GR_ad_p], 16 // Load d_2 for |x| >= 2^24 path nop.f 0 nop.i 0};;{ .mfi nop.m 0 fcvt.fx.s1 FR_N_0_fix = FR_N_0 // N_0_fix = integer part of N_0 nop.i 0};;{ .mfi nop.m 0 fcvt.xf FR_N_0 = FR_N_0_fix // Make N_0 the integer part nop.i 0};;{ .mfi nop.m 0 fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X // Arg'=-N_0*P_0+Arg nop.i 0}{ .mfi nop.m 0 fma.s1 FR_w = FR_N_0, FR_d_1, f0 // w = N_0 * d_1 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0 // N = A' * 2/pi nop.i 0};;{ .mfi nop.m 0 fcvt.fx.s1 FR_N_fix = FR_N_float // N_fix is the integer part nop.i 0};;{ .mfi nop.m 0 fcvt.xf FR_N_float = FR_N_fix nop.i 0};;{ .mfi getf.sig GR_N_Inc = FR_N_fix // N is the integer part of // the reduced-reduced argument nop.f 0 nop.i 0};;{ .mfi nop.m 0 fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime // s = -N*P_1 + Arg' nop.i 0}{ .mfi nop.m 0 fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w // w = -N*P_2 + w nop.i 0};;//// For |s| > 2**(-14) r = S + w (r complete)// Else U_hi = N_0 * d_1//{ .mfi nop.m 0 fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14 nop.i 0};;{ .mfi nop.m 0(p9) fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14 // p9 if |s| < 2^-14 nop.i 0};;//// Either S <= -2**(-14) or S >= 2**(-14)// or -2**(-14) < s < 2**(-14)//{ .mfi nop.m 0(p9) fma.s1 FR_V_hi = FR_N_float, FR_P_2, f0 nop.i 0}{ .mfi nop.m 0(p9) fma.s1 FR_U_hi = FR_N_0, FR_d_1, f0 nop.i 0};;{ .mfi nop.m 0(p8) fma.s1 FR_r = FR_s, f1, FR_w nop.i 0}{ .mfi nop.m 0(p9) fma.s1 FR_w = FR_N_float, FR_P_3, f0 nop.i 0};;//// We need abs of both U_hi and V_hi - don't// worry about switched sign of V_hi.//// Big s: finish up c = (S - r) + w (c complete)// Case 4: A = U_hi + V_hi// Note: Worry about switched sign of V_hi, so subtract instead of add.//{ .mfi nop.m 0(p9) fms.s1 FR_A = FR_U_hi, f1, FR_V_hi nop.i 0}{ .mfi nop.m 0(p9) fnma.s1 FR_V_lo = FR_N_float, FR_P_2, FR_V_hi nop.i 0};;{ .mfi nop.m 0(p9) fmerge.s FR_V_hiabs = f0, FR_V_hi nop.i 0}{ .mfi nop.m 0(p9) fms.s1 FR_U_lo = FR_N_0, FR_d_1, FR_U_hi // For small s: U_lo=N_0*d_1-U_hi nop.i 0};;//// For big s: Is |r| < 2**(-3)// For big s: if p12 set, prepare to branch to Small_R.// For big s: If p13 set, prepare to branch to Normal_R.//{ .mfi nop.m 0(p9) fmerge.s FR_U_hiabs = f0, FR_U_hi nop.i 0}{ .mfi nop.m 0(p8) fms.s1 FR_c = FR_s, f1, FR_r // For big s: c = S - r nop.i 0};;//// For small S: V_hi = N * P_2// w = N * P_3// Note the product does not include the (-) as in the writeup// so (-) missing for V_hi and w.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -