📄 s_cos.s
字号:
{ .mfi ldfe FR_Pi_by_4 = [GR_Table_Base1], 16(p11) fcmp.ge.s1 p10, p0 = FR_Input_X, FR_Two_to_63 nop.i 999 ;;}{ .mmi ldfe FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;; ldfs FR_Two_to_M3 = [GR_Table_Base1], 4 nop.i 999 ;;}{ .mib ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1], 12 nop.i 999//// Load P_2// Load P_3// Load pi_by_4// Load neg_pi_by_4// Load 2**(-3)// Load -2**(-3).//(p10) br.cond.spnt L(SINCOS_ARG_TOO_LARGE) ;;}{ .mib nop.m 999 nop.i 999//// Branch out if x >= 2**63. Use Payne-Hanek Reduction//(p7) br.cond.spnt L(SINCOS_LARGER_ARG) ;;}{ .mfi nop.m 999//// Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.// fma.s1 FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0 nop.i 999 ;;}{ .mfi nop.m 999 fcmp.lt.unc.s1 p6, p7 = FR_Input_X, FR_Pi_by_4 nop.i 999 ;;}{ .mfi nop.m 999//// Select the case when |Arg| < pi/4// Else Select the case when |Arg| >= pi/4// fcvt.fx.s1 FR_N_fix = FR_N_float nop.i 999 ;;}{ .mfi nop.m 999//// N = Arg * 2/pi// Check if Arg < pi/4//(p6) fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4 nop.i 999 ;;}//// Case 2: Convert integer N_fix back to normalized floating-point value.// Case 1: p8 is only affected when p6 is set//{ .mfi(p7) ldfs FR_Two_to_M33 = [GR_Table_Base1], 4//// Grab the integer part of N and call it N_fix//(p6) fmerge.se FR_r = FR_Input_X, FR_Input_X// If |x| < pi/4, r = x and c = 0// lf |x| < pi/4, is x < 2**(-3).// r = Arg// c = 0(p6) mov GR_N_Inc = GR_Sin_or_Cos ;;}{ .mmf nop.m 999(p7) ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4(p6) fmerge.se FR_c = f0, f0}{ .mfi nop.m 999(p6) fcmp.lt.unc.s1 p8, p9 = FR_Input_X, FR_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999//// lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.// If |x| >= pi/4,// Create the right N for |x| < pi/4 and otherwise// Case 2: Place integer part of N in GP register//(p7) fcvt.xf FR_N_float = FR_N_fix nop.i 999 ;;}{ .mmf nop.m 999(p7) getf.sig GR_N_Inc = FR_N_fix(p8) fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;}{ .mib nop.m 999 nop.i 999//// Load 2**(-33), -2**(-33)//(p8) br.cond.spnt L(SINCOS_SMALL_R) ;;}{ .mib nop.m 999 nop.i 999(p6) br.cond.sptk L(SINCOS_NORMAL_R) ;;}//// if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.////// In this branch, |x| >= pi/4.//{ .mfi ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8//// Load -2**(-67)// fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X//// w = N * P_2// s = -N * P_1 + Arg// add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos}{ .mfi nop.m 999 fma.s1 FR_w = FR_N_float, FR_P_2, f0 nop.i 999 ;;}{ .mfi nop.m 999//// Adjust N_fix by N_inc to determine whether sine or// cosine is being calculated// fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33 nop.i 999 ;;}{ .mfi nop.m 999(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33 nop.i 999 ;;}{ .mfi nop.m 999// Remember x >= pi/4.// Is s <= -2**(-33) or s >= 2**(-33) (p6)// or -2**(-33) < s < 2**(-33) (p7)(p6) fms.s1 FR_r = FR_s, f1, FR_w nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w nop.i 999}{ .mfi nop.m 999(p6) fms.s1 FR_c = FR_s, f1, FR_r nop.i 999 ;;}{ .mfi nop.m 999//// For big s: r = s - w: No futher reduction is necessary// For small s: w = N * P_3 (change sign) More reduction//(p6) fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999(p8) fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999(p7) fms.s1 FR_r = FR_s, f1, FR_U_1 nop.i 999}{ .mfb nop.m 999//// For big s: Is |r| < 2**(-3)?// For big s: c = S - r// For small s: U_1 = N * P_2 + w//// If p8 is set, prepare to branch to Small_R.// If p9 is set, prepare to branch to Normal_R.// For big s, r is complete here.//(p6) fms.s1 FR_c = FR_c, f1, FR_w//// For big s: c = c + w (w has not been negated.)// For small s: r = S - U_1//(p8) br.cond.spnt L(SINCOS_SMALL_R) ;;}{ .mib nop.m 999 nop.i 999(p9) br.cond.sptk L(SINCOS_NORMAL_R) ;;}{ .mfi(p7) add GR_Table_Base1 = 224, GR_Table_Base1//// Branch to SINCOS_SMALL_R or SINCOS_NORMAL_R//(p7) fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1//// c = S - U_1// r = S_1 * r////(p7) extr.u GR_i_1 = GR_N_Inc, 0, 1}{ .mmi nop.m 999 ;;//// Get [i_0,i_1] - two lsb of N_fix_gr.// Do dummy fmpy so inexact is always set.//(p7) cmp.eq.unc p9, p10 = 0x0, GR_i_1(p7) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;}//// For small s: U_2 = N * P_2 - U_1// S_1 stored constant - grab the one stored with the// coefficients.//{ .mfi(p7) ldfe FR_S_1 = [GR_Table_Base1], 16//// Check if i_1 and i_0 != 0//(p10) fma.s1 FR_poly = f0, f1, FR_Neg_Two_to_M67(p7) cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;}{ .mfi nop.m 999(p7) fms.s1 FR_s = FR_s, f1, FR_r nop.i 999}{ .mfi nop.m 999//// S = S - r// U_2 = U_2 + w// load S_1//(p7) fma.s1 FR_rsq = FR_r, FR_r, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.s1 FR_U_2 = FR_U_2, f1, FR_w nop.i 999}{ .mfi nop.m 999(p7) fmerge.se FR_Input_X = FR_r, FR_r nop.i 999 ;;}{ .mfi nop.m 999(p10) fma.s1 FR_Input_X = f0, f1, f1 nop.i 999 ;;}{ .mfi nop.m 999//// FR_rsq = r * r// Save r as the result.//(p7) fms.s1 FR_c = FR_s, f1, FR_U_1 nop.i 999 ;;}{ .mfi nop.m 999//// if ( i_1 ==0) poly = c + S_1*r*r*r// else Result = 1//(p12) fnma.s1 FR_Input_X = FR_Input_X, f1, f0 nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_r = FR_S_1, FR_r, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.d.s0 FR_S_1 = FR_S_1, FR_S_1, f0 nop.i 999 ;;}{ .mfi nop.m 999//// If i_1 != 0, poly = 2**(-67)//(p7) fms.s1 FR_c = FR_c, f1, FR_U_2 nop.i 999 ;;}{ .mfi nop.m 999//// c = c - U_2//(p9) fma.s1 FR_poly = FR_r, FR_rsq, FR_c nop.i 999 ;;}{ .mfi nop.m 999//// i_0 != 0, so Result = -Result//(p11) fma.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly nop.i 999 ;;}{ .mfb nop.m 999(p12) fms.d.s0 FR_Input_X = FR_Input_X, f1, FR_poly//// if (i_0 == 0), Result = Result + poly// else Result = Result - poly// br.ret.sptk b0 ;;}L(SINCOS_LARGER_ARG):{ .mfi nop.m 999 fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0 nop.i 999};;// This path for argument > 2*24// Adjust table_ptr1 to beginning of table.//{ .mmi nop.m 999 addl GR_Table_Base = @ltoff(FSINCOS_CONSTANTS#), gp nop.i 999};;{ .mmi ld8 GR_Table_Base = [GR_Table_Base] nop.m 999 nop.i 999};;//// Point to 2*-14// N_0 = Arg * Inv_P_0//{ .mmi add GR_Table_Base = 688, GR_Table_Base ;; ldfs FR_Two_to_M14 = [GR_Table_Base], 4 nop.i 999 ;;}{ .mfi ldfs FR_Neg_Two_to_M14 = [GR_Table_Base], 0 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999//// Load values 2**(-14) and -2**(-14)// fcvt.fx.s1 FR_N_0_fix = FR_N_0 nop.i 999 ;;}{ .mfi nop.m 999//// N_0_fix = integer part of N_0// fcvt.xf FR_N_0 = FR_N_0_fix nop.i 999 ;;}{ .mfi nop.m 999//// Make N_0 the integer part// fnma.s1 FR_ArgPrime = FR_N_0, FR_P_0, FR_Input_X nop.i 999}{ .mfi nop.m 999 fma.s1 FR_w = FR_N_0, FR_d_1, f0 nop.i 999 ;;}{ .mfi nop.m 999//// Arg' = -N_0 * P_0 + Arg// w = N_0 * d_1// fma.s1 FR_N_float = FR_ArgPrime, FR_Inv_pi_by_2, f0 nop.i 999 ;;}{ .mfi nop.m 999//// N = A' * 2/pi// fcvt.fx.s1 FR_N_fix = FR_N_float nop.i 999 ;;}{ .mfi nop.m 999//// N_fix is the integer part// fcvt.xf FR_N_float = FR_N_fix nop.i 999 ;;}{ .mfi getf.sig GR_N_Inc = FR_N_fix nop.f 999 nop.i 999 ;;}{ .mii nop.m 999 nop.i 999 ;; add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos ;;}{ .mfi nop.m 999//// N is the integer part of the reduced-reduced argument.// Put the integer in a GP register// fnma.s1 FR_s = FR_N_float, FR_P_1, FR_ArgPrime nop.i 999}{ .mfi nop.m 999 fnma.s1 FR_w = FR_N_float, FR_P_2, FR_w nop.i 999 ;;}{ .mfi nop.m 999//// s = -N*P_1 + Arg'// w = -N*P_2 + w// N_fix_gr = N_fix_gr + N_inc// fcmp.lt.unc.s1 p9, p8 = FR_s, FR_Two_to_M14 nop.i 999 ;;}{ .mfi nop.m 999(p9) fcmp.gt.s1 p9, p8 = FR_s, FR_Neg_Two_to_M14 nop.i 999 ;;}{ .mfi nop.m 999//// For |s| > 2**(-14) r = S + w (r complete)// Else U_hi = N_0 * d_1//(p9) fma.s1 FR_V_hi = FR_N_float, FR_P_2, f0 nop.i 999}{ .mfi nop.m 999(p9) fma.s1 FR_U_hi = FR_N_0, FR_d_1, f0 nop.i 999 ;;}{ .mfi nop.m 999//// Either S <= -2**(-14) or S >= 2**(-14)// or -2**(-14) < s < 2**(-14)//
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -