📄 s_cosl.s
字号:
FR_s = f46 FR_w = f47 FR_c = f48 FR_r = f49 FR_Z = f50 FR_A = f51 FR_a = f52 FR_t = f53 FR_U_1 = f54 FR_U_2 = f55 FR_C_1 = f56 FR_C_2 = f57 FR_C_3 = f58 FR_C_4 = f59 FR_C_5 = f60 FR_S_1 = f61 FR_S_2 = f62 FR_S_3 = f63 FR_S_4 = f64 FR_S_5 = f65 FR_poly_hi = f66 FR_poly_lo = f67 FR_r_hi = f68 FR_r_lo = f69 FR_rsq = f70 FR_r_cubed = f71 FR_C_hi = f72 FR_N_0 = f73 FR_d_1 = f74 FR_V = f75 FR_V_hi = f75 FR_V_lo = f76 FR_U_hi = f77 FR_U_lo = f78 FR_U_hiabs = f79 FR_V_hiabs = f80 FR_PP_8 = f81 FR_QQ_8 = f81 FR_PP_7 = f82 FR_QQ_7 = f82 FR_PP_6 = f83 FR_QQ_6 = f83 FR_PP_5 = f84 FR_QQ_5 = f84 FR_PP_4 = f85 FR_QQ_4 = f85 FR_PP_3 = f86 FR_QQ_3 = f86 FR_PP_2 = f87 FR_QQ_2 = f87 FR_QQ_1 = f88 FR_N_0_fix = f89 FR_Inv_P_0 = f90 FR_corr = f91 FR_poly = f92 FR_d_2 = f93 FR_Two_to_M3 = f94 FR_Neg_Two_to_63 = f94 FR_P_0 = f95 FR_C_lo = f96 FR_PP_1 = f97 FR_PP_1_lo = f98 FR_ArgPrime = f99 GR_Table_Base = r32 GR_Table_Base1 = r33 GR_i_0 = r34GR_i_1 = r35GR_N_Inc = r36 GR_Sin_or_Cos = r37 // Added for unwind supportGR_SAVE_B0 = r39GR_SAVE_GP = r40GR_SAVE_PFS = r41.global sinl#.global cosl##ifdef _LIBC.global __sinl#.global __cosl##endif.section .text.proc sinl##ifdef _LIBC.proc __sinl##endif.align 64 sinl:#ifdef _LIBC__sinl:#endif{ .mlxalloc GR_Table_Base = ar.pfs,0,12,2,0(p0) movl GR_Sin_or_Cos = 0x0 ;;}{ .mmi nop.m 999(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp nop.i 999};;{ .mmb ld8 GR_Table_Base = [GR_Table_Base] nop.m 999(p0) br.cond.sptk L(SINCOSL_CONTINUE) ;;};;.endp sinl#ASM_SIZE_DIRECTIVE(sinl#).section .text.proc cosl#cosl:#ifdef _LIBC.proc __cosl#__cosl:#endif{ .mlxalloc GR_Table_Base= ar.pfs,0,12,2,0(p0) movl GR_Sin_or_Cos = 0x1 ;;};;{ .mmi nop.m 999(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp nop.i 999};;{ .mmb ld8 GR_Table_Base = [GR_Table_Base] nop.m 999 nop.b 999};;//// Load Table Address//L(SINCOSL_CONTINUE): { .mmi(p0) add GR_Table_Base1 = 96, GR_Table_Base(p0) ldfs FR_Two_to_24 = [GR_Table_Base], 4// GR_Sin_or_Cos denotes (p0) mov r39 = b0 ;;}{ .mmi nop.m 0//// Load 2**24, load 2**63.//(p0) ldfs FR_Neg_Two_to_24 = [GR_Table_Base], 12 nop.i 0}{ .mfi(p0) ldfs FR_Two_to_63 = [GR_Table_Base1], 4//// Check for unnormals - unsupported operands. We do not want// to generate denormal exception// Check for NatVals, QNaNs, SNaNs, +/-Infs// Check for EM unsupporteds// Check for Zero //(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3 nop.i 0};;{ .mmf nop.m 999(p0) ldfs FR_Neg_Two_to_63 = [GR_Table_Base1], 12(p0) fclass.nm.unc p8, p0 = FR_Input_X, 0x1FF}{ .mfb nop.m 999(p0) fclass.m.unc p10, p0 = FR_Input_X, 0x007(p6) br.cond.spnt L(SINCOSL_SPECIAL) ;;}{ .mib nop.m 999 nop.i 999(p8) br.cond.spnt L(SINCOSL_SPECIAL) ;;}{ .mib nop.m 999 nop.i 999//// Branch if +/- NaN, Inf.// Load -2**24, load -2**63.//(p10) br.cond.spnt L(SINCOSL_ZERO) ;;}{ .mmb(p0) ldfe FR_Inv_pi_by_2 = [GR_Table_Base], 16(p0) ldfe FR_Inv_P_0 = [GR_Table_Base1], 16 nop.b 999 ;;}{ .mmb(p0) ldfe FR_d_1 = [GR_Table_Base1], 16//// Raise possible denormal operand flag with useful fcmp// Is x <= -2**63// Load Inv_P_0 for pre-reduction// Load Inv_pi_by_2//(p0) ldfe FR_P_0 = [GR_Table_Base], 16 nop.b 999 ;;}{ .mmb(p0) ldfe FR_d_2 = [GR_Table_Base1], 16//// Load P_0// Load d_1// Is x >= 2**63// Is x <= -2**24?//(p0) ldfe FR_P_1 = [GR_Table_Base], 16 nop.b 999 ;;}//// Load P_1// Load d_2// Is x >= 2**24?//{ .mfi(p0) ldfe FR_P_2 = [GR_Table_Base], 16(p0) fcmp.le.unc.s1 p7, p8 = FR_Input_X, FR_Neg_Two_to_24 nop.i 999 ;;}{ .mbb(p0) ldfe FR_P_3 = [GR_Table_Base], 16 nop.b 999 nop.b 999 ;;}{ .mfi nop.m 999(p8) fcmp.ge.s1 p7, p0 = FR_Input_X, FR_Two_to_24 nop.i 999}{ .mfi(p0) ldfe FR_Pi_by_4 = [GR_Table_Base1], 16//// Branch if +/- zero.// Decide about the paths to take:// If -2**24 < FR_Input_X < 2**24 - CASE 1 OR 2 // OTHERWISE - CASE 3 OR 4 //(p0) fcmp.le.unc.s0 p10, p11 = FR_Input_X, FR_Neg_Two_to_63 nop.i 999 ;;}{ .mmi(p0) ldfe FR_Neg_Pi_by_4 = [GR_Table_Base1], 16 ;;(p0) ldfs FR_Two_to_M3 = [GR_Table_Base1], 4 nop.i 999}{ .mfi nop.m 999(p11) fcmp.ge.s1 p10, p0 = FR_Input_X, FR_Two_to_63 nop.i 999 ;;}{ .mib(p0) ldfs FR_Neg_Two_to_M3 = [GR_Table_Base1], 12 nop.i 999//// Load P_2// Load P_3// Load pi_by_4// Load neg_pi_by_4// Load 2**(-3)// Load -2**(-3).//(p10) br.cond.spnt L(SINCOSL_ARG_TOO_LARGE) ;;}{ .mib nop.m 999 nop.i 999//// Branch out if x >= 2**63. Use Payne-Hanek Reduction//(p7) br.cond.spnt L(SINCOSL_LARGER_ARG) ;;}{ .mfi nop.m 999// // Branch if Arg <= -2**24 or Arg >= 2**24 and use pre-reduction.//(p0) fma.s1 FR_N_float = FR_Input_X, FR_Inv_pi_by_2, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fcmp.lt.unc.s1 p6, p7 = FR_Input_X, FR_Pi_by_4 nop.i 999 ;;}{ .mfi nop.m 999// // Select the case when |Arg| < pi/4 // Else Select the case when |Arg| >= pi/4 //(p0) fcvt.fx.s1 FR_N_fix = FR_N_float nop.i 999 ;;}{ .mfi nop.m 999//// N = Arg * 2/pi// Check if Arg < pi/4//(p6) fcmp.gt.s1 p6, p7 = FR_Input_X, FR_Neg_Pi_by_4 nop.i 999 ;;}//// Case 2: Convert integer N_fix back to normalized floating-point value.// Case 1: p8 is only affected when p6 is set//{ .mfi(p7) ldfs FR_Two_to_M33 = [GR_Table_Base1], 4//// Grab the integer part of N and call it N_fix//(p6) fmerge.se FR_r = FR_Input_X, FR_Input_X// If |x| < pi/4, r = x and c = 0 // lf |x| < pi/4, is x < 2**(-3).// r = Arg // c = 0(p6) mov GR_N_Inc = GR_Sin_or_Cos ;;}{ .mmf nop.m 999(p7) ldfs FR_Neg_Two_to_M33 = [GR_Table_Base1], 4(p6) fmerge.se FR_c = f0, f0}{ .mfi nop.m 999(p6) fcmp.lt.unc.s1 p8, p9 = FR_Input_X, FR_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999//// lf |x| < pi/4, is -2**(-3)< x < 2**(-3) - set p8.// If |x| >= pi/4, // Create the right N for |x| < pi/4 and otherwise // Case 2: Place integer part of N in GP register//(p7) fcvt.xf FR_N_float = FR_N_fix nop.i 999 ;;}{ .mmf nop.m 999(p7) getf.sig GR_N_Inc = FR_N_fix(p8) fcmp.gt.s1 p8, p0 = FR_Input_X, FR_Neg_Two_to_M3 ;;}{ .mib nop.m 999 nop.i 999//// Load 2**(-33), -2**(-33)//(p8) br.cond.spnt L(SINCOSL_SMALL_R) ;;}{ .mib nop.m 999 nop.i 999(p6) br.cond.sptk L(SINCOSL_NORMAL_R) ;;}//// if |x| < pi/4, branch based on |x| < 2**(-3) or otherwise.////// In this branch, |x| >= pi/4.// { .mfi(p0) ldfs FR_Neg_Two_to_M67 = [GR_Table_Base1], 8//// Load -2**(-67)// (p0) fnma.s1 FR_s = FR_N_float, FR_P_1, FR_Input_X//// w = N * P_2// s = -N * P_1 + Arg//(p0) add GR_N_Inc = GR_N_Inc, GR_Sin_or_Cos}{ .mfi nop.m 999(p0) fma.s1 FR_w = FR_N_float, FR_P_2, f0 nop.i 999 ;;}{ .mfi nop.m 999// // Adjust N_fix by N_inc to determine whether sine or// cosine is being calculated//(p0) fcmp.lt.unc.s1 p7, p6 = FR_s, FR_Two_to_M33 nop.i 999 ;;}{ .mfi nop.m 999(p7) fcmp.gt.s1 p7, p6 = FR_s, FR_Neg_Two_to_M33 nop.i 999 ;;}{ .mfi nop.m 999// Remember x >= pi/4.// Is s <= -2**(-33) or s >= 2**(-33) (p6)// or -2**(-33) < s < 2**(-33) (p7)(p6) fms.s1 FR_r = FR_s, f1, FR_w nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_w = FR_N_float, FR_P_3, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.s1 FR_U_1 = FR_N_float, FR_P_2, FR_w nop.i 999}{ .mfi nop.m 999(p6) fms.s1 FR_c = FR_s, f1, FR_r nop.i 999 ;;}{ .mfi nop.m 999// // For big s: r = s - w: No futher reduction is necessary // For small s: w = N * P_3 (change sign) More reduction//(p6) fcmp.lt.unc.s1 p8, p9 = FR_r, FR_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999(p8) fcmp.gt.s1 p8, p9 = FR_r, FR_Neg_Two_to_M3 nop.i 999 ;;}{ .mfi nop.m 999(p7) fms.s1 FR_r = FR_s, f1, FR_U_1 nop.i 999}{ .mfb nop.m 999//// For big s: Is |r| < 2**(-3)?// For big s: c = S - r// For small s: U_1 = N * P_2 + w//// If p8 is set, prepare to branch to Small_R.// If p9 is set, prepare to branch to Normal_R.// For big s, r is complete here.//(p6) fms.s1 FR_c = FR_c, f1, FR_w// // For big s: c = c + w (w has not been negated.)// For small s: r = S - U_1//(p8) br.cond.spnt L(SINCOSL_SMALL_R) ;;}{ .mib nop.m 999 nop.i 999(p9) br.cond.sptk L(SINCOSL_NORMAL_R) ;;}{ .mfi(p7) add GR_Table_Base1 = 224, GR_Table_Base1//// Branch to SINCOSL_SMALL_R or SINCOSL_NORMAL_R//(p7) fms.s1 FR_U_2 = FR_N_float, FR_P_2, FR_U_1// // c = S - U_1// r = S_1 * r////(p7) extr.u GR_i_1 = GR_N_Inc, 0, 1 ;;}{ .mmi nop.m 999//// Get [i_0,i_1] - two lsb of N_fix_gr.// Do dummy fmpy so inexact is always set.//(p7) cmp.eq.unc p9, p10 = 0x0, GR_i_1(p7) extr.u GR_i_0 = GR_N_Inc, 1, 1 ;;}// // For small s: U_2 = N * P_2 - U_1// S_1 stored constant - grab the one stored with the// coefficients.// { .mfi(p7) ldfe FR_S_1 = [GR_Table_Base1], 16//// Check if i_1 and i_0 != 0//(p10) fma.s1 FR_poly = f0, f1, FR_Neg_Two_to_M67(p7) cmp.eq.unc p11, p12 = 0x0, GR_i_0 ;;}{ .mfi nop.m 999(p7) fms.s1 FR_s = FR_s, f1, FR_r nop.i 999}{ .mfi nop.m 999// // S = S - r// U_2 = U_2 + w// load S_1//(p7) fma.s1 FR_rsq = FR_r, FR_r, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.s1 FR_U_2 = FR_U_2, f1, FR_w nop.i 999}{ .mfi nop.m 999(p7) fmerge.se FR_Input_X = FR_r, FR_r nop.i 999 ;;}{ .mfi nop.m 999(p10) fma.s1 FR_Input_X = f0, f1, f1 nop.i 999 ;;}{ .mfi nop.m 999// // FR_rsq = r * r// Save r as the result.//(p7) fms.s1 FR_c = FR_s, f1, FR_U_1 nop.i 999 ;;}{ .mfi nop.m 999// // if ( i_1 ==0) poly = c + S_1*r*r*r// else Result = 1//(p12) fnma.s1 FR_Input_X = FR_Input_X, f1, f0 nop.i 999}{ .mfi nop.m 999(p7) fma.s1 FR_r = FR_S_1, FR_r, f0 nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.s0 FR_S_1 = FR_S_1, FR_S_1, f0 nop.i 999 ;;}{ .mfi nop.m 999//// If i_1 != 0, poly = 2**(-67)//(p7) fms.s1 FR_c = FR_c, f1, FR_U_2 nop.i 999 ;;}{ .mfi nop.m 999// // c = c - U_2// (p9) fma.s1 FR_poly = FR_r, FR_rsq, FR_c nop.i 999 ;;}{ .mfi nop.m 999//// i_0 != 0, so Result = -Result//(p11) fma.s0 FR_Input_X = FR_Input_X, f1, FR_poly nop.i 999 ;;}{ .mfb nop.m 999(p12) fms.s0 FR_Input_X = FR_Input_X, f1, FR_poly//// if (i_0 == 0), Result = Result + poly// else Result = Result - poly//(p0) br.ret.sptk b0 ;;}L(SINCOSL_LARGER_ARG): { .mfi nop.m 999(p0) fma.s1 FR_N_0 = FR_Input_X, FR_Inv_P_0, f0 nop.i 999};;// This path for argument > 2*24 // Adjust table_ptr1 to beginning of table.//{ .mmi nop.m 999(p0) addl GR_Table_Base = @ltoff(FSINCOSL_CONSTANTS#), gp nop.i 999};;{ .mmi ld8 GR_Table_Base = [GR_Table_Base] nop.m 999 nop.i 999};;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -