📄 w_tgammal.s
字号:
GR_c_13 = r58// Floating Point RegistersFR_c_PosOverflow = f123FR_c_XN = f124//=======================================================// Polynomial part registers// General Purpose RegistersGR_p_Table = r59GR_p_XN = r33GR_p_Table2 = r34GR_p_Int = r35GR_p_Offset = r36GR_p_Offset2 = r38GR_p_X_Sgnd = GR_l_signif_Z // = r37GR_p_Exp = r61GR_p_Bias = r62GR_p_0p75 = r63// Floating Point RegistersFR_p_AbsX = FR_l_AbsX // = f127FR_p_IXN = FR_n_IXN // = f126FR_p_XN = f32FR_p_0p5 = f33FR_p_1p5 = f34FR_p_AbsXM1 = f35FR_p_2 = f36FR_p_A20 = f37 FR_p_A19 = f38 FR_p_A18 = f39 FR_p_A17 = f40 FR_p_A16 = f41 FR_p_A15 = f42 FR_p_A14 = f43 FR_p_A13 = f44 FR_p_A12 = f45 FR_p_A11 = f46 FR_p_A10 = f47 FR_p_A9 = f48 FR_p_A8 = f49 FR_p_A7 = f50 FR_p_A6 = f51 FR_p_A5H = f52 FR_p_A5L = f53 FR_p_A4H = f54 FR_p_A4L = f55 FR_p_A3H = f56 FR_p_A3L = f57 FR_p_A2H = f58 FR_p_A2L = f59 FR_p_A1H = f60 FR_p_A1L = f61 FR_p_A0H = f62 FR_p_A0L = f63 FR_p_XR = f64FR_p_XR2 = f65 FR_p_XR2L = f52 FR_p_XR3 = f58 FR_p_XR3L = f38 FR_p_XR4 = f42 FR_p_XR6 = f40 FR_p_XR8 = f37 FR_p_Poly5H = f66 FR_p_Poly5L = f67 FR_p_Poly4H = f53 FR_p_Poly4L = f44 FR_p_Poly3H = f41 FR_p_Poly3L = f47 FR_p_Poly2H = f68 FR_p_Poly2L = f54 FR_p_Poly1H = f55 FR_p_Poly1L = f46 FR_p_Poly0H = f39 FR_p_Poly0L = f43 FR_p_Temp5H = f69 FR_p_Temp5L = f70 FR_p_Temp4H = f71 FR_p_Temp4L = f60 FR_p_Temp2H = f72 FR_p_Temp2L = f73 FR_p_Temp1H = f59 FR_p_Temp1L = f61 FR_p_Temp0H = f49 FR_p_Temp0L = f48 FR_p_PolyTail = f45 FR_p_OddPoly0H = f56 FR_p_OddPoly0L = f51 FR_p_0p25 = f73//=======================================================// Negative polynomial part registers// General Purpose RegistersGR_r_sin_Table = r47 GR_r_sin_Table2 = r60 // Floating Point RegistersFR_r_IXNS = FR_n_IXNS FR_r_IXN = FR_n_IXN FR_r_AbsX = FR_l_AbsXFR_r_A9 = f74 FR_r_A8 = f75 FR_r_A7 = f76 FR_r_A6 = f77 FR_r_A5 = f78 FR_r_A4 = f79 FR_r_A3 = f80 FR_r_A2H = f81 FR_r_A2L = f82 FR_r_A1H = f83 FR_r_A1L = f84 FR_r_XNS = f85 FR_r_XS = f86 FR_r_XS2 = f87 FR_r_XS2L = f88 FR_r_XS4 = f89 FR_r_XS7 = f90 FR_r_XS8 = f91 FR_r_Tail = f92FR_r_TT = f93 FR_r_TH = f94 FR_r_TL = f95 FR_r_ResH = f96FR_r_ResL = f97FR_r_Res3H = f98 FR_r_Res3L = f99 FR_r_Res1H = f100 FR_r_Res1L = f101 FR_r_Y0 = f102 FR_r_Q0 = f103 FR_r_E0 = f104 FR_r_E2 = f105 FR_r_E1 = f106 FR_r_Y1 = f107 FR_r_E3 = f108 FR_r_Y2 = f109 FR_r_R0 = f110 FR_r_E4 = f111 FR_r_ZH = f112 FR_r_Y3 = f113 FR_r_R1 = f114 FR_r_ZHN = f115FR_r_ZL = f115FR_r_NegOne = f116FR_z_Y0 = f102 FR_z_Q0 = f103 FR_z_E0 = f104 FR_z_E2 = f105 FR_z_E1 = f106 FR_z_Y1 = f107 FR_z_E3 = f108 FR_z_Y2 = f109 FR_z_R0 = f110 FR_z_E4 = f111 FR_z_ZH = f112 FR_z_Y3 = f113 FR_z_R1 = f114 FR_z_ZL = f115 // General Purpose RegistersGR_SAVE_PFS = r32GR_DenOverflow = r33GR_u_XN = r34GR_SAVE_B0 = r35 GR_SAVE_GP = r36 GR_SAVE_SP = r37 // Floating Point RegistersFR_u_IXN = f34// ERROR HANDLER REGISTERSGR_Parameter_X = r64GR_Parameter_Y = r65GR_Parameter_RESULT = r66GR_Parameter_TAG = r67FR_RESULT = f8FR_X = f32 FR_Y = f1.section .textGLOBAL_LIBM_ENTRY(tgammal){ .mfi alloc r32 = ar.pfs,0,32,4,0 fabs FR_l_AbsX = f8 // Get absolute value of X addl GR_n_sin_Table = @ltoff(Constants_Tgammal_sin), gp }{ .mfi addl GR_l_Log_Table=@ltoff(Constants_Tgammal_log_80_Z_G_H_h1#),gp nop.f 0 addl GR_l_Stirling_Table = @ltoff(Constants_Tgammal_stirling), gp};;{ .mfi getf.sig GR_l_signif_Z = f8 // Significand of X fcvt.fx.s1 FR_n_IXNS = f8 // Convert to fixed point addl GR_c_Table = @ltoff(Constants_Tgammal_common), gp}{ .mfi ld8 GR_l_Log_Table = [GR_l_Log_Table] nop.f 0 addl GR_p_Table = @ltoff(Constants_Tgammal_poly), gp};;{ .mfi ld8 GR_n_sin_Table = [GR_n_sin_Table] fclass.m p6,p0 = f8,0x1EF // Check x for NaN, 0, INF, denorm // NatVal. addl GR_c_NegSingularity = 0x1003E, r0}{ .mlx ld8 GR_l_Stirling_Table = [GR_l_Stirling_Table] movl GR_c_13 = 0x402A000000000000 // 13.0};;{ .mfi getf.d GR_c_X = f8 // Double prec. X to general register frcpa.s1 FR_z_Y0,p0 = f1,f8 // y = frcpa(x) (for negatives) extr.u GR_l_Index1 = GR_l_signif_Z, 59, 4 // = High 4 bits of Z}{ .mlx ld8 GR_c_Table = [GR_c_Table] movl GR_c_SignBit = 0x8000000000000000 // High bit (sign)};;{ .mfi ld8 GR_p_Table = [GR_p_Table] fcmp.lt.s1 p15, p14 = f8,f0 // p14 - positive arg, p15 - negative shl GR_l_Index1 = GR_l_Index1,5 // Adjust Index1 ptr (x32) }{ .mfb adds GR_c_NegUnderflow = 1765, r0 nop.f 0(p6) br.cond.spnt tgammal_spec // Spec. values processing branch //////////// // (0s, INFs, NANs, NatVals, denormals) //////};;{ .mfi ldfpd FR_l_CH,FR_l_CL= [GR_l_Stirling_Table], 16 // Load CH, CL fcvt.fx.trunc.s1 FR_n_IXN = FR_l_AbsX // Abs arg to int by trunc extr.u GR_l_X_0 = GR_l_signif_Z, 49, 15 // High 15 bit of Z}{ .mfi add GR_l_Index1 = GR_l_Index1,GR_l_Log_Table // Add offset fma.s1 FR_p_2 = f1, f1, f1 // 2.0 andcm GR_c_X = GR_c_X, GR_c_SignBit // Remove sign};;{ .mfi addl GR_l_Log_Table = @ltoff(Constants_Tgammal_log_80_Z_G_H_h2#), gp fcmp.lt.s1 p10, p0 = FR_l_AbsX, f1 // If |X|<1 then p10 = 1 nop.i 0}{ .mlx ld2 GR_l_Z_1 = [GR_l_Index1],4 // load Z_1 from Index1 movl GR_l_BIAS = 0x000000000000FFFF // Bias for exponent};;{ .mfi ld8 GR_l_Log_Table = [GR_l_Log_Table] frcpa.s1 FR_l_Y0, p0 = f1, FR_l_AbsX // y = frcpa(x) nop.i 0}{ .mfi ldfs FR_l_G_1 = [GR_l_Index1],4 // Load G_1 fsub.s1 FR_l_W = FR_l_AbsX, f1 // W = |X|-1 nop.i 0};;{ .mfi getf.exp GR_l_N_Unbiased= FR_l_AbsX // exponent of |X| fmerge.se FR_l_S = f1, FR_l_AbsX // S = merging of X and 1.0 cmp.gtu p11, p0 = GR_c_13, GR_c_X // If 1 <= |X| < 13 // then p11 = 1}{ .mfb ldfs FR_l_H_1 = [GR_l_Index1],8 // Load H_1 fcvt.xf FR_n_XNS = FR_n_IXNS // Convert to FP repr. of int X(p10) br.cond.spnt tgamma_lt_1 // Branch to |X| < 1 path ///////////////////};;{ .mfi ldfpd FR_n_A2H, FR_n_A2L = [GR_n_sin_Table], 16 nop.f 0 pmpyshr2.u GR_l_X_1 = GR_l_X_0,GR_l_Z_1,15 // Adjust Index2 (x32)}{ .mfb ldfe FR_l_B2 = [GR_l_Stirling_Table], 16 nop.f 0(p11) br.cond.spnt tgamma_lt_13 // Branch to 1 <= |X| < 13 path ///////////////};;{ .mfi ldfe FR_l_h_1 = [GR_l_Index1],0 nop.f 0 sub GR_l_N = GR_l_N_Unbiased, GR_l_BIAS // N - BIAS}{ .mib ldfpd FR_l_B4,FR_l_B6= [GR_l_Stirling_Table], 16 // Load C(p15) cmp.geu.unc p8,p0 = GR_l_N_Unbiased, GR_c_NegSingularity(p8) br.cond.spnt tgammal_singularity // Singularity for arg < to -2^63 //////};;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -