📄 s_log1pl.s
字号:
;;{ .mmi ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2 // Point to G_2 and GR_M = GR_exp_mask, GR_M // Get exponent of w = x};;{ .mmi ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2 cmp.lt p8, p9 = GR_M, GR_exp_2tom7 // Test |x| < 2^-7 cmp.lt p7, p0 = GR_M, GR_exp_2tom80 // Test |x| < 2^-80};;// Small path is separate code// p7 is for the small path: |x| < 2^-80// near1 and regular paths are merged.// p8 is for the near1 path: |x| < 2^-7// p9 is for regular path: |x| >= 2^-7{ .mfi ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2 nop.f 999 nop.i 999}{ .mfb(p9) setf.exp FR_2_to_minus_N = GR_minus_N // Form 2^(-N)(p7) fnma.s0 f8 = FR_X_Prime, FR_X_Prime, FR_X_Prime // Result x - x*x(p7) br.ret.spnt b0 // Branch if |x| < 2^-80};;{ .mmi(p8) ldfe FR_P7 = [GR_ad_p],16 // Load P_7 for near1 path(p8) ldfe FR_P4 = [GR_ad_p2],16 // Load P_4 for near1 path(p9) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2};;//// For performance, don't use result of pmpyshr2.u for 4 cycles.//{ .mmf(p8) ldfe FR_P6 = [GR_ad_p],16 // Load P_6 for near1 path(p8) ldfe FR_P3 = [GR_ad_p2],16 // Load P_3 for near1 path(p9) fma.s1 FR_S_lo = FR_S_lo, f1, FR_BB // S_lo = S_lo + BB};;{ .mmf(p8) ldfe FR_P5 = [GR_ad_p],16 // Load P_5 for near1 path(p8) ldfe FR_P2 = [GR_ad_p2],16 // Load P_2 for near1 path(p8) fmpy.s1 FR_wsq = FR_W, FR_W // wsq = w * w for near1 path};;{ .mmi(p8) ldfe FR_P1 = [GR_ad_p2],16 ;; // Load P_1 for near1 path nop.m 999(p9) extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2};;{ .mfi(p9) shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 // Point to G_3(p9) fcvt.xf FR_float_N = FR_float_N nop.i 999};;{ .mfi(p9) ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3 nop.f 999 nop.i 999};;{ .mfi(p9) ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3(p9) fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 nop.i 999}{ .mfi nop.m 999(p9) fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 nop.i 999};;{ .mmf nop.m 999 nop.m 999(p9) fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2};;{ .mfi nop.m 999(p8) fmpy.s1 FR_w4 = FR_wsq, FR_wsq // w4 = w^4 for near1 path nop.i 999}{ .mfi nop.m 999(p8) fma.s1 FR_p87 = FR_W, FR_P8, FR_P7 // p87 = w * P8 + P7 nop.i 999};;{ .mfi nop.m 999(p9) fma.s1 FR_S_lo = FR_S_lo, FR_2_to_minus_N, f0 // S_lo = S_lo * 2^(-N) nop.i 999}{ .mfi nop.m 999(p8) fma.s1 FR_p43 = FR_W, FR_P4, FR_P3 // p43 = w * P4 + P3 nop.i 999};;{ .mfi nop.m 999(p9) fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 nop.i 999}{ .mfi nop.m 999(p9) fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 nop.i 999};;{ .mfi nop.m 999(p9) fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 nop.i 999}{ .mfi nop.m 999(p8) fmpy.s1 FR_w6 = FR_w4, FR_wsq // w6 = w^6 for near1 path nop.i 999};;{ .mfi nop.m 999(p8) fma.s1 FR_p432 = FR_W, FR_p43, FR_P2 // p432 = w * p43 + P2 nop.i 999}{ .mfi nop.m 999(p8) fma.s1 FR_p876 = FR_W, FR_p87, FR_P6 // p876 = w * p87 + P6 nop.i 999};;{ .mfi nop.m 999(p9) fms.s1 FR_r = FR_G, FR_S_hi, f1 // r = G * S_hi - 1 nop.i 999}{ .mfi nop.m 999(p9) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi = N * log2_hi + H nop.i 999};;{ .mfi nop.m 999(p9) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h // h = N * log2_lo + h nop.i 999};;{ .mfi nop.m 999(p9) fma.s1 FR_r = FR_G, FR_S_lo, FR_r // r = G * S_lo + (G * S_hi - 1) nop.i 999};;{ .mfi nop.m 999(p8) fma.s1 FR_p4321 = FR_W, FR_p432, FR_P1 // p4321 = w * p432 + P1 nop.i 999}{ .mfi nop.m 999(p8) fma.s1 FR_p8765 = FR_W, FR_p876, FR_P5 // p8765 = w * p876 + P5 nop.i 999};;{ .mfi nop.m 999(p9) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 // poly_lo = r * Q4 + Q3 nop.i 999}{ .mfi nop.m 999(p9) fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r nop.i 999};;{ .mfi nop.m 999(p8) fma.s1 FR_Y_lo = FR_wsq, FR_p4321, f0 // Y_lo = wsq * p4321 nop.i 999}{ .mfi nop.m 999(p8) fma.s1 FR_Y_hi = FR_W, f1, f0 // Y_hi = w for near1 path nop.i 999};;{ .mfi nop.m 999(p9) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo = poly_lo * r + Q2 nop.i 999}{ .mfi nop.m 999(p9) fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 nop.i 999};;{ .mfi nop.m 999(p8) fma.s1 FR_Y_lo = FR_w6, FR_p8765,FR_Y_lo // Y_lo = w6 * p8765 + w2 * p4321 nop.i 999};;{ .mfi nop.m 999(p9) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1 * rsq + r nop.i 999};;{ .mfi nop.m 999(p9) fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h // poly_lo = poly_lo*r^3 + h nop.i 999};;{ .mfi nop.m 999(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo nop.i 999};;// Remainder of code is common for near1 and regular paths{ .mfb nop.m 999 fadd.s0 f8 = FR_Y_lo,FR_Y_hi // Result=Y_lo+Y_hi br.ret.sptk b0 // Common exit for 2^-80 < x < inf};;// Here if x=-1LOG1P_EQ_Minus_1: //// If x=-1 raise divide by zero and return -inf// { .mfi mov GR_Parameter_TAG = 138 fsub.s1 FR_Output_X_tmp = f0, f1 nop.i 999};;{ .mfb nop.m 999 frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0 br.cond.sptk __libm_error_region};;LOG1P_special: { .mfi nop.m 999 fclass.m.unc p8, p0 = FR_Input_X, 0x1E1 // Test for natval, nan, +inf nop.i 999};;// // For SNaN raise invalid and return QNaN.// For QNaN raise invalid and return QNaN.// For +Inf return +Inf.// { .mfb nop.m 999(p8) fmpy.s0 f8 = FR_Input_X, f1 (p8) br.ret.sptk b0 // Return for natval, nan, +inf};;// // For -Inf raise invalid and return QNaN.// { .mfb mov GR_Parameter_TAG = 139 fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0 br.cond.sptk __libm_error_region};;LOG1P_unsupported: // // Return generated NaN or other value.// { .mfb nop.m 999 fmpy.s0 f8 = FR_Input_X, f0 br.ret.sptk b0};;// Here if -inf < x < -1LOG1P_LT_Minus_1: // // Deal with x < -1 in a special way - raise// invalid and produce QNaN indefinite.// { .mfb mov GR_Parameter_TAG = 139 frcpa.s0 FR_Output_X_tmp, p8 = f0, f0 br.cond.sptk __libm_error_region};;GLOBAL_IEEE754_END(log1pl)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body{ .mib stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 // Parameter 3 address}{ .mib stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 999 nop.m 999 add GR_Parameter_RESULT = 48,sp};;{ .mmi ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;LOCAL_LIBM_END(__libm_error_region#).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -