📄 e_acoshl.s
字号:
};;{ .mfi ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1 fmerge.se FR_S_hi = f1,FR_XLog_Hi // Form |x| nop.i 0};;{ .mmi getf.exp GR_N = FR_XLog_Hi // Get N = exponent of x+1 ldfd FR_h = [GR_ad_tbl_1] // Load h_1 nop.i 0};;{ .mfi ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi nop.f 0 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // Get bits 30-15 of X_0 * Z_1};;{ .mmi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo sub GR_N = GR_N, GR_Bias mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80};;{ .mfi ldfe FR_Q4 = [GR_ad_q],16 // Load Q4 nop.f 0 sub GR_minus_N = GR_Bias, GR_N // Form exponent of 2^(-N)};;{ .mmf ldfe FR_Q3 = [GR_ad_q],16 // Load Q3 setf.sig FR_float_N = GR_N // Put integer N into rightmost sign nop.f 0};;{ .mmi ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 nop.m 0 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 };;{ .mmi ldfe FR_Q1 = [GR_ad_q] // Load Q1 shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2 nop.i 0};;{ .mmi ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2 // Point to G_2 nop.i 0};;{ .mmi ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2 nop.m 0 nop.i 0};;{ .mmf ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2 setf.exp FR_2_to_minus_N = GR_minus_N // Form 2^(-N) nop.f 0};;{ .mfi nop.m 0 nop.f 0 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1*Z_2};;// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!) // BECAUSE OF POSSIBLE 10 CLOCKS STALL!// (Just nops added - nothing to do here){ .mfi nop.m 0 nop.f 0 nop.i 0};;{ .mfi nop.m 0 nop.f 0 nop.i 0};;{ .mfi nop.m 0 nop.f 0 nop.i 0};;{ .mfi nop.m 0 nop.f 0 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2};;{ .mfi shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 // Point to G_3 fcvt.xf FR_float_N = FR_float_N nop.i 0};;{ .mfi ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3 nop.f 0 nop.i 0};;{ .mfi ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 nop.i 0}{ .mfi nop.m 0 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 nop.i 0};;{ .mmf nop.m 0 nop.m 0 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2};;{ .mfi nop.m 0 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2)*G_3 nop.i 0}{ .mfi nop.m 0 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2)+H_3 nop.i 0};;{ .mfi nop.m 0 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_r = FR_G, FR_S_hi, f1 // r = G * S_hi - 1 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi=N*log2_hi+H nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h // h = N*log2_lo+h nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 // poly_lo = r * Q4 + Q3 nop.i 0}{ .mfi nop.m 0 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo=poly_lo*r+Q2 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1*rsq + r nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h//poly_lo=poly_lo*r^3+h nop.i 0};;{ .mfi nop.m 0 fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo nop.i 0};;{ .mfb nop.m 0 fadd.s0 FR_Res = FR_Y_lo,FR_Y_hi // Result=Y_lo+Y_hi br.ret.sptk b0 // Common exit};;// NEAR ONE INTERVALnear_1:{ .mfi nop.m 0 frsqrta.s1 FR_Rcp, p0 = FR_2XM1 // Rcp = 1/x reciprocal appr. &SQRT& nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_PV6 = FR_PP5, FR_XM1, FR_PP4 // pv6 = P5*xm1+P4 $POLY$ nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_QV6 = FR_QQ5, FR_XM1, FR_QQ4 // qv6 = Q5*xm1+Q4 $POLY$ nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_PV4 = FR_PP3, FR_XM1, FR_PP2 // pv4 = P3*xm1+P2 $POLY$ nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_QV4 = FR_QQ3, FR_XM1, FR_QQ2 // qv4 = Q3*xm1+Q2 $POLY$ nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_XM12 = FR_XM1, FR_XM1, f0 // xm1^2 = xm1 * xm1 $POLY$ nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_PV2 = FR_PP1, FR_XM1, FR_PP0 // pv2 = P1*xm1+P0 $POLY$ nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_QV2 = FR_QQ1, FR_XM1, FR_QQ0 // qv2 = Q1*xm1+Q0 $POLY$ nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_GG = FR_Rcp, FR_2XM1, f0 // g = Rcp * x &SQRT& nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp &SQRT& nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_PV3 = FR_XM12, FR_PV6, FR_PV4//pv3=pv6*xm1^2+pv4 $POLY$ nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_QV3 = FR_XM12, FR_QV6, FR_QV4//qv3=qv6*xm1^2+qv4 $POLY$ nop.i 0 };;{ .mfi nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT& nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_PP = FR_XM12, FR_PV3, FR_PV2 //pp=pv3*xm1^2+pv2 $POLY$ nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_QQ = FR_XM12, FR_QV3, FR_QV2 //qq=qv3*xm1^2+qv2 $POLY$ nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT& nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& nop.i 0 };;{ .mfi nop.m 0 frcpa.s1 FR_Y0,p0 = f1,FR_QQ // y = frcpa(b) #DIV# nop.i 0 }{ .mfi nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g*h &SQRT& nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_Q0 = FR_PP,FR_Y0,f0 // q = a*y #DIV# nop.i 0 }{ .mfi nop.m 0 fnma.s1 FR_E0 = FR_Y0,FR_QQ,f1 // e = 1 - b*y #DIV# nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT& nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_E2 = FR_E0,FR_E0,FR_E0 // e2 = e+e^2 #DIV# nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_E1 = FR_E0,FR_E0,f0 // e1 = e^2 #DIV# nop.i 0 };;{ .mfi nop.m 0 fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT& nop.i 0 }{ .mfi nop.m 0 fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT& nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_Y1 = FR_Y0,FR_E2,FR_Y0 // y1 = y+y*e2 #DIV# nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_E3 = FR_E1,FR_E1,FR_E0 // e3 = e+e1^2 #DIV# nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_GG = FR_DD, FR_HH, FR_GG // g = d * h + g &SQRT& nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT& nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_Y2 = FR_Y1,FR_E3,FR_Y0 // y2 = y+y1*e3 #DIV# nop.i 0 }{ .mfi nop.m 0 fnma.s1 FR_R0 = FR_QQ,FR_Q0,FR_PP // r = a-b*q #DIV# nop.i 0 };;{ .mfi nop.m 0 fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT& nop.i 0 };;{ .mfi nop.m 0 fnma.s1 FR_E4 = FR_QQ,FR_Y2,f1 // e4 = 1-b*y2 #DIV# nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_X_Hi = FR_R0,FR_Y2,FR_Q0 // x = q+r*y2 #DIV# nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_GL = FR_DD, FR_HH, f0 // gl = d * h &SQRT& nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_Y3 = FR_Y2,FR_E4,FR_Y2 // y3 = y2+y2*e4 #DIV# nop.i 0 }{ .mfi nop.m 0 fnma.s1 FR_R1 = FR_QQ,FR_X_Hi,FR_PP // r1 = a-b*x #DIV# nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_HH = FR_GG, FR_X_Hi, f0 // hh = gg * x_hi nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_LH = FR_GL, FR_X_Hi, f0 // lh = gl * x_hi nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_X_lo = FR_R1,FR_Y3,f0 // x_lo = r1*y3 #DIV# nop.i 0 };;{ .mfi nop.m 0 fma.s1 FR_LL = FR_GL, FR_X_lo, f0 // ll = gl*x_lo nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_HL = FR_GG, FR_X_lo, f0 // hl = gg * x_lo nop.i 0 };;{ .mfi nop.m 0 fms.s1 FR_Res = FR_GL, f1, FR_LL // res = gl + ll nop.i 0 };;{ .mfi nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_LH // res = res + lh nop.i 0 };;{ .mfi nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_HL // res = res + hl nop.i 0 };;{ .mfi nop.m 0 fms.s1 FR_Res = FR_Res, f1, FR_HH // res = res + hh nop.i 0 };;{ .mfb nop.m 0 fma.s0 FR_Res = FR_Res, f1, FR_GG // result = res + gg br.ret.sptk b0 // Exit for near 1 path};;// NEAR ONE INTERVAL ENDacoshl_lt_pone:{ .mfi nop.m 0 fmerge.s FR_Arg_X = FR_Arg, FR_Arg nop.i 0 };;{ .mfb mov GR_Parameter_TAG = 135 frcpa.s0 FR_Res,p0 = f0,f0 // get QNaN,and raise invalid br.cond.sptk __libm_error_region // exit if x < 1.0};;GLOBAL_LIBM_END(acoshl)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y = -32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS = ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp = -64,sp // Create new stack nop.f 0 mov GR_SAVE_GP = gp // Save gp};;{ .mmi stfe [GR_Parameter_Y] = FR_Arg_Y,16 // Parameter 2 to stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0,GR_SAVE_B0 mov GR_SAVE_B0 = b0 // Save b0};;.body{ .mib stfe [GR_Parameter_X] = FR_Arg_X // Parameter 1 to stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 }{ .mib stfe [GR_Parameter_Y] = FR_Res // Parameter 3 to stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0 = __libm_error_support# // Error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;{ .mmi ldfe f8 = [GR_Parameter_RESULT] // Get return res.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;LOCAL_LIBM_END(__libm_error_region#).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -