📄 e_acosh.s
字号:
extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits};;{ .mmi //pre-index*16 + index shladd log_table_address3 = log_GR_index,4,log_table_address3;; ldfe log_T = [log_table_address3] nop.i 0};;{ .mfi nop.m 0 fma.s1 log_rsq = log_r, log_r, f0 //r^2 nop.i 0}{ .mfi nop.m 0 fma.s1 log_rp_p4 = log_P5, log_r, log_P4 //P5*r + P4 nop.i 0};;{ .mfi nop.m 0 fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2 nop.i 0};;{ .mfi nop.m 0 //convert N to the floating-point format log_Nfloat fcvt.xf log_Nfloat = log_int_Nfloat nop.i 0};;{ .mfi nop.m 0 fma.s1 log_rcube = log_rsq, log_r, f0 //r^3 nop.i 0}{ .mfi nop.m 0 fma.s1 log_rp_p10 = log_rsq, log_P1, log_r //P1*r^2 + r nop.i 0};;{ .mfi nop.m 0 //(P5*r + P4)*r^2 + P3*r + P2 fma.s1 log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32 nop.i 0};;{ .mfi nop.m 0 fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T nop.i 0}{ .mfi nop.m 0 //((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r fma.s1 log_r2P_r = log_rp_p2, log_rcube, log_rp_p10 nop.i 0};;{ .mfb nop.m 0 // N*log2 + T + ((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r fadd.d.s0 f8 = log_T_plus_Nlog2, log_r2P_r br.ret.sptk b0 // Exit main path, path 3: 1.0005 <= x < 2^63};;// Here if path 2, 1.0 < x < 1.0005ACOSH_NEAR_ONE:// The first NR iteration{ .mfi ldfe log_C1 = [log_table_address2],16 fma.s1 acosh_w_iter1 = acosh_w_rs,log_w,f0 //t*w nop.i 0}{ .mfi nop.m 0 fma.s1 acosh_w_1 = f8,log_C4,log_C3 //x*C4 + C3 nop.i 0};;{ .mfi ldfe log_C0 = [log_table_address2],16 fma.s1 acosh_w_iter2 = acosh_w_rs,NR1,f0 //t*0.5 nop.i 0}{ .mfi nop.m 0 fnma.s1 acosh_w_iter1 = acosh_w_iter1,acosh_w_rs,NR2 //3-t*t*w nop.i 0};;{ .mfi nop.m 0 //(3-t*t*w)*t*0.5 fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0 nop.i 0}{ .mfi nop.m 0 fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C2 //(x*C4 + C3)*(x-1) + C2 nop.i 0};;// The second NR iteration{ .mfi nop.m 0 fma.s1 acosh_w_rs = acosh_w_iter2,log_w,f0 //t*w nop.i 0}{ .mfi nop.m 0 //((x*C4 + C3)*(x-1) + C2)*(x-1) + C1 fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C1 nop.i 0};;{ .mfi nop.m 0 fnma.s1 acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2 nop.i 0}{ .mfi nop.m 0 fma.s1 acosh_w_iter2 = acosh_w_iter2,NR1,f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0 nop.i 0}{ .mfi nop.m 0 //(((x*C4 + C3)*(x-1) + C2)*(x-1) + C1)*(x-1) + C0 fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C0 nop.i 0};;//The third NR iteration{ .mfi nop.m 0 fma.s1 acosh_w_rs = acosh_w_iter2,log_w,f0 //t*w nop.i 0};;{ .mfi nop.m 0 fnma.s1 acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2 nop.i 0}{ .mfi nop.m 0 fma.s1 acosh_w_iter2 = acosh_w_iter2,NR1,f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 acosh_w_sqrt = acosh_w_iter2,log_w,f0 nop.i 0};;{ .mfb nop.m 0 fma.d.s0 f8 = acosh_w_1,acosh_w_sqrt,f0 br.ret.sptk b0 // Exit path 2, 1.0 < x < 1.0005};;// Here if path 4, x >= 2^63LOG_COMMON1:{ .mfi ldfpd acosh_comp,log_P5 = [NR_table_address],16 frcpa.s1 log_C,p0 = f1,log_arg nop.i 0};;{ .mmi getf.exp log_GR_signexp_f8 = log_arg ldfpd log_P4,log_P3 = [NR_table_address],16 nop.i 0};;{ .mmi getf.sig log_GR_significand_f8 = log_arg ldfpd log_P2,log_P1 = [NR_table_address],16 nop.i 0};;{ .mfi adds log_table_address3 = 0x70, NR_table_address nop.f 0 //significant bit destruction and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones};;{ .mmf ldfe log2 = [NR_table_address],16 //BIAS subtraction sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones fms.s1 log_r = log_C,log_arg,f1 // C = frcpa(x); r = C * x - 1};;{ .mfi setf.sig log_int_Nfloat = log_GR_true_exp_f8 nop.f 0 extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits};;{ .mmi //pre-index*16 + index shladd log_table_address3 = log_GR_index,4,log_table_address3;; ldfe log_T = [log_table_address3] nop.i 0};;{ .mfi nop.m 0 fma.s1 log_rsq = log_r, log_r, f0 //r^2 nop.i 0}{ .mfi nop.m 0 fma.s1 log_rp_p4 = log_P5, log_r, log_P4 //P5*r + P4 nop.i 0};;{ .mfi nop.m 0 fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2 nop.i 0};;{ .mfi nop.m 0 fma.s1 log_rcube = log_rsq, log_r, f0 //r^3 nop.i 0}{ .mfi nop.m 0 fma.s1 log_rp_p10 = log_rsq, log_P1, log_r //P1*r^2 + r nop.i 0};;{ .mfi nop.m 0 //convert N to the floating-point format log_Nfloat fcvt.xf log_Nfloat = log_int_Nfloat nop.i 0}{ .mfi nop.m 0 //(P5*r + P4)*r^2 + P3*r + P2 fma.s1 log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32 nop.i 0};;{ .mfi nop.m 0 fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T nop.i 0}{ .mfi nop.m 0 //((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r fma.s1 log_r2P_r = log_rp_p2, log_rcube, log_rp_p10 nop.i 0};;{ .mfb nop.m 0 // N*log2 + T + ((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r fadd.d.s0 f8 = log_T_plus_Nlog2, log_r2P_r br.ret.sptk b0 // Exit path 4, x >= 2^63};;// Here if path 7, x < 1.0ACOSH_LESS_ONE:{ .mfi alloc r32 = ar.pfs,1,3,4,0 fmerge.s f10 = f8,f8 nop.i 0};;{ .mfb mov acosh_GR_tag = 136 frcpa.s0 f8,p0 = f0,f0 br.cond.sptk __libm_error_region};;GLOBAL_LIBM_END(acosh)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body{ .mib stfd [GR_Parameter_X] = f10 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0}{ .mib stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi add GR_Parameter_RESULT = 48,sp nop.m 0 nop.i 0};;{ .mmi ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;LOCAL_LIBM_END(__libm_error_region).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -