📄 e_acosl.s
字号:
nop.m 999(p0) frsqrta.s1 acos_y0,p8 = acos_B nop.i 999}{ .mfi nop.m 999(p0) fms.s1 acos_1mA = f1,f1, acos_A nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_Bb = acos_1mB,f1, acos_ABS_NORM_f8 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_Hh = acos_HALF, acos_B, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_t1 = acos_y0, acos_y0, f0 nop.i 999}{ .mfi nop.m 999(p0) fms.s1 acos_Aa = acos_1mA,f1, acos_ABS_NORM_f8 nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 acos_t2 = acos_t1, acos_Hh, acos_HALF nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_y1 = acos_t2, acos_y0, acos_y0 nop.i 999}// Step 1.2://///////////////////////// Get V = sqrt(A)/////////////////////////{ .mfi nop.m 999(p0) frsqrta.s1 acos_y0,p8 = acos_A nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_t3 = acos_y1, acos_Hh, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_t1 = acos_y0, acos_y0, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 acos_t4 = acos_t3, acos_y1, acos_HALF nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_y2 = acos_t4, acos_y1, acos_y1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_S = acos_B, acos_y2, f0 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 acos_H = acos_y2, acos_HALF, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_t5 = acos_Hh, acos_y2, f0 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 acos_Hh = acos_HALF, acos_A, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 acos_Dd = acos_S, acos_S, acos_B nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 acos_t2 = acos_t1, acos_Hh, acos_HALF nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_U = acos_Dd, acos_H, acos_S nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_y1 = acos_t2, acos_y0, acos_y0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_2U = acos_U, f1, acos_U nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_t3 = acos_y1, acos_Hh, f0 nop.i 999}// Step 1.3: // sqrt(A + a) = V + v// sqrt(B + b) = U + u/////////////////////////// Get u/////////////////////////// acos_BmUU = B - UU// acos_BmUUpb = (B - UU) + b{ .mfi nop.m 999(p0) fnma.s1 acos_BmUU = acos_U, acos_U, acos_B nop.i 999 ;;}{ .mfi nop.m 999(p0) fmerge.se f9 = acos_U, acos_U nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 acos_t4 = acos_t3, acos_y1, acos_HALF nop.i 999 ;;}// acos_1d2U = frcpa(2U){ .mfi nop.m 999(p0) frcpa.s1 acos_1d2U,p9 = f1, acos_2U nop.i 999}{ .mfi nop.m 999(p0) fma.s1 acos_BmUUpb = acos_BmUU, f1, acos_Bb nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_y2 = acos_t4, acos_y1, acos_y1 nop.i 999 ;;}{ .mfi nop.m 999// acos_Uu = ((B - UU) + b) * frcpa(2U)(p0) fma.s1 acos_Uu = acos_BmUUpb, acos_1d2U, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_S = acos_A, acos_y2, f0 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 acos_H = acos_y2, acos_HALF, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_t5 = acos_Hh, acos_y2, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 acos_Dd = acos_S, acos_S, acos_A nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_V = acos_Dd, acos_H, acos_S nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_2V = acos_V, f1, acos_V nop.i 999}// Step 3/////////////////////////// Calculate the correction, acos_corr/////////////////////////// acos_corr = U*v - (V*u){ .mfi nop.m 999(p0) fma.s1 acos_Vu = acos_V,acos_Uu, f0 nop.i 999 ;;}/////////////////////////// Get v/////////////////////////// acos_AmVV = A - VV// acos_AmVVpa = (A - VV) + a{ .mfi nop.m 999(p0) fnma.s1 acos_AmVV = acos_V, acos_V, acos_A nop.i 999 ;;}{ .mfi nop.m 999(p0) fmerge.se f8 = acos_V, acos_V nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_AmVVpa = acos_AmVV, f1, acos_Aa nop.i 999 ;;}// acos_1d2V = frcpa(2V){ .mfi nop.m 999(p0) frcpa.s1 acos_1d2V,p9 = f1, acos_2V nop.i 999 ;;}// acos_Vv = ((A - VV) + a) * frcpa(2V){ .mfi nop.m 999(p0) fma.s1 acos_Vv = acos_AmVVpa, acos_1d2V, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 acos_Uv = acos_U,acos_Vv, f0 nop.i 999 ;;}.endp acosl#ASM_SIZE_DIRECTIVE(acosl#).proc __libm_callout__libm_callout:.prologue{ .mfi nop.m 0 nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs};;{ .mfi mov GR_SAVE_GP=gp nop.f 0.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0}.body{ .mfb nop.m 999(p0) fms.s1 acos_corr = acos_Uv,f1, acos_Vu (p0) br.call.sptk.many b0=__libm_atan2_reg# ;; }// p6 ==> X is negative// p7 ==> x is positive// We know that |X| >= 1/4{ .mfi(p0) mov gp = GR_SAVE_GP (p0) fcmp.lt.unc p6,p7 = acos_X , f0 (p0) mov b0 = GR_SAVE_B0 ;; }// acos_2_Z_hi = 2 * acos_Z_hi// acos_s_lo_Z_lo = s_lo * Z_lo{ .mfi nop.m 999(p0) fma.s1 acos_2_Z_hi = acos_Z_hi, f1, acos_Z_hi (p0) mov ar.pfs = GR_SAVE_PFS }{ .mfi nop.m 999(p0) fma.s1 acos_s_lo_Z_lo = acos_s_lo, acos_Z_lo, f0 nop.i 999 ;;}// 2 is a constant needed later{ .mfi nop.m 999(p0) fma.s1 acos_2 = f1,f1,f1 nop.i 999 ;;}// X >= 1/4// acos_result_lo = 2(s_lo * Z_lo) - corr// f8 = (2*Z_hi) + (2(s_lo * Z_lo) - corr){ .mfi nop.m 999(p7) fma.s1 acos_result_lo = acos_s_lo_Z_lo, acos_2, acos_corr nop.i 999 ;;}{ .mfi nop.m 999(p7) fma.s0 f8 = acos_2_Z_hi, f1, acos_result_lo nop.i 999}// acos_result_lo = (pi_lo - corr)// acos_result_lo = (pi_lo - corr) + acos_Ww { .mfi nop.m 999(p6) fms.s1 acos_result_lo = acos_pi_lo, f1, acos_corr nop.i 999 ;;}// X <= -1/4// acos_W = pi_hi - 2 * Z_hi{ .mfi nop.m 999(p6) fnma.s1 acos_W = acos_2, acos_Z_hi, acos_pi_hi nop.i 999 ;;}// acos_Ww = pi_hi - W// acos_Ww = (pi_hi - W) + (2 * Z_hi){ .mfi nop.m 999(p6) fms.s1 acos_Ww = acos_pi_hi, f1, acos_W nop.i 999 ;;}{ .mfi nop.m 999(p6) fms.s1 acos_Ww = acos_Ww, f1, acos_2_Z_hi nop.i 999 ;;}{ .mfi nop.m 999(p6) fma.s1 acos_result_lo = acos_result_lo, f1, acos_Ww nop.i 999 ;;}// acos_Z_lo = ((pi_lo - corr) + acos_Ww) - 2 * (s_lo * Z_lo){ .mfi nop.m 999(p6) fnma.s1 acos_Z_lo = acos_s_lo_Z_lo, acos_2, acos_result_lo nop.i 999 ;;}{ .mfb nop.m 999(p6) fma.s0 f8 = acos_W, f1, acos_Z_lo (p0) br.ret.sptk b0 ;; }.endp __libm_calloutASM_SIZE_DIRECTIVE(__libm_callout).proc SPECIALSPECIAL:L(ACOS_NAN): { .mfb nop.m 999(p0) fma.s0 f8 = f8,f1,f0 (p0) br.ret.sptk b0 ;; }L(ACOS_ERROR_RETURN): // Save ar.pfs, b0, and gp; restore on exit// qnan snan inf norm unorm 0 -+// 1 1 0 0 0 0 11 = 0xc3// Coming in as X = +- 1// What should we return?// If X is 1, return (sign of X)pi/2{ .mfi nop.m 999(p0) fcmp.eq.unc p6,p7 = acos_ABS_NORM_f8,f1 nop.i 999 ;;}{ .mfi nop.m 999(p6) fcmp.lt.unc p8,p9 = f8,f0 nop.i 999 ;;}{ .mfi nop.m 999(p8) fma.s0 f8 = acos_pi_hi, f1, acos_pi_lo nop.i 999}{ .mfb nop.m 999(p9) fmerge.s f8 = f8,f0 (p6) br.ret.spnt b0 ;; }// If X is a NAN, leave{ .mfi nop.m 999(p0) fclass.m.unc p12,p0 = f8, 0xc3 nop.i 999 ;;}{ .mfb nop.m 999(p12) fma.s0 f8 = f8,f1,f0 (p12) br.ret.spnt b0 ;; }{ .mfi(p0) mov GR_Parameter_TAG = 57 (p0) frcpa f10, p6 = f0, f0nop.i 999};;.endp SPECIALASM_SIZE_DIRECTIVE(SPECIAL).proc __libm_error_region__libm_error_region:.prologue// (1){ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;// (2){ .mmi stfe [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body// (3){ .mib stfe [GR_Parameter_X] = f8 // Store Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 // Parameter 3 address}{ .mib stfe [GR_Parameter_Y] = f10 // Store Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;// (4){ .mmi ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;.endp __libm_error_regionASM_SIZE_DIRECTIVE(__libm_error_region).type __libm_error_support#,@function.global __libm_error_support#.type __libm_atan2_reg#,@function.global __libm_atan2_reg#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -