📄 e_cosh.s
字号:
{ .mfi(p0) sub GR_mJ = r40, r36 (p0) fmerge.se cosh_FR_spos = cosh_FR_N_temp1, f1 (p0) adds GR_J = 0x20, r36 ;; }{ .mii nop.m 999(p0) shl GR_mJ = GR_mJ, 5 ;; (p0) add AD_mJ = r37, GR_mJ ;; }{ .mmi nop.m 999(p0) ldfe cosh_FR_Tmjhi = [AD_mJ],16 (p0) shl GR_J = GR_J, 5 ;; }{ .mfi(p0) ldfs cosh_FR_Tmjlo = [AD_mJ],16 (p0) fcmp.lt.unc.s1 p6,p7 = cosh_FR_X,f9 (p0) add AD_J = r37, GR_J ;; }{ .mmi(p0) ldfe cosh_FR_Tjhi = [AD_J],16 ;; (p0) ldfs cosh_FR_Tjlo = [AD_J],16 nop.i 999 ;;}{ .mfb nop.m 999(p0) fmerge.se cosh_FR_sneg = cosh_FR_N_temp2, f1 (p7) br.cond.spnt L(COSH_BY_EXP) ;; }// ******************************************************// If NOT branch to EXP// ******************************************************// Calculate C_hi// ******************************************************// cosh_FR_C_hi_temp = cosh_FR_sneg * cosh_FR_Tmjhi// cosh_FR_C_hi = cosh_FR_spos * cosh_FR_Tjhi + (cosh_FR_sneg * cosh_FR_Tmjhi){ .mfi nop.m 999(p0) fma.s1 cosh_FR_C_hi_temp = cosh_FR_sneg, cosh_FR_Tmjhi, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_C_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi_temp nop.i 999}// ******************************************************// Calculate S_hi// ******************************************************// cosh_FR_S_hi_temp1 = cosh_FR_sneg * cosh_FR_Tmjhi// cosh_FR_S_hi = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi_temp1{ .mfi nop.m 999(p0) fma.s1 cosh_FR_S_hi_temp1 = cosh_FR_sneg, cosh_FR_Tmjhi, f0 nop.i 999 ;;}// ******************************************************// Calculate C_lo// ******************************************************// cosh_FR_C_lo_temp1 = cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi// cosh_FR_C_lo_temp2 = cosh_FR_sneg * cosh_FR_Tmjlo + (cosh_FR_spos * cosh_FR_Tjhi - cosh_FR_C_hi)// cosh_FR_C_lo_temp1 = cosh_FR_sneg * cosh_FR_Tmjlo// cosh_FR_C_lo_temp3 = cosh_FR_spos * cosh_FR_Tjlo + (cosh_FR_sneg * cosh_FR_Tmjlo)// cosh_FR_C_lo = cosh_FR_C_lo_temp3 + cosh_FR_C_lo_temp2{ .mfi nop.m 999(p0) fms.s1 cosh_FR_C_lo_temp1 = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_C_hi nop.i 999}{ .mfi nop.m 999(p0) fms.s1 cosh_FR_S_hi = cosh_FR_spos, cosh_FR_Tjhi, cosh_FR_S_hi_temp1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_C_lo_temp2 = cosh_FR_sneg, cosh_FR_Tmjhi, cosh_FR_C_lo_temp1 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_C_lo_temp1 = cosh_FR_sneg, cosh_FR_Tmjlo, f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_C_lo_temp3 = cosh_FR_spos, cosh_FR_Tjlo, cosh_FR_C_lo_temp1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_C_lo = cosh_FR_C_lo_temp3, f1, cosh_FR_C_lo_temp2 nop.i 999 ;;}// ******************************************************// cosh_FR_Y_lo_temp = cosh_FR_C_hi * cosh_FR_peven + cosh_FR_C_lo// cosh_FR_Y_lo = cosh_FR_S_hi * cosh_FR_podd + cosh_FR_Y_lo_temp// cosh_FR_COSH = Y_hi + Y_lo{ .mfi nop.m 999(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_C_hi, cosh_FR_peven, cosh_FR_C_lo nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_S_hi, cosh_FR_podd, cosh_FR_Y_lo_temp nop.i 999 ;;}{ .mfb nop.m 999(p0) fma.d.s0 f8 = cosh_FR_C_hi, f1, cosh_FR_Y_lo (p0) br.ret.sptk b0 ;; }L(COSH_BY_EXP): // When p7 is true, we know that an overflow is not going to happen// When p7 is false, we must check for possible overflow// p7 is the over_SAFE flag// f44 = Scale * (Y_hi + Y_lo)// = cosh_FR_spos * (cosh_FR_Tjhi + cosh_FR_Y_lo){ .mfi nop.m 999(p0) fma.s1 cosh_FR_Y_lo_temp = cosh_FR_peven, f1, cosh_FR_podd nop.i 999}// Now we are in EXP. This is the only path where an overflow is possible// but not for certain. So this is the only path where over_SAFE has any use.// r34 still has N-1// There is a danger of double-extended overflow if N-1 > 16382 = 0x3ffe// There is a danger of double overflow if N-1 > 0x3fe = 1022{ .mlx nop.m 999(p0) movl r32 = 0x00000000000003fe ;; }{ .mfi(p0) cmp.gt.unc p0,p7 = r34, r32 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_Y_lo = cosh_FR_Tjhi, cosh_FR_Y_lo_temp, cosh_FR_Tjlo nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_COSH_temp = cosh_FR_Y_lo, f1, cosh_FR_Tjhi nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.d.s0 f44 = cosh_FR_spos, cosh_FR_COSH_temp, f0 nop.i 999 ;;}// If over_SAFE is set, return{ .mfb nop.m 999(p7) fmerge.s f8 = f44,f44(p7) br.ret.sptk b0 ;; }// Else see if we overflowed// S0 user supplied status// S2 user supplied status + WRE + TD (Overflows)// If WRE is set then an overflow will not occur in EXP.// The input value that would cause a register (WRE) value to overflow is about 2^15// and this input would go into the HUGE path.// Answer with WRE is in f43.{ .mfi nop.m 999(p0) fsetc.s2 0x7F,0x42 nop.i 999;;}{ .mfi nop.m 999(p0) fma.d.s2 f43 = cosh_FR_spos, cosh_FR_COSH_temp, f0 nop.i 999 ;;}// 103FF => 103FF -FFFF = 400(true)// 400 + 3FF = 7FF, which is 1 more that the exponent of the largest// double (7FE). So 0 103FF 8000000000000000 is one ulp more than// largest double in register bias// Now set p8 if the answer with WRE is greater than or equal this value// Also set p9 if the answer with WRE is less than or equal to negative this value{ .mlx nop.m 999(p0) movl r32 = 0x00000000000103ff ;; }{ .mmf nop.m 999(p0) setf.exp f41 = r32 (p0) fsetc.s2 0x7F,0x40 ;; }{ .mfi nop.m 999(p0) fcmp.ge.unc.s1 p8, p0 = f43, f41 nop.i 999}{ .mfi nop.m 999(p0) fmerge.ns f42 = f41, f41 nop.i 999 ;;}// The error tag for overflow is 64{ .mii nop.m 999 nop.i 999 ;;(p8) mov r47 = 64 ;; }{ .mfb nop.m 999(p0) fcmp.le.unc.s1 p9, p0 = f43, f42 (p8) br.cond.spnt __libm_error_region ;;}{ .mii nop.m 999 nop.i 999 ;;(p9) mov r47 = 64 }{ .mib nop.m 999 nop.i 999(p9) br.cond.spnt __libm_error_region ;;}{ .mfb nop.m 999(p0) fmerge.s f8 = f44,f44 (p0) br.ret.sptk b0 ;; }// for COSH_HUGE, put 24000 in exponent; take sign from input; add 1// SAFE: SAFE is always 0 for HUGEL(COSH_HUGE): { .mlx nop.m 999(p0) movl r32 = 0x0000000000015dbf ;; }{ .mfi(p0) setf.exp f9 = r32 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 cosh_FR_hi_lo = f1, f9, f1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.d.s0 f44 = f9, cosh_FR_hi_lo, f0 (p0) mov r47 = 64 };;.endp cosh#ASM_SIZE_DIRECTIVE(cosh#)// Stack operations when calling error support.// (1) (2) (3) (call) (4)// sp -> + psp -> + psp -> + sp -> +// | | | |// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8// | | | |// | <-GR_Y Y2->| Y2 ->| <- GR_Y |// | | | |// | | <- GR_X X1 ->| |// | | | |// sp-64 -> + sp -> + sp -> + +// save ar.pfs save b0 restore gp// save gp restore ar.pfs.proc __libm_error_region__libm_error_region:.prologue// (1){ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;// (2){ .mmi stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body// (3){ .mib stfd [GR_Parameter_X] = f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 }{ .mib stfd [GR_Parameter_Y] = f44 // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;// (4){ .mmi ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;.endp __libm_error_regionASM_SIZE_DIRECTIVE(__libm_error_region).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -