📄 e_coshf.s
字号:
nop.i 999};;// Calculate coshf_FR_X2 = ax*ax and coshf_FR_X4 = ax*ax*ax*ax{ .mmf nop.m 999(p0) ldfe coshf_FR_P1 = [r34],16 (p0) fma.s1 coshf_FR_X2 = coshf_FR_X, coshf_FR_X, f0 ;; }{ .mmi(p0) ldfe coshf_FR_P2 = [r34],16 ;; (p0) ldfe coshf_FR_P3 = [r34],16 nop.i 999 ;;}{ .mmi(p0) ldfe coshf_FR_P4 = [r34],16 ;; (p0) ldfe coshf_FR_P5 = [r34],16 nop.i 999 ;;}{ .mfi(p0) ldfe coshf_FR_P6 = [r34],16 (p0) fma.s1 coshf_FR_X4 = coshf_FR_X2, coshf_FR_X2, f0 nop.i 999 ;;}// Calculate coshf_FR_podd = x4 *(x4 * P_5 + P_3) + P_1{ .mfi nop.m 999(p0) fma.s1 coshf_FR_poly_podd_temp1 = coshf_FR_X4, coshf_FR_P5, coshf_FR_P3 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 coshf_FR_podd = coshf_FR_X4, coshf_FR_poly_podd_temp1, coshf_FR_P1 nop.i 999}// Calculate coshf_FR_peven = p_even = x4 *(x4 * (x4 * P_6 + P_4) + P_2){ .mfi nop.m 999(p0) fma.s1 coshf_FR_poly_peven_temp1 = coshf_FR_X4, coshf_FR_P6, coshf_FR_P4 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 coshf_FR_poly_peven_temp2 = coshf_FR_X4, coshf_FR_poly_peven_temp1, coshf_FR_P2 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 coshf_FR_peven = coshf_FR_X4, coshf_FR_poly_peven_temp2, f0 nop.i 999 ;;}// Y_lo = x2*p_odd + p_even// Calculate f8 = Y_hi + Y_lo { .mfi nop.m 999(p0) fma.s1 coshf_FR_Y_lo = coshf_FR_X2, coshf_FR_podd, coshf_FR_peven nop.i 999 ;;}{ .mfb nop.m 999(p0) fma.s.s0 f8 = f1, f1, coshf_FR_Y_lo (p0) br.ret.sptk b0 ;; }L(COSH_BY_TBL): // Now that we are at TBL; so far all we know is that |x| >= 0.25.// The first two steps are the same for TBL and EXP, but if we are HUGE// Double// Go to HUGE if |x| >= 2^10, 10009 (register-biased) is e = 10 (true)// Single// Go to HUGE if |x| >= 2^7, 10006 (register-biased) is e = 7 (true)// we want to leave now. Go to HUGE if |x| >= 2^14// 1000d (register-biased) is e = 14 (true){ .mlx nop.m 999(p0) movl r32 = 0x0000000000010006 ;; }{ .mfi(p0) setf.exp f9 = r32 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fcmp.ge.unc p6,p7 = coshf_FR_X,f9 nop.i 999 ;;}{ .mib nop.m 999 nop.i 999(p6) br.cond.spnt L(COSH_HUGE) ;; }// r32 = 1// r34 = N-1 // r35 = N// r36 = j// r37 = N+1// TBL can never overflow// coshf(x) = coshf(B+R)// = coshf(B) coshf(R) + sinh(B) sinh(R) // coshf(R) can be approximated by 1 + p_even// sinh(R) can be approximated by p_odd// ******************************************************// STEP 1 (TBL and EXP)// ******************************************************// Get the following constants.// f9 = Inv_log2by64// f10 = log2by64_hi// f11 = log2by64_lo{ .mmi(p0) adds r32 = 0x1,r0 (p0) addl r34 = @ltoff(single_coshf_arg_reduction), gp nop.i 999};;// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and// put them in an exponent.// coshf_FR_spos = 2^(N-1) and coshf_FR_sneg = 2^(-N-1)// r39 = 0xffff + (N-1) = 0xffff +N -1// r40 = 0xffff - (N +1) = 0xffff -N -1{ .mlx ld8 r34 = [r34](p0) movl r38 = 0x000000000000fffe ;; }{ .mmi(p0) ldfe coshf_FR_Inv_log2by64 = [r34],16 ;; (p0) ldfe coshf_FR_log2by64_hi = [r34],16 nop.i 999 ;;}{ .mbb(p0) ldfe coshf_FR_log2by64_lo = [r34],16 nop.b 999 nop.b 999 ;;}// Get the A coefficients// f9 = A_1// f10 = A_2// f11 = A_3{ .mmi nop.m 999(p0) addl r34 = @ltoff(single_coshf_ab_table), gp nop.i 999};;{ .mmi ld8 r34 = [r34] nop.m 999 nop.i 999};;// Calculate M and keep it as integer and floating point.// M = round-to-integer(x*Inv_log2by64)// coshf_FR_M = M = truncate(ax/(log2/64))// Put the significand of M in r35// and the floating point representation of M in coshf_FR_M{ .mfi nop.m 999(p0) fma.s1 coshf_FR_M = coshf_FR_X, coshf_FR_Inv_log2by64, f0 nop.i 999}{ .mfi(p0) ldfe coshf_FR_A1 = [r34],16 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fcvt.fx.s1 coshf_FR_M_temp = coshf_FR_M nop.i 999 ;;}{ .mfi nop.m 999(p0) fnorm.s1 coshf_FR_M = coshf_FR_M_temp nop.i 999 ;;}{ .mfi(p0) getf.sig r35 = coshf_FR_M_temp nop.f 999 nop.i 999 ;;}// M is still in r35. Calculate j. j is the signed extension of the six lsb of M. It// has a range of -32 thru 31.// r35 = M// r36 = j { .mii nop.m 999 nop.i 999 ;;(p0) and r36 = 0x3f, r35 ;; }// Calculate R// f13 = f44 - f12*f10 = x - M*log2by64_hi// f14 = f13 - f8*f11 = R = (x - M*log2by64_hi) - M*log2by64_lo{ .mfi nop.m 999(p0) fnma.s1 coshf_FR_R_temp = coshf_FR_M, coshf_FR_log2by64_hi, coshf_FR_X nop.i 999}{ .mfi(p0) ldfe coshf_FR_A2 = [r34],16 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 coshf_FR_R = coshf_FR_M, coshf_FR_log2by64_lo, coshf_FR_R_temp nop.i 999}// Get the B coefficients// f15 = B_1// f32 = B_2// f33 = B_3{ .mmi(p0) ldfe coshf_FR_A3 = [r34],16 ;; (p0) ldfe coshf_FR_B1 = [r34],16 nop.i 999 ;;}{ .mmi(p0) ldfe coshf_FR_B2 = [r34],16 ;; (p0) ldfe coshf_FR_B3 = [r34],16 nop.i 999 ;;}{ .mii nop.m 999(p0) shl r34 = r36, 0x2 ;; (p0) sxt1 r37 = r34 ;; }// ******************************************************// STEP 2 (TBL and EXP)// ******************************************************// Calculate Rsquared and Rcubed in preparation for p_even and p_odd// f12 = R*R*R// f13 = R*R// f14 = R <== from above{ .mfi nop.m 999(p0) fma.s1 coshf_FR_Rsq = coshf_FR_R, coshf_FR_R, f0 (p0) shr r36 = r37, 0x2 ;; }// r34 = M-j = r35 - r36// r35 = N = (M-j)/64{ .mii(p0) sub r34 = r35, r36 nop.i 999 ;;(p0) shr r35 = r34, 0x6 ;; }{ .mii(p0) sub r40 = r38, r35 (p0) adds r37 = 0x1, r35 (p0) add r39 = r38, r35 ;; }// Get the address of the J table, add the offset,// addresses are sinh_AD_mJ and sinh_AD_J, get the T value// f32 = T(j)_hi// f33 = T(j)_lo// f34 = T(-j)_hi// f35 = T(-j)_lo{ .mmi(p0) sub r34 = r35, r32 (p0) addl r37 = @ltoff(single_coshf_j_table), gp nop.i 999};;{ .mfi ld8 r37 = [r37](p0) fma.s1 coshf_FR_Rcub = coshf_FR_Rsq, coshf_FR_R, f0 nop.i 999}// ******************************************************// STEP 3 Now decide if we need to branch to EXP// ******************************************************// Put 32 in f9; p6 true if x < 32{ .mlx nop.m 999(p0) movl r32 = 0x0000000000010004 ;; }// Calculate p_even// f34 = B_2 + Rsq *B_3// f35 = B_1 + Rsq*f34 = B_1 + Rsq * (B_2 + Rsq *B_3)// f36 = peven = Rsq * f35 = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3)){ .mfi nop.m 999(p0) fma.s1 coshf_FR_peven_temp1 = coshf_FR_Rsq, coshf_FR_B3, coshf_FR_B2 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 coshf_FR_peven_temp2 = coshf_FR_Rsq, coshf_FR_peven_temp1, coshf_FR_B1 nop.i 999}// Calculate p_odd// f34 = A_2 + Rsq *A_3// f35 = A_1 + Rsq * (A_2 + Rsq *A_3)// f37 = podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3)){ .mfi nop.m 999(p0) fma.s1 coshf_FR_podd_temp1 = coshf_FR_Rsq, coshf_FR_A3, coshf_FR_A2 nop.i 999 ;;}{ .mfi(p0) setf.exp coshf_FR_N_temp1 = r39 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 coshf_FR_peven = coshf_FR_Rsq, coshf_FR_peven_temp2, f0 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 coshf_FR_podd_temp2 = coshf_FR_Rsq, coshf_FR_podd_temp1, coshf_FR_A1 nop.i 999 ;;}{ .mfi(p0) setf.exp f9 = r32 nop.f 999 nop.i 999 ;;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -