📄 s_expm1f.s
字号:
(p0) fclass.m.unc p6, p8 = f8, 0x1E7 nop.i 0 ;;}{ .mfi nop.m 999(p0) fclass.nm.unc p9, p0 = f8, 0x1FF nop.i 0 }{ .mfi nop.m 999(p0) mov f36 = f1 nop.i 999 ;;}// // Identify NatVals, NaNs, Infs, and Zeros. // Identify EM unsupporteds. // Save special input registers //// Create FR_X_cor = 0.0 // GR_Flag = 0 // GR_Expo_Range = 0 (r32) for single precision // FR_Scale = 1.0//{ .mfb nop.m 999(p0) mov f32 = f0 (p6) br.cond.spnt EXPF_64_SPECIAL ;; }{ .mib nop.m 999 nop.i 999(p9) br.cond.spnt EXPF_64_UNSUPPORTED ;; }// // Branch out for special input values // { .mfi(p0) cmp.ne.unc p12, p13 = 0x01, r33(p0) fcmp.lt.unc.s0 p9,p0 = f8, f0 (p0) cmp.eq.unc p15, p0 = r0, r0 }// // Raise possible denormal operand exception // Normalize x // // This function computes expf( x + x_cor) // Input FR 1: FR_X // Input FR 2: FR_X_cor // Input GR 1: GR_Flag // Input GR 2: GR_Expo_Range // Output FR 3: FR_Y_hi // Output FR 4: FR_Y_lo // Output FR 5: FR_Scale // Output PR 1: PR_Safe //// Prepare to load constants// Set Safe = True//{ .mmi(p0) addl r34 = @ltoff(Constants_exp_64_Arg#),gp (p0) addl r40 = @ltoff(Constants_exp_64_W1#),gp (p0) addl r41 = @ltoff(Constants_exp_64_W2#),gp };;{ .mmi ld8 r34 = [r34] ld8 r40 = [r40](p0) addl r50 = @ltoff(Constants_exp_64_T1#), gp};;{ .mmi ld8 r41 = [r41](p0) ldfe f37 = [r34],16(p0) addl r51 = @ltoff(Constants_exp_64_T2#), gp};;//// N = fcvt.fx(float_N)// Set p14 if -6 > expo_X ////// Bias = 0x0FFFF// expo_X = expo_X and Mask //{ .mmi ld8 r50 = [r50](p0) ldfe f40 = [r34],16 nop.i 999};;{ .mlx nop.m 999(p0) movl r58 = 0x0FFFF };;//// Load W2_ptr// Branch to SMALL is expo_X < -6////// float_N = X * L_Inv// expo_X = exponent of X// Mask = 0x1FFFF//{ .mmi ld8 r51 = [r51](p0) ldfe f41 = [r34],16 //// float_N = X * L_Inv// expo_X = exponent of X// Mask = 0x1FFFF// nop.i 0};;{ .mlx(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp(p0) movl r39 = 0x1FFFF };;{ .mmi ld8 r34 = [r34](p0) getf.exp r37 = f9 nop.i 999};;{ .mii nop.m 999 nop.i 999 (p0) and r37 = r37, r39 ;; }{ .mmi(p0) sub r37 = r37, r58 ;; (p0) cmp.gt.unc p14, p0 = -6, r37 (p0) cmp.lt.unc p10, p0 = 14, r37 ;; }{ .mfi nop.m 999//// Load L_inv // Set p12 true for Flag = 0 (exp)// Set p13 true for Flag = 1 (expm1)//(p0) fmpy.s1 f38 = f9, f37 nop.i 999 ;;}{ .mfb nop.m 999//// Load L_hi// expo_X = expo_X - Bias// get W1_ptr //(p0) fcvt.fx.s1 f39 = f38(p14) br.cond.spnt EXPF_SMALL ;; }{ .mib nop.m 999 nop.i 999(p10) br.cond.spnt EXPF_HUGE ;; }{ .mmi(p0) shladd r34 = r32,4,r34 (p0) addl r35 = @ltoff(Constants_exp_64_A#),gp nop.i 999};;{ .mmi ld8 r35 = [r35] nop.m 999 nop.i 999};;//// Load T_1,T_2//{ .mmb(p0) ldfe f51 = [r35],16 (p0) ld8 r45 = [r34],8 nop.b 999 ;;}// // Set Safe = True if k >= big_expo_neg // Set Safe = False if k < big_expo_neg // { .mmb(p0) ldfe f49 = [r35],16 (p0) ld8 r48 = [r34],0 nop.b 999 ;;}{ .mfi nop.m 999//// Branch to HUGE is expo_X > 14 //(p0) fcvt.xf f38 = f39 nop.i 999 ;;}{ .mfi(p0) getf.sig r52 = f39 nop.f 999 nop.i 999 ;;}{ .mii nop.m 999(p0) extr.u r43 = r52, 6, 6 ;; //// r = r - float_N * L_lo// K = extr(N_fix,12,52)//(p0) shladd r40 = r43,3,r40 ;; }{ .mfi(p0) shladd r50 = r43,2,r50 (p0) fnma.s1 f42 = f40, f38, f9 //// float_N = float(N)// N_fix = signficand N //(p0) extr.u r42 = r52, 0, 6 }{ .mmi(p0) ldfd f43 = [r40],0 ;; (p0) shladd r41 = r42,3,r41 (p0) shladd r51 = r42,2,r51 }//// W_1_p1 = 1 + W_1//{ .mmi(p0) ldfs f44 = [r50],0 ;; (p0) ldfd f45 = [r41],0 //// M_2 = extr(N_fix,0,6)// M_1 = extr(N_fix,6,6)// r = X - float_N * L_hi//(p0) extr r44 = r52, 12, 52 }{ .mmi(p0) ldfs f46 = [r51],0 ;; (p0) sub r46 = r58, r44 (p0) cmp.gt.unc p8, p15 = r44, r45 }// // W = W_1 + W_1_p1*W_2 // Load A_2 // Bias_m_K = Bias - K//{ .mii(p0) ldfe f40 = [r35],16 //// load A_1// poly = A_2 + r*A_3 // rsq = r * r // neg_2_mK = exponent of Bias_m_k//(p0) add r47 = r58, r44 ;; // // Set Safe = True if k <= big_expo_pos // Set Safe = False if k > big_expo_pos // Load A_3// (p15) cmp.lt p8,p15 = r44,r48 ;;}{ .mmf(p0) setf.exp f61 = r46 // // Bias_p + K = Bias + K// T = T_1 * T_2// (p0) setf.exp f36 = r47 (p0) fnma.s1 f42 = f41, f38, f42 ;; }{ .mfi nop.m 999//// Load W_1,W_2// Load big_exp_pos, load big_exp_neg//(p0) fadd.s1 f47 = f43, f1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 f52 = f42, f51, f49 nop.i 999}{ .mfi nop.m 999(p0) fmpy.s1 f48 = f42, f42 nop.i 999 ;;}{ .mfi nop.m 999(p0) fmpy.s1 f53 = f44, f46 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 f54 = f45, f47, f43 nop.i 999}{ .mfi nop.m 999(p0) fneg f61 = f61 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 f52 = f42, f52, f40 nop.i 999 ;;}{ .mfi nop.m 999(p0) fadd.s1 f55 = f54, f1 nop.i 999}{ .mfi nop.m 999//// W + Wp1 * poly // (p0) mov f34 = f53 nop.i 999 ;;}{ .mfi nop.m 999//// A_1 + r * poly // Scale = setf_expf(Bias_p_k) //(p0) fma.s1 f52 = f48, f52, f42 nop.i 999 ;;}{ .mfi nop.m 999//// poly = r + rsq(A_1 + r*poly) // Wp1 = 1 + W// neg_2_mK = -neg_2_mK//(p0) fma.s1 f35 = f55, f52, f54 nop.i 999 ;;}{ .mfb nop.m 999(p0) fmpy.s1 f35 = f35, f53 // // Y_hi = T// Y_lo = T * (W + Wp1*poly)//(p12) br.cond.sptk EXPF_MAIN ;; }//// Branch if expf(x) // Continue for expf(x-1)//{ .mii(p0) cmp.lt.unc p12, p13 = 10, r44 nop.i 999 ;;//// Set p12 if 10 < K, Else p13 //(p13) cmp.gt.unc p13, p14 = -10, r44 ;; }//// K > 10: Y_lo = Y_lo + neg_2_mK// K <=10: Set p13 if -10 > K, Else set p14 //{ .mfi(p13) cmp.eq p15, p0 = r0, r0 (p14) fadd.s1 f34 = f61, f34 nop.i 999 ;;}{ .mfi nop.m 999(p12) fadd.s1 f35 = f35, f61 nop.i 999 ;;}{ .mfi nop.m 999(p13) fadd.s1 f35 = f35, f34 nop.i 999}{ .mfb nop.m 999//// K <= 10 and K < -10, Set Safe = True// K <= 10 and K < 10, Y_lo = Y_hi + Y_lo // K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk // (p13) mov f34 = f61 (p0) br.cond.sptk EXPF_MAIN ;; }EXPF_SMALL: { .mmi(p12) addl r35 = @ltoff(Constants_exp_64_P#), gp(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#), gp nop.i 999};;{ .mmi(p12) ld8 r35 = [r35] ld8 r34 = [r34] nop.i 999};;{ .mmi(p13) addl r35 = @ltoff(Constants_exp_64_Q#), gp nop.m 999 nop.i 999};;//// Return// K <= 10 and K < 10, Y_hi = neg_2_mk//// /*******************************************************/// /*********** Branch EXP_SMALL *************************/// /*******************************************************/{ .mfi(p13) ld8 r35 = [r35](p0) mov f42 = f9 (p0) add r34 = 0x48,r34 };;//// Flag = 0// r4 = rsq * rsq//{ .mfi(p0) ld8 r49 =[r34],0 nop.f 999 nop.i 999 ;;}{ .mii nop.m 999 nop.i 999 ;;//// Flag = 1//(p0) cmp.lt.unc p14, p0 = r37, r49 ;; }{ .mfi nop.m 999//// r = X//(p0) fmpy.s1 f48 = f42, f42 nop.i 999 ;;}{ .mfb nop.m 999//// rsq = r * r//(p0) fmpy.s1 f50 = f48, f48 //// Is input very small?//(p14) br.cond.spnt EXPF_VERY_SMALL ;; }//// Flag_not1: Y_hi = 1.0// Flag is 1: r6 = rsq * r4//{ .mfi(p12) ldfe f52 = [r35],16 (p12) mov f34 = f1 (p0) add r53 = 0x1,r0 ;; }{ .mfi(p13) ldfe f51 = [r35],16 //// Flag_not_1: Y_lo = poly_hi + r4 * poly_lo//(p13) mov f34 = f9 nop.i 999 ;;}{ .mmf(p12) ldfe f53 = [r35],16 //// For Flag_not_1, Y_hi = X// Scale = 1// Create 0x000...01//(p0) setf.sig f37 = r53 (p0) mov f36 = f1 ;; }{ .mmi(p13) ldfe f52 = [r35],16 ;; (p12) ldfe f54 = [r35],16 nop.i 999 ;;}{ .mfi(p13) ldfe f53 = [r35],16 (p13) fmpy.s1 f58 = f48, f50 nop.i 999 ;;}//// Flag_not1: poly_lo = P_5 + r*P_6// Flag_1: poly_lo = Q_6 + r*Q_7//{ .mmi(p13) ldfe f54 = [r35],16 ;; (p12) ldfe f55 = [r35],16 nop.i 999 ;;}{ .mmi(p12) ldfe f56 = [r35],16 ;; (p13) ldfe f55 = [r35],16 nop.i 999 ;;}{ .mmi(p12) ldfe f57 = [r35],0 ;; (p13) ldfe f56 = [r35],16
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -