📄 s_expm1l.s
字号:
.global expm1l#.align 64 expm1l: #ifdef _LIBC.global __expm1l#__expm1l:#endif{ .miialloc r32 = ar.pfs,0,30,4,0(p0) add r33 = 1, r0 (p0) cmp.eq.unc p7, p0 = r0, r0 }{ .mbb nop.m 999(p0) br.cond.sptk exp_continue nop.b 999 ;;}//// Set p7 true for expm1// Set Flag = r33 = 1 for expm1// .endp expm1lASM_SIZE_DIRECTIVE(expm1l)#ifdef _LIBClibm_hidden_def (__expm1l)#endif.section .text.proc expl#.global expl#.align 64 expl: #ifdef _LIBC.global __ieee754_expl#__ieee754_expl:#endif{ .miialloc r32 = ar.pfs,0,30,4,0(p0) add r33 = r0, r0 (p0) cmp.eq.unc p0, p7 = r0, r0 ;; }exp_continue: { .mfi(p0) add r32 = 2,r0 (p0) fnorm.s1 f9 = f8 nop.i 0}{ .mfi(p0) nop.m 0 //// Set p7 false for exp// Set Flag = r33 = 0 for exp// (p0) fclass.m.unc p6, p8 = f8, 0x1E7 nop.i 0;;}{ .mfi nop.m 999(p0) fclass.nm.unc p9, p0 = f8, 0x1FF nop.i 0}{ .mfi nop.m 999(p0) mov f36 = f1 nop.i 999 ;;}{ .mfb nop.m 999// // Identify NatVals, NaNs, Infs, and Zeros. // Identify EM unsupporteds. // Save special input registers (p0) mov f32 = f0 //// Create FR_X_cor = 0.0 // GR_Flag = 0 // GR_Expo_Range = 2 (r32) for double-extended precision // FR_Scale = 1.0//(p6) br.cond.spnt EXPL_64_SPECIAL ;; }{ .mib nop.m 999 nop.i 999(p9) br.cond.spnt EXPL_64_UNSUPPORTED ;; }{ .mfi(p0) cmp.ne.unc p12, p13 = 0x01, r33// // Branch out for special input values // (p0) fcmp.lt.unc.s0 p9,p0 = f8, f0 (p0) cmp.eq.unc p15, p0 = r0, r0 }{ .mmi nop.m 999// // Raise possible denormal operand exception // Normalize x // // This function computes expl( x + x_cor) // Input FR 1: FR_X // Input FR 2: FR_X_cor // Input GR 1: GR_Flag // Input GR 2: GR_Expo_Range // Output FR 3: FR_Y_hi // Output FR 4: FR_Y_lo // Output FR 5: FR_Scale // Output PR 1: PR_Safe (p0) addl r34 = @ltoff(Constants_exp_64_Arg#),gp (p0) addl r40 = @ltoff(Constants_exp_64_W1#),gp };;//// Prepare to load constants// Set Safe = True//{ .mmi ld8 r34 = [r34] ld8 r40 = [r40](p0) addl r41 = @ltoff(Constants_exp_64_W2#),gp };;{ .mmi(p0) ldfe f37 = [r34],16 (p0) ld8 r41 = [r41] ;; }//// N = fcvt.fx(float_N)// Set p14 if -6 > expo_X ////// Bias = 0x0FFFF// expo_X = expo_X and Mask //{ .mmi(p0) ldfe f40 = [r34],16 nop.m 999//// Load L_lo// Set p10 if 14 < expo_X //(p0) addl r50 = @ltoff(Constants_exp_64_T1#),gp }{ .mmi nop.m 999 nop.m 999(p0) addl r51 = @ltoff(Constants_exp_64_T2#),gp ;; }//// Load W2_ptr// Branch to SMALL is expo_X < -6//{.mmi(p0) ld8 r50 = [r50] (p0) ld8 r51 = [r51] };;{ .mlx(p0) ldfe f41 = [r34],16 //// float_N = X * L_Inv// expo_X = exponent of X// Mask = 0x1FFFF//(p0) movl r58 = 0x0FFFF }{ .mlx nop.m 999(p0) movl r39 = 0x1FFFF ;; }{ .mmi(p0) getf.exp r37 = f9 nop.m 999(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp ;; }{ .mii(p0) ld8 r34 = [r34] nop.i 999 (p0) and r37 = r37, r39 ;; }{ .mmi(p0) sub r37 = r37, r58 ;; (p0) cmp.gt.unc p14, p0 = -6, r37 (p0) cmp.lt.unc p10, p0 = 14, r37 ;; }{ .mfi(p0) nop.m 0 //// Load L_inv // Set p12 true for Flag = 0 (exp)// Set p13 true for Flag = 1 (expm1)//(p0) fmpy.s1 f38 = f9, f37 nop.i 999 ;;}{ .mfb nop.m 999//// Load L_hi// expo_X = expo_X - Bias// get W1_ptr //(p0) fcvt.fx.s1 f39 = f38(p14) br.cond.spnt EXPL_SMALL ;; }{ .mib nop.m 999 nop.i 999(p10) br.cond.spnt EXPL_HUGE ;; }{ .mmi(p0) shladd r34 = r32,4,r34 nop.m 999(p0) addl r35 = @ltoff(Constants_exp_64_A#),gp ;; }//// Load T_1,T_2//{ .mmi nop.m 999 ld8 r35 =[r35] nop.i 99};;{ .mmb(p0) ldfe f51 = [r35],16 (p0) ld8 r45 = [r34],8 nop.b 999 ;;}// // Set Safe = True if k >= big_expo_neg // Set Safe = False if k < big_expo_neg // { .mmb(p0) ldfe f49 = [r35],16 (p0) ld8 r48 = [r34],0 nop.b 999 ;;}{ .mfi nop.m 999//// Branch to HUGE is expo_X > 14 //(p0) fcvt.xf f38 = f39 nop.i 999 ;;}{ .mfi(p0) getf.sig r52 = f39 nop.f 999 nop.i 999 ;;}{ .mii nop.m 999(p0) extr.u r43 = r52, 6, 6 ;; //// r = r - float_N * L_lo// K = extr(N_fix,12,52)//(p0) shladd r40 = r43,3,r40 ;; }{ .mfi(p0) shladd r50 = r43,2,r50 (p0) fnma.s1 f42 = f40, f38, f9 //// float_N = float(N)// N_fix = signficand N //(p0) extr.u r42 = r52, 0, 6 }{ .mmi(p0) ldfd f43 = [r40],0 ;; (p0) shladd r41 = r42,3,r41 (p0) shladd r51 = r42,2,r51 }//// W_1_p1 = 1 + W_1//{ .mmi(p0) ldfs f44 = [r50],0 ;; (p0) ldfd f45 = [r41],0 //// M_2 = extr(N_fix,0,6)// M_1 = extr(N_fix,6,6)// r = X - float_N * L_hi//(p0) extr r44 = r52, 12, 52 }{ .mmi(p0) ldfs f46 = [r51],0 ;; (p0) sub r46 = r58, r44 (p0) cmp.gt.unc p8, p15 = r44, r45 }// // W = W_1 + W_1_p1*W_2 // Load A_2 // Bias_m_K = Bias - K//{ .mii(p0) ldfe f40 = [r35],16 //// load A_1// poly = A_2 + r*A_3 // rsq = r * r // neg_2_mK = exponent of Bias_m_k//(p0) add r47 = r58, r44 ;; // // Set Safe = True if k <= big_expo_pos // Set Safe = False if k > big_expo_pos // Load A_3// (p15) cmp.lt p8,p15 = r44,r48 ;;}{ .mmf(p0) setf.exp f61 = r46 // // Bias_p + K = Bias + K// T = T_1 * T_2// (p0) setf.exp f36 = r47 (p0) fnma.s1 f42 = f41, f38, f42 ;; }{ .mfi nop.m 999//// Load W_1,W_2// Load big_exp_pos, load big_exp_neg//(p0) fadd.s1 f47 = f43, f1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 f52 = f42, f51, f49 nop.i 999}{ .mfi nop.m 999(p0) fmpy.s1 f48 = f42, f42 nop.i 999 ;;}{ .mfi nop.m 999(p0) fmpy.s1 f53 = f44, f46 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 f54 = f45, f47, f43 nop.i 999}{ .mfi nop.m 999(p0) fneg f61 = f61 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 f52 = f42, f52, f40 nop.i 999 ;;}{ .mfi nop.m 999(p0) fadd.s1 f55 = f54, f1 nop.i 999}{ .mfi nop.m 999//// W + Wp1 * poly // (p0) mov f34 = f53 nop.i 999 ;;}{ .mfi nop.m 999//// A_1 + r * poly // Scale = setf_expl(Bias_p_k) //(p0) fma.s1 f52 = f48, f52, f42 nop.i 999 ;;}{ .mfi nop.m 999//// poly = r + rsq(A_1 + r*poly) // Wp1 = 1 + W// neg_2_mK = -neg_2_mK//(p0) fma.s1 f35 = f55, f52, f54 nop.i 999 ;;}{ .mfb nop.m 999(p0) fmpy.s1 f35 = f35, f53 // // Y_hi = T// Y_lo = T * (W + Wp1*poly)//(p12) br.cond.sptk EXPL_MAIN ;; }//// Branch if expl(x) // Continue for expl(x-1)//{ .mii(p0) cmp.lt.unc p12, p13 = 10, r44 nop.i 999 ;;//// Set p12 if 10 < K, Else p13 //(p13) cmp.gt.unc p13, p14 = -10, r44 ;; }//// K > 10: Y_lo = Y_lo + neg_2_mK// K <=10: Set p13 if -10 > K, Else set p14 //{ .mfi(p13) cmp.eq p15, p0 = r0, r0 (p14) fadd.s1 f34 = f61, f34 nop.i 999 ;;}{ .mfi nop.m 999(p12) fadd.s1 f35 = f35, f61 nop.i 999 ;;}{ .mfi nop.m 999(p13) fadd.s1 f35 = f35, f34 nop.i 999}{ .mfb nop.m 999//// K <= 10 and K < -10, Set Safe = True// K <= 10 and K < 10, Y_lo = Y_hi + Y_lo // K <= 10 and K > =-10, Y_hi = Y_hi + neg_2_mk // (p13) mov f34 = f61 (p0) br.cond.sptk EXPL_MAIN ;; }EXPL_SMALL: { .mmi nop.m 999(p0) addl r34 = @ltoff(Constants_exp_64_Exponents#),gp (p12) addl r35 = @ltoff(Constants_exp_64_P#),gp ;; }.pred.rel "mutex",p12,p13{ .mmi(p12) ld8 r35=[r35] nop.m 999(p13) addl r35 = @ltoff(Constants_exp_64_Q#),gp };;{ .mmi(p13) ld8 r35=[r35] (p0) ld8 r34=[r34] nop.i 999};;{ .mfi(p0) add r34 = 0x48,r34 // // Return// K <= 10 and K < 10, Y_hi = neg_2_mk // // /*******************************************************/// /*********** Branch EXPL_SMALL ************************/// /*******************************************************/(p0) mov f42 = f9 nop.i 999 ;;}//// Flag = 0// r4 = rsq * rsq//{ .mfi(p0) ld8 r49 =[r34],0 nop.f 999 nop.i 999 ;;}{ .mii nop.m 999 nop.i 999 ;;//// Flag = 1//(p0) cmp.lt.unc p14, p0 = r37, r49 ;; }{ .mfi nop.m 999//// r = X//(p0) fmpy.s1 f48 = f42, f42 nop.i 999 ;;}{ .mfb nop.m 999//// rsq = r * r//(p0) fmpy.s1 f50 = f48, f48 //// Is input very small?//(p14) br.cond.spnt EXPL_VERY_SMALL ;; }//// Flag_not1: Y_hi = 1.0// Flag is 1: r6 = rsq * r4//{ .mfi(p12) ldfe f52 = [r35],16 (p12) mov f34 = f1 (p0) add r53 = 0x1,r0 ;; }{ .mfi(p13) ldfe f51 = [r35],16 //// Flag_not_1: Y_lo = poly_hi + r4 * poly_lo//(p13) mov f34 = f9 nop.i 999 ;;}{ .mmf(p12) ldfe f53 = [r35],16 //// For Flag_not_1, Y_hi = X// Scale = 1// Create 0x000...01//(p0) setf.sig f37 = r53 (p0) mov f36 = f1 ;; }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -