⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 e_powl.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 5 页
字号:
////     Point to Table of W1s//     Point to Table of W2s//{ .mmi      add GR_W1_ptr   = 0x2b0, GR_table_base    // Constants_exp_64_W1      add GR_W2_ptr   = 0x4b0, GR_table_base    // Constants_exp_64_W2      cmp.le p6,p0= GR_Delta_Exp,GR_Special_Exp};;// Form two constants we need//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128//  1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand{ .mfi      setf.sig  FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63      nop.f 999      and GR_Delta_Exp=GR_Delta_Exp,GR_exp_mask  // Get exponent of y-1}{ .mlx      setf.d  FR_RSHF_2TO51 = GR_rshf_2to51    // Form const 1.1000 * 2^(63+51)      movl GR_rshf = 0x43e8000000000000        // 1.10000 2^63 for right shift};;{ .mfi      nop.m 999      fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo // logx_lo is Y_lo      cmp.eq  p15, p0=  r0, r0                 // Set p15, assume safe};;{ .mmi      setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N      setf.d  FR_RSHF = GR_rshf          // Form right shift const 1.1000 * 2^63      add GR_Table_Ptr1   = 0x50, GR_table_base // Constants_exp_64_P for                                                // EXPL_SMALL path};;{ .mmi      ldfe FR_P_6 = [GR_Table_Ptr1],16          // Load P_6 for EXPL_SMALL path;;      ldfe FR_P_5 = [GR_Table_Ptr1],16          // Load P_5 for EXPL_SMALL path      nop.i 999};;{ .mfi      ldfe FR_P_4 = [GR_Table_Ptr1],16          // Load P_4 for EXPL_SMALL path      fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo  // logx_hi ix Y_hi      nop.i 999};;{ .mmi      ldfe FR_P_3 = [GR_Table_Ptr1],16          // Load P_3 for EXPL_SMALL path;;      ldfe FR_P_2 = [GR_Table_Ptr1],16          // Load P_2 for EXPL_SMALL path      nop.i 999};;// N = X * Inv_log2_by_2^12// By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand.// We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing.{ .mfi      ldfe FR_P_1 = [GR_Table_Ptr1]             // Load P_1 for EXPL_SMALL path      fma.s1 FR_N = FR_X, FR_INV_LN2_2TO63, FR_RSHF_2TO51      nop.i 999}{ .mfb      nop.m 999      fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi  // P_hi is X(p6)  br.cond.spnt POWL_Y_ALMOST_1              // Branch if |y-1| < 2^-50};;{ .mmi      getf.exp GR_Expo_X = FR_X      add GR_T1_ptr   = 0x0b0, GR_table_base    // Constants_exp_64_T1      add GR_T2_ptr   = 0x1b0, GR_table_base    // Constants_exp_64_T2};;// float_N = round_int(N)// The signficand of N contains the rounded integer part of X * 2^12/ln2,// as a twos complement number in the lower bits (that is, it may be negative).// That twos complement number (called N) is put into GR_N_fix.// Since N is scaled by 2^51, it must be multiplied by 2^-51// before the shift constant 1.10000 * 2^63 is subtracted to yield float_N.// Thus, float_N contains the floating point version of N{ .mfi      add  GR_Table_Ptr   = 0x20, GR_table_base    // Constants_exp_64_A      fms.s1 FR_float_N = FR_N, FR_2TOM51, FR_RSHF // Form float_N      nop.i 999}//     Create low part of Y(ln(x)_hi + ln(x)_lo) as P_lo{ .mfi      mov GR_Big_Pos_Exp = 0x3ffe               // 16382, largest safe exponent      fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo      mov GR_Big_Neg_Exp = -0x3ffd              // -16381 smallest safe exponent};;{ .mfi      nop.m 999      fmpy.s1 FR_rsq = FR_X, FR_X               // rsq = X*X for EXPL_SMALL path      mov GR_vsm_expo = -70                     // Exponent for very small path}{ .mfi      nop.m 999      fma.s1 FR_poly_lo = FR_P_6, FR_X, FR_P_5  // poly_lo for EXPL_SMALL path      add GR_temp = 0x1,r0                      // For tiny signif if small path};;////      If expo_X < -6 goto exp_small//{ .mmi      getf.sig GR_N_fix = FR_N      ldfe FR_A_3 = [GR_Table_Ptr],16         // Load A_3      and GR_Expo_X = GR_Expo_X, GR_exp_mask  // Get exponent of X};;{ .mfi      ldfe FR_A_2 = [GR_Table_Ptr],16         // Load A_2      nop.f 999      sub GR_Expo_X = GR_Expo_X, GR_exp_bias  // Get true exponent of X};;////     If -6 > Expo_X, set P9 and branch//{ .mfb      cmp.gt  p9, p0  =  -6, GR_Expo_X      fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X // r = X - L_hi * float_N(p9)  br.cond.spnt EXPL_SMALL                  // Branch if |X| < 2^-6};;////     If 14 <= Expo_X, set P10//{ .mib      cmp.le  p10, p0 =  14, GR_Expo_X      nop.i 999(p10) br.cond.spnt EXPL_HUGE                   // Branch if |X| >= 2^14};;////      Load single T1//      Load single T2//      W_1_p1 = W_1 + 1//{ .mmi      nop.m 999      nop.m 999      extr.u GR_M1 = GR_N_fix, 6, 6            // Extract index M_1};;////      k = extr.u(N_fix,0,6)//{ .mmi      shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr     // Point to W1      shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr     // Point to T1      extr.u GR_M2 = GR_N_fix, 0, 6            // Extract index M_2};;// N_fix is only correct up to 50 bits because of our right shift technique.// Actually in the normal path we will have restricted K to about 14 bits.// Somewhat arbitrarily we extract 32 bits.{ .mmi      ldfd  FR_W1 = [GR_W1_ptr]      shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr     // Point to W2      extr GR_k = GR_N_fix, 12, 32             // Extract k};;{ .mfi      ldfs  FR_T1 = [GR_T1_ptr]      fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r      shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr     // Point to T2}{ .mfi      add GR_exp_bias_p_k = GR_exp_bias, GR_k      nop.f 999      cmp.gt  p14,p15 = GR_k,GR_Big_Pos_Exp};;////      if k < big_neg_exp, set p14 and Safe=False//{ .mmi      ldfs  FR_T2 = [GR_T2_ptr](p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp      nop.i 999};;{ .mmi      setf.exp FR_Scale = GR_exp_bias_p_k      ldfd  FR_W2 = [GR_W2_ptr]      nop.i 999};;{ .mfi      ldfe FR_A_1 = [GR_Table_Ptr],16      fadd.s1 FR_r = FR_r, FR_X_cor      nop.i 999};;{ .mfi      nop.m 999      fadd.s1 FR_W_1_p1 = FR_W1, f1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2      nop.i 999}{ .mfi      nop.m 999      fmpy.s1 FR_rsq = FR_r, FR_r      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 FR_T = FR_T1, FR_T2      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_TMP1 = FR_Scale, FR_Sgn, f0      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_poly = FR_r, FR_poly, FR_A_1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_TMP2 = FR_T, f1, f0            // TMP2 = Y_hi = T      nop.i 999};;{ .mfi      nop.m 999      fadd.s1 FR_Wp1 = FR_W, f1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_poly = FR_rsq, FR_poly,FR_r      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_Tscale = FR_T, FR_TMP1, f0    // Scale * Sgn * T      nop.i 999}{ .mfi      nop.m 999      fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W      nop.i 999};;{ .mfb      nop.m 999      fmpy.s1 FR_TMP3 = FR_Y_lo, FR_Tscale      br.cond.sptk POWL_64_SHARED};;EXPL_SMALL:// Here if |ylogx| < 2^-6////     Begin creating lsb to perturb final result//{ .mfi      setf.sig FR_temp = GR_temp      fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_4      cmp.lt  p12, p0 =  GR_Expo_X, GR_vsm_expo   // Test |ylogx| < 2^-70}{ .mfi      nop.m 999      fma.s1 FR_poly_hi = FR_P_2, FR_X, FR_P_1      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 FR_TMP2 = f1, f1      nop.i 999}{ .mfi      nop.m 999      fmpy.s1 FR_TMP1 = FR_Sgn, f1      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 FR_r4 = FR_rsq, FR_rsq(p12) cmp.eq  p15, p0 =  r0, r0                   // Set safe if |ylogx| < 2^-70}{ .mfb      nop.m 999(p12) fmpy.s1 FR_TMP3 = FR_Sgn, FR_X(p12) br.cond.spnt POWL_64_SHARED                 // Branch if |ylogx| < 2^-70};;{ .mfi      nop.m 999      fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_3      nop.i 999}{ .mfi      nop.m 999      fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_X      nop.i 999};;{ .mfi      nop.m 999      fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 FR_TMP3 = FR_Y_lo, FR_TMP1      // Add sign info      nop.i 999};;////     Toggle on last bit of Y_lo//     Set lsb of Y_lo to 1//{ .mfi      nop.m 999      for FR_temp = FR_Y_lo,FR_temp      nop.i 999};;{ .mfb      nop.m 999      fmerge.se FR_TMP3 = FR_TMP3,FR_temp      br.cond.sptk POWL_64_SHARED};;EXPL_HUGE:// Here if |ylogx| >= 2^14{ .mfi      mov GR_temp = 0x0A1DC               // If X < 0, exponent -24100      fcmp.gt.s1 p12, p13 =  FR_X, f0     // Test X > 0      cmp.eq  p14, p15 =  r0, r0          // Set Safe to false};;{ .mmi(p12) mov GR_Mask = 0x15DC0               // If X > 0, exponent +24000(p13) mov GR_Mask = 0x0A240               // If X < 0, exponent -24000      nop.i 999};;{ .mmf      setf.exp FR_TMP2 = GR_Mask          // Form Y_hi = TMP2(p13) setf.exp FR_Y_lo = GR_temp          // If X < 0, Y_lo = 2^-24100(p12) mov FR_Y_lo = f1                    // IF X > 0, Y_lo = 1.0};;{ .mfi      nop.m 999      fmpy.s1 FR_TMP1 = FR_TMP2, FR_Sgn   // TMP1 = Y_hi * Sgn      nop.i 999};;{ .mfb      nop.m 999      fmpy.s1 FR_TMP3 = FR_Y_lo,FR_TMP1   // TMP3 = Y_lo * (Y_hi * Sgn)      br.cond.sptk POWL_64_SHARED};;POWL_Y_ALMOST_1:// Here if delta = |y-1| < 2^-50////  x**(1 + delta) = x * e (ln(x)*delta) = x ( 1 + ln(x) * delta)//// Computation will be safe for 2^-16381 <= x < 2^16383{ .mfi       mov GR_exp_ynear1_oflow = 0xffff + 16383       fma.s1 FR_TMP1 = FR_Input_X,FR_Delta,f0       and GR_exp_x = GR_exp_mask, GR_signexp_x};;{ .mfi       cmp.lt  p15, p14 =  GR_exp_x, GR_exp_ynear1_oflow       fma.s1 FR_TMP2 = FR_logx_hi,f1,FR_X_lo       mov GR_exp_ynear1_uflow = 0xffff - 16381};;{ .mfb(p15)  cmp.ge  p15, p14 =  GR_exp_x, GR_exp_ynear1_uflow       fma.s1 FR_TMP3 = FR_Input_X,f1,f0       br.cond.sptk POWL_64_SHARED};;POWL_64_SQUARE:////      Here if x not zero and y=2.////      Setup for multipath code//{ .mfi      mov GR_exp_square_oflow = 0xffff + 8192   // Exponent where x*x overflows      fmerge.se FR_TMP1 = FR_Input_X, FR_Input_X      and GR_exp_x = GR_exp_mask, GR_signexp_x  // Get exponent of x};;{ .mfi      cmp.lt  p15, p14 =  GR_exp_x, GR_exp_square_oflow // Decide safe/unsafe      fmerge.se FR_TMP2 = FR_Input_X, FR_Input_X      mov GR_exp_square_uflow = 0xffff - 8191   // Exponent where x*x underflows};;{ .mfi(p15) cmp.ge  p15, p14 =  GR_exp_x, GR_exp_square_uflow // Decide safe/unsafe      fma.s1 FR_TMP3 = f0,f0,f0      nop.i 999};;////      This is the shared path that will set overflow and underflow.//POWL_64_SHARED:////      Return if no danger of over or underflow.//{ .mfb      nop.m 999      fma.s0 FR_Result = FR_TMP1, FR_TMP2, FR_TMP3(p15) br.ret.sptk  b0      // Main path return if certain no over/underflow};;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -