⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 s_atanl.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 4 页
字号:
{ .mfi      nop.m 999      fmpy.s1 P_lo = M, P_lo      add table_ptr2 = 32, table_ptr1};;{ .mfi      nop.m 999      fma.s1 A_temp = Q, f1, f0            // Set A_temp if POLY path      nop.i 999}{ .mfi      nop.m 999      fma.s1 E = E, E_hold, E              // E = E + E*E_hold (1) if POLY path      nop.i 999};;////     Is Q < 2**(-3)?//     swap = xor(swap,sign_X)//{ .mfi      nop.m 999      fcmp.lt.s1 p9, p0 = Q, TWO_TO_NEG3    // Test Q < 2^-3      xor swap = sign_X, swap};;//     P_hi = s_Y * P_hi{ .mmf      getf.exp exponent_Q =  Q              // Get signexp of Q      cmp.eq.unc p7, p6 = 0x00000, swap      fmpy.s1 P_hi = s_Y, P_hi};;////     if (PR_1) sigma = -1.0//     if (PR_2) sigma =  1.0//{ .mfi      getf.sig significand_Q = Q            // Get significand of Q(p6)  fsub.s1 sigma = f0, f1      nop.i 999}{ .mfb(p9)  add table_ptr1 = 128, table_base      // Point to P8 if POLY path(p7)  fadd.s1 sigma = f0, f1(p9)  br.cond.spnt ATANL_POLY               // Branch to POLY if 0 < Q < 2^-3};;//// *************************************************// ******************** STEP3 **********************// *************************************************////     lookup = b_1 b_2 b_3 B_4//{ .mmi      nop.m 999      nop.m 999      andcm k = 0x0003, exponent_Q  // k=0,1,2,3 for exp_Q=0,-1,-2,-3};;////  Generate sign_exp_Q b_1 b_2 b_3 b_4 1 0 0 0 ... 0  in single precision //  representation.  Note sign of Q is always 0.//{ .mfi      cmp.eq p8, p9 = 0x0000, k             // Test k=0      nop.f 999      extr.u lookup = significand_Q, 59, 4  // Extract b_1 b_2 b_3 b_4 for index}{ .mfi      sub sp_exp_Q = 0x7f, k                // Form single prec biased exp of Q      nop.f 999      sub k = k, r0, 1                      // Decrement k};;//     Form pointer to B index table{ .mfi      ldfe Q_4 = [table_ptr1], -16          // Load Q_4      nop.f 999(p9)  shl k = k, 8                          // k = 0, 256, or 512}{ .mfi(p9)  shladd table_ptr2 = lookup, 4, table_ptr2      nop.f 999      shladd sp_exp_4sig_Q = sp_exp_Q, 4, lookup // Shift and add in 4 high bits};;{ .mmi(p8)  add table_ptr2 = -16, table_ptr2      // Pointer if original k was 0(p9)  add table_ptr2 = k, table_ptr2        // Pointer if k was 1, 2, 3      dep special = sp_exp_4sig_Q, special, 19, 13 // Form z_hi as single prec};;//     z_hi = s exp 1.b_1 b_2 b_3 b_4 1 0 0 0 ... 0{ .mmi      ldfd Tbl_hi = [table_ptr2], 8         // Load Tbl_hi from index table;;      setf.s z_hi = special                 // Form z_hi      nop.i 999}{ .mmi      ldfs Tbl_lo = [table_ptr2], 8         // Load Tbl_lo from index table;;      ldfe Q_3 = [table_ptr1], -16          // Load Q_3      nop.i 999};;{ .mmi      ldfe Q_2 = [table_ptr1], -16          // Load Q_2      nop.m 999      nop.i 999};;{ .mmf      ldfe Q_1 = [table_ptr1], -16          // Load Q_1      nop.m 999      nop.f 999};;{ .mfi      nop.m 999      fma.s1 U_prime_hi = V, z_hi, U        // U_prime_hi = U + V * z_hi      nop.i 999}{ .mfi      nop.m 999      fnma.s1 V_prime = U, z_hi, V          // V_prime =  V - U * z_hi      nop.i 999};;{ .mfi      nop.m 999      mov A_hi = Tbl_hi                     // Start with A_hi = Tbl_hi      nop.i 999};;{ .mfi      nop.m 999      fsub.s1 U_hold = U, U_prime_hi        // U_hold = U - U_prime_hi      nop.i 999};;{ .mfi      nop.m 999      frcpa.s1 C_hi, p0 = f1, U_prime_hi    // C_hi = frcpa(1,U_prime_hi)      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 A_hi = s_Y, A_hi              // A_hi = s_Y * A_hi      nop.i 999};;{ .mfi      nop.m 999      fma.s1 U_prime_lo = z_hi, V, U_hold   // U_prime_lo =  U_hold + V * z_hi      nop.i 999};;//     C_hi_hold = 1 - C_hi * U_prime_hi (1){ .mfi      nop.m 999      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1       nop.i 999};;{ .mfi      nop.m 999      fma.s1 Res_hi = sigma, A_hi, P_hi   // Res_hi = P_hi + sigma * A_hi      nop.i 999};;{ .mfi      nop.m 999      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (1)      nop.i 999};;//     C_hi_hold = 1 - C_hi * U_prime_hi (2){ .mfi      nop.m 999      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1      nop.i 999};;{ .mfi      nop.m 999      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (2)      nop.i 999};;//     C_hi_hold = 1 - C_hi * U_prime_hi (3){ .mfi      nop.m 999      fnma.s1 C_hi_hold = C_hi, U_prime_hi, f1       nop.i 999};;{ .mfi      nop.m 999      fma.s1 C_hi = C_hi_hold, C_hi, C_hi // C_hi = C_hi + C_hi * C_hi_hold (3)      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 w_hi = V_prime, C_hi           // w_hi = V_prime * C_hi      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 wsq = w_hi, w_hi               // wsq = w_hi * w_hi      nop.i 999}{ .mfi      nop.m 999      fnma.s1 w_lo = w_hi, U_prime_hi, V_prime // w_lo = V_prime-w_hi*U_prime_hi      nop.i 999};;{ .mfi      nop.m 999      fma.s1 poly =  wsq, Q_4, Q_3           // poly = Q_3 + wsq * Q_4      nop.i 999}{ .mfi      nop.m 999      fnma.s1 w_lo = w_hi, U_prime_lo, w_lo  // w_lo = w_lo - w_hi * U_prime_lo      nop.i 999};;{ .mfi      nop.m 999      fma.s1 poly = wsq, poly, Q_2           // poly = Q_2 + wsq * poly      nop.i 999}{ .mfi      nop.m 999      fmpy.s1 w_lo = C_hi, w_lo              // w_lo =  = w_lo * C_hi      nop.i 999};;{ .mfi      nop.m 999      fma.s1 poly = wsq, poly, Q_1           // poly = Q_1 + wsq * poly      nop.i 999}{ .mfi      nop.m 999      fadd.s1 A_lo = Tbl_lo, w_lo            // A_lo = Tbl_lo + w_lo      nop.i 999};;{ .mfi      nop.m 999      fmpy.s0 Q_1 =  Q_1, Q_1                // Dummy operation to raise inexact      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 poly = wsq, poly               // poly = wsq * poly      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 poly = w_hi, poly              // poly = w_hi * poly      nop.i 999};;{ .mfi      nop.m 999      fadd.s1 A_lo = A_lo, poly              // A_lo = A_lo + poly      nop.i 999};;{ .mfi      nop.m 999      fadd.s1 A_lo = A_lo, w_hi              // A_lo = A_lo + w_hi      nop.i 999};;{ .mfi      nop.m 999      fma.s1 Res_lo = sigma, A_lo, P_lo      // Res_lo = P_lo + sigma * A_lo      nop.i 999};;////     Result  =  Res_hi + Res_lo * s_Y  (User Supplied Rounding Mode)//{ .mfb      nop.m 999      fma.s0 Result = Res_lo, s_Y, Res_hi      br.ret.sptk   b0                        // Exit table path 2^-3 <= V/U < 1};;ATANL_POLY: // Here if 0 < V/U < 2^-3//// ***********************************************// ******************** STEP4 ********************// ***********************************************////     Following://     Iterate 3 times E = E + E*(1.0 - E*U)//     Also load P_8, P_7, P_6, P_5, P_4//{ .mfi      ldfe P_8 = [table_ptr1], -16            // Load P_8      fnma.s1 z_lo = A_temp, U, V             // z_lo = V - A_temp * U      nop.i 999}{ .mfi      nop.m 999      fnma.s1 E_hold = E, U, f1               // E_hold = 1.0 - E*U (2)      nop.i 999};;{ .mmi      ldfe P_7 = [table_ptr1], -16            // Load P_7;;      ldfe P_6 = [table_ptr1], -16            // Load P_6      nop.i 999};;{ .mfi      ldfe P_5 = [table_ptr1], -16            // Load P_5      fma.s1 E = E, E_hold, E                 // E = E + E_hold*E (2)      nop.i 999};;{ .mmi      ldfe P_4 = [table_ptr1], -16            // Load P_4;;      ldfe P_3 = [table_ptr1], -16            // Load P_3      nop.i 999};;{ .mfi      ldfe P_2 = [table_ptr1], -16            // Load P_2      fnma.s1 E_hold = E, U, f1               // E_hold = 1.0 - E*U (3)      nop.i 999}{ .mlx      nop.m 999      movl         int_temp = 0x24005         // Signexp for small neg number};;{ .mmf      ldfe P_1 = [table_ptr1], -16            // Load P_1      setf.exp     tmp_small = int_temp       // Form small neg number      fma.s1 E = E, E_hold, E                 // E = E + E_hold*E (3)};;////// At this point E approximates 1/U to roughly working precision// Z = V*E approximates V/U//{ .mfi      nop.m 999      fmpy.s1 Z = V, E                         // Z = V * E      nop.i 999}{ .mfi      nop.m 999      fmpy.s1 z_lo = z_lo, E                   // z_lo = z_lo * E      nop.i 999};;////     Now what we want to do is//     poly1 = P_4 + zsq*(P_5 + zsq*(P_6 + zsq*(P_7 + zsq*P_8)))//     poly2 = zsq*(P_1 + zsq*(P_2 + zsq*P_3))//////     Fixup added to force inexact later -//     A_hi = A_temp + z_lo//     z_lo = (A_temp - A_hi) + z_lo//{ .mfi      nop.m 999      fmpy.s1 zsq = Z, Z                        // zsq = Z * Z      nop.i 999}{ .mfi      nop.m 999      fadd.s1 A_hi = A_temp, z_lo               // A_hi = A_temp + z_lo      nop.i 999};;{ .mfi      nop.m 999      fma.s1 poly1 = zsq, P_8, P_7              // poly1 = P_7 + zsq * P_8      nop.i 999}{ .mfi      nop.m 999      fma.s1 poly2 = zsq, P_3, P_2              // poly2 = P_2 + zsq * P_3      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 z4 = zsq, zsq                     // z4 = zsq * zsq      nop.i 999}{ .mfi      nop.m 999      fsub.s1 A_temp = A_temp, A_hi             // A_temp = A_temp - A_hi      nop.i 999};;{ .mfi      nop.m 999      fmerge.s     tmp = A_hi, A_hi             // Copy tmp = A_hi      nop.i 999};;{ .mfi      nop.m 999      fma.s1 poly1 = zsq, poly1, P_6            // poly1 = P_6 + zsq * poly1      nop.i 999}{ .mfi      nop.m 999      fma.s1 poly2 = zsq, poly2, P_1            // poly2 = P_2 + zsq * poly2      nop.i 999};;{ .mfi      nop.m 999      fmpy.s1 z8 = z4, z4                       // z8 = z4 * z4      nop.i 999}{ .mfi      nop.m 999      fadd.s1 z_lo = A_temp, z_lo               // z_lo = (A_temp - A_hi) + z_lo      nop.i 999};;{ .mfi      nop.m 999      fma.s1 poly1 = zsq, poly1, P_5            // poly1 = P_5 + zsq * poly1

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -