📄 s_tanl.s
字号:
fabs U_hiabs = U_hi // |U_hi| for |s| < 2^-14 nop.i 999}{ .mfi nop.m 999 fsub.s1 c = s_val, r // c = s_val - r for |s| >= 2^-14 nop.i 999};;// For Case 3, |s| >= 2^-14, determine if |r| < 1/4//// Case 4: C_hi = s_val + A//{ .mfi nop.m 999(p11) fadd.s1 C_hi = s_val, A // C_hi = s_val + A for |s| < 2^-14 nop.i 999}{ .mfi nop.m 999(p10) fcmp.lt.unc.s1 p14, p15 = r, TWO_TO_NEG2 nop.i 999};;{ .mfi getf.sig sig_r = r // Get signif of r if |s| >= 2^-33 fand B = B_mask1, r nop.i 999};;// Case 4: t = U_lo + V_lo{ .mfi getf.exp exp_r = r // Extract signexp of r if |s| >= 2^-33(p11) fadd.s1 t = U_lo, V_lo // t = U_lo + V_lo for |s| < 2^-14 nop.i 999}{ .mfi nop.m 999(p14) fcmp.gt.s1 p14, p15 = r, NEGTWO_TO_NEG2 nop.i 999};;// Case 3: c = (s - r) + w (c complete){ .mfi nop.m 999(p10) fadd.s1 c = c, w // c = c + w for |s| >= 2^-14 nop.i 999}{ .mbb nop.m 999(p14) br.cond.spnt TANL_SMALL_R // Branch if 2^24 <= |x| < 2^63 and |r|< 1/4(p15) br.cond.sptk TANL_NORMAL_R_A // Branch if 2^24 <= |x| < 2^63 and |r|>=1/4};;// Here if 2^24 <= |x| < 2^63 and |s| < 2^-14 >>>>>>> Case 4.//// Case 4: Set P_12 if U_hiabs >= V_hiabs// Case 4: w = w + N_0 * d_2// Note: the (-) is now incorporated in w .{ .mfi add table_ptr1 = 160, table_base // Point to tanl_table_p1 fcmp.ge.unc.s1 p12, p13 = U_hiabs, V_hiabs nop.i 999}{ .mfi nop.m 999 fms.s1 w2 = N_0, d_2, w2 nop.i 999};;// Case 4: C_lo = s_val - C_hi{ .mfi ldfe P1_1 = [table_ptr1], 16 // Load P1_1 fsub.s1 C_lo = s_val, C_hi nop.i 999};;//// Case 4: a = U_hi - A// a = V_hi - A (do an add to account for missing (-) on V_hi//{ .mfi ldfe P1_2 = [table_ptr1], 128 // Load P1_2(p12) fsub.s1 a = U_hi, A nop.i 999}{ .mfi nop.m 999(p13) fadd.s1 a = V_hi, A nop.i 999};;// Case 4: t = U_lo + V_lo + w{ .mfi ldfe Q1_1 = [table_ptr1], 16 // Load Q1_1 fadd.s1 t = t, w2 nop.i 999};;// Case 4: a = (U_hi - A) + V_hi// a = (V_hi - A) + U_hi// In each case account for negative missing form V_hi .//{ .mfi ldfe Q1_2 = [table_ptr1], 16 // Load Q1_2(p12) fsub.s1 a = a, V_hi nop.i 999}{ .mfi nop.m 999(p13) fsub.s1 a = U_hi, a nop.i 999};;//// Case 4: C_lo = (s_val - C_hi) + A//{ .mfi nop.m 999 fadd.s1 C_lo = C_lo, A nop.i 999 ;;}//// Case 4: t = t + a//{ .mfi nop.m 999 fadd.s1 t = t, a nop.i 999};;// Case 4: C_lo = C_lo + t// Case 4: r = C_hi + C_lo{ .mfi nop.m 999 fadd.s1 C_lo = C_lo, t nop.i 999};;{ .mfi nop.m 999 fadd.s1 r = C_hi, C_lo nop.i 999};;//// Case 4: c = C_hi - r//{ .mfi nop.m 999 fsub.s1 c = C_hi, r nop.i 999}{ .mfi nop.m 999 fmpy.s1 rsq = r, r add N_fix_gr = N_fix_gr, cot_flag // N = N + 1 (for cotl)};;// Case 4: c = c + C_lo finished.//// Is i_1 = lsb of N_fix_gr even or odd?// if i_1 == 0, set PR_11, else set PR_12.//{ .mfi nop.m 999 fadd.s1 c = c , C_lo tbit.z p11, p12 = N_fix_gr, 0};;// r and c have been computed.{ .mfi nop.m 999(p12) frcpa.s1 S_hi, p0 = f1, r nop.i 999}{ .mfi nop.m 999//// N odd: Change sign of S_hi//(p11) fma.s1 Poly = rsq, P1_2, P1_1 nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 P = rsq, Q1_2, Q1_1 nop.i 999}{ .mfi nop.m 999//// N odd: Result = S_hi + S_lo (User supplied rounding mode for C1)// fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact nop.i 999 ;;}{ .mfi nop.m 999//// N even: rsq = r * r// N odd: S_hi = frcpa(r)//(p12) fmerge.ns S_hi = S_hi, S_hi nop.i 999}{ .mfi nop.m 999//// N even: rsq = rsq * P1_2 + P1_1// N odd: poly1 = 1.0 + S_hi * r 16 bits partial account for necessary//(p11) fmpy.s1 Poly = rsq, Poly nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, r,f1(p11) tbit.z.unc p14, p15 = cot_flag, 0 // p14=1 for tanl; p15=1 for cotl}{ .mfi nop.m 999//// N even: Poly = Poly * rsq// N odd: S_hi = S_hi + S_hi*poly1 16 bits account for necessary//(p11) fma.s1 Poly = r, Poly, c nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999}{ .mfi nop.m 999//// N odd: S_hi = S_hi * poly1 + S_hi 32 bits//(p14) fadd.s0 Result = r, Poly // for tanl nop.i 999 ;;}.pred.rel "mutex",p15,p12{ .mfi nop.m 999(p15) fms.s0 Result = r, mOne, Poly // for cotl nop.i 999}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999 ;;}{ .mfi nop.m 999//// N even: Poly = Poly * r + c// N odd: poly1 = 1.0 + S_hi * r 32 bits partial//(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999 ;;}{ .mfi nop.m 999//// N even: Result = Poly + r (Rounding mode S0)// N odd: poly1 = S_hi * r + 1.0 64 bits partial//(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999 ;;}{ .mfi nop.m 999//// N odd: poly1 = S_hi * poly + S_hi 64 bits//(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999 ;;}{ .mfi nop.m 999//// N odd: poly1 = S_hi * r + 1.0//(p12) fma.s1 poly1 = S_hi, c, poly1 nop.i 999 ;;}{ .mfi nop.m 999//// N odd: poly1 = S_hi * c + poly1//(p12) fmpy.s1 S_lo = S_hi, poly1 nop.i 999 ;;}{ .mfi nop.m 999//// N odd: S_lo = S_hi * poly1//(p12) fma.s1 S_lo = P, r, S_lo(p12) tbit.z.unc p14, p15 = cot_flag, 0 ;; // p14=1 for tanl; p15=1 for cotl}{ .mfi nop.m 999(p14) fadd.s0 Result = S_hi, S_lo // for tanl nop.i 999}{ .mfb nop.m 999//// N odd: S_lo = S_lo + r * P//(p15) fms.s0 Result = S_hi, mOne, S_lo // for cotl br.ret.sptk b0 ;; // Exit for 2^24 <= |x| < 2^63 and |s| < 2^-14}TANL_SMALL_R:// Here if |r| < 1/4// r and c have been computed.// *****************************************************************// *****************************************************************// *****************************************************************// N odd: S_hi = frcpa(r)// Get [i_1] - lsb of N_fix_gr. Set p11 if N even, p12 if N odd.// N even: rsq = r * r{ .mfi add table_ptr1 = 160, table_base // Point to tanl_table_p1 frcpa.s1 S_hi, p0 = f1, r // S_hi for N odd add N_fix_gr = N_fix_gr, cot_flag // N = N + 1 (for cotl)}{ .mfi add table_ptr2 = 400, table_base // Point to Q1_7 fmpy.s1 rsq = r, r nop.i 999};;{ .mmi ldfe P1_1 = [table_ptr1], 16;; ldfe P1_2 = [table_ptr1], 16 tbit.z p11, p12 = N_fix_gr, 0};;{ .mfi ldfe P1_3 = [table_ptr1], 96 nop.f 999 nop.i 999};;{ .mfi(p11) ldfe P1_9 = [table_ptr1], -16(p12) fmerge.ns S_hi = S_hi, S_hi nop.i 999}{ .mfi nop.m 999(p11) fmpy.s1 r_to_the_8 = rsq, rsq nop.i 999};;//// N even: Poly2 = P1_7 + Poly2 * rsq// N odd: poly2 = Q1_5 + poly2 * rsq//{ .mfi(p11) ldfe P1_8 = [table_ptr1], -16(p11) fadd.s1 CORR = rsq, f1 nop.i 999};;//// N even: Poly1 = P1_2 + P1_3 * rsq// N odd: poly1 = 1.0 + S_hi * r// 16 bits partial account for necessary (-1)//{ .mmi(p11) ldfe P1_7 = [table_ptr1], -16;;(p11) ldfe P1_6 = [table_ptr1], -16 nop.i 999};;//// N even: Poly1 = P1_1 + Poly1 * rsq// N odd: S_hi = S_hi + S_hi * poly1) 16 bits account for necessary////// N even: Poly2 = P1_5 + Poly2 * rsq// N odd: poly2 = Q1_3 + poly2 * rsq//{ .mfi(p11) ldfe P1_5 = [table_ptr1], -16(p11) fmpy.s1 r_to_the_8 = r_to_the_8, r_to_the_8 nop.i 999}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999};;//// N even: Poly1 = Poly1 * rsq// N odd: poly1 = 1.0 + S_hi * r 32 bits partial////// N even: CORR = CORR * c// N odd: S_hi = S_hi * poly1 + S_hi 32 bits////// N even: Poly2 = P1_6 + Poly2 * rsq// N odd: poly2 = Q1_4 + poly2 * rsq//{ .mmf(p11) ldfe P1_4 = [table_ptr1], -16 nop.m 999(p11) fmpy.s1 CORR = CORR, c};;{ .mfi nop.m 999(p11) fma.s1 Poly1 = P1_3, rsq, P1_2 nop.i 999 ;;}{ .mfi(p12) ldfe Q1_7 = [table_ptr2], -16(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999 ;;}{ .mfi(p12) ldfe Q1_6 = [table_ptr2], -16(p11) fma.s1 Poly2 = P1_9, rsq, P1_8 nop.i 999 ;;}{ .mmi(p12) ldfe Q1_5 = [table_ptr2], -16 ;;(p12) ldfe Q1_4 = [table_ptr2], -16 nop.i 999 ;;}{ .mfi(p12) ldfe Q1_3 = [table_ptr2], -16//// N even: Poly2 = P1_8 + P1_9 * rsq// N odd: poly2 = Q1_6 + Q1_7 * rsq//(p11) fma.s1 Poly1 = Poly1, rsq, P1_1 nop.i 999 ;;}{ .mfi(p12) ldfe Q1_2 = [table_ptr2], -16(p12) fma.s1 poly1 = S_hi, r, f1
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -