📄 libm_tan.s
字号:
;;//// ARGUMENT REDUCTION CODE - CASE 3 and 4////// Adjust table_ptr1 to beginning of table.// N_0 = Arg * Inv_P_0//{ .mmi(p0) ld8 table_ptr1 = [table_ptr1] nop.m 999 nop.i 999};;{ .mmi(p0) add table_ptr1 = 8, table_ptr1 ;;//// Point to 2*-14//(p0) ldfs TWO_TO_NEG14 = [table_ptr1], 4 nop.i 999 ;;}//// Load 2**(-14).//{ .mmi(p0) ldfs NEGTWO_TO_NEG14 = [table_ptr1], 180 ;;//// N_0_fix = integer part of N_0 .// Adjust table_ptr1 to beginning of table.//(p0) ldfs TWO_TO_NEG2 = [table_ptr1], 4 nop.i 999 ;;}//// Make N_0 the integer part.//{ .mfi(p0) ldfs NEGTWO_TO_NEG2 = [table_ptr1]//// Load -2**(-14).//(p0) fcvt.fx.s1 N_0_fix = N_0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fcvt.xf N_0 = N_0_fix nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 ArgPrime = N_0, P_0, Arg nop.i 999}{ .mfi nop.m 999(p0) fmpy.s1 w = N_0, d_1 nop.i 999 ;;}{ .mfi nop.m 999//// ArgPrime = -N_0 * P_0 + Arg// w = N_0 * d_1//(p0) fmpy.s1 N = ArgPrime, two_by_PI nop.i 999 ;;}{ .mfi nop.m 999//// N = ArgPrime * 2/pi//(p0) fcvt.fx.s1 N_fix = N nop.i 999 ;;}{ .mfi nop.m 999//// N_fix is the integer part.//(p0) fcvt.xf N = N_fix nop.i 999 ;;}{ .mfi(p0) getf.sig N_fix_gr = N_fix nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999//// N is the integer part of the reduced-reduced argument.// Put the integer in a GP register.//(p0) fnma.s1 s_val = N, P_1, ArgPrime nop.i 999}{ .mfi nop.m 999(p0) fnma.s1 w = N, P_2, w nop.i 999 ;;}{ .mfi nop.m 999//// s_val = -N*P_1 + ArgPrime// w = -N*P_2 + w//(p0) fcmp.lt.unc.s1 p11, p10 = s_val, TWO_TO_NEG14 nop.i 999 ;;}{ .mfi nop.m 999(p11) fcmp.gt.s1 p11, p10 = s_val, NEGTWO_TO_NEG14 nop.i 999 ;;}{ .mfi nop.m 999//// Case 3: r = s_val + w (Z complete)// Case 4: U_hi = N_0 * d_1//(p10) fmpy.s1 V_hi = N, P_2 nop.i 999}{ .mfi nop.m 999(p11) fmpy.s1 U_hi = N_0, d_1 nop.i 999 ;;}{ .mfi nop.m 999//// Case 3: r = s_val + w (Z complete)// Case 4: U_hi = N_0 * d_1//(p11) fmpy.s1 V_hi = N, P_2 nop.i 999}{ .mfi nop.m 999(p11) fmpy.s1 U_hi = N_0, d_1 nop.i 999 ;;}{ .mfi nop.m 999//// Decide between case 3 and 4:// Case 3: s <= -2**(-14) or s >= 2**(-14)// Case 4: -2**(-14) < s < 2**(-14)//(p10) fadd.s1 r = s_val, w nop.i 999}{ .mfi nop.m 999(p11) fmpy.s1 w = N, P_3 nop.i 999 ;;}{ .mfi nop.m 999//// Case 4: We need abs of both U_hi and V_hi - dont// worry about switched sign of V_hi .//(p11) fsub.s1 A = U_hi, V_hi nop.i 999}{ .mfi nop.m 999//// Case 4: A = U_hi + V_hi// Note: Worry about switched sign of V_hi, so subtract instead of add.//(p11) fnma.s1 V_lo = N, P_2, V_hi nop.i 999 ;;}{ .mfi nop.m 999(p11) fms.s1 U_lo = N_0, d_1, U_hi nop.i 999 ;;}{ .mfi nop.m 999(p11) fabs V_hiabs = V_hi nop.i 999}{ .mfi nop.m 999//// Case 4: V_hi = N * P_2// w = N * P_3// Note the product does not include the (-) as in the writeup// so (-) missing for V_hi and w .(p10) fadd.s1 r = s_val, w nop.i 999 ;;}{ .mfi nop.m 999//// Case 3: c = s_val - r// Case 4: U_lo = N_0 * d_1 - U_hi//(p11) fabs U_hiabs = U_hi nop.i 999}{ .mfi nop.m 999(p11) fmpy.s1 w = N, P_3 nop.i 999 ;;}{ .mfi nop.m 999//// Case 4: Set P_12 if U_hiabs >= V_hiabs//(p11) fadd.s1 C_hi = s_val, A nop.i 999 ;;}{ .mfi nop.m 999//// Case 4: C_hi = s_val + A//(p11) fadd.s1 t = U_lo, V_lo nop.i 999 ;;}{ .mfi nop.m 999//// Case 3: Is |r| < 2**(-2), if so set PR_7// else set PR_8.// Case 3: If PR_7 is set, prepare to branch to Small_R.// Case 3: If PR_8 is set, prepare to branch to Normal_R.//(p10) fsub.s1 c = s_val, r nop.i 999 ;;}{ .mfi nop.m 999//// Case 3: c = (s - r) + w (c complete)//(p11) fcmp.ge.unc.s1 p12, p13 = U_hiabs, V_hiabs nop.i 999}{ .mfi nop.m 999(p11) fms.s1 w = N_0, d_2, w nop.i 999 ;;}{ .mfi nop.m 999//// Case 4: V_hi = N * P_2// w = N * P_3// Note the product does not include the (-) as in the writeup// so (-) missing for V_hi and w .//(p10) fcmp.lt.unc.s1 p14, p15 = r, TWO_TO_NEG2 nop.i 999 ;;}{ .mfi nop.m 999(p14) fcmp.gt.s1 p14, p15 = r, NEGTWO_TO_NEG2 nop.i 999 ;;}{ .mfb nop.m 999//// Case 4: V_lo = -N * P_2 - V_hi (U_hi is in place of V_hi in writeup)// Note: the (-) is still missing for V_hi .// Case 4: w = w + N_0 * d_2// Note: the (-) is now incorporated in w .//(p10) fadd.s1 c = c, w//// Case 4: t = U_lo + V_lo// Note: remember V_lo should be (-), subtract instead of add. NO//(p14) br.cond.spnt TAN_SMALL_R ;;}{ .mib nop.m 999 nop.i 999(p15) br.cond.spnt TAN_NORMAL_R ;;}{ .mfi nop.m 999//// Case 3: Vector off when |r| < 2**(-2). Recall that PR_3 will be true.// The remaining stuff is for Case 4.//(p12) fsub.s1 a = U_hi, A(p11) extr.u i_1 = N_fix_gr, 0, 1 ;;}{ .mfi nop.m 999//// Case 4: C_lo = s_val - C_hi//(p11) fadd.s1 t = t, w nop.i 999}{ .mfi nop.m 999(p13) fadd.s1 a = V_hi, A nop.i 999 ;;}//// Case 4: a = U_hi - A// a = V_hi - A (do an add to account for missing (-) on V_hi//{ .mfi(p11) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp(p11) fsub.s1 C_lo = s_val, C_hi nop.i 999};;{ .mmi(p11) ld8 table_ptr1 = [table_ptr1] nop.m 999 nop.i 999};;//// Case 4: a = (U_hi - A) + V_hi// a = (V_hi - A) + U_hi// In each case account for negative missing form V_hi .////// Case 4: C_lo = (s_val - C_hi) + A//{ .mmi(p11) add table_ptr1 = 224, table_ptr1 ;;(p11) ldfe P1_1 = [table_ptr1], 16 nop.i 999 ;;}{ .mfi(p11) ldfe P1_2 = [table_ptr1], 128//// Case 4: w = U_lo + V_lo + w//(p12) fsub.s1 a = a, V_hi nop.i 999 ;;}//// Case 4: r = C_hi + C_lo//{ .mfi(p11) ldfe Q1_1 = [table_ptr1], 16(p11) fadd.s1 C_lo = C_lo, A nop.i 999 ;;}//// Case 4: c = C_hi - r// Get [i_1] - lsb of N_fix_gr.//{ .mfi(p11) ldfe Q1_2 = [table_ptr1], 16 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p13) fsub.s1 a = U_hi, a nop.i 999 ;;}{ .mfi nop.m 999(p11) fadd.s1 t = t, a nop.i 999 ;;}{ .mfi nop.m 999//// Case 4: t = t + a//(p11) fadd.s1 C_lo = C_lo, t nop.i 999 ;;}{ .mfi nop.m 999//// Case 4: C_lo = C_lo + t//(p11) fadd.s1 r = C_hi, C_lo nop.i 999 ;;}{ .mfi nop.m 999(p11) fsub.s1 c = C_hi, r nop.i 999}{ .mfi nop.m 999//// Case 4: c = c + C_lo finished.// Is i_1 even or odd?// if i_1 == 0, set PR_4, else set PR_5.//// r and c have been computed.// We known whether this is the sine or cosine routine.// Make sure ftz mode is set - should be automatic when using wre(p0) fmpy.s1 rsq = r, r nop.i 999 ;;}{ .mfi nop.m 999(p11) fadd.s1 c = c , C_lo(p11) cmp.eq.unc p11, p12 = 0x0000, i_1 ;;}{ .mfi nop.m 999(p12) frcpa.s1 S_hi, p0 = f1, r nop.i 999}{ .mfi nop.m 999//// N odd: Change sign of S_hi//(p11) fma.s1 Result = rsq, P1_2, P1_1 nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 P = rsq, Q1_2, Q1_1 nop.i 999}{ .mfi nop.m 999//// N odd: Result = S_hi + S_lo (User supplied rounding mode for C1)//(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact nop.i 999 ;;}{ .mfi nop.m 999//// N even: rsq = r * r// N odd: S_hi = frcpa(r)//(p12) fmerge.ns S_hi = S_hi, S_hi nop.i 999}{ .mfi nop.m 999//// N even: rsq = rsq * P1_2 + P1_1// N odd: poly1 = 1.0 + S_hi * r 16 bits partial account for necessary//(p11) fmpy.s1 Result = rsq, Result nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, r,f1 nop.i 999}{ .mfi nop.m 999//// N even: Result = Result * rsq// N odd: S_hi = S_hi + S_hi*poly1 16 bits account for necessary//(p11) fma.s1 Result = r, Result, c nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999}{ .mfi nop.m 999//// N odd: S_hi = S_hi * poly1 + S_hi 32 bits//(p11) fadd.s0 Result= r, Result nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999 ;;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -