📄 libm_tan.s
字号:
{ .mfi nop.m 999//// N even: Result = Result * r + c// N odd: poly1 = 1.0 + S_hi * r 32 bits partial//(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999 ;;}{ .mfi nop.m 999//// N even: Result1 = Result + r (Rounding mode S0)// N odd: poly1 = S_hi * r + 1.0 64 bits partial//(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999 ;;}{ .mfi nop.m 999//// N odd: poly1 = S_hi * poly + S_hi 64 bits//(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999 ;;}{ .mfi nop.m 999//// N odd: poly1 = S_hi * r + 1.0//(p12) fma.s1 poly1 = S_hi, c, poly1 nop.i 999 ;;}{ .mfi nop.m 999//// N odd: poly1 = S_hi * c + poly1//(p12) fmpy.s1 S_lo = S_hi, poly1 nop.i 999 ;;}{ .mfi nop.m 999//// N odd: S_lo = S_hi * poly1//(p12) fma.s1 S_lo = P, r, S_lo nop.i 999 ;;}{ .mfb nop.m 999//// N odd: S_lo = S_lo + r * P//(p12) fadd.s0 Result = S_hi, S_lo(p0) br.ret.sptk b0 ;;}TAN_SMALL_R: { .mii nop.m 999(p0) extr.u i_1 = N_fix_gr, 0, 1 ;;(p0) cmp.eq.unc p11, p12 = 0x0000, i_1}{ .mfi nop.m 999(p0) fmpy.s1 rsq = r, r nop.i 999 ;;}{ .mfi nop.m 999(p12) frcpa.s1 S_hi, p0 = f1, r nop.i 999}{ .mfi(p0) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp nop.f 999 nop.i 999};;{ .mmi(p0) ld8 table_ptr1 = [table_ptr1] nop.m 999 nop.i 999};;// *****************************************************************// *****************************************************************// *****************************************************************{ .mmi(p0) add table_ptr1 = 224, table_ptr1 ;;(p0) ldfe P1_1 = [table_ptr1], 16 nop.i 999 ;;}// r and c have been computed.// We known whether this is the sine or cosine routine.// Make sure ftz mode is set - should be automatic when using wre// |r| < 2**(-2){ .mfi(p0) ldfe P1_2 = [table_ptr1], 16(p11) fmpy.s1 r_to_the_8 = rsq, rsq nop.i 999 ;;}//// Set table_ptr1 to beginning of constant table.// Get [i_1] - lsb of N_fix_gr.//{ .mfi(p0) ldfe P1_3 = [table_ptr1], 96//// N even: rsq = r * r// N odd: S_hi = frcpa(r)//(p12) fmerge.ns S_hi = S_hi, S_hi nop.i 999 ;;}//// Is i_1 even or odd?// if i_1 == 0, set PR_11.// if i_1 != 0, set PR_12.//{ .mfi(p11) ldfe P1_9 = [table_ptr1], -16//// N even: Poly2 = P1_7 + Poly2 * rsq// N odd: poly2 = Q1_5 + poly2 * rsq//(p11) fadd.s1 CORR = rsq, f1 nop.i 999 ;;}{ .mmi(p11) ldfe P1_8 = [table_ptr1], -16 ;;//// N even: Poly1 = P1_2 + P1_3 * rsq// N odd: poly1 = 1.0 + S_hi * r // 16 bits partial account for necessary (-1)//(p11) ldfe P1_7 = [table_ptr1], -16 nop.i 999 ;;}//// N even: Poly1 = P1_1 + Poly1 * rsq// N odd: S_hi = S_hi + S_hi * poly1) 16 bits account for necessary//{ .mfi(p11) ldfe P1_6 = [table_ptr1], -16//// N even: Poly2 = P1_5 + Poly2 * rsq// N odd: poly2 = Q1_3 + poly2 * rsq//(p11) fmpy.s1 r_to_the_8 = r_to_the_8, r_to_the_8 nop.i 999 ;;}//// N even: Poly1 = Poly1 * rsq// N odd: poly1 = 1.0 + S_hi * r 32 bits partial//{ .mfi(p11) ldfe P1_5 = [table_ptr1], -16(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999 ;;}//// N even: CORR = CORR * c// N odd: S_hi = S_hi * poly1 + S_hi 32 bits////// N even: Poly2 = P1_6 + Poly2 * rsq// N odd: poly2 = Q1_4 + poly2 * rsq//{ .mmf(p0) addl table_ptr2 = @ltoff(TAN_BASE_CONSTANTS), gp(p11) ldfe P1_4 = [table_ptr1], -16(p11) fmpy.s1 CORR = CORR, c};;{ .mmi(p0) ld8 table_ptr2 = [table_ptr2] nop.m 999 nop.i 999};;{ .mii(p0) add table_ptr2 = 464, table_ptr2 nop.i 999 ;; nop.i 999}{ .mfi nop.m 999(p11) fma.s1 Poly1 = P1_3, rsq, P1_2 nop.i 999 ;;}{ .mfi(p0) ldfe Q1_7 = [table_ptr2], -16(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999 ;;}{ .mfi(p0) ldfe Q1_6 = [table_ptr2], -16(p11) fma.s1 Poly2 = P1_9, rsq, P1_8 nop.i 999 ;;}{ .mmi(p0) ldfe Q1_5 = [table_ptr2], -16 ;;(p12) ldfe Q1_4 = [table_ptr2], -16 nop.i 999 ;;}{ .mfi(p12) ldfe Q1_3 = [table_ptr2], -16//// N even: Poly2 = P1_8 + P1_9 * rsq// N odd: poly2 = Q1_6 + Q1_7 * rsq//(p11) fma.s1 Poly1 = Poly1, rsq, P1_1 nop.i 999 ;;}{ .mfi(p12) ldfe Q1_2 = [table_ptr2], -16(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999 ;;}{ .mfi(p12) ldfe Q1_1 = [table_ptr2], -16(p11) fma.s1 Poly2 = Poly2, rsq, P1_7 nop.i 999 ;;}{ .mfi nop.m 999//// N even: CORR = rsq + 1// N even: r_to_the_8 = rsq * rsq//(p11) fmpy.s1 Poly1 = Poly1, rsq nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999}{ .mfi nop.m 999(p12) fma.s1 poly2 = Q1_7, rsq, Q1_6 nop.i 999 ;;}{ .mfi nop.m 999(p11) fma.s1 Poly2 = Poly2, rsq, P1_6 nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999}{ .mfi nop.m 999(p12) fma.s1 poly2 = poly2, rsq, Q1_5 nop.i 999 ;;}{ .mfi nop.m 999(p11) fma.s1 Poly2= Poly2, rsq, P1_5 nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 S_hi = S_hi, poly1, S_hi nop.i 999}{ .mfi nop.m 999(p12) fma.s1 poly2 = poly2, rsq, Q1_4 nop.i 999 ;;}{ .mfi nop.m 999//// N even: r_to_the_8 = r_to_the_8 * r_to_the_8// N odd: poly1 = S_hi * r + 1.0 64 bits partial//(p11) fma.s1 Poly2 = Poly2, rsq, P1_4 nop.i 999 ;;}{ .mfi nop.m 999//// N even: Result = CORR + Poly * r// N odd: P = Q1_1 + poly2 * rsq//(p12) fma.s1 poly1 = S_hi, r, f1 nop.i 999}{ .mfi nop.m 999(p12) fma.s1 poly2 = poly2, rsq, Q1_3 nop.i 999 ;;}{ .mfi nop.m 999//// N even: Poly2 = P1_4 + Poly2 * rsq// N odd: poly2 = Q1_2 + poly2 * rsq//(p11) fma.s1 Poly = Poly2, r_to_the_8, Poly1 nop.i 999 ;;}{ .mfi nop.m 999(p12) fma.s1 poly1 = S_hi, c, poly1 nop.i 999}{ .mfi nop.m 999(p12) fma.s1 poly2 = poly2, rsq, Q1_2 nop.i 999 ;;}{ .mfi nop.m 999//// N even: Poly = Poly1 + Poly2 * r_to_the_8// N odd: S_hi = S_hi * poly1 + S_hi 64 bits//(p11) fma.s1 Result = Poly, r, CORR nop.i 999 ;;}{ .mfi nop.m 999//// N even: Result = r + Result (User supplied rounding mode)// N odd: poly1 = S_hi * c + poly1//(p12) fmpy.s1 S_lo = S_hi, poly1 nop.i 999}{ .mfi nop.m 999(p12) fma.s1 P = poly2, rsq, Q1_1 nop.i 999 ;;}{ .mfi nop.m 999//// N odd: poly1 = S_hi * r + 1.0//(p11) fadd.s0 Result = Result, r nop.i 999 ;;}{ .mfi nop.m 999//// N odd: S_lo = S_hi * poly1//(p12) fma.s1 S_lo = Q1_1, c, S_lo nop.i 999}{ .mfi nop.m 999//// N odd: Result = Result + S_hi (user supplied rounding mode)//(p0) fmpy.s0 fp_tmp = fp_tmp, fp_tmp // Dummy mult to set inexact nop.i 999 ;;}{ .mfi nop.m 999//// N odd: S_lo = Q1_1 * c + S_lo//(p12) fma.s1 Result = P, r, S_lo nop.i 999 ;;}{ .mfb nop.m 999//// N odd: Result = S_lo + r * P//(p12) fadd.s0 Result = Result, S_hi(p0) br.ret.sptk b0 ;;}TAN_NORMAL_R: { .mfi(p0) getf.sig sig_r = r// *******************************************************************// *******************************************************************// *******************************************************************//// r and c have been computed.// Make sure ftz mode is set - should be automatic when using wre////// Get [i_1] - lsb of N_fix_gr alone.//(p0) fmerge.s Pos_r = f1, r(p0) extr.u i_1 = N_fix_gr, 0, 1 ;;}{ .mfi nop.m 999(p0) fmerge.s sgn_r = r, f1(p0) cmp.eq.unc p11, p12 = 0x0000, i_1 ;;}{ .mfi nop.m 999 nop.f 999(p0) extr.u lookup = sig_r, 58, 5}{ .mlx nop.m 999(p0) movl Create_B = 0x8200000000000000 ;;}{ .mfi(p0) addl table_ptr1 = @ltoff(TAN_BASE_CONSTANTS), gp nop.f 999(p0) dep Create_B = lookup, Create_B, 58, 5};;//// Get [i_1] - lsb of N_fix_gr alone.// Pos_r = abs (r)//{ .mmi ld8 table_ptr1 = [table_ptr1] nop.m 999 nop.i 999};;{ .mmi nop.m 999(p0) setf.sig B = Create_B//// Set table_ptr1 and table_ptr2 to base address of// constant table.//(p0) add table_ptr1 = 480, table_ptr1 ;;}{ .mmb nop.m 999//// Is i_1 or i_0 == 0 ?// Create the constant 1 00000 1000000000000000000000...//(p0) ldfe P2_1 = [table_ptr1], 16 nop.b 999}{ .mmi nop.m 999 ;;(p0) getf.exp exp_r = Pos_r nop.i 999}//// Get r's exponent// Get r's significand//{ .mmi(p0) ldfe P2_2 = [table_ptr1], 16 ;;//// Get the 5 bits or r for the lookup. 1.xxxxx ....// from sig_r.// Grab lsb of exp of B//(p0) ldfe P2_3 = [table_ptr1], 16 nop.i 999 ;;}{ .mii nop.m 999(p0) andcm table_offset = 0x0001, exp_r ;;(p0) shl table_offset = table_offset, 9 ;;}{ .mii nop.m 999//// Deposit 0 00000 1000000000000000000000... on// 1 xxxxx yyyyyyyyyyyyyyyyyyyyyy...,// getting rid of the ys.// Is B = 2** -2 or B=
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -