📄 s_asinhl.s
字号:
nop.i 0};;{ .mfi nop.m 0 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_S_lo = FR_XLog_Lo, FR_2_to_minus_N, f0 //S_lo=S_lo*2^-N nop.i 0};;{ .mfi nop.m 0 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 nop.i 0}{ .mfi nop.m 0 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 nop.i 0};;{ .mfi nop.m 0 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_r = FR_G, FR_S_hi, f1 // r = G * S_hi - 1 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi=N*log2_hi+H nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h // h=N*log2_lo+h nop.i 0}{ .mfi nop.m 0 fma.s1 FR_r = FR_G, FR_S_lo, FR_r // r=G*S_lo+(G*S_hi-1) nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 // poly_lo = r * Q4 + Q3 nop.i 0}{ .mfi nop.m 0 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo=poly_lo*r+Q2 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 nop.i 0};;.pred.rel "mutex",p12,p11{ .mfi nop.m 0(p12) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1*rsq + r nop.i 0}{ .mfi nop.m 0(p11) fms.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1*rsq + r nop.i 0};;.pred.rel "mutex",p12,p11{ .mfi nop.m 0(p12) fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h//poly_lo=poly_lo*r^3+h nop.i 0}{ .mfi nop.m 0(p11) fms.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h//poly_lo=poly_lo*r^3+h nop.i 0};;{ .mfi nop.m 0 fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo nop.i 0}{ .mfi nop.m 0(p11) fma.s0 FR_Y_hi = FR_Y_hi, FR_Neg_One, f0 // FR_Y_hi sign for neg nop.i 0};;{ .mfb nop.m 0 fadd.s0 FR_Res = FR_Y_lo,FR_Y_hi // Result=Y_lo+Y_hi br.ret.sptk b0 // Common exit for 2^-7 < x < inf};;// * SPECIAL VERSION OF LOGL FOR HUGE ARGUMENTS *huges_logl:{ .mfi getf.sig GR_signif = FR_XLog_Hi // Get significand of x+1 fmerge.ns FR_Neg_One = f1, f1 // Form -1.0 mov GR_exp_2tom7 = 0x0fff8 // Exponent of 2^-7};;{ .mfi add GR_ad_tbl_1 = 0x040, GR_ad_z_1 // Point to Constants_G_H_h1 nop.f 0 add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_P}{ .mfi add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2 nop.f 0 add GR_ad_tbl_2 = 0x180, GR_ad_z_1 // Point to Constants_G_H_h2};;{ .mfi nop.m 0 nop.f 0 extr.u GR_Index1 = GR_signif, 59, 4 // Get high 4 bits of signif}{ .mfi add GR_ad_tbl_3 = 0x280, GR_ad_z_1 // Point to Constants_G_H_h3 nop.f 0 nop.i 0};;{ .mfi shladd GR_ad_z_1 = GR_Index1, 2, GR_ad_z_1 // Point to Z_1 nop.f 0 extr.u GR_X_0 = GR_signif, 49, 15 // Get high 15 bits of signif.};;{ .mfi ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1 nop.f 0 mov GR_exp_mask = 0x1FFFF // Create exponent mask}{ .mfi shladd GR_ad_tbl_1 = GR_Index1, 4, GR_ad_tbl_1 // Point to G_1 nop.f 0 mov GR_Bias = 0x0FFFF // Create exponent bias};;{ .mfi ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1 fmerge.se FR_S_hi = f1,FR_XLog_Hi // Form |x+1| nop.i 0};;{ .mmi getf.exp GR_N = FR_XLog_Hi // Get N = exponent of x+1 ldfd FR_h = [GR_ad_tbl_1] // Load h_1 nop.i 0};;{ .mfi ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi nop.f 0 pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // Get bits 30-15 of X_0 * Z_1};;// WE CANNOT USE GR_X_1 IN NEXT 3 CYCLES BECAUSE OF POSSIBLE 10 CLOCKS STALL!// "DEAD" ZONE!{ .mmi ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo sub GR_N = GR_N, GR_Bias mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80};;{ .mfi ldfe FR_Q4 = [GR_ad_q],16 // Load Q4 nop.f 0 sub GR_minus_N = GR_Bias, GR_N // Form exponent of 2^(-N)};;{ .mmf ldfe FR_Q3 = [GR_ad_q],16 // Load Q3 setf.sig FR_float_N = GR_N // Put integer N into rightmost sign nop.f 0};;{ .mmi nop.m 0 ldfe FR_Q2 = [GR_ad_q],16 // Load Q2 extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1 };;{ .mmi ldfe FR_Q1 = [GR_ad_q] // Load Q1 shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2 nop.i 0};;{ .mmi ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2 shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2 // Point to G_2 nop.i 0};;{ .mmi ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2 nop.m 0 nop.i 0};;{ .mfi ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2 nop.f 0 nop.i 0}{ .mfi setf.exp FR_2_to_minus_N = GR_minus_N // Form 2^(-N) nop.f 0 nop.i 0};;{ .mfi nop.m 0 nop.f 0 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2};;// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES BECAUSE OF POSSIBLE 10 CLOCKS STALL!// "DEAD" ZONE!// JUST HAVE TO INSERT 3 NOP CYCLES (nothing to do here){ .mfi nop.m 0 nop.f 0 nop.i 0};;{ .mfi nop.m 0 nop.f 0 nop.i 0};;{ .mfi nop.m 0 nop.f 0 nop.i 0};;{ .mfi nop.m 0(p11) fma.s1 FR_Q4 = FR_Q4, FR_Neg_One, f0 // Negate Q4 extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2 };;{ .mfi shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 // Point to G_3 fcvt.xf FR_float_N = FR_float_N nop.i 0}{ .mfi nop.m 0(p11) fma.s1 FR_Q3 = FR_Q3, FR_Neg_One, f0 // Negate Q3 nop.i 0};;{ .mfi ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3(p11) fma.s1 FR_Q2 = FR_Q2, FR_Neg_One, f0 // Negate Q2 nop.i 0}{ .mfi nop.m 0(p11) fma.s1 FR_Q1 = FR_Q1, FR_Neg_One, f0 // Negate Q1 nop.i 0};;{ .mfi ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3 fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2 nop.i 0}{ .mfi nop.m 0 fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2 nop.i 0};;{ .mmf nop.m 0 nop.m 0 fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2};;{ .mfi nop.m 0 fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3 nop.i 0}{ .mfi nop.m 0 fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3 nop.i 0};;{ .mfi nop.m 0 fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_r = FR_G, FR_S_hi, f1 // r = G * S_hi - 1 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi=N*log2_hi+H nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h // h=N*log2_lo+h nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 // poly_lo = r * Q4 + Q3 nop.i 0}{ .mfi nop.m 0 fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo=poly_lo*r+Q2 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3 nop.i 0};;.pred.rel "mutex",p12,p11{ .mfi nop.m 0(p12) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1*rsq + r nop.i 0}{ .mfi nop.m 0(p11) fms.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1*rsq + r nop.i 0};;.pred.rel "mutex",p12,p11{ .mfi nop.m 0(p12) fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h//poly_lo=poly_lo*r^3+h nop.i 0}{ .mfi nop.m 0(p11) fms.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h//poly_lo=poly_lo*r^3+h nop.i 0};;{ .mfi nop.m 0 fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo nop.i 0}{ .mfi nop.m 0(p11) fma.s0 FR_Y_hi = FR_Y_hi, FR_Neg_One, f0 // FR_Y_hi sign for neg nop.i 0};;{ .mfb nop.m 0 fadd.s0 FR_Res = FR_Y_lo,FR_Y_hi // Result=Y_lo+Y_hi br.ret.sptk b0 // Common exit for 2^-7 < x < inf};;// NEAR ZERO POLYNOMIAL INTERVALnear_0:{ .mfi nop.m 0 fma.s1 FR_X4 = FR_X2, FR_X2, f0 // x^4 = x^2 * x^2 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_P9 = FR_C9,FR_X2,FR_C7 // p9 = C9*x^2 + C7 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_P5 = FR_C5,FR_X2,FR_C3 // p5 = C5*x^2 + C3 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_P3 = FR_P9,FR_X4,FR_P5 // p3 = p9*x^4 + p5 nop.i 0};;{ .mfb nop.m 0 fma.s0 FR_Res = FR_P3,FR_X3,FR_Arg // res = p3*C3 + x br.ret.sptk b0 // Near 0 path return};;GLOBAL_LIBM_END(asinhl)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -