📄 libm_sincosf.s
字号:
// data8 0xBFED906BCF328D46 // sin ( 26 Pi / 16 ) data8 0x3FD87DE2A6AEA963 // cos ( 26 Pi / 16 )// data8 0xBFEA9B66290EA1A3 // sin ( 27 Pi / 16 ) data8 0x3FE1C73B39AE68C8 // cos ( 27 Pi / 16 )// data8 0xBFE6A09E667F3BCD // sin ( 28 Pi / 16 ) data8 0x3FE6A09E667F3BCD // cos ( 28 Pi / 16 )// data8 0xBFE1C73B39AE68C8 // sin ( 29 Pi / 16 ) data8 0x3FEA9B66290EA1A3 // cos ( 29 Pi / 16 )// data8 0xBFD87DE2A6AEA963 // sin ( 30 Pi / 16 ) data8 0x3FED906BCF328D46 // cos ( 30 Pi / 16 )// data8 0xBFC8F8B83C69A60B // sin ( 31 Pi / 16 ) data8 0x3FEF6297CFF75CB0 // cos ( 31 Pi / 16 )// data8 0x0000000000000000 // sin ( 32 Pi / 16 ) data8 0x3FF0000000000000 // cos ( 32 Pi / 16 )LOCAL_OBJECT_END(double_sin_cos_beta_k4).section .textGLOBAL_IEEE754_ENTRY(sincosf)// cis_GR_sig_inv_pi_by_16 = significand of 16/pi{ .mlx alloc GR_SAVE_PFS = ar.pfs, 0, 21, 0, 0 movl cisf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A // 16/pi signd}// cis_GR_rshf_2to61 = 1.1000 2^(63+63-2){ .mlx addl cisf_AD_1 = @ltoff(double_cisf_pi), gp movl cisf_GR_rshf_2to61 = 0x47b8000000000000 // 1.1 2^(63+63-2)};;{ .mfi ld8 cisf_AD_1 = [cisf_AD_1] fnorm.s1 cisf_NORM_f8 = cisf_Arg cmp.eq p13, p14 = r0, r0 // p13 set for sincos}// cis_GR_exp_2tom61 = exponent of scaling factor 2^-61{ .mib mov cisf_GR_exp_2tom61 = 0xffff-61 nop.i 0 br.cond.sptk _CISF_COMMON};;GLOBAL_IEEE754_END(sincosf)GLOBAL_LIBM_ENTRY(__libm_sincosf){ .mlx// cisf_GR_sig_inv_pi_by_16 = significand of 16/pi alloc GR_SAVE_PFS = ar.pfs,0,21,0,0 movl cisf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A}// cisf_GR_rshf_2to61 = 1.1000 2^(63+63-2){ .mlx addl cisf_AD_1 = @ltoff(double_cisf_pi), gp movl cisf_GR_rshf_2to61 = 0x47b8000000000000};;// p14 set for __libm_sincos and cis{ .mfi ld8 cisf_AD_1 = [cisf_AD_1] fnorm.s1 cisf_NORM_f8 = cisf_Arg cmp.eq p14, p13 = r0, r0}// cisf_GR_exp_2tom61 = exponent of scaling factor 2^-61{ .mib mov cisf_GR_exp_2tom61 = 0xffff-61 nop.i 0 nop.b 0};;_CISF_COMMON:// Form two constants we need// 16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand// 1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand// fcmp used to set denormal, and invalid on snans{ .mfi setf.sig cisf_SIG_INV_PI_BY_16_2TO61 = cisf_GR_sig_inv_pi_by_16 fclass.m p6,p0 = cisf_Arg, 0xe7//if x=0,inf,nan addl cisf_gr_tmp = -1, r0}// cisf_GR_rshf = 1.1000 2^63 for right shift{ .mlx setf.d cisf_RSHF_2TO61 = cisf_GR_rshf_2to61 movl cisf_GR_rshf = 0x43e8000000000000};;// Form another constant// 2^-61 for scaling Nfloat// 0x10017 is register_bias + 24.// So if f8 >= 2^24, go to large args routine{ .mmi getf.exp cisf_r_signexp = cisf_Arg setf.exp cisf_2TOM61 = cisf_GR_exp_2tom61 mov cisf_exp_limit = 0x10017};;// Load the two pieces of pi/16// Form another constant// 1.1000...000 * 2^63, the right shift constant{ .mmb ldfe cisf_Pi_by_16_hi = [cisf_AD_1],16 setf.d cisf_RSHF = cisf_GR_rshf(p6) br.cond.spnt _CISF_SPECIAL_ARGS};;{ .mmi ldfe cisf_Pi_by_16_lo = [cisf_AD_1],16 setf.sig cisf_tmp = cisf_gr_tmp //constant for inexact set nop.i 0};;// Start loading P, Q coefficients{ .mmi ldfpd cisf_P2,cisf_Q2 = [cisf_AD_1],16 nop.m 0 dep.z cisf_r_exp = cisf_r_signexp, 0, 17};;// p10 is true if we must call routines to handle larger arguments// p10 is true if f8 exp is >= 0x10017{ .mmb ldfpd cisf_P1,cisf_Q1 = [cisf_AD_1], 16 cmp.ge p10, p0 = cisf_r_exp, cisf_exp_limit(p10) br.cond.spnt _CISF_LARGE_ARGS // go to |x| >= 2^24 path};;// cisf_W = x * cisf_Inv_Pi_by_16// Multiply x by scaled 16/pi and add large const to shift integer part of W to// rightmost bits of significand{ .mfi nop.m 0 fma.s1 cisf_W_2TO61_RSH = cisf_NORM_f8,cisf_SIG_INV_PI_BY_16_2TO61,cisf_RSHF_2TO61 nop.i 0};;// cisf_NFLOAT = Round_Int_Nearest(cisf_W){ .mfi nop.m 0 fms.s1 cisf_NFLOAT = cisf_W_2TO61_RSH,cisf_2TOM61,cisf_RSHF nop.i 0};;// N = (int)cisf_int_Nfloat{ .mfi getf.sig cisf_GR_n = cisf_W_2TO61_RSH nop.f 0 nop.i 0};;// Add 2^(k-1) (which is in cisf_r_sincos) to N// cisf_r = -cisf_Nfloat * cisf_Pi_by_16_hi + x// cisf_r = cisf_r -cisf_Nfloat * cisf_Pi_by_16_lo{ .mfi add cisf_GR_n_cos = 0x8, cisf_GR_n fnma.s1 cisf_r = cisf_NFLOAT, cisf_Pi_by_16_hi, cisf_NORM_f8 nop.i 0};;//Get M (least k+1 bits of N){ .mmi and cisf_GR_m_sin = 0x1f,cisf_GR_n and cisf_GR_m_cos = 0x1f,cisf_GR_n_cos nop.i 0};;{ .mmi shladd cisf_AD_2_cos = cisf_GR_m_cos,4, cisf_AD_1 shladd cisf_AD_2_sin = cisf_GR_m_sin,4, cisf_AD_1 nop.i 0};;// den. input to set uflow{ .mmf ldfpd cisf_Sm_sin, cisf_Cm_sin = [cisf_AD_2_sin] ldfpd cisf_Sm_cos, cisf_Cm_cos = [cisf_AD_2_cos] fclass.m.unc p10,p0 = cisf_Arg,0x0b};;{ .mfi nop.m 0 fma.s1 cisf_rsq = cisf_r, cisf_r, f0 // get r^2 nop.i 0}{ .mfi nop.m 0 fmpy.s0 cisf_tmp = cisf_tmp,cisf_tmp // inexact flag nop.i 0};;{ .mmf nop.m 0 nop.m 0 fnma.s1 cisf_r_exact = cisf_NFLOAT, cisf_Pi_by_16_lo, cisf_r};;{ .mfi nop.m 0 fma.s1 cisf_P = cisf_rsq, cisf_P2, cisf_P1 nop.i 0}{ .mfi nop.m 0 fma.s1 cisf_Q = cisf_rsq, cisf_Q2, cisf_Q1 nop.i 0};;{ .mfi nop.m 0 fmpy.s1 cisf_rcub = cisf_r_exact, cisf_rsq // get r^3 nop.i 0};;{ .mfi nop.m 0 fmpy.s1 cisf_srsq_sin = cisf_Sm_sin,cisf_rsq nop.i 0}{ .mfi nop.m 0 fmpy.s1 cisf_srsq_cos = cisf_Sm_cos,cisf_rsq nop.i 0};;{ .mfi nop.m 0 fma.s1 cisf_P = cisf_rcub,cisf_P,cisf_r_exact nop.i 0};;{ .mfi nop.m 0 fma.s1 cisf_Q_sin = cisf_srsq_sin,cisf_Q, cisf_Sm_sin nop.i 0}{ .mfi nop.m 0 fma.s1 cisf_Q_cos = cisf_srsq_cos,cisf_Q, cisf_Sm_cos nop.i 0};;// If den. arg, force underflow to be set{ .mfi nop.m 0(p10) fmpy.s.s0 cisf_tmp = cisf_Arg,cisf_Arg nop.i 0};;//Final sin{ .mfi nop.m 0 fma.s.s0 cisf_Sin_res = cisf_Cm_sin, cisf_P, cisf_Q_sin nop.i 0}//Final cos{ .mfb nop.m 0 fma.s.s0 cisf_Cos_res = cisf_Cm_cos, cisf_P, cisf_Q_cos(p14) br.cond.sptk _CISF_RETURN //com. exit for __libm_sincos and cis main path};;{ .mmb stfs [cisf_pResSin] = cisf_Sin_res stfs [cisf_pResCos] = cisf_Cos_res br.ret.sptk b0 // common exit for sincos main path};;_CISF_SPECIAL_ARGS:// sinf(+/-0) = +/-0// sinf(Inf) = NaN// sinf(NaN) = NaN{ .mfi nop.m 999 fma.s.s0 cisf_Sin_res = cisf_Arg, f0, f0 // sinf(+/-0,NaN,Inf) nop.i 999};;// cosf(+/-0) = 1.0// cosf(Inf) = NaN// cosf(NaN) = NaN{ .mfb nop.m 999 fma.s.s0 cisf_Cos_res = cisf_Arg, f0, f1 // cosf(+/-0,NaN,Inf)(p14) br.cond.sptk _CISF_RETURN //spec exit for __libm_sincos and cis main path};;{ .mmb stfs [cisf_pResSin] = cisf_Sin_res stfs [cisf_pResCos] = cisf_Cos_res br.ret.sptk b0 // special exit for sincos main path};; // exit for sincos // NOTE! r8 and r9 used only because of compiler issue // connected with float point complex function arguments pass // After fix of this issue this operations can be deleted_CISF_RETURN:{ .mmb getf.s r8 = cisf_Cos_res getf.s r9 = cisf_Sin_res br.ret.sptk b0 // exit for sincos};;GLOBAL_LIBM_END(__libm_sincosf)//// |x| > 2^24 path ///////.proc _CISF_LARGE_ARGS_CISF_LARGE_ARGS:.prologue{ .mfi nop.m 0 nop.f 0.save ar.pfs, GR_SAVE_PFS mov GR_SAVE_PFS = ar.pfs};;{ .mfi mov GR_SAVE_GP = gp nop.f 0.save b0, GR_SAVE_B0 mov GR_SAVE_B0 = b0};;.body// Call of huge arguments sincos{ .mib nop.m 0 mov GR_SAVE_PR = pr br.call.sptk b0 = __libm_sincos_large};;{ .mfi mov gp = GR_SAVE_GP nop.f 0 mov pr = GR_SAVE_PR, 0x1fffe};;{ .mfi nop.m 0 nop.f 0 mov b0 = GR_SAVE_B0};;{ .mfi nop.m 0 fma.s.s0 cisf_Cos_res = cisf_Cos_res, f1, f0 mov ar.pfs = GR_SAVE_PFS}// exit for |x| > 2^24 path (__libm_sincos and cis){ .mfb nop.m 0 fma.s.s0 cisf_Sin_res = cisf_Sin_res, f1, f0(p14) br.cond.sptk _CISF_RETURN};;{ .mmb stfs [cisf_pResSin] = cisf_Sin_res stfs [cisf_pResCos] = cisf_Cos_res br.ret.sptk b0 // exit for sincos |x| > 2^24 path};;.endp _CISF_LARGE_ARGS.type __libm_sincos_large#,@function.global __libm_sincos_large#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -