⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 s_cosf.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 2 页
字号:
    data8 0xBFED906BCF328D46 // sin ( 26 Pi / 16 )    data8 0x3FD87DE2A6AEA963 // cos ( 26 Pi / 16 )//    data8 0xBFEA9B66290EA1A3 // sin ( 27 Pi / 16 )    data8 0x3FE1C73B39AE68C8 // cos ( 27 Pi / 16 )//    data8 0xBFE6A09E667F3BCD // sin ( 28 Pi / 16 )    data8 0x3FE6A09E667F3BCD // cos ( 28 Pi / 16 )//    data8 0xBFE1C73B39AE68C8 // sin ( 29 Pi / 16 )    data8 0x3FEA9B66290EA1A3 // cos ( 29 Pi / 16 )//    data8 0xBFD87DE2A6AEA963 // sin ( 30 Pi / 16 )    data8 0x3FED906BCF328D46 // cos ( 30 Pi / 16 )//    data8 0xBFC8F8B83C69A60B // sin ( 31 Pi / 16 )    data8 0x3FEF6297CFF75CB0 // cos ( 31 Pi / 16 )//    data8 0x0000000000000000 // sin ( 32 Pi / 16 )    data8 0x3FF0000000000000 // cos ( 32 Pi / 16 )LOCAL_OBJECT_END(double_sin_cos_beta_k4).section .text////////////////////////////////////////////////////////// There are two entry points: sin and cos// If from sin, p8 is true// If from cos, p9 is trueGLOBAL_IEEE754_ENTRY(sinf){ .mlx      alloc         r32                 = ar.pfs,1,13,0,0      movl  sincosf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A //signd of 16/pi}{ .mlx      addl         sincosf_AD_1         = @ltoff(double_sincosf_pi), gp      movl  sincosf_GR_rshf_2to61       = 0x47b8000000000000 // 1.1 2^(63+63-2)};;{ .mfi      ld8           sincosf_AD_1        = [sincosf_AD_1]      fnorm.s1      sincosf_NORM_f8     = f8     // Normalize argument      cmp.eq        p8,p9               = r0, r0 // set p8 (clear p9) for sin}{ .mib      mov           sincosf_GR_exp_2tom61 = 0xffff-61 // exponent of scale 2^-61      mov           sincosf_r_sincos      = 0x0       // 0 for sin      br.cond.sptk  _SINCOSF_COMMON                 // go to common part};;GLOBAL_IEEE754_END(sinf)GLOBAL_IEEE754_ENTRY(cosf){ .mlx      alloc         r32                 = ar.pfs,1,13,0,0      movl  sincosf_GR_sig_inv_pi_by_16 = 0xA2F9836E4E44152A //signd of 16/pi}{ .mlx      addl          sincosf_AD_1        = @ltoff(double_sincosf_pi), gp      movl  sincosf_GR_rshf_2to61       = 0x47b8000000000000 // 1.1 2^(63+63-2)};;{ .mfi      ld8           sincosf_AD_1        = [sincosf_AD_1]      fnorm.s1      sincosf_NORM_f8     = f8        // Normalize argument      cmp.eq        p9,p8               = r0, r0    // set p9 (clear p8) for cos}{ .mib      mov           sincosf_GR_exp_2tom61 = 0xffff-61 // exponent of scale 2^-61      mov           sincosf_r_sincos      = 0x8       // 8 for cos      nop.b         999};;////////////////////////////////////////////////////////// All entry points end up here.// If from sin, sincosf_r_sincos is 0 and p8 is true// If from cos, sincosf_r_sincos is 8 = 2^(k-1) and p9 is true// We add sincosf_r_sincos to N///////////// Common sin and cos part //////////////////_SINCOSF_COMMON://  Form two constants we need//  16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand//  1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand//  fcmp used to set denormal, and invalid on snans{ .mfi      setf.sig      sincosf_SIG_INV_PI_BY_16_2TO61 = sincosf_GR_sig_inv_pi_by_16      fclass.m      p6,p0                          = f8, 0xe7 // if x=0,inf,nan      mov           sincosf_exp_limit              = 0x10017}{ .mlx      setf.d        sincosf_RSHF_2TO61  = sincosf_GR_rshf_2to61      movl          sincosf_GR_rshf     = 0x43e8000000000000 // 1.1000 2^63};;                                                          // Right shift//  Form another constant//  2^-61 for scaling Nfloat//  0x10017 is register_bias + 24.//  So if f8 >= 2^24, go to large argument routines{ .mmi      getf.exp      sincosf_r_signexp   = f8      setf.exp      sincosf_2TOM61      = sincosf_GR_exp_2tom61      addl          gr_tmp              = -1,r0 // For "inexect" constant create};;// Load the two pieces of pi/16// Form another constant//  1.1000...000 * 2^63, the right shift constant{ .mmb      ldfe          sincosf_Pi_by_16_1  = [sincosf_AD_1],16      setf.d        sincosf_RSHF        = sincosf_GR_rshf(p6)  br.cond.spnt  _SINCOSF_SPECIAL_ARGS};;// Getting argument's exp for "large arguments" filtering{ .mmi      ldfe          sincosf_Pi_by_16_2  = [sincosf_AD_1],16      setf.sig      fp_tmp              = gr_tmp // constant for inexact set      nop.i         999};;// Polynomial coefficients (Q2, Q1, P2, P1) loading{ .mmi      ldfpd         sincosf_P2,sincosf_Q2 = [sincosf_AD_1],16      nop.m         999       nop.i         999 };;// Select exponent (17 lsb){ .mmi      ldfpd         sincosf_P1,sincosf_Q1 = [sincosf_AD_1],16      nop.m         999       dep.z         sincosf_r_exp         = sincosf_r_signexp, 0, 17};;// p10 is true if we must call routines to handle larger arguments// p10 is true if f8 exp is >= 0x10017 (2^24){ .mfb      cmp.ge        p10,p0              = sincosf_r_exp,sincosf_exp_limit      nop.f         999(p10) br.cond.spnt  _SINCOSF_LARGE_ARGS // Go to "large args" routine};;// sincosf_W          = x * sincosf_Inv_Pi_by_16// Multiply x by scaled 16/pi and add large const to shift integer part of W to//   rightmost bits of significand{ .mfi      nop.m         999       fma.s1 sincosf_W_2TO61_RSH = sincosf_NORM_f8, sincosf_SIG_INV_PI_BY_16_2TO61, sincosf_RSHF_2TO61      nop.i         999 };;// sincosf_NFLOAT = Round_Int_Nearest(sincosf_W)// This is done by scaling back by 2^-61 and subtracting the shift constant{ .mfi      nop.m         999      fms.s1 sincosf_NFLOAT = sincosf_W_2TO61_RSH,sincosf_2TOM61,sincosf_RSHF      nop.i         999 };;// get N = (int)sincosf_int_Nfloat{ .mfi      getf.sig      sincosf_GR_n        = sincosf_W_2TO61_RSH // integer N value      nop.f         999      nop.i         999 };;// Add 2^(k-1) (which is in sincosf_r_sincos=8) to N// sincosf_r          = -sincosf_Nfloat * sincosf_Pi_by_16_1 + x{ .mfi      add           sincosf_GR_n        = sincosf_GR_n, sincosf_r_sincos      fnma.s1 sincosf_r = sincosf_NFLOAT, sincosf_Pi_by_16_1, sincosf_NORM_f8      nop.i         999 };;// Get M (least k+1 bits of N){ .mmi      and           sincosf_GR_m        = 0x1f,sincosf_GR_n // Put mask 0x1F  -       nop.m         999                                     // - select k+1 bits      nop.i         999};;// Add 16*M to address of sin_cos_beta table{ .mfi      shladd        sincosf_AD_2        = sincosf_GR_32m, 4, sincosf_AD_1(p8)  fclass.m.unc  p10,p0              = f8,0x0b  // If sin denormal input -      nop.i         999 };;// Load Sin and Cos table value using obtained index m  (sincosf_AD_2){ .mfi      ldfd          sincosf_Sm          = [sincosf_AD_2],8 // Sin value S[m](p9)  fclass.m.unc  p11,p0              = f8,0x0b  // If cos denormal input -      nop.i         999                            // - set denormal};;// sincosf_r          = sincosf_r -sincosf_Nfloat * sincosf_Pi_by_16_2{ .mfi      ldfd          sincosf_Cm          = [sincosf_AD_2] // Cos table value C[m]      fnma.s1  sincosf_r_exact = sincosf_NFLOAT, sincosf_Pi_by_16_2, sincosf_r      nop.i         999}// get rsq = r*r{ .mfi      nop.m         999      fma.s1        sincosf_rsq         = sincosf_r, sincosf_r,  f0 // r^2 = r*r      nop.i         999};;{ .mfi      nop.m         999      fmpy.s0       fp_tmp              = fp_tmp, fp_tmp // forces inexact flag      nop.i         999                                };;// Polynomials calculation // Q = Q2*r^2 + Q1// P = P2*r^2 + P1{ .mfi      nop.m         999      fma.s1        sincosf_Q           = sincosf_rsq, sincosf_Q2, sincosf_Q1      nop.i         999}{ .mfi      nop.m         999      fma.s1        sincosf_P           = sincosf_rsq, sincosf_P2, sincosf_P1      nop.i         999 };;// get rcube and S[m]*r^2{ .mfi      nop.m         999      fmpy.s1       sincosf_srsq        = sincosf_Sm,sincosf_rsq // r^2*S[m]      nop.i         999}{ .mfi      nop.m         999      fmpy.s1       sincosf_rcub        = sincosf_r_exact, sincosf_rsq      nop.i         999 };;// Get final P and Q// Q = Q*S[m]*r^2 + S[m]// P = P*r^3 + r{ .mfi      nop.m         999      fma.s1        sincosf_Q           = sincosf_srsq,sincosf_Q, sincosf_Sm      nop.i         999}{ .mfi      nop.m         999      fma.s1        sincosf_P           = sincosf_rcub,sincosf_P,sincosf_r_exact      nop.i         999 };;// If sinf(denormal) - force underflow to be set.pred.rel "mutex",p10,p11{ .mfi      nop.m         999(p10) fmpy.s.s0     fp_tmp              = f8,f8 // forces underflow flag      nop.i         999                         // for denormal sine args}// If cosf(denormal) - force denormal to be set{ .mfi      nop.m         999(p11) fma.s.s0     fp_tmp              = f8, f1, f8 // forces denormal flag      nop.i         999                              // for denormal cosine args};;// Final calculation// result = C[m]*P + Q{ .mfb      nop.m         999      fma.s.s0      f8                  = sincosf_Cm, sincosf_P, sincosf_Q      br.ret.sptk   b0 // Exit for common path};;////////// x = 0/Inf/NaN path //////////////////_SINCOSF_SPECIAL_ARGS:.pred.rel "mutex",p8,p9// sinf(+/-0) = +/-0// sinf(Inf)  = NaN// sinf(NaN)  = NaN{ .mfi      nop.m         999(p8)  fma.s.s0      f8                  = f8, f0, f0 // sinf(+/-0,NaN,Inf)      nop.i         999}// cosf(+/-0) = 1.0// cosf(Inf)  = NaN// cosf(NaN)  = NaN{ .mfb      nop.m         999(p9)  fma.s.s0      f8                  = f8, f0, f1 // cosf(+/-0,NaN,Inf)      br.ret.sptk   b0 // Exit for x = 0/Inf/NaN path};;GLOBAL_IEEE754_END(cosf)//////////// x >= 2^24 - large arguments routine call ////////////LOCAL_LIBM_ENTRY(__libm_callout_sincosf)_SINCOSF_LARGE_ARGS:.prologue{ .mfi      mov           sincosf_GR_all_ones = -1 // 0xffffffff      nop.f         999.save ar.pfs,GR_SAVE_PFS      mov           GR_SAVE_PFS         = ar.pfs};;{ .mfi      mov           GR_SAVE_GP          = gp      nop.f         999.save b0, GR_SAVE_B0      mov           GR_SAVE_B0          = b0}.body{ .mbb      setf.sig      sincosf_save_tmp    = sincosf_GR_all_ones  // inexact set      nop.b         999(p8)  br.call.sptk.many b0              = __libm_sin_large# // sinf(large_X)};;{ .mbb      cmp.ne        p9,p0               = sincosf_r_sincos, r0 // set p9 if cos      nop.b         999(p9)  br.call.sptk.many b0              = __libm_cos_large# // cosf(large_X)};;{ .mfi      mov           gp                  = GR_SAVE_GP      fma.s.s0      f8                  = f8, f1, f0 // Round result to single      mov           b0                  = GR_SAVE_B0}{ .mfi // force inexact set      nop.m         999      fmpy.s0       sincosf_save_tmp    = sincosf_save_tmp, sincosf_save_tmp       nop.i         999                                               };;{ .mib      nop.m         999      mov           ar.pfs              = GR_SAVE_PFS      br.ret.sptk   b0 // Exit for large arguments routine call};;LOCAL_LIBM_END(__libm_callout_sincosf).type    __libm_sin_large#, @function.global  __libm_sin_large#.type    __libm_cos_large#, @function.global  __libm_cos_large#

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -