⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 s_cos.s

📁 glibc 2.9,最新版的C语言库函数
💻 S
📖 第 1 页 / 共 2 页
字号:
data8 0x8000000000000000 , 0x00003fff // cos(32 pi/16) C0LOCAL_OBJECT_END(double_sin_cos_beta_k4).section .text////////////////////////////////////////////////////////// There are two entry points: sin and cos// If from sin, p8 is true// If from cos, p9 is trueGLOBAL_IEEE754_ENTRY(sin){ .mlx      getf.exp      sincos_r_signexp    = f8      movl sincos_GR_sig_inv_pi_by_16   = 0xA2F9836E4E44152A // signd of 16/pi}{ .mlx      addl          sincos_AD_1         = @ltoff(double_sincos_pi), gp      movl sincos_GR_rshf_2to61         = 0x47b8000000000000 // 1.1 2^(63+63-2)};;{ .mfi      ld8           sincos_AD_1         = [sincos_AD_1]      fnorm.s0      sincos_NORM_f8      = f8  // Normalize argument      cmp.eq        p8,p9               = r0, r0 // set p8 (clear p9) for sin}{ .mib      mov           sincos_GR_exp_2tom61  = 0xffff-61 // exponent of scale 2^-61      mov           sincos_r_sincos       = 0x0 // sincos_r_sincos = 0 for sin      br.cond.sptk  _SINCOS_COMMON  // go to common part};;GLOBAL_IEEE754_END(sin)GLOBAL_IEEE754_ENTRY(cos){ .mlx      getf.exp      sincos_r_signexp    = f8      movl sincos_GR_sig_inv_pi_by_16   = 0xA2F9836E4E44152A // signd of 16/pi}{ .mlx      addl          sincos_AD_1         = @ltoff(double_sincos_pi), gp      movl sincos_GR_rshf_2to61         = 0x47b8000000000000 // 1.1 2^(63+63-2)};;{ .mfi      ld8           sincos_AD_1         = [sincos_AD_1]      fnorm.s1      sincos_NORM_f8      = f8 // Normalize argument      cmp.eq        p9,p8               = r0, r0 // set p9 (clear p8) for cos}{ .mib      mov           sincos_GR_exp_2tom61  = 0xffff-61 // exp of scale 2^-61      mov           sincos_r_sincos       = 0x8 // sincos_r_sincos = 8 for cos      nop.b         999};;////////////////////////////////////////////////////////// All entry points end up here.// If from sin, sincos_r_sincos is 0 and p8 is true// If from cos, sincos_r_sincos is 8 = 2^(k-1) and p9 is true// We add sincos_r_sincos to N///////////// Common sin and cos part //////////////////_SINCOS_COMMON:// Form two constants we need//  16/pi * 2^-2 * 2^63, scaled by 2^61 since we just loaded the significand//  1.1000...000 * 2^(63+63-2) to right shift int(W) into the low significand{ .mfi      setf.sig      sincos_SIG_INV_PI_BY_16_2TO61 = sincos_GR_sig_inv_pi_by_16      fclass.m      p6,p0                         = f8, 0xe7 // if x = 0,inf,nan      mov           sincos_exp_limit              = 0x1001a}{ .mlx      setf.d        sincos_RSHF_2TO61   = sincos_GR_rshf_2to61      movl          sincos_GR_rshf      = 0x43e8000000000000 // 1.1 2^63}                                                            // Right shift;;// Form another constant//  2^-61 for scaling Nfloat// 0x1001a is register_bias + 27.// So if f8 >= 2^27, go to large argument routines{ .mfi      alloc         r32                 = ar.pfs, 1, 4, 0, 0      fclass.m      p11,p0              = f8, 0x0b // Test for x=unorm      mov           sincos_GR_all_ones  = -1 // For "inexect" constant create}{ .mib      setf.exp      sincos_2TOM61       = sincos_GR_exp_2tom61      nop.i         999(p6)  br.cond.spnt  _SINCOS_SPECIAL_ARGS};;// Load the two pieces of pi/16// Form another constant//  1.1000...000 * 2^63, the right shift constant{ .mmb      ldfe          sincos_Pi_by_16_1   = [sincos_AD_1],16      setf.d        sincos_RSHF         = sincos_GR_rshf(p11) br.cond.spnt  _SINCOS_UNORM       // Branch if x=unorm};;_SINCOS_COMMON2:// Return here if x=unorm// Create constant used to set inexact{ .mmi      ldfe          sincos_Pi_by_16_2   = [sincos_AD_1],16      setf.sig      fp_tmp              = sincos_GR_all_ones      nop.i         999};;// Select exponent (17 lsb){ .mfi      ldfe          sincos_Pi_by_16_3   = [sincos_AD_1],16      nop.f         999      dep.z         sincos_r_exp        = sincos_r_signexp, 0, 17 };;// Polynomial coefficients (Q4, P4, Q3, P3, Q2, Q1, P2, P1) loading// p10 is true if we must call routines to handle larger arguments// p10 is true if f8 exp is >= 0x1001a (2^27){ .mmb      ldfpd         sincos_P4,sincos_Q4 = [sincos_AD_1],16      cmp.ge        p10,p0              = sincos_r_exp,sincos_exp_limit (p10) br.cond.spnt  _SINCOS_LARGE_ARGS // Go to "large args" routine};;// sincos_W          = x * sincos_Inv_Pi_by_16// Multiply x by scaled 16/pi and add large const to shift integer part of W to//   rightmost bits of significand{ .mfi      ldfpd         sincos_P3,sincos_Q3 = [sincos_AD_1],16      fma.s1 sincos_W_2TO61_RSH = sincos_NORM_f8,sincos_SIG_INV_PI_BY_16_2TO61,sincos_RSHF_2TO61      nop.i         999};;// get N = (int)sincos_int_Nfloat// sincos_NFLOAT = Round_Int_Nearest(sincos_W)// This is done by scaling back by 2^-61 and subtracting the shift constant{ .mmf      getf.sig      sincos_GR_n         = sincos_W_2TO61_RSH      ldfpd         sincos_P2,sincos_Q2 = [sincos_AD_1],16      fms.s1 sincos_NFLOAT = sincos_W_2TO61_RSH,sincos_2TOM61,sincos_RSHF};;// sincos_r          = -sincos_Nfloat * sincos_Pi_by_16_1 + x{ .mfi      ldfpd         sincos_P1,sincos_Q1 = [sincos_AD_1],16      fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_1, sincos_NORM_f8      nop.i         999 };;// Add 2^(k-1) (which is in sincos_r_sincos) to N{ .mmi      add           sincos_GR_n         = sincos_GR_n, sincos_r_sincos;;// Get M (least k+1 bits of N)      and           sincos_GR_m         = 0x1f,sincos_GR_n      nop.i         999 };;// sincos_r          = sincos_r -sincos_Nfloat * sincos_Pi_by_16_2{ .mfi      nop.m         999      fnma.s1 sincos_r = sincos_NFLOAT, sincos_Pi_by_16_2,  sincos_r      shl           sincos_GR_32m       = sincos_GR_m,5};;// Add 32*M to address of sin_cos_beta table// For sin denorm. - set uflow{ .mfi      add           sincos_AD_2         = sincos_GR_32m, sincos_AD_1(p8)  fclass.m.unc  p10,p0              = f8,0x0b      nop.i         999 };;// Load Sin and Cos table value using obtained index m  (sincosf_AD_2){ .mfi      ldfe          sincos_Sm           = [sincos_AD_2],16      nop.f         999       nop.i         999 };;// get rsq = r*r{ .mfi      ldfe          sincos_Cm           = [sincos_AD_2]      fma.s1        sincos_rsq          = sincos_r, sincos_r,   f0 // r^2 = r*r      nop.i         999}{ .mfi      nop.m         999      fmpy.s0       fp_tmp              = fp_tmp,fp_tmp // forces inexact flag      nop.i         999 };;// sincos_r_exact = sincos_r -sincos_Nfloat * sincos_Pi_by_16_3{ .mfi      nop.m         999      fnma.s1 sincos_r_exact = sincos_NFLOAT, sincos_Pi_by_16_3, sincos_r      nop.i         999 };;// Polynomials calculation // P_1 = P4*r^2 + P3// Q_2 = Q4*r^2 + Q3{ .mfi      nop.m         999      fma.s1        sincos_P_temp1      = sincos_rsq, sincos_P4, sincos_P3      nop.i         999}{ .mfi      nop.m         999      fma.s1        sincos_Q_temp1      = sincos_rsq, sincos_Q4, sincos_Q3      nop.i         999 };;// get rcube = r^3 and S[m]*r^2{ .mfi      nop.m         999      fmpy.s1       sincos_srsq         = sincos_Sm,sincos_rsq      nop.i         999}{ .mfi      nop.m         999      fmpy.s1       sincos_rcub         = sincos_r_exact, sincos_rsq      nop.i         999 };;// Polynomials calculation // Q_2 = Q_1*r^2 + Q2// P_1 = P_1*r^2 + P2{ .mfi      nop.m         999      fma.s1        sincos_Q_temp2      = sincos_rsq, sincos_Q_temp1, sincos_Q2      nop.i         999}{ .mfi      nop.m         999      fma.s1        sincos_P_temp2      = sincos_rsq, sincos_P_temp1, sincos_P2      nop.i         999 };;// Polynomials calculation // Q = Q_2*r^2 + Q1// P = P_2*r^2 + P1{ .mfi      nop.m         999      fma.s1        sincos_Q            = sincos_rsq, sincos_Q_temp2, sincos_Q1      nop.i         999}{ .mfi      nop.m         999      fma.s1        sincos_P            = sincos_rsq, sincos_P_temp2, sincos_P1      nop.i         999 };;// Get final P and Q// Q = Q*S[m]*r^2 + S[m]// P = P*r^3 + r{ .mfi      nop.m         999      fma.s1        sincos_Q            = sincos_srsq,sincos_Q, sincos_Sm      nop.i         999}{ .mfi      nop.m         999      fma.s1        sincos_P            = sincos_rcub,sincos_P, sincos_r_exact      nop.i         999 };;// If sin(denormal), force underflow to be set{ .mfi      nop.m         999(p10) fmpy.d.s0     fp_tmp              = sincos_NORM_f8,sincos_NORM_f8      nop.i         999};;// Final calculation// result = C[m]*P + Q{ .mfb      nop.m         999      fma.d.s0      f8                  = sincos_Cm, sincos_P, sincos_Q      br.ret.sptk   b0  // Exit for common path};;////////// x = 0/Inf/NaN path //////////////////_SINCOS_SPECIAL_ARGS:.pred.rel "mutex",p8,p9// sin(+/-0) = +/-0// sin(Inf)  = NaN// sin(NaN)  = NaN{ .mfi      nop.m         999(p8)  fma.d.s0      f8                  = f8, f0, f0 // sin(+/-0,NaN,Inf)      nop.i         999}// cos(+/-0) = 1.0// cos(Inf)  = NaN// cos(NaN)  = NaN{ .mfb      nop.m         999(p9)  fma.d.s0      f8                  = f8, f0, f1 // cos(+/-0,NaN,Inf)      br.ret.sptk   b0 // Exit for x = 0/Inf/NaN path};;_SINCOS_UNORM:// Here if x=unorm{ .mfb      getf.exp      sincos_r_signexp    = sincos_NORM_f8 // Get signexp of x       fcmp.eq.s0    p11,p0              = f8, f0  // Dummy op to set denorm flag      br.cond.sptk  _SINCOS_COMMON2     // Return to main path};;GLOBAL_IEEE754_END(cos)//////////// x >= 2^27 - large arguments routine call ////////////LOCAL_LIBM_ENTRY(__libm_callout_sincos)_SINCOS_LARGE_ARGS:.prologue{ .mfi      mov           GR_SAVE_r_sincos    = sincos_r_sincos // Save sin or cos      nop.f         999.save ar.pfs,GR_SAVE_PFS      mov           GR_SAVE_PFS         = ar.pfs};;{ .mfi      mov           GR_SAVE_GP          = gp      nop.f         999.save b0, GR_SAVE_B0      mov           GR_SAVE_B0          = b0}.body{ .mbb      setf.sig      sincos_save_tmp     = sincos_GR_all_ones// inexact set      nop.b         999(p8)  br.call.sptk.many b0              = __libm_sin_large# // sin(large_X)};;{ .mbb      cmp.ne        p9,p0               = GR_SAVE_r_sincos, r0 // set p9 if cos      nop.b         999(p9)  br.call.sptk.many b0              = __libm_cos_large# // cos(large_X)};;{ .mfi      mov           gp                  = GR_SAVE_GP      fma.d.s0      f8                  = f8, f1, f0 // Round result to double      mov           b0                  = GR_SAVE_B0}// Force inexact set{ .mfi      nop.m         999      fmpy.s0       sincos_save_tmp     = sincos_save_tmp, sincos_save_tmp      nop.i         999 };;{ .mib      nop.m         999      mov           ar.pfs              = GR_SAVE_PFS      br.ret.sptk   b0 // Exit for large arguments routine call};;LOCAL_LIBM_END(__libm_callout_sincos).type    __libm_sin_large#,@function.global  __libm_sin_large#.type    __libm_cos_large#,@function.global  __libm_cos_large#

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -