📄 libm_lgammaf.s
字号:
}{ .mfb nop.m 0(p7) fma.s.s0 f8 = FR_A1,FR_x,FR_A0 br.ret.sptk b0};;// branch for calculating of ln(GAMMA(x)) for -9 < x < 1//---------------------------------------------------------------------.align 32lgammaf_negrecursion:{ .mfi getf.sig GR_N = FR_int_Ntrunc fms.s1 FR_1pXf = FR_Xp2,f1,FR_Ntrunc // 1 + (x+1) - [x] mov GR_Neg2 = 2}{ .mfi add GR_ad_Co = 0xCE0,GR_ad_Data fms.s1 FR_Xf = FR_Xp1,f1,FR_Ntrunc // (x+1) - [x] mov GR_Neg4 = 4};;{ .mfi add GR_ad_Ce = 0xD00,GR_ad_Data fma.s1 FR_Xp6 = FR_2,FR_2,FR_Xp2 // (x+6) add GR_ad_C0 = 0xB30,GR_ad_Data}{ .mfi sub GR_Neg2 = r0,GR_Neg2 fma.s1 FR_Xp7 = FR_2,FR_3,FR_Xp1 // (x+7) sub GR_Neg4 = r0,GR_Neg4};;{ .mfi cmp.ne p8,p0 = r0,GR_N fcmp.eq.s1 p13,p0 = FR_NormX,FR_Ntrunc and GR_IntNum = 0xF,GR_N}{ .mfi cmp.lt p6,p0 = GR_N,GR_Neg2 fma.s1 FR_Xp8 = FR_2,FR_3,FR_Xp2 // (x+8) cmp.lt p7,p0 = GR_N,GR_Neg4};;{ .mfi getf.d GR_Arg = FR_NormX(p6) fma.s1 FR_Xp2 = FR_Xp2,FR_Xp3,f0(p8) tbit.z.unc p14,p15 = GR_IntNum,0}{ .mfi sub GR_RootInd = 0xE,GR_IntNum(p7) fma.s1 FR_Xp4 = FR_Xp4,FR_Xp5,f0 add GR_ad_Root = 0xDE0,GR_ad_Data};;{ .mfi shladd GR_ad_Root = GR_RootInd,3,GR_ad_Root fms.s1 FR_x = FR_Xp1,f1,FR_Ntrunc // (x+1) - [x] nop.i 0}{ .mfb nop.m 0 nop.f 0(p13) br.cond.spnt lgammaf_singularity};;.pred.rel "mutex",p14,p15{ .mfi cmp.gt p6,p0 = 0xA,GR_IntNum(p14) fma.s1 FR_Req = FR_Req,FR_Xf,f0 cmp.gt p7,p0 = 0xD,GR_IntNum}{ .mfi(p15) mov GR_SignOfGamma = 1 // set sign of gamma(x) to 1(p15) fnma.s1 FR_Req = FR_Req,FR_Xf,f0 cmp.leu p0,p13 = 2,GR_RootInd};;{ .mfi nop.m 0(p6) fma.s1 FR_Xp6 = FR_Xp6,FR_Xp7,f0(p13) add GR_ad_RootCo = 0xE00,GR_ad_Data};;{ .mfi nop.m 0 fcmp.eq.s1 p12,p11 = FR_1pXf,FR_2 nop.i 0};;{ .mfi getf.sig GR_Sig = FR_1pXf fcmp.le.s1 p9,p0 = FR_05,FR_Xf nop.i 0}{ .mfi(p13) shladd GR_RootInd = GR_RootInd,4,r0(p7) fma.s1 FR_Xp2 = FR_Xp2,FR_Xp4,f0(p8) cmp.gt.unc p10,p0 = 0x9,GR_IntNum};;.pred.rel "mutex",p11,p12{ .mfi nop.m 0(p10) fma.s1 FR_Req = FR_Req,FR_Xp8,f0(p11) extr.u GR_Ind = GR_Sig,61,2}{ .mfi(p13) add GR_RootInd = GR_RootInd,GR_RootInd nop.f 0(p12) mov GR_Ind = 3};;{ .mfi shladd GR_IndX2 = GR_Ind,1,r0 nop.f 0 cmp.gt p14,p0 = 2,GR_Ind}{ .mfi shladd GR_IndX8 = GR_Ind,3,r0 nop.f 0 cmp.eq p6,p0 = 1,GR_Ind};;.pred.rel "mutex",p6,p9{ .mfi shladd GR_ad_Co = GR_IndX8,3,GR_ad_Co(p6) fms.s1 FR_x = FR_Xf,f1,FR_LocalMin cmp.gt p10,p0 = 0xB,GR_IntNum}{ .mfi shladd GR_ad_Ce = GR_IndX8,3,GR_ad_Ce(p9) fma.s1 FR_x = f0,f0,FR_1pXf shladd GR_ad_C0 = GR_IndX2,4,GR_ad_C0};;{ .mfi // load coefficients of polynomial approximation // of ln(GAMMA(x)), 1 <= x < 2 ldfpd FR_A8,FR_A7 = [GR_ad_Co],16(p10) fma.s1 FR_Xp2 = FR_Xp2,FR_Xp6,f0 add GR_ad_C0 = 8,GR_ad_C0}{ .mfi ldfpd FR_A6,FR_A5 = [GR_ad_Ce],16 nop.f 0(p14) add GR_ad_Root = 0x10,GR_ad_Root};;{ .mfi ldfpd FR_A4,FR_A3 = [GR_ad_Co],16 nop.f 0 add GR_ad_RootCe = 0xE10,GR_ad_Data}{ .mfi ldfpd FR_A2,FR_A1 = [GR_ad_Ce],16 nop.f 0(p14) add GR_RootInd = 0x40,GR_RootInd};;{ .mmi ldfd FR_A0 = [GR_ad_C0](p13) add GR_ad_RootCo = GR_ad_RootCo,GR_RootInd(p13) add GR_ad_RootCe = GR_ad_RootCe,GR_RootInd};;{ .mmi(p13) ld8 GR_Root = [GR_ad_Root](p13) ldfd FR_Root = [GR_ad_Root] mov GR_ExpBias = 0xffff};;{ .mfi nop.m 0 fma.s1 FR_x2 = FR_x,FR_x,f0 nop.i 0}{ .mlx(p8) cmp.gt.unc p10,p0 = 0xF,GR_IntNum movl GR_Dx = 0x000000014F8B588E};;{ .mfi // load coefficients of polynomial approximation // of ln(GAMMA(x)), x is close to one of negative roots(p13) ldfpd FR_R3,FR_R2 = [GR_ad_RootCo] // argumenth for logarithm(p10) fma.s1 FR_Req = FR_Req,FR_Xp2,f0 mov GR_ExpMask = 0x1ffff}{ .mfi(p13) ldfpd FR_R1,FR_R0 = [GR_ad_RootCe] nop.f 0 // set p9 if signgum is 32-bit int // set p8 if signgum is 64-bit int cmp.eq p8,p9 = 8,r34};;.pred.rel "mutex",p9,p8{ .mfi(p9) st4 [r33] = GR_SignOfGamma // as 32-bit int fma.s1 FR_A7 = FR_A8,FR_x,FR_A7(p13) sub GR_Root = GR_Arg,GR_Root}{ .mfi(p8) st8 [r33] = GR_SignOfGamma // as 64-bit int fma.s1 FR_A5 = FR_A6,FR_x,FR_A5 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_w = FR_Req,f1,f1(p13) add GR_Root = GR_Root,GR_Dx}{ .mfi nop.m 0 nop.f 0(p13) add GR_2xDx = GR_Dx,GR_Dx};;{ .mfi nop.m 0 fma.s1 FR_A3 = FR_A4,FR_x,FR_A3 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_A1 = FR_A2,FR_x,FR_A1(p13) cmp.leu.unc p10,p0 = GR_Root,GR_2xDx};;{ .mfi nop.m 0 frcpa.s1 FR_InvX,p0 = f1,FR_Req nop.i 0}{ .mfi nop.m 0(p10) fms.s1 FR_rx = FR_NormX,f1,FR_Root nop.i 0};;{ .mfi getf.exp GR_SignExp = FR_Req fma.s1 FR_x4 = FR_x2,FR_x2,f0 nop.i 0};;{ .mfi getf.sig GR_Sig = FR_Req fma.s1 FR_A5 = FR_A7,FR_x2,FR_A5 nop.i 0};;{ .mfi sub GR_PureExp = GR_SignExp,GR_ExpBias fma.s1 FR_w2 = FR_w,FR_w,f0 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_Q32 = FR_P3,FR_w,FR_P2 nop.i 0};;{ .mfi setf.sig FR_int_Ln = GR_PureExp fma.s1 FR_A1 = FR_A3,FR_x2,FR_A1 extr.u GR_Ind4T = GR_Sig,55,8}{ .mfi nop.m 0 fma.s1 FR_Q10 = FR_P1,FR_w,f1 nop.i 0};;{ .mfi shladd GR_ad_T = GR_Ind4T,3,GR_ad_Data fms.s1 FR_r = FR_InvX,FR_Req,f1 nop.i 0}{ .mfi nop.m 0(p10) fms.s1 FR_rx2 = FR_rx,FR_rx,f0 nop.i 0};;{ .mfi ldfd FR_T = [GR_ad_T](p10) fma.s1 FR_R2 = FR_R3,FR_rx,FR_R2 nop.i 0}{ .mfi nop.m 0(p10) fma.s1 FR_R0 = FR_R1,FR_rx,FR_R0 nop.i 0};;{ .mfi getf.exp GR_Exp = FR_w fma.s1 FR_A1 = FR_A5,FR_x4,FR_A1 mov GR_ExpMask = 0x1ffff}{ .mfi nop.m 0 fma.s1 FR_Q32 = FR_Q32, FR_w2,FR_Q10 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_r2 = FR_r,FR_r,f0 mov GR_fff7 = 0xFFF7}{ .mfi nop.m 0 fma.s1 FR_P32 = FR_P3,FR_r,FR_P2 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_P10 = FR_P1,FR_r,f1 and GR_Exp = GR_ExpMask,GR_Exp}{ .mfb nop.m 0(p10) fma.s.s0 f8 = FR_R2,FR_rx2,FR_R0(p10) br.ret.spnt b0 // exit for arguments close to negative roots};;{ .mfi nop.m 0 fcvt.xf FR_N = FR_int_Ln nop.i 0}{ .mfi cmp.ge p14,p15 = GR_Exp,GR_fff7 nop.f 0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_A0 = FR_A1,FR_x,FR_A0 nop.i 0}{ .mfi nop.m 0(p15) fma.s1 FR_Ln = FR_Q32,FR_w,f0 nop.i 0};;{ .mfi nop.m 0(p14) fma.s1 FR_P32 = FR_P32,FR_r2,FR_P10 cmp.eq p6,p7 = 0,GR_Ind};;{ .mfi nop.m 0(p14) fma.s1 FR_T = FR_N,FR_Ln2,FR_T nop.i 0};;{ .mfi nop.m 0(p14) fma.s1 FR_Ln = FR_P32,FR_r,FR_T nop.i 0};;.pred.rel "mutex",p6,p7{ .mfi nop.m 0(p6) fms.s.s0 f8 = FR_A0,FR_x,FR_Ln nop.i 0}{ .mfb nop.m 0(p7) fms.s.s0 f8 = FR_A0,f1,FR_Ln br.ret.sptk b0};;// branch for calculating of ln(GAMMA(x)) for x < -2^13//---------------------------------------------------------------------.align 32lgammaf_negstirling:{ .mfi shladd GR_ad_T = GR_Ind4T,3,GR_ad_Data fms.s1 FR_Xf = FR_NormX,f1,FR_N // xf = x - [x] mov GR_SingBound = 0x10016}{ .mfi add GR_ad_Co = 0xCA0,GR_ad_Data fma.s1 FR_P32 = FR_P3,FR_r,FR_P2 nop.i 0};;{ .mfi ldfd FR_T = [GR_ad_T] fcvt.xf FR_int_Ln = FR_int_Ln cmp.le p6,p0 = GR_SingBound,GR_Exp}{ .mfb add GR_ad_Ce = 0x20,GR_ad_Co fma.s1 FR_r2 = FR_r,FR_r,f0(p6) br.cond.spnt lgammaf_singularity};;{ .mfi // load coefficients of polynomial approximation // of ln(sin(Pi*xf)/(Pi*xf)), |xf| <= 0.5 ldfpd FR_S16,FR_S14 = [GR_ad_Co],16 fma.s1 FR_P10 = FR_P1,FR_r,f1 nop.i 0}{ .mfi ldfpd FR_S12,FR_S10 = [GR_ad_Ce],16 fms.s1 FR_xm05 = FR_NormX,f1,FR_05 nop.i 0};;{ .mmi ldfpd FR_S8,FR_S6 = [GR_ad_Co],16 ldfpd FR_S4,FR_S2 = [GR_ad_Ce],16 nop.i 0};;{ .mfi getf.sig GR_N = FR_int_Ntrunc // signgam calculation fma.s1 FR_Xf2 = FR_Xf,FR_Xf,f0 nop.i 0};;{ .mfi nop.m 0 frcpa.s1 FR_InvXf,p0 = f1,FR_Xf nop.i 0};;{ .mfi getf.d GR_Arg = FR_Xf fcmp.eq.s1 p6,p0 = FR_NormX,FR_N mov GR_ExpBias = 0x3FF};;{ .mfi nop.m 0 fma.s1 FR_T = FR_int_Ln,FR_Ln2,FR_T extr.u GR_Exp = GR_Arg,52,11}{ .mfi nop.m 0 fma.s1 FR_P32 = FR_P32,FR_r2,FR_P10 nop.i 0};;{ .mfi sub GR_PureExp = GR_Exp,GR_ExpBias fma.s1 FR_S14 = FR_S16,FR_Xf2,FR_S14 extr.u GR_Ind4T = GR_Arg,44,8}{ .mfb mov GR_SignOfGamma = 1 // set signgam to -1 fma.s1 FR_S10 = FR_S12,FR_Xf2,FR_S10(p6) br.cond.spnt lgammaf_singularity};;{ .mfi setf.sig FR_int_Ln = GR_PureExp fms.s1 FR_rf = FR_InvXf,FR_Xf,f1 // set p14 if GR_N is even tbit.z p14,p0 = GR_N,0}{ .mfi
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -