📄 w_tgammaf.s
字号:
(p10) fma.s1 FR_InvNormX2 = FR_InvNormX2,FR_InvAn,f0 nop.i 0};;{ .mfi nop.m 0 frcpa.s1 FR_Rcp0,p0 = f1,FR_GAMMA nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_Multplr = FR_NormX,f1,f1 // x - 1 nop.i 0};;{ .mfi nop.m 0 // NR-iteration fnma.s1 FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1 nop.i 0};;.pred.rel "mutex",p8,p9{ .mfi nop.m 0 // 1/x or 1/(An*x)(p8) fma.s1 FR_Multplr = FR_InvNormX2,FR_InvNormX1,FR_InvNormX2 nop.i 0}{ .mfi nop.m 0(p9) fma.s1 FR_Multplr = f1,f1,f0 nop.i 0};;{ .mfi nop.m 0 // NR-iteration fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 nop.i 0};;{ .mfi nop.m 0 // NR-iteration fnma.s1 FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1 nop.i 0}{ .mfi nop.m 0 // NR-iteration fma.s1 FR_Rcp1 = FR_Rcp1,FR_Multplr,f0 nop.i 0};;{ .mfb nop.m 0 fma.s.s0 f8 = FR_Rcp1,FR_Rcp2,FR_Rcp1 br.ret.sptk b0};;// here if 0 < x < 1//--------------------------------------------------------------------.align 32tgammaf_from_0_to_1:{ .mfi cmp.lt p7,p0 = GR_Arg,GR_ExpOf05 // NR-iteration fnma.s1 FR_Rcp1 = FR_Rcp0,FR_NormX,f1 cmp.eq p8,p0 = GR_Arg,GR_ExpOf05}{ .mfi cmp.gt p9,p0 = GR_Arg,GR_ExpOf05 fma.s1 FR_r = f0,f0,FR_NormX // reduced arg for (0;1) mov GR_ExpOf025 = 0x7FA };;{ .mfi getf.s GR_ArgNz = f8 fma.d.s0 FR_X = f0,f0,f8 // set deno flag shl GR_OvfNzBound = GR_OvfNzBound,20}{ .mfi(p8) mov GR_Tbl12Offs = 0x80 // 0.5 <= x < 0.75 nop.f 0(p7) cmp.ge.unc p6,p0 = GR_Arg,GR_ExpOf025};;.pred.rel "mutex",p6,p9{ .mfi(p9) mov GR_Tbl12Offs = 0xC0 // 0.75 <= x < 1 nop.f 0(p6) mov GR_Tbl12Offs = 0x40 // 0.25 <= x < 0.5}{ .mfi add GR_ad_Ce = 0x2C0,GR_ad_Data nop.f 0 add GR_ad_Co = 0x2A0,GR_ad_Data};;{ .mfi add GR_ad_Co = GR_ad_Co,GR_Tbl12Offs nop.f 0 cmp.lt p12,p0 = GR_ArgNz,GR_OvfNzBound}{ .mib add GR_ad_Ce = GR_ad_Ce,GR_Tbl12Offs cmp.eq p7,p0 = GR_ArgNz,GR_OvfNzBound // jump if argument is 0x00200000(p7) br.cond.spnt tgammaf_overflow_near0_bound};;{ .mmb ldfpd FR_A7,FR_A6 = [GR_ad_Co],16 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16 // jump if argument is close to 0 positive(p12) br.cond.spnt tgammaf_overflow };;{ .mfi ldfpd FR_A3,FR_A2 = [GR_ad_Co],16 // NR-iteration fma.s1 FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0 nop.i 0}{ .mfb ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16 nop.f 0 br.cond.sptk tgamma_from_0_to_2 };;// here if 1 < x < 2//--------------------------------------------------------------------.align 32tgammaf_from_1_to_2:{ .mfi add GR_ad_Co = 0x2A0,GR_ad_Data fms.s1 FR_r = f0,f0,FR_1mX shr GR_TblOffs = GR_Arg,47}{ .mfi add GR_ad_Ce = 0x2C0,GR_ad_Data nop.f 0 mov GR_TblOffsMask = 0x18};;{ .mfi nop.m 0 nop.f 0 and GR_TblOffs = GR_TblOffs,GR_TblOffsMask };;{ .mfi shladd GR_ad_Co = GR_TblOffs,3,GR_ad_Co nop.f 0 nop.i 0}{ .mfi shladd GR_ad_Ce = GR_TblOffs,3,GR_ad_Ce nop.f 0 cmp.eq p6,p7 = 8,GR_TblOffs};;{ .mmi ldfpd FR_A7,FR_A6 = [GR_ad_Co],16 ldfpd FR_A5,FR_A4 = [GR_ad_Ce],16 nop.i 0};;{ .mmi ldfpd FR_A3,FR_A2 = [GR_ad_Co],16 ldfpd FR_A1,FR_A0 = [GR_ad_Ce],16 nop.i 0};;.align 32tgamma_from_0_to_2:{ .mfi nop.m 0(p6) fms.s1 FR_r = FR_r,f1,FR_LocalMin nop.i 0};;{ .mfi nop.m 0 // NR-iteration(p10) fnma.s1 FR_Rcp2 = FR_Rcp1,FR_NormX,f1 nop.i 0};;{ .mfi nop.m 0 fms.s1 FR_r2 = FR_r,FR_r,f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_A7 = FR_A7,FR_r,FR_A6 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_A5 = FR_A5,FR_r,FR_A4 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_A3 = FR_A3,FR_r,FR_A2 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_A1 = FR_A1,FR_r,FR_A0 nop.i 0};;{ .mfi nop.m 0 // NR-iteration(p10) fma.s1 FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_A7 = FR_A7,FR_r2,FR_A5 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_r4 = FR_r2,FR_r2,f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_A3 = FR_A3,FR_r2,FR_A1 nop.i 0};;{ .mfi nop.m 0 (p10) fma.s1 FR_GAMMA = FR_A7,FR_r4,FR_A3 nop.i 0}{ .mfi nop.m 0 (p11) fma.s.s0 f8 = FR_A7,FR_r4,FR_A3 nop.i 0};;{ .mfb nop.m 0 (p10) fma.s.s0 f8 = FR_GAMMA,FR_Rcp2,f0 br.ret.sptk b0};;// overflow//--------------------------------------------------------------------.align 32tgammaf_overflow_near0_bound:.pred.rel "mutex",p14,p15{ .mfi mov GR_fpsr = ar.fpsr nop.f 0(p15) mov r8 = 0x7f8}{ .mfi nop.m 0 nop.f 0(p14) mov r8 = 0xff8};;{ .mfi nop.m 0 nop.f 0 shl r8 = r8,20 };;{ .mfi sub r8 = r8,r0,1 nop.f 0 extr.u GR_fpsr = GR_fpsr,10,2 // rounding mode};;.pred.rel "mutex",p14,p15{ .mfi // set p8 to 0 in case of overflow and to 1 otherwise // for negative arg: // no overflow if rounding mode either Z or +Inf, i.e. // GR_fpsr > 1(p14) cmp.lt p8,p0 = 1,GR_fpsr nop.f 0 // for positive arg: // no overflow if rounding mode either Z or -Inf, i.e. // (GR_fpsr & 1) == 0(p15) tbit.z p0,p8 = GR_fpsr,0};;{ .mib(p8) setf.s f8 = r8 // set result to 0x7f7fffff without // OVERFLOW flag raising nop.i 0(p8) br.ret.sptk b0};;.align 32tgammaf_overflow:{ .mfi nop.m 0 nop.f 0 mov r8 = 0x1FFFE};;{ .mfi setf.exp f9 = r8 fmerge.s FR_X = f8,f8 nop.i 0};;.pred.rel "mutex",p14,p15{ .mfi nop.m 0(p14) fnma.s.s0 f8 = f9,f9,f0 // set I,O and -INF result mov GR_TAG = 261 // overflow}{ .mfb nop.m 0 (p15) fma.s.s0 f8 = f9,f9,f0 // set I,O and +INF result br.cond.sptk tgammaf_libm_err};;// x is negative integer or +/-0//--------------------------------------------------------------------.align 32tgammaf_singularity:{ .mfi nop.m 0 fmerge.s FR_X = f8,f8 mov GR_TAG = 262 // negative}{ .mfb nop.m 0 frcpa.s0 f8,p0 = f0,f0 br.cond.sptk tgammaf_libm_err};;// x is negative noninteger with big absolute value//--------------------------------------------------------------------.align 32tgammaf_underflow:{ .mfi mov r8 = 0x00001 nop.f 0 tbit.z p6,p7 = GR_Sig,0};;{ .mfi setf.exp f9 = r8 nop.f 0 nop.i 0};;.pred.rel "mutex",p6,p7{ .mfi nop.m 0(p6) fms.s.s0 f8 = f9,f9,f9 nop.i 0}{ .mfb nop.m 0(p7) fma.s.s0 f8 = f9,f9,f9 br.ret.sptk b0};;// x for natval, nan, +/-inf or +/-0//--------------------------------------------------------------------.align 32tgammaf_spec_args:{ .mfi nop.m 0 fclass.m p6,p0 = f8,0x1E1 // Test x for natval, nan, +inf nop.i 0};;{ .mfi nop.m 0 fclass.m p7,p8 = f8,0x7 // +/-0 nop.i 0};;{ .mfi nop.m 0 fmerge.s FR_X = f8,f8 nop.i 0}{ .mfb nop.m 0(p6) fma.s.s0 f8 = f8,f1,f8(p6) br.ret.spnt b0};;.pred.rel "mutex",p7,p8{ .mfi(p7) mov GR_TAG = 262 // negative(p7) frcpa.s0 f8,p0 = f1,f8 nop.i 0 }{ .mib nop.m 0 nop.i 0(p8) br.cond.spnt tgammaf_singularity};;.align 32tgammaf_libm_err:{ .mfi alloc r32 = ar.pfs,1,4,4,0 nop.f 0 mov GR_Parameter_TAG = GR_TAG};;GLOBAL_LIBM_END(tgammaf)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs }{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0 };;.body{ .mib stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 }{ .mib stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;{ .mmi ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};; LOCAL_LIBM_END(__libm_error_region).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -