📄 s_erfcf.s
字号:
}{ .mfi setf.d EXP_RSHF = exp_GR_rshf // Form right shift 1.100 * 2^63(p7) fms.s1 FR_AbsArg = f1, f0, f8 // |x| if x < 0 mov exp_TB1_size = 0x100 };;// Form pointer GR_P_POINT_3 to the beginning of erfc_p_table { .mfi setf.d FR_05 = GR_05 nop.f 0 sub GR_ShftPi = GR_ShftPi,GR_ShftPi_bias}{ .mfb add GR_P_POINT_3 = GR_P_POINT_3, EXP_AD_TB1 nop.f 0(p9) br.cond.spnt SPECIAL // For x = 0,+inf,-inf,nan,unnorm};;{ .mfi add GR_P_POINT_1 = GR_P_POINT_3, GR_ShftPi nop.f 0 add GR_P_POINT_2 = GR_P_POINT_3, GR_ShftPi}{ .mfi ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16 fma.s1 FR_NormX = f8,f1,f0 add GR_P_POINT_3 = GR_P_POINT_3, GR_ShftPi};;// Load coefficients for polynomial P15(x){ .mfi ldfpd FR_A15, FR_A14 = [GR_P_POINT_1], 16 nop.f 0 add GR_P_POINT_3 = 0x30, GR_P_POINT_3}{ .mfi ldfe exp_ln2_by_128_lo = [EXP_AD_TB1], 16 nop.f 0 add GR_P_POINT_2 = 0x20, GR_P_POINT_2 };;// Now EXP_AD_TB1 points to the beginning of table 1{ .mlx ldfpd FR_A13, FR_A12 = [GR_P_POINT_1] movl GR_1_by_6 = 0x3FC5555555555555}{ .mfi add GR_P_POINT_4 = 0x30, GR_P_POINT_2 nop.f 0 nop.i 0};;{ .mfi ldfpd FR_A11, FR_A10 = [GR_P_POINT_2] fma.s1 FR_2 = f1, f1, f1 mov exp_TB2_size = 0x80}{ .mfi ldfpd FR_A9, FR_A8 = [GR_P_POINT_3],16 nop.f 0 add GR_P_POINT_1 = 0x60 ,GR_P_POINT_1};;// W = X * Inv_log2_by_128// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.{ .mfi ldfpd FR_A7, FR_A6 = [GR_P_POINT_3] fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8,EXP_INV_LN2_2TO63,EXP_RSHF_2TO56 add EXP_AD_TB2 = exp_TB1_size, EXP_AD_TB1 }{ .mfi ldfpd FR_A5, FR_A4 = [GR_P_POINT_4], 16 nop.f 0 nop.i 0};;{ .mfi ldfpd FR_A3, FR_A2 = [GR_P_POINT_4] fmerge.s FR_X = f8,f8 nop.i 0}{ .mfi ldfpd FR_A1, FR_A0 = [GR_P_POINT_1] nop.f 0 nop.i 0};;//p14: x < - NEG_ARG_ASYMP = -4.4 -> erfcf(x) ~=~ 2.0{ .mfi setf.d FR_1_by_6 = GR_1_by_6(p7) fcmp.gt.unc.s1 p14,p0 = FR_AbsArg, FR_NEG_ARG_ASYMP //p7: x < 0 nop.i 0};;//p15: x > POS_ARG_ASYMP = 10.06 -> erfcf(x) ~=~ 0.0{ .mfi nop.m 0(p6) fcmp.gt.unc.s1 p15,p0 = FR_AbsArg, FR_POS_ARG_ASYMP //p6: x > 0 nop.i 0} ;;{ .mfi nop.m 0 fcmp.le.s1 p8,p0 = FR_NormX, FR_UnfBound // p8: x <= UnfBound nop.i 0}{ .mfb nop.m 0(p14) fnma.s.s0 FR_RESULT = FR_EpsNorm, FR_EpsNorm, FR_2//y = 2 if x <-4.4(p14) br.ret.spnt b0};;// Nfloat = round_int(W) // The signficand of EXP_W_2TO56_RSH contains the rounded integer part of W,// as a twos complement number in the lower bits (that is, it may be negative).// That twos complement number (called N) is put into exp_GR_N.// Since EXP_W_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56// before the shift constant 1.10000 * 2^63 is subtracted to yield EXP_Nfloat.// Thus, EXP_Nfloat contains the floating point version of N{ .mfi nop.m 0 fms.s1 EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF nop.i 0} { .mfb(p15) mov GR_Parameter_TAG = 209(p15) fma.s.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0 //Result.for x>10.06(p15) br.cond.spnt __libm_error_region} ;;// Now we can calculate polynomial P15(x){ .mfi nop.m 0 fma.s1 FR_P15_1_1 = FR_AbsArg, FR_AbsArg, f0 // x ^2 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_P15_0_1 = FR_A15, FR_AbsArg, FR_A14 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_P15_1_2 = FR_A13, FR_AbsArg, FR_A12 nop.i 0 };;{ .mfi getf.sig exp_GR_N = EXP_W_2TO56_RSH fma.s1 FR_P15_2_1 = FR_A9, FR_AbsArg, FR_A8 nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_P15_2_2 = FR_A11, FR_AbsArg, FR_A10 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_P15_3_1 = FR_A5, FR_AbsArg, FR_A4 nop.i 0 }{ .mfi nop.m 0 fma.s1 FR_P15_3_2 = FR_A7, FR_AbsArg, FR_A6 nop.i 0};;// exp_GR_index_1 has index_1// exp_GR_index_2_16 has index_2 * 16// exp_GR_biased_M has M// exp_GR_index_1_16 has index_1 * 16// r2 has true M{ .mfi and exp_GR_index_1 = 0x0f, exp_GR_N fma.s1 FR_P15_4_1 = FR_A1, FR_AbsArg, FR_A0 shr r2 = exp_GR_N, 0x7 }{ .mfi and exp_GR_index_2_16 = 0x70, exp_GR_N fma.s1 FR_P15_4_2 = FR_A3, FR_AbsArg, FR_A2 nop.i 0};;// EXP_AD_T1 has address of T1 // EXP_AD_T2 has address if T2 { .mfi add EXP_AD_T2 = EXP_AD_TB2, exp_GR_index_2_16 nop.f 0 shladd EXP_AD_T1 = exp_GR_index_1, 4, EXP_AD_TB1}{ .mfi addl exp_GR_biased_M = 0xffff, r2 fnma.s1 exp_r = EXP_Nfloat, exp_ln2_by_128_hi, EXP_NORM_f8 nop.i 0};;// Create Scale = 2^M// r = x - Nfloat * ln2_by_128_hi { .mfi setf.exp EXP_2M = exp_GR_biased_M fma.s1 FR_P15_7_1 = FR_P15_0_1, FR_P15_1_1, FR_P15_1_2 nop.i 0}{ .mfi ldfe exp_T2 = [EXP_AD_T2] nop.f 0 nop.i 0};;// Load T1 and T2{ .mfi ldfe exp_T1 = [EXP_AD_T1] fma.s1 FR_P15_7_2 = FR_P15_1_1, FR_P15_1_1, f0 // x^4 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_P15_8_1 = FR_P15_1_1, FR_P15_2_2, FR_P15_2_1 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_P15_9_1 = FR_P15_1_1, FR_P15_4_2, FR_P15_4_1 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_P15_9_2 = FR_P15_1_1, FR_P15_3_2, FR_P15_3_1 nop.i 0};;{ .mfi nop.m 0 fma.s1 exp_P = FR_1_by_6, exp_r, FR_05 nop.i 0}{ .mfi nop.m 0 fma.s1 exp_rsq = exp_r, exp_r, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_P15_13_1 = FR_P15_7_2, FR_P15_7_1, FR_P15_8_1 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_P15_14_1 = FR_P15_7_2, FR_P15_9_2, FR_P15_9_1 nop.i 0}{ .mfi nop.m 0 fma.s1 FR_P15_14_2 = FR_P15_7_2, FR_P15_7_2, f0 // x^8 nop.i 0};;{ .mfi nop.m 0 fma.s1 exp_P = exp_P, exp_rsq, exp_r nop.i 0}{ .mfi nop.m 0 fma.s1 exp_S1 = EXP_2M, exp_T2, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_Pol = FR_P15_14_2, FR_P15_13_1, FR_P15_14_1 // P15(x) nop.i 0};;{ .mfi nop.m 0 fma.s1 exp_S = exp_S1, exp_T1, f0 nop.i 0};;{ .mfi nop.m 0 fma.s1 FR_Exp = exp_S, exp_P, exp_S // exp(-x^2) nop.i 0 };; { .mfi nop.m 0 fma.s.s0 FR_Tmpf = f8, f1, f0 // Flag d nop.i 0 };;//p6: result for 0 < x < = POS_ARG_ASYMP //p7: result for - NEG_ARG_ASYMP <= x < 0//p8: exit for - NEG_ARG_ASYMP <= x <= UnfBound, x!=0.pred.rel "mutex",p6,p7{ .mfi nop.m 0(p6) fma.s.s0 f8 = FR_Exp, FR_Pol, f0 nop.i 0 }{ .mfb mov GR_Parameter_TAG = 209(p7) fnma.s.s0 f8 = FR_Exp, FR_Pol, FR_2(p8) br.ret.sptk b0 };;//p10: branch for UnfBound < x < = POS_ARG_ASYMP{ .mfb nop.m 0 nop.f 0(p10) br.cond.spnt __libm_error_region };;//Only via (p9) br.cond.spnt SPECIAL for x = 0,+inf,-inf,nan,unnormSPECIAL:{ .mfi nop.m 0 fclass.m.unc p10,p0 = f8,0x07 // p10: x = 0 nop.i 0};;{ .mfi nop.m 0 fclass.m.unc p11,p0 = f8,0x21 // p11: x = +inf nop.i 0};;{ .mfi nop.m 0 fclass.m.unc p12,p0 = f8,0x22 // p12 x = -inf nop.i 0}{ .mfb nop.m 0(p10) fma.s.s0 f8 = f1, f1, f0(p10) br.ret.sptk b0 // Quick exit for x = 0};;{ .mfi nop.m 0 fclass.m.unc p13,p0 = f8,0xc3 // p13: x = nan nop.i 0}{ .mfb nop.m 0(p11) fma.s.s0 f8 = f0, f1, f0(p11) br.ret.spnt b0 // Quick exit for x = +inf};;{ .mfi nop.m 0 fclass.m.unc p14,p0 = f8,0x0b // P14: x = unnormalized nop.i 0}{ .mfb nop.m 0(p12) fma.s.s0 f8 = f1, f1, f1(p12) br.ret.spnt b0 // Quick exit for x = -inf};;{ .mfb nop.m 0(p13) fma.s.s0 f8 = f8, f1, f0(p13) br.ret.sptk b0 // Quick exit for x = nan};;{ .mfb nop.m 0(p14) fnma.s.s0 f8 = f8, f1, f1(p14) br.ret.sptk b0 // Quick exit for x = unnormalized};;GLOBAL_LIBM_END(erfcf)// Call via (p10) br.cond.spnt __libm_error_region// for UnfBound < x < = POS_ARG_ASYMP // and// // call via (p15) br.cond.spnt __libm_error_region// for x > POS_ARG_ASYMPLOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body{ .mib stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 }{ .mib stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;{ .mmi ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};; LOCAL_LIBM_END(__libm_error_region).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -