📄 s_erfc.s
字号:
data8 0x963A6DD66951B72E, 0x00003FF3 //A2 = +2.86537625289770759336e-04Ldata8 0xBE93F9E80DF4AE0A, 0x0000BFEF //A3 = -2.27186718010906557773e-05Ldata8 0xF10589FC10D908E0, 0x00003FEB //A4 = +1.79575113004740124999e-06Ldata8 0x97F1A2435C7877EF, 0x0000BFE8 //A5 = -1.41508767557208714648e-07Ldata8 0xBEFF2FB5F00E9327, 0x00003FE4 //A6 = +1.11174782364058338591e-08Ldata8 0xEF5E09DC714DF198, 0x0000BFE0 //A7 = -8.70813302639377671664e-10Ldata8 0x958A6EB9408970A4, 0x00003FDD //A8 = +6.80032608255179732632e-11Ldata8 0xBA31F40954675710, 0x0000BFD9 //A9 = -5.29198388081297293593e-12Ldata8 0xE63B9CEEDC4CF0E6, 0x00003FD5 //A10 = +4.08975721481205179918e-13Ldata8 0x8AF8F1E3FED32CEC, 0x0000BFD2 //A11 = -3.08580807479307213059e-14Ldata8 0x9A88033A08842BEA, 0x00003FCE //A12 = +2.14455258045503137285e-15Ldata8 0x88BCF775B7B3A939, 0x0000BFCA //A13 = -1.18601440246395438386e-16Ldata8 0x88687B63A5B7135E, 0x00003FC5 //A14 = +3.69734984736162880476e-18L// Pol16 data8 0x99B8A501204BF3E7, 0x00003FFA //A0 = +3.75296063885057657456e-02Ldata8 0xA33FA20D2867C79C, 0x0000BFF6 //A1 = -2.49097544033960143953e-03Ldata8 0xACFD14CA6AA55829, 0x00003FF2 //A2 = +1.64974783411741182991e-04Ldata8 0xB6E9B4ED9B378B09, 0x0000BFEE //A3 = -1.09024594422859744844e-05Ldata8 0xC0FD95D38ADCF301, 0x00003FEA //A4 = +7.18945888498730738040e-07Ldata8 0xCB302F7AAFFFA074, 0x0000BFE6 //A5 = -4.73084450875945514829e-08Ldata8 0xD578674188198402, 0x00003FE2 //A6 = +3.10640208133938026422e-09Ldata8 0xDFCC6ED4219E7FC4, 0x0000BFDE //A7 = -2.03543610142159316364e-10Ldata8 0xEA1F448AA373E4A9, 0x00003FDA //A8 = +1.33083028465054001215e-11Ldata8 0xF44780B8EACD37B5, 0x0000BFD6 //A9 = -8.67854438613319891312e-13Ldata8 0xFD55794492F53AEE, 0x00003FD2 //A10 = +5.62514216652784597182e-14Ldata8 0x805C040421E7A098, 0x0000BFCF //A11 = -3.56269003968981157635e-15Ldata8 0xEFCCD20DE93A138E, 0x00003FCA //A12 = +2.07993414310230172191e-16Ldata8 0xB259764466732080, 0x0000BFC6 //A13 = -9.66834364652262630640e-18Ldata8 0x9597C1DB6AF830E4, 0x00003FC1 //A14 = +2.53420063550355940811e-19L// Pol17 data8 0xFFFCBD66BAA4368C, 0x00003FF9 //A0 = +3.12484454387527380657e-02Ldata8 0xE28174723762D197, 0x0000BFF5 //A1 = -1.72810121976742793952e-03Ldata8 0xC81D832836019EC4, 0x00003FF1 //A2 = +9.54224026432644399736e-05Ldata8 0xB0885530C7D7AB5B, 0x0000BFED //A3 = -5.26107996417947739207e-06Ldata8 0x9B7EA64F62F6FD06, 0x00003FE9 //A4 = +2.89631495607631932854e-07Ldata8 0x88C24ACAA9042166, 0x0000BFE5 //A5 = -1.59208376111789845204e-08Ldata8 0xF033E5CD9B7F2822, 0x00003FE0 //A6 = +8.73852423930118273815e-10Ldata8 0xD2A1B161FB4DFBFE, 0x0000BFDC //A7 = -4.78920839886600387264e-11Ldata8 0xB86B27FCBB5A1E9D, 0x00003FD8 //A8 = +2.62074563162805723295e-12Ldata8 0xA124E1303F08E508, 0x0000BFD4 //A9 = -1.43124677534734729453e-13Ldata8 0x8C0B270950D7C697, 0x00003FD0 //A10 = +7.77397948226387851915e-15Ldata8 0xEE034E350C65D2D9, 0x0000BFCB //A11 = -4.12886586201102092942e-16Ldata8 0xBA94473E52495304, 0x00003FC7 //A12 = +2.02289587087169937807e-17Ldata8 0xE913D34CBB853CEE, 0x0000BFC2 //A13 = -7.89697093687557412061e-19Ldata8 0xA44576A85E8CAB59, 0x00003FBD //A14 = +1.73929048516879172258e-20L// Pol18 data8 0xD579A3FE4622DED2, 0x00003FF9 //A0 = +2.60589793198885278242e-02Ldata8 0x9D97EB84E7CD89C8, 0x0000BFF5 //A1 = -1.20234251012583627659e-03Ldata8 0xE86EFDC2CCA5C47B, 0x00003FF0 //A2 = +5.54164790116744315389e-05Ldata8 0xAB39FA5621E39B15, 0x0000BFEC //A3 = -2.55147332073979814633e-06Ldata8 0xFC0244F58F8D8097, 0x00003FE7 //A4 = +1.17350772365097747003e-07Ldata8 0xB941D44B71B14FE2, 0x0000BFE3 //A5 = -5.39169255673480031672e-09Ldata8 0x880B4A40B6F2C901, 0x00003FDF //A6 = +2.47462779512141204748e-10Ldata8 0xC7998AE5652CDCFC, 0x0000BFDA //A7 = -1.13459336509953900777e-11Ldata8 0x92438AA45915CD95, 0x00003FD6 //A8 = +5.19633524685027215673e-13Ldata8 0xD6067243AD3AEAE6, 0x0000BFD1 //A9 = -2.37615683835509918256e-14Ldata8 0x9BD0722A07669E4D, 0x00003FCD //A10 = +1.08117849400479298186e-15Ldata8 0xDDF6F1B79F50E3C4, 0x0000BFC8 //A11 = -4.81309059042573202592e-17Ldata8 0x91F283C0351A9ACA, 0x00003FC4 //A12 = +1.97795505638619048412e-18Ldata8 0x990BC4FAFA9C7542, 0x0000BFBF //A13 = -6.48174913943425248713e-20Ldata8 0xB536865B89676892, 0x00003FB9 //A14 = +1.19916696090758913485e-21L// Pol19 data8 0xB241CEB1B7C953F1, 0x00003FF9 //A0 = +2.17598950382519671244e-02Ldata8 0xDBD6FBA9B11B85E1, 0x0000BFF4 //A1 = -8.38622198373701898430e-04Ldata8 0x877605B1AD082441, 0x00003FF0 //A2 = +3.22964249573360786077e-05Ldata8 0xA6D04DC067A5D310, 0x0000BFEB //A3 = -1.24285881515578912302e-06Ldata8 0xCD458A72BC161315, 0x00003FE6 //A4 = +4.77935289502172654216e-08Ldata8 0xFC6902CFB5DE90A2, 0x0000BFE1 //A5 = -1.83652591038905929358e-09Ldata8 0x9B12B0707DFE615C, 0x00003FDD //A6 = +7.05190381049444126079e-11Ldata8 0xBE67972F2C8EE5AE, 0x0000BFD8 //A7 = -2.70581282732878853626e-12Ldata8 0xE99D8CAF9A3FFE02, 0x00003FD3 //A8 = +1.03746090805854376435e-13Ldata8 0x8F35F5BBEF9E4299, 0x0000BFCF //A9 = -3.97489765699919189983e-15Ldata8 0xAF6E62C3C91B7178, 0x00003FCA //A10 = +1.52162305785839987182e-16Ldata8 0xD6636229C1646963, 0x0000BFC5 //A11 = -5.81100425482928485309e-18Ldata8 0x810331BF289E068F, 0x00003FC1 //A12 = +2.18555638648715837944e-19Ldata8 0x8E3D07CA59546B83, 0x0000BFBC //A13 = -7.53003820427900359431e-21Ldata8 0xD5970B291ED73560, 0x00003FB6 //A14 = +1.76677518655145552907e-22LLOCAL_OBJECT_END(erfc_p_table).section .textGLOBAL_LIBM_ENTRY(erfc){ .mfi alloc r32 = ar.pfs, 0, 33, 4, 0 fma.s1 FR_Tmp = f1, f1, f8 // |x|+1, if x >= 0 nop.i 0}{ .mfi addl EXP_AD_TB1 = @ltoff(exp_table_1), gp fms.s1 FR_Tmp1 = f1, f1, f8 // |x|+1, if x < 0 mov exp_GR_rshf_2to56 = 0x4768 // begin 1.1 2^(63+56)};;{ .mfi ld8 EXP_AD_TB1 = [EXP_AD_TB1] fcmp.ge.s1 p6,p7 = f8, f0 // p6: x >= 0 ,p7: x<0 mov exp_GR_rshf_2to56 = 0x4768 // begin 1.1 2^(63+56)}{ .mlx mov exp_TB1_size = 0x100 movl exp_GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc //signif. of 1/ln2};;{ .mfi nop.m 0 fclass.m p8,p0 = f8,0x07 // p8: x = 0 shl exp_GR_rshf_2to56 = exp_GR_rshf_2to56, 48 //end 1.1 2^(63+56)}{ .mfi mov exp_GR_exp_2tom56 = 0xffff-56 fnma.s1 EXP_NORM_f8 = f8, f8, f0 // high bits for -x^2 nop.i 0 };;.pred.rel "mutex",p6,p7{ .mfi setf.sig EXP_INV_LN2_2TO63 = exp_GR_sig_inv_ln2 // form 1/ln2 * 2^63(p6) fma.s1 FR_AbsArg = f1, f0, f8 // |x|, if x >= 0 mov GR_POS_ARG_ASYMP = 0x403C }{ .mfi mov GR_NEG_ARG_ASYMP = 0x4018(p7) fms.s1 FR_AbsArg = f1, f0, f8 // |x|, if x < 0 mov exp_GR_rshf = 0x43e8 // begin 1.1 2^63 for right shift};;{ .mfi setf.exp EXP_2TOM56 = exp_GR_exp_2tom56 // 2^-56 for scaling Nfloat fclass.m p10,p0 = f8, 0x21 // p10: x = +inf mov exp_GR_17ones = 0x1FFFF }{ .mlx setf.d EXP_RSHF_2TO56 = exp_GR_rshf_2to56 // const 1.10*2^(63+56) movl GR_ERFC_XB_TB = 0x1A0 };;.pred.rel "mutex",p6,p7{ .mfi ldfd FR_UnfBound = [EXP_AD_TB1], 16(p6) fma.s1 FR_Tmp = FR_Tmp, FR_Tmp, f0 // (|x|+1)^2,x >=0 shl exp_GR_rshf = exp_GR_rshf, 48 //end 1.1 2^63 for right shift}{ .mfi nop.m 0 (p7) fma.s1 FR_Tmp = FR_Tmp1, FR_Tmp1, f0 // (|x|+1)^2, x<0 mov GR_0x1 = 0x1 };;{ .mfi mov GR_BIAS = 0x0FFFF fclass.m p9,p0 = f8, 0x22 // p9: x = -inf shl GR_EpsNorm = GR_0x1,53}{ .mfb mov exp_TB2_size = 0x80(p8) fma.d.s0 f8 = f1, f1, f0 //p8: y = 1.0, x = 0(p8) br.ret.spnt b0 //p8: quick exit for x = 0};;{ .mfi nop.m 0 fclass.m p11,p0 = f8, 0xc3 // p11: x = nan nop.i 0 }{ .mfi setf.d EXP_RSHF = exp_GR_rshf //Form right shift const 1.100 * 2^63 fma.s1 FR_NormX = f8,f1,f0 nop.i 0};;{ .mfi setf.d FR_EpsNorm = GR_EpsNorm nop.f 0 (p6) shl GR_ARG_ASYMP = GR_POS_ARG_ASYMP, 48//p6:ARG_ASYMP= 28.0,x>=0}{ .mfi nop.m 0 fma.s1 FR_2 = f1, f1, f1 nop.i 0};;{ .mfi ldfe exp_ln2_by_128_hi = [EXP_AD_TB1],16 fma.s1 f8_sq_lo = f8, f8, EXP_NORM_f8 // low bits for -x^2(p7) shl GR_ARG_ASYMP = GR_NEG_ARG_ASYMP, 48//p6:ARG_ASYMP= 6.0,x < 0};;{ .mfi sub GR_mBIAS = r0, GR_BIAS fma.s1 FR_Tmp = FR_Tmp, FR_Tmp, f0 // (|x|+1)^4 nop.i 0}{ .mfi ldfe exp_ln2_by_128_lo = [EXP_AD_TB1], 16 nop.f 0 nop.i 0 };;{ .mfi getf.d GR_AbsArg = FR_AbsArg nop.f 0 add GR_ERFC_XB_TB = GR_ERFC_XB_TB, EXP_AD_TB1//pointer to XB_TBL}{ .mfb shladd GR_ShftPi_bias = GR_BIAS, 4, GR_mBIAS // BIAS * 2^4 - BIAS(p9) fma.d.s0 f8 = f1, f1, f1 // p9: y = 2 for x = -inf(p9) br.ret.spnt b0 // p9: quick exit for x = -inf};;{ .mfi add GR_ERFC_P_TB = 0x140, GR_ERFC_XB_TB // pointer to P_TBL fma.s1 EXP_W_2TO56_RSH = EXP_NORM_f8,EXP_INV_LN2_2TO63,EXP_RSHF_2TO56 shladd GR_ShftPi_bias = GR_ShftPi_bias, 4, r0 // BIAS * 240}{ .mfb nop.m 0(p10) fma.d.s0 f8 = f0, f1, f0 // p10: y = 0 for x = +inf(p10) br.ret.spnt b0 // p10: quick exit for x = +inf};;.pred.rel "mutex",p6,p7{ .mfi(p6) cmp.gt.unc p15,p0 = GR_AbsArg,GR_ARG_ASYMP //p15: x > 28.0,p6: x >= 0 nop.f 0 (p7) cmp.gt.unc p14,p0 = GR_AbsArg, GR_ARG_ASYMP //p14: x < - 6.0,p7: x < 0}{ .mfb add EXP_AD_TB2 = exp_TB1_size, EXP_AD_TB1(p11) fma.d.s0 f8 = f8, f1, f0 //p11: y = x for x = nan(p11) br.ret.spnt b0 //p11: quick exit for x = nan};; { .mfi add EXP_AD_P = exp_TB2_size, EXP_AD_TB2 fms.s1 f8_sq_lo = f1, f1, f8_sq_lo // 1 - low bits for -x^2 nop.i 0};;{ .mfi ldfpd exp_P4, exp_P3 = [EXP_AD_P], 16 fmerge.s FR_X = f8,f8 shladd GR_ShftXBi_bias = GR_mBIAS, 4, r0} { .mfb nop.m 0(p14) fnma.d.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,FR_2 //p14:y ~=~ 2,x< -6.0(p14) br.ret.spnt b0 //p14: quick exit for x < -6.0};;//p15: y ~=~ 0.0(result with underflow error), x > ARG_ASYMP = 28, { .mfi ldfpd exp_P2, exp_P1 = [EXP_AD_P] fma.d.s0 FR_Tmpf = f1, f1, FR_EpsNorm // flag i nop.i 0}{ .mfb(p15) mov GR_Parameter_TAG = 208(p15) fma.d.s0 FR_RESULT = FR_EpsNorm,FR_EpsNorm,f0 (p15) br.cond.spnt __libm_error_region};;//p8: x < 27.0, result without ungerflow error{ .mfi getf.exp GR_IndxPlusBias = FR_Tmp // exp + bias for (|x|+1)^4 fcmp.lt.s1 p8,p0 = FR_NormX,FR_UnfBound nop.i 0}{ .mfi nop.m 0 fms.s1 EXP_Nfloat = EXP_W_2TO56_RSH, EXP_2TOM56, EXP_RSHF nop.i 0};;{ .mmi shladd GR_ShftXBi = GR_IndxPlusBias, 4, GR_ShftXBi_bias shladd GR_ShftPi = GR_IndxPlusBias, 4, GR_ShftPi_bias shl GR_ShftPi_8 = GR_IndxPlusBias, 8 };;{ .mmi getf.sig exp_GR_N = EXP_W_2TO56_RSH add GR_ERFC_XB_TB = GR_ERFC_XB_TB, GR_ShftXBi// pointer to XB[i] sub GR_ShftPi = GR_ShftPi_8, GR_ShftPi // (256-16)*i};;{ .mmi ldfe FR_Xb = [GR_ERFC_XB_TB] add GR_ShftA12 = 0xC0, GR_ShftPi // pointer shift for A12 add GR_ShftA13 = 0xD0, GR_ShftPi // pointer shift for A13 };;{ .mfi add GR_P_A13 = GR_ERFC_P_TB, GR_ShftA13 // pointer to A13 nop.f 0 and exp_GR_index_1 = 0x0f, exp_GR_N }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -