📄 e_log2.s
字号:
data8 0x8f9b91da5736d415, 0x0000bffbdata8 0x8a1b06b09b7fd1d1, 0x0000bffbdata8 0x8497daca0a2e077a, 0x0000bffbdata8 0xfe241745a453f10c, 0x0000bffadata8 0xf3132d6708d723c5, 0x0000bffadata8 0xe7fcf2e21a0e7d77, 0x0000bffadata8 0xd75198b04afb8da9, 0x0000bffadata8 0xcc2dfe1a4a8ca305, 0x0000bffadata8 0xc10500d63aa65882, 0x0000bffadata8 0xb5d69bac77ec398a, 0x0000bffadata8 0xaaa2c95dc66abcde, 0x0000bffadata8 0x9f6984a342d13101, 0x0000bffadata8 0x942ac82e5387ac51, 0x0000bffadata8 0x88e68ea899a0976c, 0x0000bffadata8 0xefebc4409ccf872e, 0x0000bff9data8 0xd947b0c6642ef69e, 0x0000bff9data8 0xc2987d51e043d407, 0x0000bff9data8 0xabde1eeee6bfd257, 0x0000bff9data8 0x95188a9917cf2e01, 0x0000bff9data8 0xfc8f6a777c1b7f1e, 0x0000bff8data8 0xced727635c59725c, 0x0000bff8data8 0xa108358a4c904615, 0x0000bff8data8 0xe644fcbeb3ac9c90, 0x0000bff7data8 0x8a4bd667bf08e7de, 0x0000bff7data8 0x0000000000000000 // T[255] Lowdata8 0x0000000000000000 // T[255] HighLOCAL_OBJECT_END(T_table).section .textGLOBAL_LIBM_ENTRY(log2){ .mfi alloc r32=ar.pfs,1,4,4,0 // y=frcpa(x) frcpa.s1 f6,p0=f1,f8 // will form significand of 1.5 (to test whether the index is 128 or above) mov r24=0xc}{.mfi nop.m 0 // normalize x fma.s1 f7=f8,f1,f0 // r2 = pointer to C_1...C_6 followed by T_table addl r2 = @ltoff(poly_coeffs), gp;;}{.mfi // get significand getf.sig r25=f8 // f8 denormal ? fclass.m p8,p10=f8,0x9 // will form significand of 1.5 (to test whether the index is 128 or above) shl r24=r24,60}{.mfi mov r26=0x804 nop.f 0 // r23=bias-1 mov r23=0xfffe;;}{.mmf getf.exp r29=f8 // load start address for C_1...C_6 followed by T_table ld8 r2=[r2] // will continue only for positive normal/denormal numbers fclass.nm.unc p12,p7 = f8, 0x19 ;;}.pred.rel "mutex",p8,p10{.mfi // denormal input, repeat get significand (after normalization) (p8) getf.sig r25=f7 // x=1 ? fcmp.eq.s0 p6,p0=f8,f1 // get T_index (p10) shr.u r28=r25,63-8}{.mfi // f32=0.5 setf.exp f32=r23 nop.f 0 // r27=bias mov r27=0xffff;;}{.mmi // denormal input, repeat get exponent (after normalization) (p8) getf.exp r29=f7 mov r23=0xff // r26=0x80400...0 (threshold for using polynomial approximation) shl r26=r26,64-12;;}{.mfb add r3=48,r2 // r=1-x*y fms.s1 f6=f6,f8,f1 (p12) br.cond.spnt SPECIAL_LOG2}{.mfi // load C_4, C_5 ldfpd f10,f11=[r2],16 nop.f 0 cmp.geu p12,p0=r25,r24;;}{.mmi // load C_6, C_7 ldfpd f12,f13=[r2],16 // r27=bias-1 (if index >=128, will add exponent+1) (p12) mov r27=0xfffe (p8) shr.u r28=r25,63-8;;}{.mfi // load C_1 ldfe f14=[r2],32 fmerge.se f7=f1,f7 // if first 9 bits after leading 1 are all zero, then p8=1 cmp.ltu p8,p12=r25,r26}{.mfi // load C_3 ldfe f15=[r3] nop.f 0 // get T_index and r28=r28,r23;;}{.mfi // r29=exponent-bias sub r29=r29,r27 // x=1, return 0 (p6) fma.d.s0 f8=f0,f0,f0 // get T address shladd r2=r28,4,r2}{.mfb // first 8 bits after leading 1 are all ones ? cmp.eq p10,p0=r23,r28 // if first 8 bits after leading bit are 0, use polynomial approx. only (p8) fms.s1 f6=f7,f1,f1 // x=1, return (p6) br.ret.spnt b0;;}{.mfi // r26=1 mov r26=1 // if first 8 bits after leading 1 are all ones, use polynomial approx. only (p10) fms.s1 f6=f7,f32,f1 nop.i 0;;}.pred.rel "mutex",p8,p12{.mmf // load T (unless first 9 bits after leading 1 are 0) (p12) ldfe f33=[r2] // f8=expon - bias setf.sig f8=r29 // set T=0 (if first 9 bits after leading 1 are 0) (p8) fma.s1 f33=f0,f0,f0;;}{.mfi nop.m 0 // P12=1-0.5*r fnma.s1 f32=f32,f6,f1 // r26=2^{63} shl r26=r26,63}{.mfi nop.m 0 // r2=r*r fma.s1 f7=f6,f6,f0 nop.i 0;;}{.mfi // significand(x)=1 ? cmp.eq p0,p6=r26,r25 // P67=C_6+C_7*r fma.s1 f13=f13,f6,f12 nop.i 0}{.mfi nop.m 0 // P45=C_4+C_5*r fma.s1 f10=f11,f6,f10 nop.i 0;;}{.mfi nop.m 0 // C_1*r (p6) fma.s1 f14=f14,f6,f0 nop.i 0;;}{.mfi nop.m 0 // normalize additive term (l=exponent of x) fcvt.xf f8=f8 nop.i 0}{.mfi nop.m 0 // P13=1-0.5*r+C_3*r^2 (p6) fma.s1 f15=f15,f7,f32 nop.i 0;;}{.mfi nop.m 0 // P47=P45+r2*P67 (p6) fma.s1 f13=f13,f7,f10 // if significand(x)=1, return exponent (l) nop.i 0}{.mfi nop.m 0 // r3=r^3 (p6) fma.s1 f7=f7,f6,f0 nop.i 0;;}{.mfi nop.m 0 // add T+l (p6) fma.s1 f8=f8,f1,f33 nop.i 0}{.mfi nop.m 0 // P17=P13+r3*P47 (p6) fma.s1 f13=f13,f7,f15 nop.i 0;;}{.mfb nop.m 0 // result=T+l+(C_1*r)*P16 (p6) fma.d.s0 f8=f13,f14,f8 // return br.ret.sptk b0;;}SPECIAL_LOG2:{.mfi nop.m 0 // x=+Infinity ? fclass.m p7,p0=f8,0x21 nop.i 0;;}{.mfi nop.m 0 // x=+/-Zero ? fclass.m p8,p0=f8,0x7 nop.i 0;;}{.mfi nop.m 0 // x=-Infinity, -normal, -denormal ? fclass.m p6,p0=f8,0x3a nop.i 0;;}{.mfb nop.m 0 // log2(+Infinity)=+Infinity nop.f 0 (p7) br.ret.spnt b0;;}{.mfi (p8) mov GR_Parameter_TAG = 170 // log2(+/-0)=-infinity, raises Divide by Zero // set f8=-0 (p8) fmerge.ns f8=f0,f8 nop.i 0;;}{.mfb nop.m 0 (p8) frcpa.s0 f8,p0=f1,f8 (p8) br.cond.sptk __libm_error_region;;}{.mfb (p6) mov GR_Parameter_TAG = 171 // x<0: return NaN, raise Invalid (p6) frcpa.s0 f8,p0=f0,f0 (p6) br.cond.sptk __libm_error_region;;} {.mfb nop.m 0 // Remaining cases: NaNs fma.d.s0 f8=f8,f1,f0 br.ret.sptk b0;;}GLOBAL_LIBM_END(log2)LOCAL_LIBM_ENTRY(__libm_error_region).prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs }{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0 };;.body{ .mib stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 }{ .mib stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;{ .mmi ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};; LOCAL_LIBM_END(__libm_error_region).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -