📄 s_cbrtl.s
字号:
data4 0x1d927875, 0x9f074faa, 0x1e9dc2c3, 0x1f13c0d2 data4 0x1e3c9685, 0x9e6b6f75, 0x9db9cb31, 0x1ea5f3aa data4 0x9d992c61, 0x1f1015e4, 0x1f194f70, 0x9e19d2b3 data4 0x9d89116c, 0x1f23cd35, 0x1e33d3a2, 0x1ee331b8 data4 0x1d5ba7ec, 0x9f273788, 0x9e6907f4, 0x9ed5f912 data4 0x9edd458d, 0x1e2ca7b2, 0x1ef81fe4, 0x1dc7ade6 data4 0x1e876e51, 0x9f04ec89, 0x1f1da63a, 0x1ec02bd0 data4 0x9e71326f, 0x1e7847b4, 0x1f0de618, 0x9e036cb6 data4 0x1eec61e2, 0x1ef1758b, 0x9ee880a3, 0x1ed269d7 data4 0x1e27edd3, 0x9e8a81a1, 0x1eacb84d, 0x9e1aad37 data4 0x1f1aa8f7, 0x1e9bbd90, 0x1ea1b61f, 0x9ed41c2f data4 0x1dbb5dd6, 0x1f0ec733, 0x9df06b1b, 0x1e06fef1 data4 0x9edede3a, 0x1edeb5e2, 0x1f0e63ee, 0x9db316bb data4 0x9efc1ad3, 0x1f01fbb5, 0x9cc0d078, 0x1ea28b36 data4 0x9e9dd205, 0x9e791534, 0x1da1c8d5, 0x9e8195cc data4 0x1f0681a4, 0x1eeaf1e2, 0x9ef83b37, 0x9f22a92b data4 0x1eabc4ce, 0x1f10eefb, 0x1e06d9aa, 0x1e7cacd5 data4 0x1f1ea087, 0x1eb21983, 0x9f100c78, 0x1e840abe data4 0x9efab66c, 0x1f183fa8, 0x9e84ee68, 0x9eea083d data4 0x9ee23a74, 0x1f1351d7, 0x9ec5d42a, 0x9f071f57 data4 0x9ef578d9, 0x9f1aa7e7, 0x1eb02044, 0x1f151a2e data4 0x9c0dc8b2, 0x9ef4087a, 0x1ec12b93, 0x1c1a946b data4 0x1e89946f, 0x9dafe8c3, 0x1d295288, 0x9e8497ab data4 0x1ec000c6, 0x1e102f29, 0x1e542256, 0x1e67d44d data4 0x1ef688d8, 0x1f0e0f29, 0x1e67861f, 0x1e869748 data4 0x1ee6aa6e, 0x9e4d228b, 0x9e50be5b, 0x1e9fe225 data4 0x9ea34102, 0x9e628a3b, 0x9ed9fd83, 0x1ecd7109 data4 0x1f1864ff, 0x1ea19b76, 0x1db0d1c9, 0x9dff519b data4 0x1e8fea71, 0x9ee82e9a, 0x9f08919b, 0x9ef5c8ae data4 0x9ee446a4, 0x1ea59444, 0x1eb74230, 0x1ea13fbf data4 0x9ea6a3ea, 0x1e5f2797, 0x9e0adb07, 0x9d3adadd data4 0x1ebf2ee2, 0x1da19bfa, 0x1e8dea6d, 0x1ec4fea9 data4 0x1e669f22, 0x1dc5f919, 0x9ed25caa, 0x1ee475b1 data4 0x1ed0603e, 0x9eacb35c, 0x1dc00b27, 0x1e2f9991 data4 0x1e7b0406, 0x1eaa3387, 0x9d865bde, 0x1eb78a48 data4 0x1c40ae2e, 0x1ee9838b, 0x9f0f0d7f, 0x1e3e5d26 data4 0x1e99e7a6, 0x9e681ccf, 0x9e93ed65, 0x9eeb6a66 data4 0x1e29e9af, 0x9e96f923, 0x9e74f11d, 0x9f1474da data4 0x1eec2ea7, 0x1ebf7aa3, 0x9c25dcca, 0x9f0553c2 data4 0x9e599efd, 0x1d2ab490, 0x1e95d7cd, 0x9ee4b20e data4 0x9d988ce5, 0x9ef9787e, 0x9dbbba5b, 0x9f12c304 data4 0x1e3b9d70, 0x1e7bcae8, 0x9d98bb6e, 0x9e8e6b01 data4 0x9f07d03b, 0x9d67c822, 0x9f0ef69e, 0x1c7c0fe3 data4 0x9e9bfbb9, 0x9e83b84b, 0x1efbf15e, 0x9ecfa6a6 data4 0x9c91158e, 0x9ecf6770, 0x1ee1e3a8, 0x9dc95ec0 data4 0x1ef603f7, 0x1d5e52ba, 0x1c477d1b, 0x9e955cd8 data4 0x1ed665b0, 0x9e8376c4, 0x9c0ee88e, 0x1e8c989e data4 0x1ea2df29, 0x9d961e5c, 0x1e101813, 0x1e7fffff data4 0x9e5abff4, 0x1dbddd71, 0x1eb69100, 0x1e71f114 data4 0x1e9ca798, 0x1ef62c8d, 0x9db4e55a, 0x1dbe69ce data4 0x9ef1c01f, 0x1f044a2a, 0x9eb9e0d7, 0x9ee59745 data4 0x9e874803, 0x1ea0b418, 0x9e13572a, 0x1ddbb3a2 data4 0x9ec0e391, 0x1e89fba1, 0x1ee8b261, 0x9e5d25f0 data4 0x9ef222cb, 0x9ef135ec, 0x1ea04b9a, 0x9f04291f data4 0x9e969254, 0x9ee32f08, 0x9ed909d3, 0x9e362640 data4 0x9ec20735, 0x1e50131b, 0x9ed4e049, 0x1ee8e817 data4 0x1e1e09c0, 0x9ea643c5, 0x9e5a1ab6, 0x9e389059 data4 0x1e560947, 0x1d02b877, 0x1e4475ab, 0x9ea9aaf6 data4 0x1e95bc5e, 0x1eaf6afd, 0x1d43067d, 0x9d043821 data4 0x9e97baa9, 0x1de5c4f9, 0x9e9a0069, 0x9e1b9944 data4 0x1eb13686, 0x9eb907eb, 0x1e059589, 0x1cbd0f93 data4 0x9eb7e6ae, 0x1e9fa175, 0x1ee5bdf4, 0x1e8052f7 data4 0x9c80d1e3, 0x1bfbe28e, 0x9e672b3b, 0x9ecacf19 data4 0x9e3c04be, 0x1dfe8c5c, 0x1e1ba9cb, 0x1eb40b1e data4 0x1ec7e7f6, 0x9d0d45b3, 0x1ef0113b, 0x9a155fa3 data4 0x1e28ec3b, 0x1e7ca8df, 0x9d2f91b4, 0x1eccd9ed data4 0x9ed943bc, 0x9ccaab19, 0x9e8a5c58, 0x1ec3bca8 data4 0x1ed78dc7, 0x9ed391a8, 0x9e938f6e, 0x9ec4a030 data4 0x9e80346e, 0x1e7a4686, 0x9e284315, 0x9e39584c data4 0x1ebdc9b4, 0x9e9cfce5, 0x9ef55c65, 0x1e2941e7 data4 0x9efbe59f, 0x1d87c41b, 0x1e40befc, 0x1e3d05b5 data4 0x1de9ea67, 0x1ec9a21c, 0x1decb69a, 0x1df6e75a data4 0x9e8030ab, 0x9db20540, 0x9ef1e977, 0x1e3cdc43 data4 0x1e0492b0, 0x9e91d872, 0x1e775346, 0x9e939978 data4 0x1eb2714e, 0x1e49a203, 0x9e10195a, 0x1ef1ffc3 data4 0x9ea8b709, 0x9e832e27, 0x1ed5ac3b, 0x1edb20a6 data4 0x1e4dbd4e, 0x1efbb932, 0x1d8170ec, 0x1e6c4849 data4 0x1f008e17, 0x1e8000c4, 0x1d855ecf, 0x9e37cb85 data4 0x1ecffdf5, 0x1eba6519, 0x9edbe600, 0x1ea3e5e7 data4 0x1ed4fb39, 0x1f00be77, 0x1e6f4484, 0x9e9e7107 data4 0x9e30b29d, 0x9ee6e174, 0x1e3a2656, 0x9dd72f3f data4 0x9ee12138, 0x1ed16fed, 0x9ece8a02, 0x9ca5b249 data4 0x9eafd508, 0x9ef0e9fc, 0x1d1307ac, 0x1eecee20 data4 0x1cf60c6f, 0x9d556216, 0x9eaed175, 0x9ec919f4 data4 0x1ec2c988, 0x1cd82772, 0x9dc99456, 0x1eab0467 data4 0x1e89b36f, 0x1c757944, 0x1eef9abd, 0x9e98664dLOCAL_OBJECT_END(D_table).section .textGLOBAL_LIBM_ENTRY(cbrtl){ .mfi getf.sig GR_ARGSIG = f8 // will continue on main path only for normal/denormal numbers // all other values will be filtered out and will exit early fclass.nm.unc p12, p7 = f8, 0x1b // GR_ADDR = pointer to C_1...C_6 followed by T_table addl GR_ADDR = @ltoff(poly_coeffs), gp}{ .mfi // GR_BIAS23 = 2/3*bias -63 = 0xaaaa-0x3f = 0xaa6b mov GR_BIAS23 = 0xaa6b // normalize a fma.s1 FR_XNORM = f8, f1, f0 // GR_D_ADDR = pointer to D table addl GR_D_ADDR = @ltoff(D_table), gp};;{ .mmf // load start address for C_1...C_6 followed by T_table ld8 GR_C_START = [ GR_ADDR ] // load start address of D table ld8 GR_D_START = [ GR_D_ADDR ] // y = frcpa(a) frcpa.s1 FR_RCP, p6 = f1, f8};;{ .mmi // get normalized significand getf.sig GR_NORMSIG = FR_XNORM // get exponent getf.exp GR_NORMEXPSGN = FR_XNORM (p7) cmp.eq p12, p0 = GR_ARGSIG, r0};;{ .mii // load C_1 ldfe FR_C1 = [ GR_C_START ], 16 mov GR_SGNMASK = 0x20000 nop.i 0};;{ .mfb // load C_2 ldfe FR_C2 = [ GR_C_START ], 16 (p12) fma.s0 f8 = f8, f1, f0 // NaN/Infinities exit early (p12) br.ret.spnt b0};;{ .mfi // load C_3, C_4 ldfpd FR_C3, FR_C4 = [ GR_C_START ], 16 // y = frcpa(a), set flags and result when argument is 0 // only used when p6=0 frcpa.s0 f8, p0 = f1, f8 nop.i 0};;{ .mii // get GR_SIGN = sign and GR_SIGN = GR_NORMEXPSGN, GR_SGNMASK // eliminate leading 1 from GR_NORMSIG = 2nd table index shl GR_INDEX2 = GR_NORMSIG, 1 // eliminate sign from exponent andcm GR_NORMEXP = GR_NORMEXPSGN, GR_SGNMASK};;{ .mfi // load C_5, C_6 (p6) ldfpd FR_C5, FR_C6 = [ GR_C_START ], 16 // r = 1-a*y (p6) fnma.s1 FR_R = FR_RCP, FR_XNORM, f1 // Start computation of floor(exponent/3) by // computing (2^20+2)/3*exponent = exponent*0x55556 // 1: exponent* = 5; // (2^{16}-1)/3 = 0x5555: // will form 0x5555*exponent by using shladd's shladd GR_EXP5 = GR_NORMEXP, 2, GR_NORMEXP};;{ .mib // Next several integer steps compute floor(exponent/3) // GR_TMP1 = (5*expon)*16 shladd GR_TMP1 = GR_EXP5, 4, r0 // GR_EXP3 = 3*exponent shladd GR_EXP3 = GR_NORMEXP, 1, GR_NORMEXP nop.b 0};;{ .mmi // GR_EXP6 = 6*exponent shladd GR_EXP6 = GR_EXP3, 1, r0 // GR_EXP17 = 17*expon add GR_EXP17 = GR_EXP5, GR_TMP1 // GR_IX2 = 2nd table index (8 bits) shr.u GR_IX2 = GR_INDEX2, 56};;{ .mmi // adjust T_table pointer by 2nd index shladd GR_T_INDEX = GR_IX2, 3, GR_C_START // adjust D_table pointer by 2nd index shladd GR_D_INDEX = GR_IX2, 2, GR_D_START // GR_TMP2 = (17*expon)*16^2 shl GR_TMP2 = GR_EXP17, 8};;{ .mmi // GR_TMP3 = expon*(2^16-1)/3 add GR_TMP3 = GR_EXP17, GR_TMP2;; // GR_TMP4 = expon*(2^20+2)/3 = expon*0x55556 shladd GR_TMP4 = GR_TMP3, 4, GR_EXP6 nop.i 0};;{ .mii nop.m 0 // GR_EXP_RES = floor(expon/3) shr.u GR_EXP_RES = GR_TMP4, 20 nop.i 0};;{ .mmi nop.m 0 // r16 = 3*exponent shladd r16 = GR_EXP_RES, 1, GR_EXP_RES // bias exponent add GR_EXPBIAS = GR_BIAS23, GR_EXP_RES};;{ .mmi // get remainder of exponent/3 sub GR_EXP_MOD_3 = GR_NORMEXP, r16;; // add sign to exponent or GR_EXPSIGNRES = GR_EXPBIAS, GR_SIGN // remainder << = 8 shl GR_REMTMP = GR_EXP_MOD_3, 8};;{ .mfi // adjust D_table pointer by 1st index shladd GR_IX_D = GR_REMTMP, 2, GR_D_INDEX // P_1 = C_1+C_2*r (p6) fma.s1 FR_P1 = FR_C2, FR_R, FR_C1 // adjust T_table pointer by 1st index shladd GR_IX_T = GR_REMTMP, 3, GR_T_INDEX}{ .mfi // FR_SGNEXP = sign*2^{exponent/3} (p6) setf.exp FR_SGNEXP = GR_EXPSIGNRES // r^2 = r*r (p6) fma.s1 FR_R2 = FR_R, FR_R, f0 nop.i 0};;{ .mfi // load D (p6) ldfs FR_D = [ GR_IX_D ] // P_2 = C_3+C_4*r (p6) fma.s1 FR_P2 = FR_C4, FR_R, FR_C3 nop.i 0}{ .mfi // load T (p6) ldf8 FR_T = [ GR_IX_T ] // P_3 = C_5+C_6*r (p6) fma.s1 FR_P3 = FR_C6, FR_R, FR_C5 nop.i 0};;{ .mfi nop.m 0 // P_4 = D-r*P_1 (p6) fnma.s1 FR_P4 = FR_R, FR_P1, FR_D nop.i 0}{ .mfi nop.m 0 // r^3 = r*r^2 (p6) fma.s1 FR_R3 = FR_R, FR_R2, f0 nop.i 0};;{ .mfi nop.m 0 // P_5 = P_2+r2*P_3 (p6) fma.s1 FR_P5 = FR_R2, FR_P3, FR_P2 nop.i 0};;{ .mfi nop.m 0 // T = T*(sign*2^{exponent/3}) (p6) fma.s1 FR_TF = FR_T, FR_SGNEXP, f0 nop.i 0}{ .mfi nop.m 0 // P = P_4-r3*P_5 (p6) fnma.s1 FR_P = FR_R3, FR_P5, FR_P4 nop.i 0};;{ .mfb nop.m 0 // result = T+T*p (p6) fma.s0 f8 = FR_TF, FR_P, FR_TF br.ret.sptk b0};;GLOBAL_LIBM_END(cbrtl)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -