📄 e_powf.s
字号:
nop.i 999}{ .mfi nop.m 999 fma.s1 POW_f12 = POW_f1, POW_f2,f0 nop.i 999};;{ .mfi nop.f 999(p9) cmp.le.unc p0,p10 = 6, pow_GR_true_exp_Y_Gpr};;{ .mfb nop.m 999 fma.s1 POW_e123 = POW_e12, f1, POW_e3(p8) br.cond.spnt L(POW_OVER_UNDER_X_NOT_INF)};;{ .mmf fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3};;{ .mfi nop.m 999 fma.s1 POW_ssq = POW_s, POW_s, f0 nop.i 999}{ .mfi nop.m 999 fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half nop.i 999}{ .mfi nop.m 999 fma.s1 POW_1ps = f1,f1,POW_s nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_f3 = POW_e123,f1,f1 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_T1T2 = POW_T1, POW_T2, f0 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps nop.i 999}{ .mfi nop.m 999 fma.s1 POW_s4 = POW_ssq, POW_ssq, f0 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_f123 = POW_f12, POW_f3, f0 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_A = POW_2M, POW_T1T2, f0 nop.i 999};;{ .mfi nop.m 999(p12) fmerge.s POW_f123 = f8,POW_f123 // if x neg, y odd int nop.i 999}{ .mfi nop.m 999// fma.s1 POW_es = POW_ssq, POW_v3, POW_v2 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_A = POW_A, POW_f123, f0 nop.i 999}{ .mfi nop.m 999// fma.s1 POW_es = POW_es, POW_ssq, POW_1ps nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_A = POW_A, POW_es,f0 nop.i 999};;{ .mfb nop.m 999(p10) fma.s f8 = POW_A, POW_q, POW_A(p10) br.ret.sptk b0};;// POSSIBLE_OVER_UNDER// p6 = TRUE ==> Y negative{ .mfi nop.m 999 fmerge.s POW_abs_A = f0, POW_A cmp.eq.unc p0,p6 = pow_GR_sign_Y, r0};;{ .mib nop.m 999 nop.i 999(p6) br.cond.spnt L(POW_POSSIBLE_UNDER) };;// POSSIBLE_OVER// We got an answer. // overflow is a possibility, not a certainty// We define an overflow when the answer with// WRE set// user-defined rounding mode// double// Largest double is 7FE (biased double)// 7FE - 3FF + FFFF = 103FE// Create + largest_double_plus_ulp// Create - largest_double_plus_ulp// Calculate answer with WRE set.// single// Largest single is FE (biased double)// FE - 7F + FFFF = 1007E// Create + largest_single_plus_ulp// Create - largest_single_plus_ulp// Calculate answer with WRE set.// Cases when answer is ldn+1 are as follows:// ldn ldn+1// --+----------|----------+------------// |// +inf +inf -inf// RN RN// RZ// Put in s2 (td set, wre set){ .mfi mov pow_GR_gt_ln = 0x1007f fsetc.s2 0x7F,0x42 nop.i 999 };;{ .mfi setf.exp POW_gt_pln = pow_GR_gt_ln fma.s.s2 POW_wre_urm_f8 = POW_abs_A, POW_q, POW_abs_A nop.i 999 ;;}// Return s2 to default{ .mfi nop.m 999 fsetc.s2 0x7F,0x40 nop.i 999};;// p7 = TRUE ==> yes, we have an overflow{ .mfi nop.m 999 fcmp.ge.unc.s1 p7, p0 = POW_wre_urm_f8, POW_gt_pln nop.i 999};;{ .mfb(p7) mov pow_GR_tag = 30 fma.s f8 = POW_A, POW_q, POW_A(p7) br.cond.spnt __libm_error_region }{ .mfb nop.m 999 nop.f 999(p0) br.ret.sptk b0 };;L(POW_POSSIBLE_UNDER):// We got an answer. input was < -2^9 but > -2^10 (double)// We got an answer. input was < -2^6 but > -2^7 (float)// underflow is a possibility, not a certainty// We define an underflow when the answer with// ftz set// is zero (tiny numbers become zero)// Notice (from below) that if we have an unlimited exponent range,// then there is an extra machine number E between the largest denormal and// the smallest normal.// So if with unbounded exponent we round to E or below, then we are// tiny and underflow has occurred.// But notice that you can be in a situation where we are tiny, namely// rounded to E, but when the exponent is bounded we round to smallest// normal. So the answer can be the smallest normal with underflow.// E// -----+--------------------+--------------------+-----// | | |// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe// 0.1...11 2^-3ffe (biased, 1)// largest dn smallest normal// Put in s2 (td set, ftz set){ .mfi nop.m 999 fsetc.s2 0x7F,0x41 nop.i 999 };;{ .mfi nop.m 999 fma.s.s2 POW_ftz_urm_f8 = POW_A, POW_q, POW_A nop.i 999};;// Return s2 to default{ .mfi nop.m 999 fsetc.s2 0x7F,0x40 nop.i 999 };;// p7 = TRUE ==> yes, we have an underflow{ .mfi nop.m 999 fcmp.eq.unc.s1 p7, p0 = POW_ftz_urm_f8, f0 nop.i 999 };;{ .mfb(p7) mov pow_GR_tag = 31 fma.s f8 = POW_A, POW_q, POW_A(p7) br.cond.spnt __libm_error_region };;{ .mfb nop.m 999 nop.f 999 br.ret.sptk b0 };;L(POW_X_DENORM):// Here if x unorm. Use the NORM_X for getf instructions, and the back// to normal path{ .mfi getf.exp pow_GR_signexp_X = POW_NORM_X nop.f 999 nop.i 999};;{ .mfi getf.sig pow_GR_sig_X = POW_NORM_X nop.f 999 nop.i 999};;{ .mfi and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones nop.f 999};;{ .mib sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones shl pow_GR_offset = pow_GR_sig_X, 1 br.cond.sptk L(POW_COMMON)};;L(POW_X_0_Y_0):// When X is +-0 and Y is +-0, IEEE returns 1.0 // We call error support with this value { .mfb mov pow_GR_tag = 32 fma.s f8 = f1,f1,f0 br.cond.sptk __
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -