📄 e_powf.s
字号:
(p12) mov pow_GR_xneg_yodd = 1 fnma.s1 POW_f12 = POW_Nfloat, POW_log2_by_128_lo, f1 nop.i 999}{ .mfb nop.m 999 fnma.s1 POW_s = POW_Nfloat, POW_log2_by_128_hi, POW_Z(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x};;{ .mmi and pow_GR_index1 = 0x0f, pow_GR_int_N and pow_GR_index2 = 0x70, pow_GR_int_N shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128};;{ .mfi shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1 fma.s1 POW_q = POW_Z3, POW_Q1, POW_Q0_half add pow_int_GR_M = pow_GR_16ones, pow_int_GR_M}{ .mfi add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2 fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0 nop.i 999};;{ .mmi ldfe POW_T1 = [pow_AD_T1] ldfe POW_T2 = [pow_AD_T2] nop.i 999};;// f123 = f12*(e3+1) = f12*e3+f12{ .mfi setf.exp POW_2M = pow_int_GR_M fma.s1 POW_f123 = POW_e3,POW_f12,POW_f12 nop.i 999}{ .mfi nop.m 999 fma.s1 POW_ssq = POW_s, POW_s, f0 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_v2 = POW_s, POW_Q2, POW_Q1 and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones};;{ .mfi cmp.ne p12,p13 = pow_GR_xneg_yodd, r0 fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3 sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones};;// p8 TRUE ==> |Y(G + r)| >= 7// single// -2^7 -2^6 2^6 2^7// -----+-----+----+ ... +-----+-----+-----// p8 | p9 | p8// | | p10 | |// Form signexp of constants to indicate overflow{ .mfi mov pow_GR_big_pos = 0x1007f nop.f 999 cmp.le p8,p9 = 7, pow_GR_true_exp_Y_Gpr}{ .mfi mov pow_GR_big_neg = 0x3007f nop.f 999 andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones};;// Form big positive and negative constants to test for possible overflow// Scale both terms of the polynomial by POW_f123{ .mfi setf.exp POW_big_pos = pow_GR_big_pos fma.s1 POW_ssq = POW_ssq, POW_f123, f0(p9) cmp.le.unc p0,p10 = 6, pow_GR_true_exp_Y_Gpr}{ .mfb setf.exp POW_big_neg = pow_GR_big_neg fma.s1 POW_1ps = POW_s, POW_f123, POW_f123(p8) br.cond.spnt POW_OVER_UNDER_X_NOT_INF};;{ .mfi nop.m 999(p12) fnma.s1 POW_T1T2 = POW_T1, POW_T2, f0 nop.i 999}{ .mfi nop.m 999(p13) fma.s1 POW_T1T2 = POW_T1, POW_T2, f0 nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_v210 = POW_s, POW_v2, POW_Q0_half nop.i 999}{ .mfi nop.m 999 fma.s1 POW_2Mqp1 = POW_2M, POW_q, POW_2M nop.i 999};;{ .mfi nop.m 999 fma.s1 POW_es = POW_ssq, POW_v210, POW_1ps nop.i 999}{ .mfi nop.m 999 fma.s1 POW_A = POW_T1T2, POW_2Mqp1, f0 nop.i 999};;// Dummy op to set inexact{ .mfi nop.m 999 fma.s0 POW_tmp = POW_2M, POW_q, POW_2M nop.i 999};;{ .mfb nop.m 999 fma.s.s0 f8 = POW_A, POW_es, f0(p10) br.ret.sptk b0 // Exit main branch if no over/underflow};;// POSSIBLE_OVER_UNDER// p6 = TRUE ==> Y_Gpr negative// Result is already computed. We just need to know if over/underflow occurred.{ .mfb cmp.eq p0,p6 = pow_GR_sign_Y_Gpr, r0 nop.f 999(p6) br.cond.spnt POW_POSSIBLE_UNDER};;// POSSIBLE_OVER// We got an answer.// overflow is a possibility, not a certainty// We define an overflow when the answer with// WRE set// user-defined rounding mode// double// Largest double is 7FE (biased double)// 7FE - 3FF + FFFF = 103FE// Create + largest_double_plus_ulp// Create - largest_double_plus_ulp// Calculate answer with WRE set.// single// Largest single is FE (biased double)// FE - 7F + FFFF = 1007E// Create + largest_single_plus_ulp// Create - largest_single_plus_ulp// Calculate answer with WRE set.// Cases when answer is ldn+1 are as follows:// ldn ldn+1// --+----------|----------+------------// |// +inf +inf -inf// RN RN// RZ// Put in s2 (td set, wre set){ .mfi nop.m 999 fsetc.s2 0x7F,0x42 nop.i 999};;{ .mfi nop.m 999 fma.s.s2 POW_wre_urm_f8 = POW_A, POW_es, f0 nop.i 999};;// Return s2 to default{ .mfi nop.m 999 fsetc.s2 0x7F,0x40 nop.i 999};;// p7 = TRUE ==> yes, we have an overflow{ .mfi nop.m 999 fcmp.ge.s1 p7, p8 = POW_wre_urm_f8, POW_big_pos nop.i 999};;{ .mfi nop.m 999(p8) fcmp.le.s1 p7, p0 = POW_wre_urm_f8, POW_big_neg nop.i 999};;{ .mbb(p7) mov pow_GR_tag = 30(p7) br.cond.spnt __libm_error_region // Branch if overflow br.ret.sptk b0 // Exit if did not overflow};;POW_POSSIBLE_UNDER:// We got an answer. input was < -2^9 but > -2^10 (double)// We got an answer. input was < -2^6 but > -2^7 (float)// underflow is a possibility, not a certainty// We define an underflow when the answer with// ftz set// is zero (tiny numbers become zero)// Notice (from below) that if we have an unlimited exponent range,// then there is an extra machine number E between the largest denormal and// the smallest normal.// So if with unbounded exponent we round to E or below, then we are// tiny and underflow has occurred.// But notice that you can be in a situation where we are tiny, namely// rounded to E, but when the exponent is bounded we round to smallest// normal. So the answer can be the smallest normal with underflow.// E// -----+--------------------+--------------------+-----// | | |// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe// 0.1...11 2^-3ffe (biased, 1)// largest dn smallest normal// Form small constant (2^-170) to correct underflow result near region of // smallest denormal in round-nearest.// Put in s2 (td set, ftz set).pred.rel "mutex",p12,p13{ .mfi mov pow_GR_Fpsr = ar40 // Read the fpsr--need to check rc.s0 fsetc.s2 0x7F,0x41 mov pow_GR_rcs0_mask = 0x0c00 // Set mask for rc.s0}{ .mfi(p12) mov pow_GR_tmp = 0x2ffff - 170 nop.f 999(p13) mov pow_GR_tmp = 0x0ffff - 170 };;{ .mfi setf.exp POW_eps = pow_GR_tmp // Form 2^-170 fma.s.s2 POW_ftz_urm_f8 = POW_A, POW_es, f0 nop.i 999};;// Return s2 to default{ .mfi nop.m 999 fsetc.s2 0x7F,0x40 nop.i 999};;// p7 = TRUE ==> yes, we have an underflow{ .mfi nop.m 999 fcmp.eq.s1 p7, p0 = POW_ftz_urm_f8, f0 nop.i 999};;{ .mmi(p7) and pow_GR_rcs0 = pow_GR_rcs0_mask, pow_GR_Fpsr // Isolate rc.s0;;(p7) cmp.eq.unc p6,p0 = pow_GR_rcs0, r0 // Test for round to nearest nop.i 999};;// Tweak result slightly if underflow to get correct rounding near smallest// denormal if round-nearest{ .mfi nop.m 999(p6) fms.s.s0 f8 = POW_A, POW_es, POW_eps nop.i 999}{ .mbb(p7) mov pow_GR_tag = 31(p7) br.cond.spnt __libm_error_region // Branch if underflow br.ret.sptk b0 // Exit if did not underflow};;POW_X_DENORM:// Here if x unorm. Use the NORM_X for getf instructions, and then back// to normal path{ .mfi getf.exp pow_GR_signexp_X = POW_NORM_X nop.f 999 nop.i 999};;{ .mib getf.sig pow_GR_sig_X = POW_NORM_X nop.i 999 br.cond.sptk POW_COMMON};;POW_X_0:// Here if x=0 and y not nan//// We have the following cases:// p6 x=0 and y>0 and is an integer (may be even or odd)// p7 x=0 and y>0 and is NOT an integer, return +0// p8 x=0 and y>0 and so big as to always be an even integer, return +0// p9 x=0 and y>0 and may not be integer// p10 x=0 and y>0 and is an odd integer, return x// p11 x=0 and y>0 and is an even integer, return +0// p12 used in dummy fcmp to set denormal flag if y=unorm// p13 x=0 and y>0// p14 x=0 and y=0, branch to code for calling error handling// p15 x=0 and y<0, branch to code for calling error handling//{ .mfi getf.sig pow_GR_sig_int_Y = POW_int_Y // Get signif of int_Y fcmp.lt.s1 p15,p13 = f9, f0 // Test for y<0 and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones}{ .mfb cmp.ne p14,p0 = pow_GR_y_zero,r0 // Test for y=0 fcvt.xf POW_float_int_Y = POW_int_Y(p14) br.cond.spnt POW_X_0_Y_0 // Branch if x=0 and y=0};;// If x=0 and y>0, test y and flag denormal{ .mfb(p13) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033 // Test y +big = even int(p13) fcmp.eq.s0 p12,p0 = f9,f0 // If x=0, y>0 dummy op to flag denormal(p15) br.cond.spnt POW_X_0_Y_NEG // Branch if x=0 and y<0};;// Here if x=0 and y>0{ .mfi nop.m 999(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y // Test y=int nop.i 999}{ .mfi nop.m 999(p8) fma.s.s0 f8 = f0,f0,f0 // If x=0, y>0 and large even int, return +0 nop.i 999};;{ .mfi nop.m 999(p7) fma.s.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y>0 and not integer(p6) tbit.nz.unc p10,p11 = pow_GR_sig_int_Y,0 // If y>0 int, test y even/odd};;// Note if x=0, y>0 and odd integer, just return x{ .mfb nop.m 999(p11) fma.s.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y even integer br.ret.sptk b0 // Exit if x=0 and y>0};;POW_X_0_Y_0:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -