📄 e_powl.s
字号:
ldfs FR_H_2 = [GR_Index2],8 // Load H_2(p6) tbit.nz.unc p9, p0 = GR_fraction_y, 63 // Test x<0 and y odd integer add GR_Table_Ptr = 0xbcc, GR_table_base // Constants_log_80_h3_G_H, G_3};;//// For x < 0 and y odd integer,, set sign = -1.//{ .mfi getf.exp GR_M = FR_W // Get signexp of W nop.f 999 pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // X_2 = X_1 * Z_2 (bits 15-30)}{ .mfi ldfe FR_h_2 = [GR_Index2] // Load h_2(p9) fnma.s1 FR_Sgn = f1, f1, f0 // If x<0, y odd int, result negative sub GR_N = GR_N, GR_exp_bias // Get true exponent of x = N};;{ .mfi add GR_Table_Ptr1 = 0xdc0, GR_table_base // Ptr to H_3 fcmp.eq.s0 p11, p0 = FR_Input_Y, FR_Half // Test y=0.5, also set denorm(p6) shl GR_fraction_y= GR_fraction_y, 1 // Shift left 1 to get fraction};;{ .mmb setf.sig FR_float_N = GR_N(p6) cmp.ne.unc p8, p0 = GR_fraction_y, r0 // Test x<0 and y not integer(p8) br.cond.spnt POWL_64_XNEG // Branch if x<0 and y not int};;//// Raise possible denormal operand exception for both X and Y.// Set pointers in case |x| near 1// Branch to embedded sqrt(x) if y=0.5//{ .mfi add GR_P_ptr1 = 0x6b0, GR_table_base // Constants_log_80_P, P8, NEAR path fcmp.eq.s0 p12, p0 = FR_Input_X, FR_Input_Y // Dummy to set denormal add GR_P_ptr2 = 0x700, GR_table_base // Constants_log_80_P, P4, NEAR path}{ .mfb cmp.eq p15, p14 = r0, r0 // Assume result safe (no over/under) fsub.s1 FR_Delta = FR_Input_Y,f1 // Delta = y - 1.0(p11) br.cond.spnt POWL_64_SQRT // Branch if y=0.5};;//// Computes ln( x ) to extra precision// Input FR 1: FR_X// Output FR 2: FR_Y_hi// Output FR 3: FR_Y_lo// Output PR 1: PR_Safe//{ .mfi and GR_M = GR_exp_mask, GR_M // Mask to get exponent of W nop.f 999 extr.u GR_Index3 = GR_X_2, 1, 5 // Get index3};;{ .mmi shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 // Ptr to H_3 shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr // Ptr to G_3 sub GR_M = GR_M, GR_exp_bias // Get true exponent of W};;{ .mib ldfs FR_G_3 = [GR_Index3],-12 // Load G_3 cmp.gt p7, p14 = -8, GR_M // Test if |x-1| < 2^-8(p7) br.cond.spnt LOGL80_NEAR // Branch if |x-1| < 2^-8};;// Here if |x-1| >= 2^-8{ .mmf ldfs FR_H_3 = [GR_Table_Ptr1] // Load H_3 nop.m 999 nop.f 999};;{ .mfi ldfe FR_h_3 = [GR_Index3] // Load h_3 fmerge.se FR_S = f1,FR_Z // S = merge of 1.0 and signif(Z) nop.i 999}{ .mfi add GR_Table_Ptr = 0x740, GR_table_base // Constants_log_80_Q fmpy.s1 FR_G = FR_G_1, FR_G_2 // G = G_1 * G_2 nop.i 999};;//// Begin Loading Q's - load log2_hi part//{ .mfi ldfe FR_log2_hi = [GR_Table_Ptr],16 // Load log2_hi fadd.s1 FR_H = FR_H_1, FR_H_2 // H = H_1 + H_2 nop.i 999};;//// h = h_1 + h_2//{ .mfi ldfe FR_log2_lo = [GR_Table_Ptr],16 // Load log2_lo fadd.s1 FR_h = FR_h_1, FR_h_2 // h = h_1 + h_2 nop.i 999};;{ .mfi ldfe FR_Q_6 = [GR_Table_Ptr],16 // Load Q_6 fcvt.xf FR_float_N = FR_float_N nop.i 999};;{ .mfi ldfe FR_Q_5 = [GR_Table_Ptr],16 // Load Q_5 nop.f 999 nop.i 999};;//// G = G_1 * G_2 * G_3//{ .mfi ldfe FR_Q_4 = [GR_Table_Ptr],16 // Load Q_4 fmpy.s1 FR_G = FR_G, FR_G_3 nop.i 999};;//// H = H_1 + H_2 + H_3//{ .mfi ldfe FR_Q_3 = [GR_Table_Ptr],16 // Load Q_3 fadd.s1 FR_H = FR_H, FR_H_3 nop.i 999};;//// Y_lo = poly + Y_lo//// h = h_1 + h_2 + h_3//{ .mfi ldfe FR_Q_2 = [GR_Table_Ptr],16 // Load Q_2 fadd.s1 FR_h = FR_h, FR_h_3 nop.i 999};;//// GS_hi = G*S// r = G*S -1//{ .mfi ldfe FR_Q_1 = [GR_Table_Ptr],16 // Load Q_1 fmpy.s1 FR_GS_hi = FR_G, FR_S nop.i 999}{ .mfi nop.m 999 fms.s1 FR_r = FR_G, FR_S, f1 nop.i 999};;//// poly_lo = Q_5 + r * Q_6//{ .mfi getf.exp GR_Delta_Exp = FR_Delta // Get signexp of y-1 for exp calc fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5 nop.i 999}//// r_cor = GS_hi -1//{ .mfi nop.m 999 fsub.s1 FR_r_cor = FR_GS_hi, f1 nop.i 999};;//// GS_lo = G*S - GS_hi//{ .mfi nop.m 999 fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi nop.i 999};;//// rsq = r * r//{ .mfi nop.m 999 fmpy.s1 FR_rsq = FR_r, FR_r nop.i 999}//// G = float_N*log2_hi + H//{ .mfi nop.m 999 fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H nop.i 999};;//// Y_lo = float_N*log2_lo + h//{ .mfi nop.m 999 fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h nop.i 999};;//// poly_lo = Q_4 + r * poly_lo// r_cor = r_cor - r//{ .mfi nop.m 999 fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4 nop.i 999}{ .mfi nop.m 999 fsub.s1 FR_r_cor = FR_r_cor, FR_r nop.i 999};;//// poly_hi = r * Q_2 + Q_1// Y_hi = G + r//{ .mfi nop.m 999 fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1 nop.i 999}{ .mfi nop.m 999 fadd.s1 FR_Y_hi = FR_G, FR_r nop.i 999};;//// poly_lo = Q_3 + r * poly_lo// r_cor = r_cor + GS_lo//{ .mfi nop.m 999 fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3 nop.i 999}{ .mfi nop.m 999 fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo nop.i 999};;//// Y_lo = G - Y_hi//{ .mfi nop.m 999 fsub.s1 FR_Y_lo_2 = FR_G, FR_Y_hi nop.i 999};;//// r_cor = r_cor + Y_lo// poly = poly_hi + rsq * poly_lo//{ .mfi add GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo nop.i 999}{ .mfi nop.m 999 fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly nop.i 999};;//// Load L_hi// Load L_lo// all long before they are needed.// They are used in LOGL_RETURN PATH//// Y_lo = Y_lo + r// poly = rsq * poly + r_cor//{ .mfi ldfe FR_L_hi = [GR_Table_Ptr],16 // Load L_hi fadd.s1 FR_Y_lo = FR_Y_lo_2, FR_r nop.i 999}{ .mfi nop.m 999 fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor nop.i 999};;{ .mfb ldfe FR_L_lo = [GR_Table_Ptr],16 // Load L_lo fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly br.cond.sptk LOGL_RETURN // Branch to common code};;LOGL80_NEAR:// Here if |x-1| < 2^-8//// Branch LOGL80_NEAR//{ .mmf ldfe FR_P_8 = [GR_P_ptr1],16 // Load P_8 ldfe FR_P_4 = [GR_P_ptr2],16 // Load P_4 fmpy.s1 FR_Wsq = FR_W, FR_W};;{ .mmi ldfe FR_P_7 = [GR_P_ptr1],16 // Load P_7 ldfe FR_P_3 = [GR_P_ptr2],16 // Load P_3 nop.i 999};;{ .mmi ldfe FR_P_6 = [GR_P_ptr1],16 // Load P_6 ldfe FR_P_2 = [GR_P_ptr2],16 // Load P_2 nop.i 999};;{ .mmi ldfe FR_P_5 = [GR_P_ptr1],16 // Load P_5 ldfe FR_P_1 = [GR_P_ptr2],16 // Load P_1 nop.i 999};;{ .mfi getf.exp GR_Delta_Exp = FR_Delta // Get signexp of y-1 for exp calc fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq nop.i 999}{ .mfi add GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg fmpy.s1 FR_W3 = FR_Wsq, FR_W nop.i 999};;{ .mfi nop.m 999 fmpy.s1 FR_half_W = FR_Half, FR_W nop.i 999};;{ .mfi ldfe FR_L_hi = [GR_Table_Ptr],16 fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7 nop.i 999}{ .mfi nop.m 999 fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3 nop.i 999};;{ .mfi ldfe FR_L_lo = [GR_Table_Ptr],16 fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W nop.i 999};;{ .mfi nop.m 999 fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6 nop.i 999}{ .mfi nop.m 999 fma.s1 FR_poly = FR_W, FR_poly, FR_P_2 nop.i 999};;{ .mfi nop.m 999 fsub.s1 FR_Y_lo = FR_W, FR_Y_hi nop.i 999};;{ .mfi nop.m 999 fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5 nop.i 999}{ .mfi nop.m 999 fma.s1 FR_poly = FR_W, FR_poly, FR_P_1 nop.i 999};;{ .mfi nop.m 999 fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo nop.i 999};;{ .mfi nop.m 999 fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly nop.i 999};;{ .mfi nop.m 999 fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo nop.i 999};;LOGL_RETURN:// Common code for completion of both logx paths//// L_hi, L_lo already loaded.////// kernel_log_80 computed ln(X)// and return logX_hi and logX_lo as results.// PR_pow_Safe set as well.////// Compute Y * (logX_hi + logX_lo)// P_hi -> X// P_lo -> X_cor// (Manipulate names so that inputs are in// the place kernel_exp expects them)//// This function computes exp( x + x_cor)// Input FR 1: FR_X// Input FR 2: FR_X_cor// Output FR 3: FR_Y_hi// Output FR 4: FR_Y_lo// Output FR 5: FR_Scale// Output PR 1: PR_Safe//// P15 is True//// Load constants used in computing N using right-shift technique{ .mlx mov GR_exp_2tom51 = 0xffff-51 movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2}{ .mlx add GR_Special_Exp = -50,GR_exp_bias movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)};;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -