📄 e_powl.s
字号:
// load G_2 // X_2 = X_1 * Z_2 // Add offset to Table 2 ptr.// float_N = significand of N//{ .mmi(p0) ldfs FR_H_2 = [GR_Index2],8 ;; //// load H_2 // G = G * G_2//(p0) ldfe FR_h_2 = [GR_Index2],0 (p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;; }{ .mmi nop.m 999 nop.m 999 nop.i 999;;}{ .mmi nop.m 999 nop.m 999 nop.i 999;;}{ .mmi nop.m 999 nop.m 999 nop.i 999;;}{ .mii nop.m 999 nop.i 999 ;;(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;; }{ .mfi(p0) shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 nop.f 999//// h = h_1 + h_2 // Adjust Index3 //(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr ;; }{ .mmb nop.m 999(p0) ldfe FR_h_3 = [GR_Index3],12 nop.b 999 ;;}{ .mmf(p0) ldfs FR_H_3 = [GR_Table_Ptr1],0 //// float_N = Make N a fp number// Load h_3// Get pointer to Q table. //(p0) ldfs FR_G_3 = [GR_Index3],0 (p0) fmpy.s1 FR_G = FR_G_1, FR_G_2 };;{ .mmi nop.m 999(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_Q#), gp nop.i 999};;{ .mmi ld8 GR_Table_Ptr = [GR_Table_Ptr] nop.m 999 nop.i 999};;{ .mfi(p0) ldfe FR_log2_hi = [GR_Table_Ptr],16(p0) fadd.s1 FR_H = FR_H_1, FR_H_2 nop.i 999 ;;}{ .mmf nop.m 999//// G = G_1 * G_2 * G_3 //(p0) ldfe FR_log2_lo = [GR_Table_Ptr],16 //// load h_2 // H = H_1 + H_2 // Get Index3//(p0) fadd.s1 FR_h = FR_h_1, FR_h_2 ;; }//// Load log2_lo part// r = G*S -1//{ .mfi(p0) ldfe FR_Q_6 = [GR_Table_Ptr],16 //// Load H_3//(p0) fcvt.xf FR_float_N = FR_float_N nop.i 999 ;;}//// Load Q_6//{ .mmi(p0) ldfe FR_Q_5 = [GR_Table_Ptr],16 ;; (p0) ldfe FR_Q_4 = [GR_Table_Ptr],16 nop.i 999 ;;}{ .mmi(p0) ldfe FR_Q_3 = [GR_Table_Ptr],16 ;; (p0) ldfe FR_Q_2 = [GR_Table_Ptr],16 nop.i 999 ;;}{ .mmf nop.m 999//// poly_lo = Q_5 + r * Q_6// Load Q_2// rsq = r * r //(p0) ldfe FR_Q_1 = [GR_Table_Ptr],16 //// h = h_1 + h_2 + h_3 // H = H_1 + H_2 + H_3 // Load G_3.// Begin Loading Q's - load log2_hi part//(p0) fmpy.s1 FR_G = FR_G, FR_G_3 }{ .mfi nop.m 999(p0) fadd.s1 FR_H = FR_H, FR_H_3 nop.i 999 };;//// Y_lo = poly + Y_lo //{ .mmi nop.m 999(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Arg#), gp nop.i 999};;{ .mmi ld8 GR_Table_Ptr = [GR_Table_Ptr] nop.m 999 nop.i 999};;{ .mfi nop.m 999(p0) fadd.s1 FR_h = FR_h, FR_h_3 nop.i 999 ;;}{ .mfi nop.m 999//// Load Q_5//(p0) fmpy.s1 FR_GS_hi = FR_G, FR_S nop.i 999}{ .mfi nop.m 999(p0) fms.s1 FR_r = FR_G, FR_S, f1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5 nop.i 999}{ .mfi nop.m 999//// GS_hi = G*S// Load Q_4//(p0) fsub.s1 FR_r_cor = FR_GS_hi, f1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1 nop.i 999 ;;}{ .mfi nop.m 999//// Load Q_3// r_cor = GS_hi -1// GS_lo = G*S - GS_hi//(p0) fmpy.s1 FR_rsq = FR_r, FR_r nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H nop.i 999 ;;}{ .mfi nop.m 999//// poly = poly_hi + rsq * poly_lo // Tbl = float_N*log2_hi + H//(p0) fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h nop.i 999 ;;}{ .mfi nop.m 999//// r_cor = r_cor - r// poly_hi = r * Q_2 + Q_1//(p0) fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4 nop.i 999}{ .mfi nop.m 999//// Load Q_1//(p0) fsub.s1 FR_r_cor = FR_r_cor, FR_r nop.i 999 ;;}{ .mfi nop.m 999// // Y_lo = float_N*log2_lo + h// (p0) fadd.s1 FR_Y_hi = FR_G, FR_r nop.i 999 ;;}{ .mfi nop.m 999//// poly_lo = Q_4 + r * poly_lo;;// r_cor = r_cor + GS_lo;;//(p0) fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3 nop.i 999}{ .mfi nop.m 999(p0) fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo nop.i 999 ;;}{ .mfi nop.m 999(p0) fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo nop.i 999}{ .mfi nop.m 999//// poly_lo = Q_3 + r * poly_lo;;//(p0) fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly nop.i 999 ;;}{ .mfi nop.m 999(p0) fsub.s1 FR_Y_lo = FR_G, FR_Y_hi nop.i 999}{ .mmi(p0) ldfe FR_L_Inv = [GR_Table_Ptr],16 ;; (p0) ldfe FR_L_hi = [GR_Table_Ptr],16 nop.i 999 ;;}{ .mfi(p0) ldfe FR_L_lo = [GR_Table_Ptr],16 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999//// Y_hi = Tbl + r // r_cor = r_cor + Y_lo //(p0) fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor nop.i 999 ;;}{ .mfi nop.m 999// Y_lo = Tbl - Y_hi // poly = rsq * poly + r_cor//(p0) fadd.s1 FR_Y_lo = FR_Y_lo, FR_r nop.i 999 ;;}{ .mfb nop.m 999//// Y_lo = Y_lo + r //(p0) fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly //// Load L_Inv// Load L_hi// Load L_lo// all long before they are needed.// They are used in LOGL_RETURN PATH//br.cond.sptk L(LOGL_RETURN) ;; }L(LOGL80_NEAR): //// Branch LOGL80_NEAR//{ .mmi nop.m 999(p0) addl GR_Table_Ptr = @ltoff(Constants_log_80_P#), gp nop.i 999};;{ .mmi ld8 GR_Table_Ptr = [GR_Table_Ptr] nop.m 999 nop.i 999};;{ .mfi nop.m 999(p0) fmpy.s1 FR_Wsq = FR_W, FR_W (p0) add GR_Table_Ptr1 = 0x50,GR_Table_Ptr }//// Adjust ptr to 1/2 // Adjust Ptr1 to P_4//{ .mmi(p0) ldfe FR_Half = [GR_Table_Ptr],16 ;; (p0) ldfe FR_P_4 = [GR_Table_Ptr1],16 nop.i 999}//// Load 1/2 //{ .mmi(p0) ldfe FR_P_8 = [GR_Table_Ptr],16 ;; (p0) ldfe FR_P_3 = [GR_Table_Ptr1],16 nop.i 999}{ .mmi(p0) ldfe FR_P_7 = [GR_Table_Ptr],16 ;; (p0) ldfe FR_P_2 = [GR_Table_Ptr1],16 nop.i 999}//// Load P_7// half_W = .5 * W// Load P_3//{ .mmi(p0) ldfe FR_P_6 = [GR_Table_Ptr],16 ;; (p0) ldfe FR_P_1 = [GR_Table_Ptr1],16 nop.i 999 ;;}//// Load P_6// Wsq = w * w// poly = w*P_4 + P_3 // Load P_2//{ .mfi(p0) ldfe FR_P_5 = [GR_Table_Ptr],16 //// Load P_5// poly_lo = w * P_8 + P_7 // Y_hi = w - (1/2)w*w// Load P_1//(p0) fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq nop.i 999}{ .mfi nop.m 999(p0) fmpy.s1 FR_W3 = FR_Wsq, FR_W nop.i 999 };;//// Y_lo = W3 * poly + Y_lo//{ .mmi nop.m 999(p0) addl GR_Table_Ptr = @ltoff(Constants_exp_64_Arg#), gp nop.i 999};;{ .mmi ld8 GR_Table_Ptr = [GR_Table_Ptr] nop.m 999 nop.i 999};;{ .mmi(p0) ldfe FR_L_Inv = [GR_Table_Ptr],16 ;; (p0) ldfe FR_L_hi = [GR_Table_Ptr],16 nop.i 999 ;;}{ .mfi(p0) ldfe FR_L_lo = [GR_Table_Ptr],16 //// Load P_8// Load P_4//(p0) fmpy.s1 FR_half_W = FR_Half, FR_W nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3 nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W nop.i 999 ;;}{ .mfi nop.m 999//// W4 = Wsq * Wsq// poly = w *poly + P_2//(p0) fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_poly = FR_W, FR_poly, FR_P_2 nop.i 999 ;;}{ .mfi nop.m 999(p0) fsub.s1 FR_Y_lo = FR_W, FR_Y_hi nop.i 999 ;;}{ .mfi nop.m 999//// poly = w * poly + P_1// w3 = wsq * w//(p0) fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5 nop.i 999}{ .mfi nop.m 999//// poly_lo = w * poly_lo + P_6// Y_lo = W - Y_hi//(p0) fma.s1 FR_poly = FR_W, FR_poly, FR_P_1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo nop.i 999 ;;}{ .mfi nop.m 999//// poly_lo = w * poly_lo + // Y_lo = Y_lo - w * (1/2)w//(p0) fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly nop.i 999 ;;}{ .mfi nop.m 999//// Y_lo = (W-Y_hi) - w * (1/2)w// poly = W4* poly_lo + poly //(p0) fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo nop.i 999 ;;}L(LOGL_RETURN): { .mfi(p0) add GR_Expo_Range = 0x2,r0 //// Load L_Inv// Load L_hi// Load L_lo// all long before they are needed.////// kernel_log_80 computed ln(X)// and return logX_hi and logX_lo as results.// PR_pow_Safe set as well. //(p0) fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo //// Compute Y * (logX_hi + logX_lo) // P_hi -> X // P_lo -> X_cor // (Manipulate names so that inputs are in// the place kernel_exp expects them)// Set GR_Flag to 2 // Set GR_Expo_Range to Double// // This function computes exp( x + x_cor) // Input FR 1: FR_X // Input FR 2: FR_X_cor // Input GR 1: GR_Flag // Input GR 2: GR_Expo_Range // Output FR 3: FR_Y_hi // Output FR 4: FR_Y_lo // Output FR 5: FR_Scale // Output PR 1: PR_Safe // (p0) cmp.eq.unc p15, p0 = r0, r0 };;{ .mmi(p0) addl GR_W1_ptr = @ltoff(Constants_exp_64_W1#), gp(p0) addl GR_W2_ptr = @ltoff(Constants_exp_64_W2#), gp(p0) add GR_Flag = 0x2,r0 };;{ .mmi ld8 GR_W1_ptr = [GR_W1_ptr] ld8 GR_W2_ptr = [GR_W2_ptr](p0) cmp.ne.unc p7, p0 = 0x1, GR_Flag };;{ .mlx nop.m 999(p0) movl GR_Mask = 0x1FFFF ;; }{ .mlx nop.m 999(p0) movl GR_BIAS = 0x0FFFF ;; }{ .mfi nop.m 999//// X_lo = Y * logX_lo//(p0) fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo nop.i 999 ;;}{ .mfi nop.m 999//// Set Safe=True // Flag is always 2 for this routine//(p0) fmpy.s1 FR_float_N = FR_X, FR_L_Inv nop.i 999}{ .mfi nop.m 999//// X_hi = Y * logX_hi + X_lo// Set GR_Flag = 2 for exp(x + xcor)//(p0) fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi nop.i 999 ;;}{ .mmi nop.m 999 ;;(p0) getf.exp GR_Expo_X = FR_X
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -