📄 s_log1pf.s
字号:
FR_AA = f47 FR_log2_hi = f47 // Shared with AA FR_BB = f48FR_log2_lo = f48 // Shared with BB FR_S_lo = f49 FR_two_negN = f50 FR_float_N = f51 FR_Q4 = f52 FR_dummy = f52 // Shared with Q4FR_P4 = f52 // Shared with Q4FR_Threshold = f52// Shared with Q4FR_Q3 = f53 FR_P3 = f53 // Shared with Q3FR_Tiny = f53 // Shared with Q3FR_Q2 = f54 FR_P2 = f54 // Shared with Q2FR_1LN10_hi = f54 // Shared with Q2FR_Q1 = f55 FR_P1 = f55 // Shared with Q1 FR_1LN10_lo = f55 // Shared with Q1 FR_P5 = f98 FR_SCALE = f98 FR_Output_X_tmp = f99 GR_Expo_Range = r32GR_Table_Base = r34GR_Table_Base1 = r35GR_Table_ptr = r36 GR_Index2 = r37 GR_signif = r38 GR_X_0 = r39 GR_X_1 = r40 GR_X_2 = r41 GR_Z_1 = r42 GR_Z_2 = r43 GR_N = r44 GR_Bias = r45 GR_M = r46 GR_ScaleN = r47 GR_Index3 = r48 GR_Perturb = r49 GR_Table_Scale = r50 GR_SAVE_PFS = r51GR_SAVE_B0 = r52GR_SAVE_GP = r53GR_Parameter_X = r54GR_Parameter_Y = r55GR_Parameter_RESULT = r56GR_Parameter_TAG = r57 .section .text.proc log1pf#.global log1pf#.align 64 log1pf:#ifdef _LIBC.global __log1pf__log1pf:#endif{ .mfialloc r32 = ar.pfs,0,22,4,0(p0) fsub.s1 FR_Neg_One = f0,f1 (p0) cmp.eq.unc p7, p0 = r0, r0 }{ .mfi(p0) cmp.ne.unc p14, p0 = r0, r0 (p0) fnorm.s1 FR_X_Prime = FR_Input_X (p0) cmp.eq.unc p15, p0 = r0, r0 ;; }{ .mfi nop.m 999(p0) fclass.m.unc p6, p0 = FR_Input_X, 0x1E3 nop.i 999};;{ .mfi nop.m 999(p0) fclass.nm.unc p10, p0 = FR_Input_X, 0x1FF nop.i 999};;{ .mfi nop.m 999(p0) fcmp.eq.unc.s1 p9, p0 = FR_Input_X, f0 nop.i 999}{ .mfi nop.m 999(p0) fadd FR_Em1 = f0,f0 nop.i 999 ;;}{ .mfi nop.m 999(p0) fadd FR_E = f0,f1 nop.i 999 ;;}{ .mfi nop.m 999(p0) fcmp.eq.unc.s1 p8, p0 = FR_Input_X, FR_Neg_One nop.i 999}{ .mfi nop.m 999(p0) fcmp.lt.unc.s1 p13, p0 = FR_Input_X, FR_Neg_One nop.i 999}L(LOG_BEGIN): { .mfi nop.m 999(p0) fadd.s1 FR_Z = FR_X_Prime, FR_E nop.i 999}{ .mlx nop.m 999(p0) movl GR_Table_Scale = 0x0000000000000018 ;; }{ .mmi nop.m 999// // Create E = 1 and Em1 = 0 // Check for X == 0, meaning log(1+0)// Check for X < -1, meaning log(negative)// Check for X == -1, meaning log(0)// Normalize x // Identify NatVals, NaNs, Infs. // Identify EM unsupporteds. // Identify Negative values - us S1 so as// not to raise denormal operand exception // Set p15 to true for log1pf// Set p14 to false for log1pf// Set p7 true for log and log1pf// (p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h1#),gp nop.i 999}{ .mfi nop.m 999(p0) fmax.s1 FR_AA = FR_X_Prime, FR_E nop.i 999 ;;}{ .mfi ld8 GR_Table_Base = [GR_Table_Base](p0) fmin.s1 FR_BB = FR_X_Prime, FR_E nop.i 999}{ .mfb nop.m 999(p0) fadd.s1 FR_W = FR_X_Prime, FR_Em1 // // Begin load of constants base// FR_Z = Z = |x| + E // FR_W = W = |x| + Em1// AA = fmax(|x|,E)// BB = fmin(|x|,E)//(p6) br.cond.spnt L(LOG_64_special) ;; }{ .mib nop.m 999 nop.i 999(p10) br.cond.spnt L(LOG_64_unsupported) ;; }{ .mib nop.m 999 nop.i 999(p13) br.cond.spnt L(LOG_64_negative) ;; }{ .mib(p0) getf.sig GR_signif = FR_Z nop.i 999(p9) br.cond.spnt L(LOG_64_one) ;; }{ .mib nop.m 999 nop.i 999(p8) br.cond.spnt L(LOG_64_zero) ;; }{ .mfi(p0) getf.exp GR_N = FR_Z // // Raise possible denormal operand exception // Create Bias// // This function computes ln( x + e ) // Input FR 1: FR_X = FR_Input_X // Input FR 2: FR_E = FR_E// Input FR 3: FR_Em1 = FR_Em1 // Input GR 1: GR_Expo_Range = GR_Expo_Range = 1// Output FR 4: FR_Y_hi // Output FR 5: FR_Y_lo // Output FR 6: FR_Scale // Output PR 7: PR_Safe //(p0) fsub.s1 FR_S_lo = FR_AA, FR_Z //// signif = getf.sig(Z)// abs_W = fabs(w)//(p0) extr.u GR_Table_ptr = GR_signif, 59, 4 ;; }{ .mfi nop.m 999(p0) fmerge.se FR_S_hi = f1,FR_Z (p0) extr.u GR_X_0 = GR_signif, 49, 15 }{ .mmi nop.m 999(p0) addl GR_Table_Base1 = @ltoff(Constants_Z_G_H_h2#),gp nop.i 999};;{ .mlx ld8 GR_Table_Base1 = [GR_Table_Base1](p0) movl GR_Bias = 0x000000000000FFFF ;; }{ .mfi nop.m 999(p0) fabs FR_abs_W = FR_W (p0) pmpyshr2.u GR_Table_ptr = GR_Table_ptr,GR_Table_Scale,0 }{ .mfi nop.m 999// // Branch out for special input values // (p0) fcmp.lt.unc.s0 p8, p0 = FR_Input_X, f0 nop.i 999 ;;}{ .mfi nop.m 999//// X_0 = extr.u(signif,49,15)// Index1 = extr.u(signif,59,4)//(p0) fadd.s1 FR_S_lo = FR_S_lo, FR_BB nop.i 999 ;;}{ .mii nop.m 999 nop.i 999 ;;//// Offset_to_Z1 = 24 * Index1// For performance, don't use result// for 3 or 4 cycles.//(p0) add GR_Table_ptr = GR_Table_ptr, GR_Table_Base ;; }//// Add Base to Offset for Z1// Create Bias{ .mmi(p0) ld4 GR_Z_1 = [GR_Table_ptr],4 ;; (p0) ldfs FR_G = [GR_Table_ptr],4 nop.i 999 ;;}{ .mmi(p0) ldfs FR_H = [GR_Table_ptr],8 ;; (p0) ldfd FR_h = [GR_Table_ptr],0 (p0) pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 }//// Load Z_1 // Get Base of Table2 //{ .mfi(p0) getf.exp GR_M = FR_abs_W nop.f 999 nop.i 999 ;;}{ .mii nop.m 999 nop.i 999 ;;//// M = getf.exp(abs_W)// S_lo = AA - Z// X_1 = pmpyshr2(X_0,Z_1,15)//(p0) sub GR_M = GR_M, GR_Bias ;; }// // M = M - Bias// Load G1// N = getf.exp(Z)//{ .mii(p0) cmp.gt.unc p11, p0 = -80, GR_M (p0) cmp.gt.unc p12, p0 = -7, GR_M ;; (p0) extr.u GR_Index2 = GR_X_1, 6, 4 ;; }{ .mib nop.m 999//// if -80 > M, set p11// Index2 = extr.u(X_1,6,4)// if -7 > M, set p12// Load H1//(p0) pmpyshr2.u GR_Index2 = GR_Index2,GR_Table_Scale,0 (p11) br.cond.spnt L(log1pf_small) ;; }{ .mib nop.m 999 nop.i 999(p12) br.cond.spnt L(log1pf_near) ;; }{ .mii(p0) sub GR_N = GR_N, GR_Bias //// poly_lo = r * poly_lo //(p0) add GR_Perturb = 0x1, r0 ;; (p0) sub GR_ScaleN = GR_Bias, GR_N }{ .mii(p0) setf.sig FR_float_N = GR_N nop.i 999 ;;//// Prepare Index2 - pmpyshr2.u(X_1,Z_2,15)// Load h1// S_lo = S_lo + BB // Branch for -80 > M// (p0) add GR_Index2 = GR_Index2, GR_Table_Base1}{ .mmi(p0) setf.exp FR_two_negN = GR_ScaleN nop.m 999(p0) addl GR_Table_Base = @ltoff(Constants_Z_G_H_h3#),gp };;//// Index2 points to Z2// Branch for -7 > M//{ .mmb(p0) ld4 GR_Z_2 = [GR_Index2],4 ld8 GR_Table_Base = [GR_Table_Base] nop.b 999 ;;}(p0) nop.i 999//// Load Z_2// N = N - Bias// Tablebase points to Table3//{ .mmi(p0) ldfs FR_G_tmp = [GR_Index2],4 ;; //// Load G_2// pmpyshr2 X_2= (X_1,Z_2,15)// float_N = setf.sig(N)// ScaleN = Bias - N//(p0) ldfs FR_H_tmp = [GR_Index2],8 nop.i 999 ;;}//// Load H_2// two_negN = setf.exp(scaleN)// G = G_1 * G_2//{ .mfi(p0) ldfd FR_h_tmp = [GR_Index2],0 nop.f 999(p0) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;; }{ .mii nop.m 999(p0) extr.u GR_Index3 = GR_X_2, 1, 5 ;; //// Load h_2// H = H_1 + H_2 // h = h_1 + h_2 // Index3 = extr.u(X_2,1,5)//(p0) shladd GR_Index3 = GR_Index3,4,GR_Table_Base }{ .mmi nop.m 999 nop.m 999//// float_N = fcvt.xf(float_N)// load G3//(p0) addl GR_Table_Base = @ltoff(Constants_Q#),gp ;; }{ .mfild8 GR_Table_Base = [GR_Table_Base]nop.f 999nop.i 999} ;;{ .mfi(p0) ldfe FR_log2_hi = [GR_Table_Base],16 (p0) fmpy.s1 FR_S_lo = FR_S_lo, FR_two_negN nop.i 999 ;;}{ .mmf nop.m 999//// G = G3 * G// Load h3// Load log2_hi// H = H + H3//(p0) ldfe FR_log2_lo = [GR_Table_Base],16 (p0) fmpy.s1 FR_G = FR_G, FR_G_tmp ;; }{ .mmf(p0) ldfs FR_G_tmp = [GR_Index3],4 //// h = h + h3// r = G * S_hi + 1 // Load log2_lo//(p0) ldfe FR_Q4 = [GR_Table_Base],16 (p0) fadd.s1 FR_h = FR_h, FR_h_tmp ;; }{ .mfi(p0) ldfe FR_Q3 = [GR_Table_Base],16 (p0) fadd.s1 FR_H = FR_H, FR_H_tmp nop.i 999 ;;}{ .mmf(p0) ldfs FR_H_tmp = [GR_Index3],4 (p0) ldfe FR_Q2 = [GR_Table_Base],16 //// Comput Index for Table3// S_lo = S_lo * two_negN//(p0) fcvt.xf FR_float_N = FR_float_N ;; }//// If S_lo == 0, set p8 false// Load H3// Load ptr to table of polynomial coeff.//{ .mmf(p0) ldfd FR_h_tmp = [GR_Index3],0 (p0) ldfe FR_Q1 = [GR_Table_Base],0 (p0) fcmp.eq.unc.s1 p0, p8 = FR_S_lo, f0 ;; }{ .mfi nop.m 999(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp nop.i 999 ;;}{ .mfi nop.m 999(p0) fadd.s1 FR_H = FR_H, FR_H_tmp nop.i 999 ;;}{ .mfi nop.m 999(p0) fms.s1 FR_r = FR_G, FR_S_hi, f1 nop.i 999}{ .mfi nop.m 999(p0) fadd.s1 FR_h = FR_h, FR_h_tmp nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H nop.i 999 ;;}{ .mfi nop.m 999//// Load Q4 // Load Q3
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -