📄 s_log1pl.s
字号:
//// h = h + h3// r = G * S_hi + 1 // Load log2_lo//(p0) ldfe FR_Q4 = [GR_Table_Base],16 (p0) fadd.s1 FR_h = FR_h, FR_h_tmp ;; }{ .mfi(p0) ldfe FR_Q3 = [GR_Table_Base],16 (p0) fadd.s1 FR_H = FR_H, FR_H_tmp nop.i 999 ;;}{ .mmf(p0) ldfs FR_H_tmp = [GR_Index3],4 (p0) ldfe FR_Q2 = [GR_Table_Base],16 //// Comput Index for Table3// S_lo = S_lo * two_negN//(p0) fcvt.xf FR_float_N = FR_float_N ;; }//// If S_lo == 0, set p8 false// Load H3// Load ptr to table of polynomial coeff.//{ .mmf(p0) ldfd FR_h_tmp = [GR_Index3],0 (p0) ldfe FR_Q1 = [GR_Table_Base],0 (p0) fcmp.eq.unc.s1 p0, p8 = FR_S_lo, f0 ;; }{ .mfi nop.m 999(p0) fmpy.s1 FR_G = FR_G, FR_G_tmp nop.i 999 ;;}{ .mfi nop.m 999(p0) fadd.s1 FR_H = FR_H, FR_H_tmp nop.i 999 ;;}{ .mfi nop.m 999(p0) fms.s1 FR_r = FR_G, FR_S_hi, f1 nop.i 999}{ .mfi nop.m 999(p0) fadd.s1 FR_h = FR_h, FR_h_tmp nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H nop.i 999 ;;}{ .mfi nop.m 999//// Load Q4 // Load Q3 // Load Q2 // Load Q1 //(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r nop.i 999}{ .mfi nop.m 999//// poly_lo = r * Q4 + Q3// rsq = r* r//(p0) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h nop.i 999 ;;}{ .mfi nop.m 999//// If (S_lo!=0) r = s_lo * G + r//(p0) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 nop.i 999}//// Create a 0x00000....01// poly_lo = poly_lo * rsq + h//{ .mfi(p0) setf.sig FR_dummy = GR_Perturb (p0) fmpy.s1 FR_rsq = FR_r, FR_r nop.i 999 ;;}{ .mfi nop.m 999//// h = N * log2_lo + h // Y_hi = n * log2_hi + H //(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r nop.i 999 ;;}{ .mfi nop.m 999//// poly_lo = r * poly_o + Q2 // poly_hi = Q1 * rsq + r //(p0) fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h nop.i 999 ;;}{ .mfb nop.m 999(p0) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo //// Create the FR for a binary "or"// Y_lo = poly_hi + poly_lo//// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;//// Turn the lsb of Y_lo ON//// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;//// Merge the new lsb into Y_lo, for alone doesn't//(p0) br.cond.sptk LOGL_main ;; }L(log1pl_near): { .mmi nop.m 999 nop.m 999// /*******************************************************/// /*********** Branch log1pl_near ************************/// /*******************************************************/(p0) addl GR_Table_Base = @ltoff(Constants_P#),gp ;; }{ .mmi nop.m 999 ld8 GR_Table_Base = [GR_Table_Base] nop.i 999};;//// Load base address of poly. coeff.//{ .mmb(p0) add GR_Table_ptr = 0x40,GR_Table_Base //// Address tables with separate pointers //(p0) ldfe FR_P8 = [GR_Table_Base],16 nop.b 999 ;;}{ .mmb(p0) ldfe FR_P4 = [GR_Table_ptr],16 //// Load P4// Load P8//(p0) ldfe FR_P7 = [GR_Table_Base],16 nop.b 999 ;;}{ .mmf(p0) ldfe FR_P3 = [GR_Table_ptr],16 //// Load P3// Load P7//(p0) ldfe FR_P6 = [GR_Table_Base],16 (p0) fmpy.s1 FR_wsq = FR_W, FR_W ;; }{ .mfi(p0) ldfe FR_P2 = [GR_Table_ptr],16 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3 nop.i 999}//// Load P2// Load P6// Wsq = w * w// Y_hi = p4 * w + p3//{ .mfi(p0) ldfe FR_P5 = [GR_Table_Base],16 (p0) fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7 nop.i 999 ;;}{ .mfi(p0) ldfe FR_P1 = [GR_Table_ptr],16 //// Load P1// Load P5// Y_lo = p8 * w + P7//(p0) fmpy.s1 FR_w4 = FR_wsq, FR_wsq nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6 (p0) add GR_Perturb = 0x1, r0 ;; }{ .mfi nop.m 999//// w4 = w2 * w2 // Y_hi = y_hi * w + p2 // Y_lo = y_lo * w + p6 // Create perturbation bit//(p0) fmpy.s1 FR_w6 = FR_w4, FR_wsq nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1 nop.i 999}//// Y_hi = y_hi * w + p1 // w6 = w4 * w2 //{ .mfi(p0) setf.sig FR_Q4 = GR_Perturb (p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_dummy = FR_wsq,FR_Y_hi, f0 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_W,f1,f0 nop.i 999};;{ .mfb nop.m 999//// Y_hi = w // Y_lo = y_lo * w + p5 //(p0) fma.s1 FR_Y_lo = FR_w6, FR_Y_lo,FR_dummy //// Y_lo = y_lo * w6 + y_high order part. //// performance//(p0) br.cond.sptk LOGL_main ;; }L(log1pl_small): { .mmi nop.m 999// /*******************************************************/// /*********** Branch log1pl_small ***********************/// /*******************************************************/(p0) addl GR_Table_Base = @ltoff(Constants_Threshold#),gp}{ .mfi nop.m 999(p0) mov FR_Em1 = FR_W (p0) cmp.eq.unc p7, p0 = r0, r0 ;; }{ .mlx ld8 GR_Table_Base = [GR_Table_Base](p0) movl GR_Expo_Range = 0x0000000000000004 ;; }//// Set Safe to true// Set Expo_Range = 0 for single// Set Expo_Range = 2 for double // Set Expo_Range = 4 for double-extended //{ .mmi(p0) shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;; (p0) ldfe FR_Threshold = [GR_Table_Base],16 nop.i 999}{ .mlx nop.m 999(p0) movl GR_Bias = 0x000000000000FF9B ;; }{ .mfi(p0) ldfe FR_Tiny = [GR_Table_Base],0 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fcmp.gt.unc.s1 p13, p12 = FR_abs_W, FR_Threshold nop.i 999 ;;}{ .mfi nop.m 999(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W nop.i 999}{ .mfi nop.m 999(p13) fadd FR_SCALE = f0, f1 nop.i 999 ;;}{ .mfi nop.m 999(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny (p12) cmp.ne.unc p7, p0 = r0, r0 }{ .mfi(p12) setf.exp FR_SCALE = GR_Bias nop.f 999 nop.i 999 ;;}{ .mfb nop.m 999//// Set p7 to SAFE = FALSE// Set Scale = 2^-100 //(p0) fma.s0 f8 = FR_Y_lo,FR_SCALE,FR_Y_hi(p0) br.ret.sptk b0 ;; }L(LOGL_64_one): { .mfb nop.m 999(p0) fmpy.s0 f8 = FR_Input_X, f0 (p0) br.ret.sptk b0 ;; }// // Raise divide by zero for +/-0 input.// L(LOGL_64_zero): { .mfi(p0) mov GR_Parameter_TAG = 0//// If we have logl(1), log10l(1) or log1pl(0), return 0.// (p0) fsub.s0 FR_Output_X_tmp = f0, f1 nop.i 999 ;;}{ .mii(p14) mov GR_Parameter_TAG = 6 nop.i 999 ;;(p15) mov GR_Parameter_TAG = 138 ;; }{ .mfb nop.m 999(p0) frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0 (p0) br.cond.sptk __libm_error_region ;; }{ .mfb nop.m 999// // Report that logl(0) computed// { .mfb(p0) mov FR_Input_X = FR_Output_X_tmp(p0) br.ret.sptk b0 ;;}L(LOGL_64_special): { .mfi nop.m 999// // Return -Inf or value from handler.// (p0) fclass.m.unc p7, p0 = FR_Input_X, 0x1E1 nop.i 999 ;;}{ .mfb nop.m 999// // Check for Natval, QNan, SNaN, +Inf // (p7) fmpy.s0 f8 = FR_Input_X, f1 // // For SNaN raise invalid and return QNaN.// For QNaN raise invalid and return QNaN.// For +Inf return +Inf.// (p7) br.ret.sptk b0 ;;}// // For -Inf raise invalid and return QNaN.// { .mii(p0) mov GR_Parameter_TAG = 1 nop.i 999 ;;(p14) mov GR_Parameter_TAG = 7 ;;}{ .mfi(p15) mov GR_Parameter_TAG = 139 nop.f 999 nop.i 999 ;;}{ .mfb nop.m 999(p0) fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0 (p0) br.cond.sptk __libm_error_region ;; }// // Report that logl(-Inf) computed// Report that log10l(-Inf) computed// Report that log1p(-Inf) computed// { .mfb nop.m 0(p0) mov FR_Input_X = FR_Output_X_tmp(p0) br.ret.sptk b0 ;;}L(LOGL_64_unsupported): { .mfb nop.m 999// // Return generated NaN or other value .// (p0) fmpy.s0 f8 = FR_Input_X, f0 (p0) br.ret.sptk b0 ;;}L(LOGL_64_negative): { .mfi nop.m 999// // Deal with x < 0 in a special way // (p0) frcpa.s0 FR_Output_X_tmp, p8 = f0, f0 // // Deal with x < 0 in a special way - raise// invalid and produce QNaN indefinite.// (p0) mov GR_Parameter_TAG = 1 ;; }{ .mii(p14) mov GR_Parameter_TAG = 7 nop.i 999 ;;(p15) mov GR_Parameter_TAG = 139}.endp log1plASM_SIZE_DIRECTIVE(log1pl) .proc __libm_error_region__libm_error_region:.prologue{ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;{ .mmi stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body{ .mib stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y nop.b 0 // Parameter 3 address}{ .mib stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;{ .mmi ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 // Return};;.endp __libm_error_regionASM_SIZE_DIRECTIVE(__libm_error_region).proc LOGL_main LOGL_main: { .mfi nop.m 999//// kernel_log_64 computes ln(X + E)//(p7) fadd.s0 FR_Input_X = FR_Y_lo,FR_Y_hi nop.i 0}{ .mmi nop.m 999 nop.m 999(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;; }{ .mmi nop.m 999(p14) ld8 GR_Table_Base = [GR_Table_Base] nop.i 999};;{ .mmi(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;; (p14) ldfe FR_1LN10_lo = [GR_Table_Base] nop.i 999 ;;}{ .mfi nop.m 999(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi nop.i 999 ;;}{ .mfi nop.m 999(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp nop.i 999 ;;}{ .mfb nop.m 999(p14) fma.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp(p0) br.ret.sptk b0 ;; }.endp LOGL_mainASM_SIZE_DIRECTIVE(LOGL_main) .type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -