📄 s_log1pf.s
字号:
// Load Q2 // Load Q1 //(p8) fma.s1 FR_r = FR_G, FR_S_lo, FR_r nop.i 999}{ .mfi nop.m 999//// poly_lo = r * Q4 + Q3// rsq = r* r//(p0) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h nop.i 999 ;;}{ .mfi nop.m 999//// If (S_lo!=0) r = s_lo * G + r//(p0) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 nop.i 999}//// Create a 0x00000....01// poly_lo = poly_lo * rsq + h//{ .mfi(p0) setf.sig FR_dummy = GR_Perturb (p0) fmpy.s1 FR_rsq = FR_r, FR_r nop.i 999 ;;}{ .mfi nop.m 999//// h = N * log2_lo + h // Y_hi = n * log2_hi + H //(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r nop.i 999 ;;}{ .mfi nop.m 999//// poly_lo = r * poly_o + Q2 // poly_hi = Q1 * rsq + r //(p0) fmpy.s1 FR_poly_lo = FR_poly_lo, FR_r nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_poly_lo = FR_poly_lo, FR_rsq, FR_h nop.i 999 ;;}{ .mfb nop.m 999(p0) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo //// Create the FR for a binary "or"// Y_lo = poly_hi + poly_lo//// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;//// Turn the lsb of Y_lo ON//// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;//// Merge the new lsb into Y_lo, for alone doesn't//(p0) br.cond.sptk L(LOG_main) ;; }L(log1pf_near): { .mmi nop.m 999 nop.m 999// /*******************************************************/// /*********** Branch log1pf_near ************************/// /*******************************************************/(p0) addl GR_Table_Base = @ltoff(Constants_P#),gp ;; }//// Load base address of poly. coeff.//{.mmi nop.m 999 ld8 GR_Table_Base = [GR_Table_Base] nop.i 999};;{ .mmb(p0) add GR_Table_ptr = 0x40,GR_Table_Base //// Address tables with separate pointers //(p0) ldfe FR_P8 = [GR_Table_Base],16 nop.b 999 ;;}{ .mmb(p0) ldfe FR_P4 = [GR_Table_ptr],16 //// Load P4// Load P8//(p0) ldfe FR_P7 = [GR_Table_Base],16 nop.b 999 ;;}{ .mmf(p0) ldfe FR_P3 = [GR_Table_ptr],16 //// Load P3// Load P7//(p0) ldfe FR_P6 = [GR_Table_Base],16 (p0) fmpy.s1 FR_wsq = FR_W, FR_W ;; }{ .mfi(p0) ldfe FR_P2 = [GR_Table_ptr],16 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_W, FR_P4, FR_P3 nop.i 999}//// Load P2// Load P6// Wsq = w * w// Y_hi = p4 * w + p3//{ .mfi(p0) ldfe FR_P5 = [GR_Table_Base],16 (p0) fma.s1 FR_Y_lo = FR_W, FR_P8, FR_P7 nop.i 999 ;;}{ .mfi(p0) ldfe FR_P1 = [GR_Table_ptr],16 //// Load P1// Load P5// Y_lo = p8 * w + P7//(p0) fmpy.s1 FR_w4 = FR_wsq, FR_wsq nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P2 nop.i 999}{ .mfi nop.m 999(p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P6 (p0) add GR_Perturb = 0x1, r0 ;; }{ .mfi nop.m 999//// w4 = w2 * w2 // Y_hi = y_hi * w + p2 // Y_lo = y_lo * w + p6 // Create perturbation bit//(p0) fmpy.s1 FR_w6 = FR_w4, FR_wsq nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_W, FR_Y_hi, FR_P1 nop.i 999}//// Y_hi = y_hi * w + p1 // w6 = w4 * w2 //{ .mfi(p0) setf.sig FR_Q4 = GR_Perturb (p0) fma.s1 FR_Y_lo = FR_W, FR_Y_lo, FR_P5 nop.i 999 ;;}{ .mfi nop.m 999(p0) fma.s1 FR_Y_hi = FR_wsq,FR_Y_hi, FR_W nop.i 999}{ .mfb nop.m 999//// Y_hi = y_hi * wsq + w // Y_lo = y_lo * w + p5 //(p0) fmpy.s1 FR_Y_lo = FR_w6, FR_Y_lo //// Y_lo = y_lo * w6 //// (p0) for FR_dummy = FR_Y_lo,FR_dummy ;;//// Set lsb on: Taken out to improve performance //// (p0) fmerge.se FR_Y_lo = FR_Y_lo,FR_dummy ;;//// Make sure it's on in Y_lo also. Taken out to improve// performance//(p0) br.cond.sptk L(LOG_main) ;; }L(log1pf_small): { .mmi nop.m 999 nop.m 999// /*******************************************************/// /*********** Branch log1pf_small ***********************/// /*******************************************************/(p0) addl GR_Table_Base = @ltoff(Constants_Threshold#),gp }{ .mfi nop.m 999(p0) mov FR_Em1 = FR_W (p0) cmp.eq.unc p7, p0 = r0, r0 ;; }{ .mlx ld8 GR_Table_Base = [GR_Table_Base](p0) movl GR_Expo_Range = 0x0000000000000002 ;; }//// Set Safe to true// Set Expo_Range = 0 for single// Set Expo_Range = 2 for double // Set Expo_Range = 4 for double-extended //{ .mmi(p0) shladd GR_Table_Base = GR_Expo_Range,4,GR_Table_Base ;; (p0) ldfe FR_Threshold = [GR_Table_Base],16 nop.i 999}{ .mlx nop.m 999(p0) movl GR_Bias = 0x000000000000FF9B ;; }{ .mfi(p0) ldfe FR_Tiny = [GR_Table_Base],0 nop.f 999 nop.i 999 ;;}{ .mfi nop.m 999(p0) fcmp.gt.unc.s1 p13, p12 = FR_abs_W, FR_Threshold nop.i 999 ;;}{ .mfi nop.m 999(p13) fnmpy.s1 FR_Y_lo = FR_W, FR_W nop.i 999}{ .mfi nop.m 999(p13) fadd FR_SCALE = f0, f1 nop.i 999 ;;}{ .mfi nop.m 999(p12) fsub.s1 FR_Y_lo = f0, FR_Tiny (p12) cmp.ne.unc p7, p0 = r0, r0 }{ .mfi(p12) setf.exp FR_SCALE = GR_Bias nop.f 999 nop.i 999 ;;}//// Set p7 to SAFE = FALSE// Set Scale = 2^-100 //{ .mfb nop.m 999(p0) fma.s.s0 FR_Input_X = FR_Y_lo,FR_SCALE,FR_Y_hi(p0) br.ret.sptk b0};;L(LOG_64_one): { .mfb nop.m 999(p0) fmpy.s.s0 FR_Input_X = FR_Input_X, f0 (p0) br.ret.sptk b0};;// // Raise divide by zero for +/-0 input.// L(LOG_64_zero): { .mfi(p0) mov GR_Parameter_TAG = 142 //// If we have log1pf(0), return -Inf.// (p0) fsub.s0 FR_Output_X_tmp = f0, f1 nop.i 999 ;;}{ .mfb nop.m 999(p0) frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0 (p0) br.cond.sptk L(LOG_ERROR_Support) ;; }L(LOG_64_special): { .mfi nop.m 999// // Return -Inf or value from handler.// (p0) fclass.m.unc p7, p0 = FR_Input_X, 0x1E1 nop.i 999 ;;}{ .mfb nop.m 999// // Check for Natval, QNan, SNaN, +Inf // (p7) fmpy.s.s0 f8 = FR_Input_X, f1 // // For SNaN raise invalid and return QNaN.// For QNaN raise invalid and return QNaN.// For +Inf return +Inf.// (p7) br.ret.sptk b0};;// // For -Inf raise invalid and return QNaN.// { .mfb(p0) mov GR_Parameter_TAG = 143 (p0) fmpy.s.s0 FR_Output_X_tmp = FR_Input_X, f0 (p0) br.cond.sptk L(LOG_ERROR_Support) ;; }//// Report that log1pf(-Inf) computed// L(LOG_64_unsupported): // // Return generated NaN or other value .// { .mfb nop.m 999(p0) fmpy.s.s0 FR_Input_X = FR_Input_X, f0 (p0) br.ret.sptk b0 ;;}L(LOG_64_negative): { .mfi nop.m 999// // Deal with x < 0 in a special way // (p0) frcpa.s0 FR_Output_X_tmp, p8 = f0, f0 // // Deal with x < 0 in a special way - raise// invalid and produce QNaN indefinite.// (p0) mov GR_Parameter_TAG = 143;;}.endp log1pf#ASM_SIZE_DIRECTIVE(log1pf).proc __libm_error_region__libm_error_region:L(LOG_ERROR_Support): .prologue// (1){ .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value nop.f 0.save ar.pfs,GR_SAVE_PFS mov GR_SAVE_PFS=ar.pfs // Save ar.pfs}{ .mfi.fframe 64 add sp=-64,sp // Create new stack nop.f 0 mov GR_SAVE_GP=gp // Save gp};;// (2){ .mmi stfs [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack add GR_Parameter_X = 16,sp // Parameter 1 address.save b0, GR_SAVE_B0 mov GR_SAVE_B0=b0 // Save b0};;.body// (3){ .mib stfs [GR_Parameter_X] =FR_Input_X // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 }{ .mib stfs [GR_Parameter_Y] = FR_Output_X_tmp // STORE Parameter 3 on stack add GR_Parameter_Y = -16,GR_Parameter_Y br.call.sptk b0=__libm_error_support# // Call error handling function};;{ .mmi nop.m 0 nop.m 0 add GR_Parameter_RESULT = 48,sp};;// (4){ .mmi ldfs FR_Input_X = [GR_Parameter_RESULT] // Get return result off stack.restore sp add sp = 64,sp // Restore stack pointer mov b0 = GR_SAVE_B0 // Restore return address};;{ .mib mov gp = GR_SAVE_GP // Restore gp mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs br.ret.sptk b0 };;.endp __libm_error_regionASM_SIZE_DIRECTIVE(__libm_error_region).proc __libm_LOG_main __libm_LOG_main:L(LOG_main): //// kernel_log_64 computes ln(X + E)//{ .mfi nop.m 999(p7) fadd.s.s0 FR_Input_X = FR_Y_lo,FR_Y_hi nop.i 999}{ .mmi nop.m 999 nop.m 999(p14) addl GR_Table_Base = @ltoff(Constants_1_by_LN10#),gp ;; }{ .mmi nop.m 999(p14) ld8 GR_Table_Base = [GR_Table_Base] nop.i 999};;{ .mmi(p14) ldfe FR_1LN10_hi = [GR_Table_Base],16 ;; (p14) ldfe FR_1LN10_lo = [GR_Table_Base] nop.i 999 ;;}{ .mfi nop.m 999(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi nop.i 999 ;;}{ .mfi nop.m 999(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp nop.i 999 ;;}{ .mfb nop.m 999(p14) fma.s.s0 FR_Input_X = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp(p0) br.ret.sptk b0 ;; }.endp __libm_LOG_mainASM_SIZE_DIRECTIVE(__libm_LOG_main).type __libm_error_support#,@function.global __libm_error_support#
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -