📄 libm_lgammal.s
字号:
// This is accomplised by integer multiplication.// It is proved that X_1 indeed always begin// with 1.0000 in fixed point.////// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1// truncated to lsb = 2^(-8). Similar to A_1,// A_2 is not needed in actual implementation. It// helps explain how some of the values are defined.//// Define index_2 := [ d_5 d_6 d_7 d_8 ].//// Fetch Z_2 := (1/A_2) rounded UP in fixed point with// fixed point lsb = 2^(-15). Fetch done using index_2.// Z_2 looks like z_0.z_1 z_2 ... z_15//// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.// floating pt.//// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14// This is accomplised by integer multiplication.// It is proved that X_2 indeed always begin// with 1.00000000 in fixed point.////// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).//// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].//// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.// floating pt. Fetch is done using index_3.//// Compute G := G_1 * G_2 * G_3.//// This is done exactly since each of G_j only has 21 sig. bits.//// Compute//// r := (G*S_hi - 1)////// Step 2. Approximation// ---------------------//// This step computes an approximation to logl( 1 + r ) where r is the// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);// thus logl(1+r) can be approximated by a short polynomial://// logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5////// Step 3. Reconstruction// ----------------------//// This step computes the desired result of logl(X)://// logl(X) = logl( 2^N * S_hi )// = N*logl(2) + logl( S_hi )// = N*logl(2) + logl(1/G) +// logl(1 + G*S_hi - 1 )//// logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers:// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are// single-precision numbers and the low parts are double precision// numbers. These have the property that//// N*log2_hi + SUM ( log1byGj_hi )//// is computable exactly in double-extended precision (64 sig. bits).// Finally//// lnHi(X) := N*log2_hi + SUM ( log1byGj_hi )// lnLo(X) := poly_hi + [ poly_lo +// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]//////*********************************************************************// General Purpose Registers// scratch registersrPolDataPtr = r2rLnSinDataPtr = r3rExpX = r8rSignifX = r9rDelta = r10rSignExpX = r11GR_ad_z_1 = r14r17Ones = r15GR_Index1 = r16rSignif1andQ = r17GR_X_0 = r18GR_X_1 = r19GR_X_2 = r20GR_Z_1 = r21GR_Z_2 = r22GR_N = r23rExpHalf = r24rExp8 = r25rX0Dx = r25GR_ad_tbl_1 = r26GR_ad_tbl_2 = r27GR_ad_tbl_3 = r28GR_ad_q = r29GR_ad_z_1 = r30GR_ad_z_2 = r31// stacked registersrPFS_SAVED = r32GR_ad_z_3 = r33rSgnGamAddr = r34rSgnGamSize = r35rLogDataPtr = r36rZ1offsett = r37rTmpPtr = r38rTmpPtr2 = r39rTmpPtr3 = r40rExp2 = r41rExp2tom7 = r42rZ625 = r42rExpOne = r43rNegSingularity = r44rXint = r45rTbl1Addr = r46rTbl2Addr = r47rTbl3Addr = r48rZ2Addr = r49rRootsAddr = r50rRootsBndAddr = r51rRoot = r52rRightBound = r53rLeftBound = r54rSignifDx = r55rBernulliPtr = r56rLnSinTmpPtr = r56rIndex1Dx = r57rIndexPol = r58GR_Index3 = r59GR_Index2 = r60rSgnGam = r61rXRnd = r62GR_SAVE_B0 = r63GR_SAVE_GP = r64GR_SAVE_PFS = r65// output parameters when calling error handling routineGR_Parameter_X = r66GR_Parameter_Y = r67GR_Parameter_RESULT = r68GR_Parameter_TAG = r69//********************************************************************// Floating Point Registers// CAUTION: due to the lack of registers there exist (below in the code)// sometimes "unconventional" use of declared registers//fAbsX = f6fDelX4 = f6fSignifX = f7// macros for error handling routineFR_X = f10 // first argumentFR_Y = f1 // second argument (lgammal has just one)FR_RESULT = f8 // result// First 7 Bernulli numbersfB2 = f9fLnDeltaL = f9fXSqr = f9fB4 = f10fX4 = f10fB6 = f11fX6 = f11fB8 = f12fXSqrL = f12fB10 = f13fRes7H = f13fB12 = f14fRes7L = f14fB14 = f15// stack registers// Polynomial coefficients: A0, ..., A25fA0 = f32fA0L = f33fInvXL = f33fA1 = f34fA1L = f35fA2 = f36fA2L = f37fA3 = f38fA3L = f39fA4 = f40fA4L = f41fRes6H = f41fA5 = f42fB2L = f42fA5L = f43fMinNegStir = f43fRes6L = f43fA6 = f44fMaxNegStir = f44fA7 = f45fLnDeltaH = f45fA8 = f46fBrnL = f46fA9 = f47fBrnH = f47fA10 = f48fRes5L = f48fA11 = f49fRes5H = f49fA12 = f50fDx6 = f50fA13 = f51fDx8 = f51fA14 = f52fDx4 = f52fA15 = f53fYL = f53fh3Dx = f53fA16 = f54fYH = f54fH3Dx = f54fA17 = f55fResLnDxL = f55fG3Dx = f55fA18 = f56fResLnDxH = f56fh2Dx = f56fA19 = f57fFloatNDx = f57fA20 = f58fPolyHiDx = f58fhDx = f58fA21 = f59fRDxCub = f59fHDx = f59fA22 = f60fRDxSq = f60fGDx = f60fA23 = f61fPolyLoDx = f61fInvX3 = f61fA24 = f62fRDx = f62fInvX8 = f62fA25 = f63fInvX4 = f63fPol = f64fPolL = f65// Coefficients of ln(sin(Pi*x)/Pi*x)fLnSin2 = f66fLnSin2L = f67fLnSin4 = f68fLnSin6 = f69fLnSin8 = f70fLnSin10 = f71fLnSin12 = f72fLnSin14 = f73fLnSin16 = f74fLnSin18 = f75fDelX8 = f75fLnSin20 = f76fLnSin22 = f77fDelX6 = f77fLnSin24 = f78fLnSin26 = f79fLnSin28 = f80fLnSin30 = f81fhDelX = f81fLnSin32 = f82fLnSin34 = f83fLnSin36 = f84fXint = f85fDxSqr = f85fRes3L = f86fRes3H = f87fRes4H = f88fRes4L = f89fResH = f90fResL = f91fDx = f92FR_MHalf = f93fRes1H = f94fRes1L = f95fRes2H = f96fRes2L = f97FR_FracX = f98fRcpX = f99fLnSinH = f99fTwo = f100fMOne = f100FR_G = f101FR_H = f102FR_h = f103FR_G2 = f104FR_H2 = f105FR_poly_lo = f106FR_poly_hi = f107FR_h2 = f108FR_rsq = f109FR_r = f110FR_log2_hi = f111FR_log2_lo = f112fFloatN = f113FR_Q4 = f114FR_G3 = f115FR_H3 = f116FR_h3 = f117FR_Q3 = f118FR_Q2 = f119FR_Q1 = f120fThirteen = f121fSix = f121FR_rcub = f121// Last three Bernulli numbersfB16 = f122fB18 = f123fB20 = f124fInvX = f125
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -