📄 libm_lgammaf.s
字号:
.file "libm_lgammaf.s"// Copyright (c) 2002 - 2005, Intel Corporation// All rights reserved.//// Contributed 2002 by the Intel Numerics Group, Intel Corporation//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// * Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// * The name of Intel Corporation may not be used to endorse or promote// products derived from this software without specific prior written// permission.// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.//// Intel Corporation is the author of this code,and requests that all// problem reports or change requests be submitted to it directly at// http://www.intel.com/software/products/opensource/libraries/num.htm.////*********************************************************************//// History:// 01/10/02 Initial version// 01/25/02 Corrected parameter store, load, and tag for __libm_error_support// 02/01/02 Added support of SIGN(GAMMA(x)) calculation// 05/20/02 Cleaned up namespace and sf0 syntax// 09/16/02 Improved accuracy on intervals reduced to [1;1.25]// 10/21/02 Now it returns SIGN(GAMMA(x))=-1 for negative zero// 02/10/03 Reordered header: .section, .global, .proc, .align// 07/22/03 Reformatted some data tables// 03/31/05 Reformatted delimiters between data tables////*********************************************************************////*********************************************************************//// Function: __libm_lgammaf(float x, int* signgam, int szsigngam)// computes the principle value of the logarithm of the GAMMA function// of x. Signum of GAMMA(x) is stored to memory starting at the address// specified by the signgam.////*********************************************************************//// Resources Used://// Floating-Point Registers: f6-f15// f32-f97//// General Purpose Registers:// r8-r11// r14-r30// r32-r36// r37-r40 (Used to pass arguments to error handling routine)//// Predicate Registers: p6-p15////*********************************************************************//// IEEE Special Conditions://// lgamma(+inf) = +inf// lgamma(-inf) = +inf// lgamma(+/-0) = +inf// lgamma(x<0, x - integer) = +inf// lgamma(SNaN) = QNaN// lgamma(QNaN) = QNaN////*********************************************************************//// Overview//// The method consists of three cases.//// If 2^13 <= x < OVERFLOW_BOUNDARY use case lgammaf_pstirling;// else if 1 < x < 2^13 use case lgammaf_regular;// else if -9 < x < 1 use case lgammaf_negrecursion;// else if -2^13 < x < -9 use case lgammaf_negpoly;// else if x < -2^13 use case lgammaf_negstirling;// else if x is close to negative// roots of ln(GAMMA(x)) use case lgammaf_negroots;////// Case 2^13 <= x < OVERFLOW_BOUNDARY// ----------------------------------// Here we use algorithm based on the Stirling formula:// ln(GAMMA(x)) = ln(sqrt(2*Pi)) + (x-0.5)*ln(x) - x//// Case 1 < x < 2^13// -----------------// To calculate ln(GAMMA(x)) for such arguments we use polynomial// approximation on following intervals: [1.0; 1.25), [1.25; 1.5),// [1.5, 1.75), [1.75; 2), [2; 4), [2^i; 2^(i+1)), i=1..8//// Following variants of approximation and argument reduction are used:// 1. [1.0; 1.25)// ln(GAMMA(x)) ~ (x-1.0)*P7(x)//// 2. [1.25; 1.5)// ln(GAMMA(x)) ~ ln(GAMMA(x0))+(x-x0)*P8(x-x0),// where x0 - point of local minimum on [1;2] rounded to nearest double// precision number.//// 3. [1.5; 1.75)// ln(GAMMA(x)) ~ P8(x)//// 4. [1.75; 2.0)// ln(GAMMA(x)) ~ (x-2)*P7(x)//// 5. [2; 4)// ln(GAMMA(x)) ~ (x-2)*P10(x)//// 6. [2^i; 2^(i+1)), i=2..8// ln(GAMMA(x)) ~ P10((x-2^i)/2^i)//// Case -9 < x < 1// ---------------// Here we use the recursive formula:// ln(GAMMA(x)) = ln(GAMMA(x+1)) - ln(x)//// Using this formula we reduce argument to base interval [1.0; 2.0]//// Case -2^13 < x < -9// --------------------// Here we use the formula:// ln(GAMMA(x)) = ln(Pi/(|x|*GAMMA(|x|)*sin(Pi*|x|))) =// = -ln(|x|) - ln((GAMMA(|x|)) - ln(sin(Pi*r)/(Pi*r)) - ln(|r|)// where r = x - rounded_to_nearest(x), i.e |r| <= 0.5 and// ln(sin(Pi*r)/(Pi*r)) is approximated by 8-degree polynomial of r^2//// Case x < -2^13// --------------// Here we use algorithm based on the Stirling formula:// ln(GAMMA(x)) = -ln(sqrt(2*Pi)) + (|x|-0.5)ln(x) - |x| -// - ln(sin(Pi*r)/(Pi*r)) - ln(|r|)// where r = x - rounded_to_nearest(x).//// Neighbourhoods of negative roots// --------------------------------// Here we use polynomial approximation// ln(GAMMA(x-x0)) = ln(GAMMA(x0)) + (x-x0)*P14(x-x0),// where x0 is a root of ln(GAMMA(x)) rounded to nearest double// precision number.////// Claculation of logarithm// ------------------------// Consider x = 2^N * xf so// ln(x) = ln(frcpa(x)*x/frcpa(x))// = ln(1/frcpa(x)) + ln(frcpa(x)*x)//// frcpa(x) = 2^(-N) * frcpa(xf)//// ln(1/frcpa(x)) = -ln(2^(-N)) - ln(frcpa(xf))// = N*ln(2) - ln(frcpa(xf))// = N*ln(2) + ln(1/frcpa(xf))//// ln(x) = ln(1/frcpa(x)) + ln(frcpa(x)*x) =// = N*ln(2) + ln(1/frcpa(xf)) + ln(frcpa(x)*x)// = N*ln(2) + T + ln(frcpa(x)*x)//// Let r = 1 - frcpa(x)*x, note that r is quite small by// absolute value so//// ln(x) = N*ln(2) + T + ln(1+r) ~ N*ln(2) + T + Series(r),// where T - is precomputed tabular value,// Series(r) = (P3*r + P2)*r^2 + (P1*r + 1)////*********************************************************************GR_TAG = r8GR_ad_Data = r8GR_ad_Co = r9GR_ad_SignGam = r10GR_ad_Ce = r10GR_SignExp = r11GR_ad_C650 = r14GR_ad_RootCo = r14GR_ad_C0 = r15GR_Dx = r15GR_Ind = r16GR_Offs = r17GR_IntNum = r17GR_ExpBias = r18GR_ExpMask = r19GR_Ind4T = r20GR_RootInd = r20GR_Sig = r21GR_Exp = r22GR_PureExp = r23GR_ad_C43 = r24GR_StirlBound = r25GR_ad_T = r25GR_IndX8 = r25GR_Neg2 = r25GR_2xDx = r25GR_SingBound = r26GR_IndX2 = r26GR_Neg4 = r26GR_ad_RootCe = r26GR_Arg = r27GR_ExpOf2 = r28GR_fff7 = r28GR_Root = r28GR_ReqBound = r28GR_N = r29GR_ad_Root = r30GR_ad_OvfBound = r30GR_SignOfGamma = r31GR_SAVE_B0 = r33GR_SAVE_PFS = r34GR_SAVE_GP = r35GR_SAVE_SP = r36GR_Parameter_X = r37GR_Parameter_Y = r38GR_Parameter_RESULT = r39GR_Parameter_TAG = r40//*********************************************************************FR_X = f10FR_Y = f1 // lgammaf is single argument functionFR_RESULT = f8FR_x = f6FR_x2 = f7FR_x3 = f9FR_x4 = f10FR_xm2 = f11FR_w = f11FR_w2 = f12FR_Q32 = f13FR_Q10 = f14FR_InvX = f15FR_NormX = f32FR_A0 = f33FR_A1 = f34FR_A2 = f35FR_A3 = f36FR_A4 = f37FR_A5 = f38FR_A6 = f39FR_A7 = f40FR_A8 = f41FR_A9 = f42FR_A10 = f43FR_int_N = f44FR_P3 = f45FR_P2 = f46FR_P1 = f47FR_LocalMin = f48FR_Ln2 = f49FR_05 = f50FR_LnSqrt2Pi = f51FR_3 = f52FR_r = f53FR_r2 = f54FR_T = f55FR_N = f56FR_xm05 = f57FR_int_Ln = f58FR_P32 = f59FR_P10 = f60FR_Xf = f61FR_InvXf = f62FR_rf = f63FR_rf2 = f64FR_Tf = f65FR_Nf = f66FR_xm05f = f67FR_P32f = f68FR_P10f = f69FR_Lnf = f70FR_Xf2 = f71FR_Xf4 = f72FR_Xf8 = f73FR_Ln = f74FR_xx = f75FR_Root = f75FR_Req = f76FR_1pXf = f77FR_S16 = f78FR_R3 = f78FR_S14 = f79FR_R2 = f79FR_S12 = f80FR_R1 = f80FR_S10 = f81FR_R0 = f81FR_S8 = f82FR_rx = f82FR_S6 = f83FR_rx2 = f84FR_S4 = f84FR_S2 = f85FR_Xp1 = f86FR_Xp2 = f87FR_Xp3 = f88FR_Xp4 = f89FR_Xp5 = f90FR_Xp6 = f91FR_Xp7 = f92FR_Xp8 = f93FR_OverflowBound = f93FR_2 = f94FR_tmp = f95FR_int_Ntrunc = f96FR_Ntrunc = f97//*********************************************************************RODATA.align 32LOCAL_OBJECT_START(lgammaf_data)log_table_1:data8 0xbfd0001008f39d59 // P3data8 0x3fd5556073e0c45a // P2data8 0x3fe62e42fefa39ef // ln(2)data8 0x3fe0000000000000 // 0.5//data8 0x3F60040155D5889E //ln(1/frcpa(1+ 0/256)data8 0x3F78121214586B54 //ln(1/frcpa(1+ 1/256)data8 0x3F841929F96832F0 //ln(1/frcpa(1+ 2/256)data8 0x3F8C317384C75F06 //ln(1/frcpa(1+ 3/256)data8 0x3F91A6B91AC73386 //ln(1/frcpa(1+ 4/256)data8 0x3F95BA9A5D9AC039 //ln(1/frcpa(1+ 5/256)data8 0x3F99D2A8074325F4 //ln(1/frcpa(1+ 6/256)data8 0x3F9D6B2725979802 //ln(1/frcpa(1+ 7/256)data8 0x3FA0C58FA19DFAAA //ln(1/frcpa(1+ 8/256)data8 0x3FA2954C78CBCE1B //ln(1/frcpa(1+ 9/256)data8 0x3FA4A94D2DA96C56 //ln(1/frcpa(1+ 10/256)data8 0x3FA67C94F2D4BB58 //ln(1/frcpa(1+ 11/256)data8 0x3FA85188B630F068 //ln(1/frcpa(1+ 12/256)data8 0x3FAA6B8ABE73AF4C //ln(1/frcpa(1+ 13/256)data8 0x3FAC441E06F72A9E //ln(1/frcpa(1+ 14/256)data8 0x3FAE1E6713606D07 //ln(1/frcpa(1+ 15/256)data8 0x3FAFFA6911AB9301 //ln(1/frcpa(1+ 16/256)data8 0x3FB0EC139C5DA601 //ln(1/frcpa(1+ 17/256)data8 0x3FB1DBD2643D190B //ln(1/frcpa(1+ 18/256)data8 0x3FB2CC7284FE5F1C //ln(1/frcpa(1+ 19/256)data8 0x3FB3BDF5A7D1EE64 //ln(1/frcpa(1+ 20/256)data8 0x3FB4B05D7AA012E0 //ln(1/frcpa(1+ 21/256)data8 0x3FB580DB7CEB5702 //ln(1/frcpa(1+ 22/256)data8 0x3FB674F089365A7A //ln(1/frcpa(1+ 23/256)data8 0x3FB769EF2C6B568D //ln(1/frcpa(1+ 24/256)data8 0x3FB85FD927506A48 //ln(1/frcpa(1+ 25/256)data8 0x3FB9335E5D594989 //ln(1/frcpa(1+ 26/256)data8 0x3FBA2B0220C8E5F5 //ln(1/frcpa(1+ 27/256)data8 0x3FBB0004AC1A86AC //ln(1/frcpa(1+ 28/256)data8 0x3FBBF968769FCA11 //ln(1/frcpa(1+ 29/256)data8 0x3FBCCFEDBFEE13A8 //ln(1/frcpa(1+ 30/256)data8 0x3FBDA727638446A2 //ln(1/frcpa(1+ 31/256)data8 0x3FBEA3257FE10F7A //ln(1/frcpa(1+ 32/256)data8 0x3FBF7BE9FEDBFDE6 //ln(1/frcpa(1+ 33/256)data8 0x3FC02AB352FF25F4 //ln(1/frcpa(1+ 34/256)data8 0x3FC097CE579D204D //ln(1/frcpa(1+ 35/256)data8 0x3FC1178E8227E47C //ln(1/frcpa(1+ 36/256)data8 0x3FC185747DBECF34 //ln(1/frcpa(1+ 37/256)data8 0x3FC1F3B925F25D41 //ln(1/frcpa(1+ 38/256)data8 0x3FC2625D1E6DDF57 //ln(1/frcpa(1+ 39/256)data8 0x3FC2D1610C86813A //ln(1/frcpa(1+ 40/256)data8 0x3FC340C59741142E //ln(1/frcpa(1+ 41/256)data8 0x3FC3B08B6757F2A9 //ln(1/frcpa(1+ 42/256)data8 0x3FC40DFB08378003 //ln(1/frcpa(1+ 43/256)data8 0x3FC47E74E8CA5F7C //ln(1/frcpa(1+ 44/256)data8 0x3FC4EF51F6466DE4 //ln(1/frcpa(1+ 45/256)data8 0x3FC56092E02BA516 //ln(1/frcpa(1+ 46/256)data8 0x3FC5D23857CD74D5 //ln(1/frcpa(1+ 47/256)data8 0x3FC6313A37335D76 //ln(1/frcpa(1+ 48/256)data8 0x3FC6A399DABBD383 //ln(1/frcpa(1+ 49/256)data8 0x3FC70337DD3CE41B //ln(1/frcpa(1+ 50/256)data8 0x3FC77654128F6127 //ln(1/frcpa(1+ 51/256)data8 0x3FC7E9D82A0B022D //ln(1/frcpa(1+ 52/256)data8 0x3FC84A6B759F512F //ln(1/frcpa(1+ 53/256)data8 0x3FC8AB47D5F5A310 //ln(1/frcpa(1+ 54/256)data8 0x3FC91FE49096581B //ln(1/frcpa(1+ 55/256)data8 0x3FC981634011AA75 //ln(1/frcpa(1+ 56/256)data8 0x3FC9F6C407089664 //ln(1/frcpa(1+ 57/256)data8 0x3FCA58E729348F43 //ln(1/frcpa(1+ 58/256)data8 0x3FCABB55C31693AD //ln(1/frcpa(1+ 59/256)data8 0x3FCB1E104919EFD0 //ln(1/frcpa(1+ 60/256)data8 0x3FCB94EE93E367CB //ln(1/frcpa(1+ 61/256)data8 0x3FCBF851C067555F //ln(1/frcpa(1+ 62/256)data8 0x3FCC5C0254BF23A6 //ln(1/frcpa(1+ 63/256)data8 0x3FCCC000C9DB3C52 //ln(1/frcpa(1+ 64/256)data8 0x3FCD244D99C85674 //ln(1/frcpa(1+ 65/256)data8 0x3FCD88E93FB2F450 //ln(1/frcpa(1+ 66/256)data8 0x3FCDEDD437EAEF01 //ln(1/frcpa(1+ 67/256)data8 0x3FCE530EFFE71012 //ln(1/frcpa(1+ 68/256)data8 0x3FCEB89A1648B971 //ln(1/frcpa(1+ 69/256)data8 0x3FCF1E75FADF9BDE //ln(1/frcpa(1+ 70/256)data8 0x3FCF84A32EAD7C35 //ln(1/frcpa(1+ 71/256)data8 0x3FCFEB2233EA07CD //ln(1/frcpa(1+ 72/256)data8 0x3FD028F9C7035C1C //ln(1/frcpa(1+ 73/256)data8 0x3FD05C8BE0D9635A //ln(1/frcpa(1+ 74/256)data8 0x3FD085EB8F8AE797 //ln(1/frcpa(1+ 75/256)data8 0x3FD0B9C8E32D1911 //ln(1/frcpa(1+ 76/256)data8 0x3FD0EDD060B78081 //ln(1/frcpa(1+ 77/256)data8 0x3FD122024CF0063F //ln(1/frcpa(1+ 78/256)data8 0x3FD14BE2927AECD4 //ln(1/frcpa(1+ 79/256)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -