📄 s_erfc.s
字号:
.file "erfc.s"// Copyright (c) 2001 - 2005, Intel Corporation// All rights reserved.//// Contributed 2001 by the Intel Numerics Group, Intel Corporation//// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are// met://// * Redistributions of source code must retain the above copyright// notice, this list of conditions and the following disclaimer.//// * Redistributions in binary form must reproduce the above copyright// notice, this list of conditions and the following disclaimer in the// documentation and/or other materials provided with the distribution.//// * The name of Intel Corporation may not be used to endorse or promote// products derived from this software without specific prior written// permission.// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Intel Corporation is the author of this code, and requests that all// problem reports or change requests be submitted to it directly at // http://www.intel.com/software/products/opensource/libraries/num.htm.//// History//==============================================================// 11/12/01 Initial version// 05/20/02 Cleaned up namespace and sf0 syntax// 02/06/03 Reordered header: .section, .global, .proc, .align// 03/31/05 Reformatted delimiters between data tables//// API//==============================================================// double erfc(double)//// Overview of operation//==============================================================// 1. 0 <= x <= 28.0// // erfc(x) = P14(z) * exp( -x^2 ), z = x - x(i).//// Comment://// Let x(i) = -1.0 + 2^(i/4),i=0,...19. So we have 20 unequal// argument intervals [x(i),x(i+1)] with length ratio q = 2^(1/4).// Values x(i) we have in the table erfc_xb_table.// // Let x(i)<= x < x(i+1).// We can find i as exponent of number (x + 1)^4.// // Let P14(z) - polynomial approximation of degree 14 for function// erfc(z+x(i)) * exp( (z+x(i))^2) and 0 <= z <= x(i+1)-x(i).// Polynomial coeffitients we have in the table erfc_p_table.//// So we can find result for erfc(x) as above.// Algorithm description for exp function see below. // // 2. -6 <= x < 0//// erfc(x) = 2.0 - erfc(-x)//// 3. x > 28.0// erfc(x) ~=~ 0.0 //// 4. x < -6.0 // erfc(x) ~=~ 2.0 // Special values //==============================================================// erfc(+0) = 1.0// erfc(-0) = 1.0// erfc(+qnan) = +qnan // erfc(-qnan) = -qnan // erfc(+snan) = +qnan // erfc(-snan) = -qnan // erfc(-inf) = 2.0 // erfc(+inf) = +0//==============================================================// Take double exp(double) from libm_64.//// Overview of operation//==============================================================// Take the input x. w is "how many log2/128 in x?"// w = x * 128/log2// n = int(w)// x = n log2/128 + r + delta// n = 128M + index_1 + 2^4 index_2// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)// Construct 2^M// Get 2^(index_1/128) from table_1;// Get 2^(index_2/8) from table_2;// Calculate exp(r) by series// r = x - n (log2/128)_high// delta = - n (log2/128)_low// Calculate exp(delta) as 1 + delta//==============================================================// Comment for exp for erfc://// We use quad precision for calculate input argument -x^2 and add// result low bits to value delta in exp. // Registers used//==============================================================// Floating Point registers used: // f8, input// f9 -> f15, f32 -> f93// General registers used: // r32 -> r68 // Predicate registers used:// p6 -> p15// Assembly macros//==============================================================exp_GR_rshf = r33EXP_AD_TB1 = r34EXP_AD_TB2 = r35EXP_AD_P = r36exp_GR_N = r37exp_GR_index_1 = r38exp_GR_index_2_16 = r39exp_GR_biased_M = r40EXP_AD_T1 = r41EXP_AD_T2 = r42exp_GR_sig_inv_ln2 = r43exp_GR_17ones = r44exp_TB1_size = r45exp_TB2_size = r46exp_GR_rshf_2to56 = r47exp_GR_exp_2tom56 = r48// GR for erfc(x)//==============================================================GR_POS_ARG_ASYMP = r49GR_NEG_ARG_ASYMP = r50GR_ARG_ASYMP = r51GR_ERFC_XB_TB = r52GR_ERFC_P_TB = r53GR_IndxPlusBias = r54GR_BIAS = r55GR_P_A12 = r56GR_P_A13 = r57GR_AbsArg = r58GR_ShftXBi = r59GR_ShftPi = r60GR_mBIAS = r61GR_ShftPi_bias = r62GR_ShftXBi_bias = r63GR_ShftA12 = r64GR_ShftA13 = r65GR_EpsNorm = r66GR_0x1 = r67GR_ShftPi_8 = r68// GR for __libm_support call//==============================================================GR_SAVE_B0 = r61GR_SAVE_PFS = r62GR_SAVE_GP = r63GR_SAVE_SP = r64GR_Parameter_X = r65GR_Parameter_Y = r66GR_Parameter_RESULT = r67GR_Parameter_TAG = r68// FR for exp(-x^2)//==============================================================FR_X = f10FR_Y = f1FR_RESULT = f8EXP_2TOM56 = f6EXP_INV_LN2_2TO63 = f7EXP_W_2TO56_RSH = f9EXP_RSHF_2TO56 = f10exp_P4 = f11 exp_P3 = f12 exp_P2 = f13 exp_P1 = f14 exp_ln2_by_128_hi = f15 exp_ln2_by_128_lo = f32 EXP_RSHF = f33EXP_Nfloat = f34 exp_r = f35exp_f = f36exp_rsq = f37exp_rcube = f38EXP_2M = f39exp_S1 = f40exp_T1 = f41exp_rP4pP3 = f42exp_P_lo = f43exp_P_hi = f44exp_P = f45exp_S = f46EXP_NORM_f8 = f47 exp_S2 = f48exp_T2 = f49// FR for erfc(x)//==============================================================FR_AbsArg = f50FR_Tmp = f51FR_Xb = f52FR_A0 = f53FR_A1 = f54FR_A2 = f55FR_A3 = f56FR_A4 = f57FR_A5 = f58FR_A6 = f59FR_A7 = f60FR_A8 = f61FR_A9 = f62FR_A10 = f63FR_A11 = f64FR_A12 = f65FR_A13 = f66FR_A14 = f67FR_P14_0_1 = f68FR_P14_0_2 = f69FR_P14_1_1 = f70FR_P14_1_2 = f71FR_P14_2_1 = f72FR_P14_2_2 = f73FR_P14_3_1 = f74FR_P14_3_2 = f75FR_P14_6_1 = f76FR_P14_7_1 = f77FR_P14_7_2 = f78FR_P14_8_1 = f79FR_P14_8_2 = f80FR_P14_12_1 = f81FR_P14_13_1 = f82FR_P14_13_2 = f83FR_Pol = f84FR_Exp = f85FR_2 = f86f8_sq_lo = f87FR_LocArg = f88FR_Tmpf = f89FR_Tmp1 = f90FR_EpsNorm = f91FR_UnfBound = f92FR_NormX = f93// Data tables//==============================================================RODATA.align 16// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************LOCAL_OBJECT_START(exp_table_1)data8 0x403a8b12fc6e4892 , 0 // underflow boundarydata8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hidata8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo//// Table 1 is 2^(index_1/128) where// index_1 goes from 0 to 15//data8 0x8000000000000000 , 0x00003FFFdata8 0x80B1ED4FD999AB6C , 0x00003FFFdata8 0x8164D1F3BC030773 , 0x00003FFFdata8 0x8218AF4373FC25EC , 0x00003FFFdata8 0x82CD8698AC2BA1D7 , 0x00003FFFdata8 0x8383594EEFB6EE37 , 0x00003FFF
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -