📄 mathhardalib.s
字号:
/* mathHardALib - C callable math routines for the MIPS R3010 *//* Copyright 1984-2001 Wind River Systems, Inc. */ .data .globl copyright_wind_river/* * This file has been developed or significantly modified by the * MIPS Center of Excellence Dedicated Engineering Staff. * This notice is as per the MIPS Center of Excellence Master Partner * Agreement, do not remove this notice without checking first with * WR/Platforms MIPS Center of Excellence engineering management. *//*modification history --------------------02a,16jan02,agf add warning about incorrect ceil & floor logic01z,02aug01,mem Diab integration01y,16jul01,ros add CofE comment01x,25apr01,mem Disable optimized FP routines.01w,19apr01,roz remove math functions now in c lib01v,06feb01,roz removed pow and other incorrect functions for mips01u,28dec00,agf Architecture update for MIPS32/6401t,14jul00,dra don't use pow for vr540001s,10sep99,myz added CW4000_16 support.01r,19jan99,dra added CW4000, CW4011, VR4100, VR5000 and VR5400 support.01q,20jan97,tam fixed more functions in order to handle non finite arguments (Nan, +/-Inf) and arguments out of the function domain correctly (spr #7784).01p,30dec96,tam fixed log10, logf and log10f so that it returns Nan or -Inf instead of generating an exception (spr #6511). fixed isNaNSingle().01o,05dec96,tam reworked single precision support: -made all single precision routines use only single precision floating point instructions. -re-wrote expf routine. -added powf, fabsf and fmodf routines.01n,16nov95,mem converted code to work with gas.01m,12nov94,dvs fixed clearcase conversion search/replace errors.01l,19oct93,cd added R4000 support.01k,03aug93,yao document cleanup. fixed pow () to return correct value when dblX is <= 0 (SPR#2447).01j,20jul93,yao SPR#2047 - fixed pow () function. added nop instruction for the instruction pipeline constraints. updated copyright notice.01i,19sep92,kdl changed name to mathHardALib; changed mathHardInit() to mathHardALibInit().01i,30jul92,kdl changed to ANSI single precision names (e.g. fsin -> sinf); changed mathInit() to mathHardInit().01h,06jun92,ajm 5.0.5 merge, note mod history changes, rid single quotes for make problems, also fixed if/else comments01g,26may92,rrr the tree shuffle01f,06jan92,ajm fixed sqrt bug with MIPS_ARCH and denorm_sqrt01e,31oct91,ajm added pow function01d,04oct91,rrr passed through the ansification filter -fixed #else and #endif -changed VOID to void -changed ASMLANGUAGE to _ASMLANGUAGE -changed copyright notice01c,29aug91,wmd moved to r3k/math directory.01b,16jul91,ajm added fabs routine01a,30apr91,ajm merged from mips source*//*DESCRIPTIONThis library provides a C interface to the high level math functionson the MIPS R3010 floating point coprocessor. Each routine has the followingformat: . calculate fp function using double parameter . transfer result to parameter storageWARNINGThis library only works if there is a MIPS R3010 coprocessor in the system!Attempts to use these routines with no coprocessor present will resultin illegal instruction traps.SEE ALSO: fppLib (1)*/#define _ASMLANGUAGE#include "vxWorks.h"#include "asm.h"#ifndef PORTABLE_MATHLIB/* *------------------------------------------------------------- *| RESTRICTED RIGHTS LEGEND | *| Use, duplication, or disclosure by the Government is | *| subject to restrictions as set forth in subparagraph | *| (c)(1)(ii) of the Rights in Technical Data and Computer | *| Software Clause at DFARS 252.227-7013. | *| MIPS Computer Systems, Inc. | *| 928 Arques Avenue | *| Sunnyvale, CA 94086 | *------------------------------------------------------------- *//* --------------------------------------------------------- *//* | Copyright (c) 1986, 1989 MIPS Computer Systems, Inc. | *//* | All Rights Reserved. | *//* --------------------------------------------------------- *//* Algorithm from Cody and Waite. *//* asincos.s */#define ACS_eps 3.72529029846191406250e-9#define ACS_p5 -0.69674573447350646411e+0#define ACS_p4 +0.10152522233806463645e+2#define ACS_p3 -0.39688862997504877339e+2#define ACS_p2 +0.57208227877891731407e+2#define ACS_p1 -0.27368494524164255994e+2#define ACS_q4 -0.23823859153670238830e+2#define ACS_q3 +0.15095270841030604719e+3#define ACS_q2 -0.38186303361750149284e+3#define ACS_q1 +0.41714430248260412556e+3#define ACS_q0 -0.16421096714498560795e+3#define ACS_pio2 1.57079632679489661923#define ACS_pi 3.14159265358979323846#define one 1.0#define two 2.0#define half 0.5/* atan.s */#define ATAN_r7_16 0.4375 #define ATAN_r11_16 0.6875 #define ATAN_r19_16 1.1875#define ATAN_r39_16 2.4375 #define ATAN_athfhi 4.6364760900080609352E-1#define ATAN_athflo 4.6249969567426939759E-18#define ATAN_PIo4 7.8539816339744827900E-1#define ATAN_at1fhi 9.8279372324732905408E-1#define ATAN_at1flo -2.4407677060164810007E-17#define ATAN_p11 1.6438029044759730479E-2#define ATAN_p10 -3.6700606902093604877E-2#define ATAN_p9 4.9850617156082015213E-2#define ATAN_p8 -5.8358371008508623523E-2#define ATAN_p7 6.6614695906082474486E-2#define ATAN_p6 -7.6919217767468239799E-2#define ATAN_p5 9.0908906105474668324E-2#define ATAN_p4 -1.1111110579344973814E-1#define ATAN_p3 1.4285714278004377209E-1#define ATAN_p2 -1.9999999999979536924E-1#define ATAN_p1 3.3333333333333942106E-1#define ATAN_PI 3.1415926535897931160E0#define ATAN_PIo2 1.5707963267948965580E0/* asincosf.s */#define FACS_eps 3.72529029846191406250e-9#define FACS_p2 -0.504400557e+0#define FACS_p1 +0.933935835e+0#define FACS_q1 -0.554846723e+1#define FACS_q0 +0.560363004e+1#define FACS_pio2 1.57079632679489661923#define FACS_pi 3.14159265358979323846/* atanf.s */#define FAT_mpio2 -1.57079632679489661923#define FAT_pio6 0.52359877559829887308#define FAT_p1 -0.5090958253e-1#define FAT_p0 -0.4708325141e+0#define FAT_q0 0.1412500740e+1#define FAT_twomr3 0.26794919243112270647#define FAT_sqrt3 1.73205080756887729353#define FAT_sqrt3m1 0.73205080756887729353#define FAT_pi 3.14159265358979323846/* logf.s */#define FLOG_p0 -0.5527074855e+0#define FLOG_q0 -0.6632718214e+1#define FLOG_ln2 0.69314718055994530941#define FLOG_loge 0.43429448190325182765/* sincosf.s */#define FSNCS_pio2 1.57079632679489661923#define FSNCS_pi 3.14159265358979323846#define FSNCS_ymax 32000.0#define FSNCS_oopi 0.31830988618379067154#define FSNCS_p4 0.2601903036e-5#define FSNCS_p3 -0.1980741872e-3#define FSNCS_p2 0.8333025139e-2#define FSNCS_p1 -0.1666665668e+0/* sinhf.s */#define SNF_eps 3.72529029846191406250e-9#define SNF_p1 -0.190333399e+0#define SNF_p0 -0.713793159e+1#define SNF_q0 -0.428277109e+2/* tanf.s */#define TANF_pio4 0.78539816339744830961#define TANF_pio2 1.57079632679489661923132#define TANF_ymax 6433.0#define TANF_twoopi 0.63661977236758134308#define TANF_p0 -0.958017723e-1#define TANF_q1 0.971685835e-2#define TANF_q0 -0.429135777e+0/* tanhf.s */#define TNHF_ln3o2 0.54930614433405484570#define TNHF_eps 3.72529029846191406250e-9#define TNHF_p1 -0.3831010665e-2#define TNHF_p0 -0.8237728127e+0#define TNHF_q0 +0.2471319654e+1#define TNHF_xbig 20.101268236238413961/* hypot.s */#define HYPT_sqrt2 1.4142135623730951455E+00 /* 2^ 0 * 1.6A09E667F3BCD */#define HYPT_r2p1hi 2.4142135623730949234E+00 /* 2^ 1 * 1.3504F333F9DE6 */#define HYPT_r2p1lo 1.2537167179050217666E-16 /* 2^-53 * 1.21165F626CDD5 *//* log.s */#define LG_loge 0.43429448190325182765/* sincos.s */#define SNCS_pio2 1.57079632679489661923#define SNCS_ymax 6746518852.0#define SNCS_oopi 0.31830988618379067154#if FALSE#define SNCS_pihi 3.1416015625#define SNCS_pilo -8.908910206761537356617e-6#else /* FALSE */#define SNCS_pihi 3.1415920257568359375#define SNCS_pilo 6.2783295730096264338e-7#endif /* FALSE */#define SNCS_p7 0.27204790957888846175e-14#define SNCS_p6 -0.76429178068910467734e-12#define SNCS_p5 0.16058936490371589114e-9#define SNCS_p4 -0.25052106798274584544e-7#define SNCS_p3 0.27557319210152756119e-5#define SNCS_p2 -0.19841269841201840457e-3#define SNCS_p1 0.83333333333331650314e-2#define SNCS_p0 -0.16666666666666665052e+0/* sinh.s */#define SINH_eps 3.72529029846191406250e-9#define SINH_p3 -0.78966127417357099479e+0#define SINH_p2 -0.16375798202630751372e+3#define SINH_p1 -0.11563521196851768270e+5#define SINH_p0 -0.35181283430177117881e+6#define SINH_q2 -0.27773523119650701667e+3#define SINH_q1 +0.36162723109421836460e+5#define SINH_q0 -0.21108770058106271242e+7#define SINH_ybar SINH_expmax#define SINH_expmax 709.78271289338397/* tan.s */#define TN_pio4 0.78539816339744830961#define TN_pio2 1.57079632679489661923#define TN_ymax 3373259426.0#define TN_twoopi 0.63661977236758134308#define TN_pio2hi 1.57080078125#define TN_pio2lo -4.454455103380768678308e-6#define TN_p3 -0.17861707342254426711e-4#define TN_p2 0.34248878235890589960e-2#define TN_p1 -0.13338350006421960681e+0#define TN_q4 0.49819433993786512270e-6#define TN_q3 -0.31181531907010027307e-3#define TN_q2 0.25663832289440112864e-1#define TN_q1 -0.46671683339755294240e+0#define TN_q0 1.0/* tanh.s */#define TNH_ln3o2 0.54930614433405484570#define TNH_eps 3.72529029846191406250e-9#define TNH_p2 -0.96437492777225469787e+0#define TNH_p1 -0.99225929672236083313e+2#define TNH_p0 -0.16134119023996228053e+4#define TNH_q2 +0.11274474380534949335e+3#define TNH_q1 +0.22337720718962312926e+4#define TNH_q0 +0.48402357071988688686e+4#define TNH_xbig 20.101268236238413961/* miscallenous */#ifdef MIPSEL#define D(h,l) l,h#endif /* MIPSEL */#ifdef MIPSEB#define D(h,l) h,l#endif /* MIPSEB */#define RM_MASK 3#define INF_HI_SHORT 0x7ff0#define INF_HI_SHORT_SINGLE 0x7f80#undef INCLUDE_DOUBLE_PRECISION#undef INCLUDE_SINGLE_PRECISION .text .set reorder/********************************************************************************* mathHardALibInit - initialize hardware floating point math package** This null routine is provided so the linker will pull in the math library.* It is called from mathHardInit() in mathHardLib.** WARNING* This library only works if there is a MIPS R3010 coprocessor in the system!* void mathHardALibInit ()* NOMANUAL*/ .globl mathHardALibInit .ent mathHardALibInitmathHardALibInit: j ra .end mathHardALibInit#ifdef INCLUDE_DOUBLE_PRECISION /********************************************************************************* cabs - complex absolute value** This function returns the absolute value of the double precesion complex * number* double cabs(z)* struct { double r, i;} z;* {* double hypot();* return hypot(z.r,z.i);* }*/ .globl cabs .ent cabscabs: .frame sp, 0, ra dmtc1 $4, $f12 dmtc1 $6, $f14 j hypot .end cabs /********************************************************************************* expm1 - floating point inverse natural logarithm (e ** (x))** INTERNAL* Algorithm from* "Table-driven Implementation of the Exponential Function for* IEEE Floating Point", Peter Tang, Argonne National Laboratory,* December 3, 1987* as implemented in C by M. Mueller, April 20 1988, Evans & Sutherland.* Coded in MIPS assembler by Earl Killian.* double expm1 (dblParam)* double dblParam;*/ .globl expm1 .ent expm1expm1: .frame sp, 0, ra /* argument in f12 */ .set noreorder li.d $f10, 709.78271289338397 /* expmax */ cfc1 t4, $31 /* read fp control/status */ c.ole.d $f12, $f10 /* check if special */ li.d $f14, -37.429947750237041 /* -1 threshold */ bc1f 90f /* - if NaN, +Infinity, */ /* or greater than expmax */ c.lt.d $f12, $f14 /* check for expm1(x) = -1 */ li t1, -4 bc1t 80f /* -if less than -1 threshold */ and t1, t4 /* rounding mode = nearest */ li.d $f10, -2.8768207245178096e-01 /* T1 */ ctc1 t1, $31 /* write fp control/status */ nop c.le.d $f12, $f10 li.d $f14, 2.2314355131420976e-01 /* T2 */ bc1t 20f c.lt.d $f12, $f14 li.d $f10, 5.5511151231257827e-17 bc1f 20f abs.d $f0, $f12 c.lt.d $f0, $f10 li.d $f10, 2.4360682937111612e-08 li.d $f14, 2.7582184028154369e-07 bc1t 81f nop .set reorder mul.d $f0, $f12, $f10 li.d $f10, 2.7558212415361945e-06 add.d $f0, $f14 mul.d $f0, $f12 li.d $f14, 2.4801576918453421e-05 add.d $f0, $f10 mul.d $f0, $f12 li.d $f10, 1.9841269447671544e-04 add.d $f0, $f14 mul.d $f0, $f12 li.d $f14, 1.3888888890687830e-03 add.d $f0, $f10 mul.d $f0, $f12 li.d $f10, 8.3333333334012268e-03 add.d $f0, $f14 mul.d $f0, $f12 li.d $f14, 4.1666666666665561e-02 add.d $f0, $f10 mul.d $f0, $f12 li.d $f10, 1.6666666666666632e-01 add.d $f0, $f14 mul.d $f0, $f12 add.d $f0, $f10 mul.d $f0, $f12 mul.d $f0, $f12 mul.d $f0, $f12 cvt.s.d $f2, $f12 cvt.d.s $f2, $f2 sub.d $f4, $f12, $f2 li.d $f14, 0.5 mul.d $f6, $f2, $f2 mul.d $f6, $f14 add.d $f8, $f12, $f2 mul.d $f8, $f4 mul.d $f8, $f14 li.d $f10, 0.0078125 c.lt.d $f6, $f10 bc1t 10f add.d $f10, $f2, $f6 add.d $f8, $f4 add.d $f0, $f8 add.d $f0, $f10 ctc1 t4, $31 /* restore fp control/status */ j ra10: add.d $f0, $f8 add.d $f0, $f6 add.d $f0, $f12 ctc1 t4, $31 /* restore fp control/status */ j ra20: li.d $f10, 4.6166241308446828e+01 mul.d $f0, $f12, $f10 cvt.w.d $f0, $f0 mfc1 t0, $f0 cvt.d.w $f2, $f0 li.d $f4, -2.1660849390173098e-02 mul.d $f4, $f2 add.d $f4, $f12 li.d $f6, -2.3251928468788740e-12 mul.d $f6, $f2 add.d $f2, $f4, $f6 li.d $f10, 1.3888949086377719e-03 li.d $f14, 8.3333679843421958e-03 mul.d $f0, $f2, $f10 li.d $f10, 4.1666666666226079e-02 add.d $f0, $f14 mul.d $f0, $f2 li.d $f14, 1.6666666666526087e-01 add.d $f0, $f10 mul.d $f0, $f2 li.d $f10, 5.0000000000000000e-01 add.d $f0, $f14 mul.d $f0, $f2 add.d $f0, $f10 mul.d $f0, $f2 mul.d $f0, $f2 add.d $f0, $f6 add.d $f0, $f4 and t1, t0, 31 sra t0, 5 sll t1, 4 l.d $f14, EXP_DAT+0(t1) l.d $f16, EXP_DAT+8(t1) add.d $f8, $f14, $f16 mul.d $f0, $f8 addu t2, t0, 1023 sll t2, 20 mtc1 t2, $f3 mtc1 $0, $f2 bge t0, 53, 30f ble t0, -7, 40f /* -6 <= M <= 52 */ add.d $f0, $f16 li t1, 1023 subu t1, t0 sll t1, 20 mtc1 t1, $f5 mtc1 $0, $f4 sub.d $f6, $f14, $f4 add.d $f0, $f6 mul.d $f0, $f2 ctc1 t4, $31 /* restore fp control/status */ j ra30: /* M >= 53 */ li t1, 1023 subu t1, t0 sll t1, 20 mtc1 t1, $f5 mtc1 $0, $f4 sub.d $f4, $f16, $f4 add.d $f0, $f4 add.d $f0, $f14 mul.d $f0, $f2 ctc1 t4, $31 /* restore fp control/status */ j ra40: /* M <= -7 */ add.d $f0, $f16 add.d $f0, $f14 mul.d $f0, $f2 li.d $f10, 1.0 sub.d $f0, $f10 ctc1 t4, $31 /* restore fp control/status */ j ra80: /* X < -1 threshold */ li.d $f0, -1.0 /* should raise inexact */ j ra81: /* |X| < identity threshold */ ctc1 t4, $31 /* restore fp control/status */ mov.d $f0, $f12 /* should raise inexact */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -