📄 mathhardalib.s
字号:
** logf - single precision floating point natural logarithm ** float logf (fltParam)* float fltParam;*/ .globl logf .ent logflogf: .frame sp, 0, ra mfc1 t0, $f12 srl t1, t0, 23 ble t0, 0, logferr beq t1, 255, logfnan subu t1, 126 sll t2, t1, 23 subu t0, t2 mtc1 t0, $f12 li.s $f6, 0.70710678118654752440 li.s $f8, one c.lt.s $f6, $f12 li.s $f6, two bc1t logf1 addu t0, (1<<23) mtc1 t0, $f12 subu t1, 1logf1: sub.s $f4, $f12, $f8 mul.s $f4, $f6 add.s $f0, $f12, $f8 div.s $f4, $f0 mul.s $f0, $f4, $f4 li.s $f6, FLOG_p0 li.s $f8, FLOG_q0 mul.s $f2, $f0, $f6 add.s $f0, $f0, $f8 mtc1 t1, $f8 div.s $f2, $f0 mul.s $f2, $f4 add.s $f2, $f4 beq t1, 0, log2f li.s $f6, FLOG_ln2 cvt.s.w $f8, $f8 mul.s $f8, $f6 add.s $f2, $f8log2f: mov.s $f0, $f2 j ralogferr: li.s $f2, 0.0 sll t1, t0, 1 beq t1, 0, logf0 /* return signaling Nan */ li t1, 0x7fff sll t1, t1, 16 mtc1 t1, $f0 j ralogf0: li.s $f0, -1.0 div.s $f0, $f2 j ralogfnan: mov.s $f0, $f12 j ra .end logf#if FALSE .globl r_lg10 .ent r_lg10r_lg10: l.s $f12, (a0) .end r_lg10#endif/********************************************************************************* log10f - single precision floating point logarithm base 10 ** float log10f (fltParam)* float fltParam;*/ .globl log10f .ent log10flog10f: .frame sp, 0, t3 move t3, ra /* is x < 0 ? */ li.s $f10, 0.0 c.ult.s $f12, $f10 nop bc1t log10fNeg nop /* is x = 0 ? */ c.ueq.s $f12, $f10 nop bc1t log10fZero /* return log(x)/log(10) */ jal logf li.s $f6, FLOG_loge mul.s $f0, $f6 j t3log10fNeg: /* return signaling Nan */ li v0, 0x7fc0 /* set Nan return value */ sll v0, v0, 16 mtc1 v0, $f0 j t3log10fZero: /* returns -Inf */ li v0, 0xff80 sll v0, v0, 16 /* v0 = 0xff800000 */ mtc1 v0, $f0 j t3 .end log10f/********************************************************************************* cosf - single precision floating point cosine** float cosf (fltParam)* float fltParam;*/ .globl cosf .ent cosfcosf: .frame sp, 0, ra li.s $f6, FSNCS_ymax abs.s $f12 # COS(-X) = COS(X) cfc1 t1, $31 c.olt.s $f12, $f6 and t0, t1, ~RM_MASK bc1f sincos2 ctc1 t0, $31 /* Reduce argument */ li.s $f6, FSNCS_oopi li.s $f8, half mul.s $f2, $f12, $f6 add.s $f2, $f8 cvt.w.s $f4, $f2 cvt.s.w $f2, $f4 mfc1 t0, $f4 sub.s $f2, $f8 mov.s $f10, $f12 b sincos .end cosf/********************************************************************************* sinf - single precision floating point sine** float sinf (fltParam)* float fltParam;*/ .globl sinf .ent sinfsinf: .frame sp, 0, ra li.s $f8, FSNCS_pio2 abs.s $f0, $f12 c.olt.s $f0, $f8 cfc1 t1, $31 mov.s $f10, $f12 bc1t sincos1 and t0, t1, ~RM_MASK li.s $f6, FSNCS_ymax c.olt.s $f0, $f6 li.s $f6, FSNCS_oopi bc1f sincos2 ctc1 t0, $31 /* Reduce argument */ mul.s $f2, $f12, $f6 cvt.w.s $f2, $f2 mfc1 t0, $f2 cvt.s.w $f2, $f2sincos: /* use extended precision arithmetic to subtract N*PI */ li.s $f6, FSNCS_pi and t0, 1 mul.s $f2, $f6 sub.s $f10, $f2 beq t0, 0, sincos1 neg.s $f10sincos1: mul.s $f2, $f10, $f10 # g = f**2 /* evaluate R(g) */ li.s $f6, FSNCS_p4 li.s $f8, FSNCS_p3 mul.s $f4, $f2, $f6 add.s $f4, $f8 li.s $f8, FSNCS_p2 mul.s $f4, $f2 add.s $f4, $f8 li.s $f8, FSNCS_p1 mul.s $f4, $f2 add.s $f4, $f8 /* result is f+f*g*R(g) */ mul.s $f4, $f2 mul.s $f4, $f10 add.s $f0, $f10, $f4 ctc1 t1, $31 # restore rounding mode j rasincos2: li.s $f0, 0.0 div.s $f0, $f0 j ra .end sinf/********************************************************************************* sinhf - single precision floating point hyperbolic sine** float sinhf (fltParam)* float fltParam;*//*lnv: .float 0.69316101074218750000e+0vo2m1: .float 0.13830277879601902638e-4*/ .globl sinhf .ent sinhfsinhf: .frame sp, 0, ra li.s $f8, one abs.s $f0, $f12 c.ole.s $f0, $f8 li.s $f8, SNF_eps bc1f sinhf2 c.lt.s $f0, $f8 bc1t sinhf1sinhf0: mul.s $f2, $f0, $f0 li.s $f10, SNF_p1 li.s $f8, SNF_q0 mul.s $f4, $f2, $f10 li.s $f10, SNF_p0 add.s $f6, $f2, $f8 add.s $f4, $f10 mul.s $f4, $f2 div.s $f4, $f6 mul.s $f4, $f12 add.s $f0, $f4, $f12 j rasinhf1: mov.s $f0, $f12 j rasinhf2: li.s $f8, 88.7228317 s.s $f12, 8(sp) c.ole.s $f0, $f8 SW ra, 0(sp) mov.s $f12, $f0 bc1f sinhf3 subu sp, 16 jal expf addu sp, 16 li.s $f8, half div.s $f2, $f8, $f0 mul.s $f0, $f8 lw t0, 8(sp) LW ra, 0(sp) bltz t0, 1f sub.s $f0, $f0, $f2 j ra1: sub.s $f0, $f2, $f0 j rasinhf3:/* li.s $f8, lnv li.s $f6, expfmax sub.s $f12, $f8 c.lt.s $f6, $f12 SW ra, 0(sp) bc1t sinhf_error subu sp, 16 jal expf addu sp, 16 li.s $f8, one li.s $f6, vo2m1 sub.s $f2, $f0, $f8 LW ra, 0(sp) mul.s $f2, $f6 add.s $f0, $f2 j ra*/sinhf_error: /* raise Overflow and return +-Infinity */ lw t0, 8(sp) sll t1, t0, 1 srl t1, 23+1 beq t1, 255, 1f li.s $f0, 2e38 add.s $f0, $f01: bltz t0, 2f j ra2: neg.s $f0 j ra .end sinhf/********************************************************************************* sqrtf - single precision floating point square root** 0.5ulp accurate algorithm using double precision for final iteration** float sqrtf (fltParam)* float fltParam;*/ .globl sqrtf .ent sqrtfsqrtf: .frame sp, 0, ra#if 1/* all MIPS processors that we support the FPU, would have the sqrt instruction *//* #if ((CPU == R4650) || (CPU == R4000) || (CPU==VR5000) || (CPU==VR5400)) */ sqrt.s $f0,$f12 j ra .end sqrtf#else /* CPU == R3000 || CPU == CW4000 || CPU == CW4011 */ mfc1 t0, $f12 li t2, -(127<<22)+(127<<23) sra t1, t0, 23 li t3, 255 blez t1, 8f srl t0, 1 beq t1, t3, 9f srl t1, t0, 18-2 and t1, 31<<2 lw t1, local_sqrttable(t1) addu t0, t2 subu t0, t1 mtc1 t0, $f0 /* 8 -> 18 bits */ cfc1 t4, $31 div.s $f2, $f12, $f0 cvt.d.s $f12, $f12 li t2, (1<<23) /* 9 cycle interlock */ add.s $f0, $f2 /* 1 cycle interlock (2 cycle stall) */ mfc1 t0, $f0 add t1, t2, 6<<3 /* 17 -> 18 bits (instead of nop) */ subu t0, t1 mtc1 t0, $f0 /* 18 -> 37 */ cvt.d.s $f0, $f0 div.d $f2, $f12, $f0 /* 18 cycle interlock */ add.d $f0, $f2 /* 1 cycle interlock (2 cycle stall) */ /* 37 -> 75 (53) */ div.d $f2, $f12, $f0 mfc1 t0, $f1 li t1, (2<<20) subu t0, t1 mtc1 t0, $f1 /* nop */ /* 13 cycle interlock */ add.d $f0, $f2 ctc1 t4, $31 cvt.s.d $f0, $f0 j ra8: /* sign = 1 or biased exponent = 0 */ sll t2, t0, 1 bne t2, 0, 1f9: /* x = 0.0, -0.0, +Infinity, or NaN */ mov.s $f0, $f12 j ra1: /* x < 0 or x = denorm */ move t8, ra bgez t0, denorm_sqrtf li.s $f0, 0.0 div.s $f0, $f0 j ra .end sqrtf .ent denorm_sqrtfdenorm_sqrtf: .frame sp, 0, t8 cvt.d.s $f12, $f12 jal sqrt cvt.s.d $f0, $f0 j t8 /* nop */ .end denorm_sqrtf#endif /* (CPU == R4650) || (CPU == R4000) || (CPU==VR5000) || (CPU==VR5400) *//********************************************************************************* tanf - single precision floating point tangent** float tanf (fltParam)* float fltParam;*/ .globl tanf .ent tanftanf: .frame sp, 0, ra li.s $f8, TANF_pio4 abs.s $f0, $f12 c.olt.s $f0, $f8 cfc1 t1, $31 mov.s $f14, $f12 li t0, 0 bc1t tanf0 and t2, t1, ~RM_MASK li.s $f8, TANF_ymax c.olt.s $f0, $f8 li.s $f8, TANF_twoopi bc1f tanf2 mul.s $f2, $f12, $f8 /* convert to integer using round-to-nearest */ ctc1 t2, $31 cvt.w.s $f2, $f2 mfc1 t0, $f2 and t0, 1 /* argument reduction */ cvt.s.w $f2, $f2 li.s $f6, TANF_pio2 mul.s $f2, $f6 sub.s $f14, $f2tanf0: /* rational approximation */ mul.s $f2, $f14, $f14 li.s $f8, TANF_q1 li.s $f6, TANF_p0 mul.s $f10, $f2, $f8 li.s $f8, TANF_q0 mul.s $f4, $f2, $f6 add.s $f10, $f8 mul.s $f10, $f2 li.s $f8, one mul.s $f4, $f14 add.s $f10, $f8 add.s $f14, $f4 ctc1 t1, $31 bne t0, 0, tanf1 div.s $f0, $f14, $f10 j ratanf1: div.s $f0, $f10, $f14 neg.s $f0 j ratanf2: li.s $f0, 0.0 div.s $f0, $f0 j ra .end tanf/********************************************************************************* tanhf - single precision floating point hyperbolic tangent** float tanhf (fltParam)* float fltParam;*/ .globl tanhf .ent tanhftanhf: .frame sp, 0, ra /* is $f12 = Nan ? */ mfc1 t0, $f12 sll t2, t0, 1 srl t2, t2, 1 li t4, 255 sll t4, 23 ble t2, t4, 1f /* tanhf (Nan) = Nan */ mov.s $f0, $f12 j ra1: li.s $f8, TNHF_ln3o2 abs.s $f0, $f12 c.lt.s $f8, $f0 li.s $f8, TNHF_eps bc1t tanhf2 c.lt.s $f0, $f8 bc1t tanhf1 mul.s $f2, $f0, $f0 li.s $f10, TNHF_p1 li.s $f8, TNHF_q0 mul.s $f4, $f2, $f10 li.s $f10, TNHF_p0 add.s $f6, $f2, $f8 add.s $f4, $f10 mul.s $f4, $f2 div.s $f4, $f6 mul.s $f4, $f12 add.s $f0, $f4, $f12 j ratanhf1: mov.s $f0, $f12 j ratanhf2: li.s $f10, TNHF_xbig s.s $f12, 8(sp) c.lt.s $f10, $f0 SW ra, 0(sp) add.s $f12, $f0, $f0 bc1t tanhf4 subu sp, 16 jal expf addu sp, 16 li.s $f10, one li.s $f8, two add.s $f0, $f10 div.s $f0, $f8, $f0 lw t0, 8(sp) LW ra, 0(sp) bltz t0, 1f sub.s $f0, $f10, $f0 j ra
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -