📄 softfloat.c
字号:
aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) ); return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); } if ( aExp == 0 ) { if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); normalizeFloat32Subnormal( aSig, &aExp, &aSig ); } aSig |= 0x00800000; return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );}#endif#ifdef FLOAT128/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the double-precision floating-point format. The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic.*----------------------------------------------------------------------------*/float128 float32_to_float128( float32 a STATUS_PARAM ){ flag aSign; int16 aExp; bits32 aSig; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) ); return packFloat128( aSign, 0x7FFF, 0, 0 ); } if ( aExp == 0 ) { if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); normalizeFloat32Subnormal( aSig, &aExp, &aSig ); --aExp; } return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );}#endif/*----------------------------------------------------------------------------| Rounds the single-precision floating-point value `a' to an integer, and| returns the result as a single-precision floating-point value. The| operation is performed according to the IEC/IEEE Standard for Binary| Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float32 float32_round_to_int( float32 a STATUS_PARAM){ flag aSign; int16 aExp; bits32 lastBitMask, roundBitsMask; int8 roundingMode; float32 z; aExp = extractFloat32Exp( a ); if ( 0x96 <= aExp ) { if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { return propagateFloat32NaN( a, a STATUS_VAR ); } return a; } if ( aExp <= 0x7E ) { if ( (bits32) ( a<<1 ) == 0 ) return a; STATUS(float_exception_flags) |= float_flag_inexact; aSign = extractFloat32Sign( a ); switch ( STATUS(float_rounding_mode) ) { case float_round_nearest_even: if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { return packFloat32( aSign, 0x7F, 0 ); } break; case float_round_down: return aSign ? 0xBF800000 : 0; case float_round_up: return aSign ? 0x80000000 : 0x3F800000; } return packFloat32( aSign, 0, 0 ); } lastBitMask = 1; lastBitMask <<= 0x96 - aExp; roundBitsMask = lastBitMask - 1; z = a; roundingMode = STATUS(float_rounding_mode); if ( roundingMode == float_round_nearest_even ) { z += lastBitMask>>1; if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; } else if ( roundingMode != float_round_to_zero ) { if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { z += roundBitsMask; } } z &= ~ roundBitsMask; if ( z != a ) STATUS(float_exception_flags) |= float_flag_inexact; return z;}/*----------------------------------------------------------------------------| Returns the result of adding the absolute values of the single-precision| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated| before being returned. `zSign' is ignored if the result is a NaN.| The addition is performed according to the IEC/IEEE Standard for Binary| Floating-Point Arithmetic.*----------------------------------------------------------------------------*/static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM){ int16 aExp, bExp, zExp; bits32 aSig, bSig, zSig; int16 expDiff; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); bSig = extractFloat32Frac( b ); bExp = extractFloat32Exp( b ); expDiff = aExp - bExp; aSig <<= 6; bSig <<= 6; if ( 0 < expDiff ) { if ( aExp == 0xFF ) { if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR ); return a; } if ( bExp == 0 ) { --expDiff; } else { bSig |= 0x20000000; } shift32RightJamming( bSig, expDiff, &bSig ); zExp = aExp; } else if ( expDiff < 0 ) { if ( bExp == 0xFF ) { if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); return packFloat32( zSign, 0xFF, 0 ); } if ( aExp == 0 ) { ++expDiff; } else { aSig |= 0x20000000; } shift32RightJamming( aSig, - expDiff, &aSig ); zExp = bExp; } else { if ( aExp == 0xFF ) { if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); return a; } if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); zSig = 0x40000000 + aSig + bSig; zExp = aExp; goto roundAndPack; } aSig |= 0x20000000; zSig = ( aSig + bSig )<<1; --zExp; if ( (sbits32) zSig < 0 ) { zSig = aSig + bSig; ++zExp; } roundAndPack: return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );}/*----------------------------------------------------------------------------| Returns the result of subtracting the absolute values of the single-| precision floating-point values `a' and `b'. If `zSign' is 1, the| difference is negated before being returned. `zSign' is ignored if the| result is a NaN. The subtraction is performed according to the IEC/IEEE| Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM){ int16 aExp, bExp, zExp; bits32 aSig, bSig, zSig; int16 expDiff; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); bSig = extractFloat32Frac( b ); bExp = extractFloat32Exp( b ); expDiff = aExp - bExp; aSig <<= 7; bSig <<= 7; if ( 0 < expDiff ) goto aExpBigger; if ( expDiff < 0 ) goto bExpBigger; if ( aExp == 0xFF ) { if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); float_raise( float_flag_invalid STATUS_VAR); return float32_default_nan; } if ( aExp == 0 ) { aExp = 1; bExp = 1; } if ( bSig < aSig ) goto aBigger; if ( aSig < bSig ) goto bBigger; return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 ); bExpBigger: if ( bExp == 0xFF ) { if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); return packFloat32( zSign ^ 1, 0xFF, 0 ); } if ( aExp == 0 ) { ++expDiff; } else { aSig |= 0x40000000; } shift32RightJamming( aSig, - expDiff, &aSig ); bSig |= 0x40000000; bBigger: zSig = bSig - aSig; zExp = bExp; zSign ^= 1; goto normalizeRoundAndPack; aExpBigger: if ( aExp == 0xFF ) { if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR ); return a; } if ( bExp == 0 ) { --expDiff; } else { bSig |= 0x40000000; } shift32RightJamming( bSig, expDiff, &bSig ); aSig |= 0x40000000; aBigger: zSig = aSig - bSig; zExp = aExp; normalizeRoundAndPack: --zExp; return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );}/*----------------------------------------------------------------------------| Returns the result of adding the single-precision floating-point values `a'| and `b'. The operation is performed according to the IEC/IEEE Standard for| Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float32 float32_add( float32 a, float32 b STATUS_PARAM ){ flag aSign, bSign; aSign = extractFloat32Sign( a ); bSign = extractFloat32Sign( b ); if ( aSign == bSign ) { return addFloat32Sigs( a, b, aSign STATUS_VAR); } else { return subFloat32Sigs( a, b, aSign STATUS_VAR ); }}/*----------------------------------------------------------------------------| Returns the result of subtracting the single-precision floating-point values| `a' and `b'. The operation is performed according to the IEC/IEEE Standard| for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float32 float32_sub( float32 a, float32 b STATUS_PARAM ){ flag aSign, bSign; aSign = extractFloat32Sign( a ); bSign = extractFloat32Sign( b ); if ( aSign == bSign ) { return subFloat32Sigs( a, b, aSign STATUS_VAR ); } else { return addFloat32Sigs( a, b, aSign STATUS_VAR ); }}/*----------------------------------------------------------------------------| Returns the result of multiplying the single-precision floating-point values| `a' and `b'. The operation is performed according to the IEC/IEEE Standard| for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float32 float32_mul( float32 a, float32 b STATUS_PARAM ){ flag aSign, bSign, zSign; int16 aExp, bExp, zExp; bits32 aSig, bSig; bits64 zSig64; bits32 zSig; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); bSig = extractFloat32Frac( b ); bExp = extractFloat32Exp( b ); bSign = extractFloat32Sign( b ); zSign = aSign ^ bSign; if ( aExp == 0xFF ) { if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { return propagateFloat32NaN( a, b STATUS_VAR ); } if ( ( bExp | bSig ) == 0 ) { float_raise( float_flag_invalid STATUS_VAR); return float32_default_nan; } return packFloat32( zSign, 0xFF, 0 ); } if ( bExp == 0xFF ) { if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR ); if ( ( aExp | aSig ) == 0 ) { float_raise( float_flag_invalid STATUS_VAR); return float32_default_nan; } return packFloat32( zSign, 0xFF, 0 ); } if ( aExp == 0 ) { if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); normalizeFloat32Subnormal( aSig, &aExp, &aSig ); } if ( bExp == 0 ) { if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); normalizeFloat32Subnormal( bSig, &bExp, &bSig ); } zExp = aExp + bExp - 0x7F; aSig = ( aSig | 0x00800000 )<<7; bSig = ( bSig | 0x00800000 )<<8; shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); zSig = zSig64; if ( 0 <= (sbits32) ( zSig<<1 ) ) { zSig <<= 1; --zExp; } return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );}/*-----------
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -