📄 softfloat.c
字号:
/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe single-precision floating-point format. The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float32 int32_to_float32( int32 a ){ flag zSign; if ( a == 0 ) return 0; if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); zSign = ( a < 0 ); return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );}/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe double-precision floating-point format. The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float64 int32_to_float64( int32 a ){ flag aSign; uint32 absA; int8 shiftCount; bits64 zSig; if ( a == 0 ) return 0; aSign = ( a < 0 ); absA = aSign ? - a : a; shiftCount = countLeadingZeros32( absA ) + 21; zSig = absA; return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );}#ifdef FLOATX80/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a'to the extended double-precision floating-point format. The conversionis performed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic.-------------------------------------------------------------------------------*/floatx80 int32_to_floatx80( int32 a ){ flag zSign; uint32 absA; int8 shiftCount; bits64 zSig; if ( a == 0 ) return packFloatx80( 0, 0, 0 ); zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros32( absA ) + 32; zSig = absA; return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );}#endif/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format. The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic---which means in particular that the conversion is roundedaccording to the current rounding mode. If `a' is a NaN, the largestpositive integer is returned. Otherwise, if the conversion overflows, thelargest integer with the same sign as `a' is returned.-------------------------------------------------------------------------------*/int32 float32_to_int32( float32 a ){ flag aSign; int16 aExp, shiftCount; bits32 aSig; bits64 zSig; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; if ( aExp ) aSig |= 0x00800000; shiftCount = 0xAF - aExp; zSig = aSig; zSig <<= 32; if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); return roundAndPackInt32( aSign, zSig );}/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format. The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic, except that the conversion is always rounded toward zero. If`a' is a NaN, the largest positive integer is returned. Otherwise, if theconversion overflows, the largest integer with the same sign as `a' isreturned.-------------------------------------------------------------------------------*/int32 float32_to_int32_round_to_zero( float32 a ){ flag aSign; int16 aExp, shiftCount; bits32 aSig; int32 z; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); shiftCount = aExp - 0x9E; if ( 0 <= shiftCount ) { if ( a == 0xCF000000 ) return 0x80000000; float_raise( float_flag_invalid ); if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; return 0x80000000; } else if ( aExp <= 0x7E ) { if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; return 0; } aSig = ( aSig | 0x00800000 )<<8; z = aSig>>( - shiftCount ); if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { float_exception_flags |= float_flag_inexact; } return aSign ? - z : z;}/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the double-precision floating-point format. The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic.-------------------------------------------------------------------------------*/float64 float32_to_float64( float32 a ){ flag aSign; int16 aExp; bits32 aSig; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); return packFloat64( aSign, 0x7FF, 0 ); } if ( aExp == 0 ) { if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); normalizeFloat32Subnormal( aSig, &aExp, &aSig ); --aExp; } return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );}#ifdef FLOATX80/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the extended double-precision floating-point format. The conversionis performed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic.-------------------------------------------------------------------------------*/floatx80 float32_to_floatx80( float32 a ){ flag aSign; int16 aExp; bits32 aSig; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); } if ( aExp == 0 ) { if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); normalizeFloat32Subnormal( aSig, &aExp, &aSig ); } aSig |= 0x00800000; return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );}#endif/*-------------------------------------------------------------------------------Rounds the single-precision floating-point value `a' to an integer, andreturns the result as a single-precision floating-point value. Theoperation is performed according to the IEC/IEEE Standard for BinaryFloating-point Arithmetic.-------------------------------------------------------------------------------*/float32 float32_round_to_int( float32 a ){ flag aSign; int16 aExp; bits32 lastBitMask, roundBitsMask; int8 roundingMode; float32 z; aExp = extractFloat32Exp( a ); if ( 0x96 <= aExp ) { if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { return propagateFloat32NaN( a, a ); } return a; } if ( aExp <= 0x7E ) { if ( (bits32) ( a<<1 ) == 0 ) return a; float_exception_flags |= float_flag_inexact; aSign = extractFloat32Sign( a ); switch ( float_rounding_mode ) { case float_round_nearest_even: if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { return packFloat32( aSign, 0x7F, 0 ); } break; case float_round_down: return aSign ? 0xBF800000 : 0; case float_round_up: return aSign ? 0x80000000 : 0x3F800000; } return packFloat32( aSign, 0, 0 ); } lastBitMask = 1; lastBitMask <<= 0x96 - aExp; roundBitsMask = lastBitMask - 1; z = a; roundingMode = float_rounding_mode; if ( roundingMode == float_round_nearest_even ) { z += lastBitMask>>1; if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; } else if ( roundingMode != float_round_to_zero ) { if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { z += roundBitsMask; } } z &= ~ roundBitsMask; if ( z != a ) float_exception_flags |= float_flag_inexact; return z;}/*-------------------------------------------------------------------------------Returns the result of adding the absolute values of the single-precisionfloating-point values `a' and `b'. If `zSign' is true, the sum is negatedbefore being returned. `zSign' is ignored if the result is a NaN. Theaddition is performed according to the IEC/IEEE Standard for BinaryFloating-point Arithmetic.-------------------------------------------------------------------------------*/static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ){ int16 aExp, bExp, zExp; bits32 aSig, bSig, zSig; int16 expDiff; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); bSig = extractFloat32Frac( b ); bExp = extractFloat32Exp( b ); expDiff = aExp - bExp; aSig <<= 6; bSig <<= 6; if ( 0 < expDiff ) { if ( aExp == 0xFF ) { if ( aSig ) return propagateFloat32NaN( a, b ); return a; } if ( bExp == 0 ) { --expDiff; } else { bSig |= 0x20000000; } shift32RightJamming( bSig, expDiff, &bSig ); zExp = aExp; } else if ( expDiff < 0 ) { if ( bExp == 0xFF ) { if ( bSig ) return propagateFloat32NaN( a, b ); return packFloat32( zSign, 0xFF, 0 ); } if ( aExp == 0 ) { ++expDiff; } else { aSig |= 0x20000000; } shift32RightJamming( aSig, - expDiff, &aSig ); zExp = bExp; } else { if ( aExp == 0xFF ) { if ( aSig | bSig ) return propagateFloat32NaN( a, b ); return a; } if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); zSig = 0x40000000 + aSig + bSig; zExp = aExp; goto roundAndPack; } aSig |= 0x20000000; zSig = ( aSig + bSig )<<1; --zExp; if ( (sbits32) zSig < 0 ) { zSig = aSig + bSig; ++zExp; } roundAndPack: return roundAndPackFloat32( zSign, zExp, zSig );}/*-------------------------------------------------------------------------------Returns the result of subtracting the absolute values of the single-precision floating-point values `a' and `b'. If `zSign' is true, thedifference is negated before being returned. `zSign' is ignored if theresult is a NaN. The subtraction is performed according to the IEC/IEEEStandard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ){ int16 aExp, bExp, zExp; bits32 aSig, bSig, zSig; int16 expDiff; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); bSig = extractFloat32Frac( b ); bExp = extractFloat32Exp( b ); expDiff = aExp - bExp; aSig <<= 7; bSig <<= 7; if ( 0 < expDiff ) goto aExpBigger; if ( expDiff < 0 ) goto bExpBigger; if ( aExp == 0xFF ) { if ( aSig | bSig ) return propagateFloat32NaN( a, b ); float_raise( float_flag_invalid ); return float32_default_nan; } if ( aExp == 0 ) { aExp = 1; bExp = 1; } if ( bSig < aSig ) goto aBigger; if ( aSig < bSig ) goto bBigger; return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); bExpBigger: if ( bExp == 0xFF ) { if ( bSig ) return propagateFloat32NaN( a, b ); return packFloat32( zSign ^ 1, 0xFF, 0 ); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -