📄 softfloat.c
字号:
/*----------------------------------------------------------------------------| Returns the result of converting the 32-bit two's complement integer `a'| to the extended double-precision floating-point format. The conversion| is performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic.*----------------------------------------------------------------------------*/floatx80 int32_to_floatx80( int32 a STATUS_PARAM ){ flag zSign; uint32 absA; int8 shiftCount; bits64 zSig; if ( a == 0 ) return packFloatx80( 0, 0, 0 ); zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros32( absA ) + 32; zSig = absA; return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );}#endif#ifdef FLOAT128/*----------------------------------------------------------------------------| Returns the result of converting the 32-bit two's complement integer `a' to| the quadruple-precision floating-point format. The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float128 int32_to_float128( int32 a STATUS_PARAM ){ flag zSign; uint32 absA; int8 shiftCount; bits64 zSig0; if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros32( absA ) + 17; zSig0 = absA; return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );}#endif/*----------------------------------------------------------------------------| Returns the result of converting the 64-bit two's complement integer `a'| to the single-precision floating-point format. The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float32 int64_to_float32( int64 a STATUS_PARAM ){ flag zSign; uint64 absA; int8 shiftCount; if ( a == 0 ) return 0; zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros64( absA ) - 40; if ( 0 <= shiftCount ) { return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount ); } else { shiftCount += 7; if ( shiftCount < 0 ) { shift64RightJamming( absA, - shiftCount, &absA ); } else { absA <<= shiftCount; } return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR ); }}/*----------------------------------------------------------------------------| Returns the result of converting the 64-bit two's complement integer `a'| to the double-precision floating-point format. The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 int64_to_float64( int64 a STATUS_PARAM ){ flag zSign; if ( a == 0 ) return 0; if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) { return packFloat64( 1, 0x43E, 0 ); } zSign = ( a < 0 ); return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );}#ifdef FLOATX80/*----------------------------------------------------------------------------| Returns the result of converting the 64-bit two's complement integer `a'| to the extended double-precision floating-point format. The conversion| is performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic.*----------------------------------------------------------------------------*/floatx80 int64_to_floatx80( int64 a STATUS_PARAM ){ flag zSign; uint64 absA; int8 shiftCount; if ( a == 0 ) return packFloatx80( 0, 0, 0 ); zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros64( absA ); return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );}#endif#ifdef FLOAT128/*----------------------------------------------------------------------------| Returns the result of converting the 64-bit two's complement integer `a' to| the quadruple-precision floating-point format. The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float128 int64_to_float128( int64 a STATUS_PARAM ){ flag zSign; uint64 absA; int8 shiftCount; int32 zExp; bits64 zSig0, zSig1; if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros64( absA ) + 49; zExp = 0x406E - shiftCount; if ( 64 <= shiftCount ) { zSig1 = 0; zSig0 = absA; shiftCount -= 64; } else { zSig1 = absA; zSig0 = 0; } shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); return packFloat128( zSign, zExp, zSig0, zSig1 );}#endif/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the 32-bit two's complement integer format. The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic---which means in particular that the conversion is rounded| according to the current rounding mode. If `a' is a NaN, the largest| positive integer is returned. Otherwise, if the conversion overflows, the| largest integer with the same sign as `a' is returned.*----------------------------------------------------------------------------*/int32 float32_to_int32( float32 a STATUS_PARAM ){ flag aSign; int16 aExp, shiftCount; bits32 aSig; bits64 aSig64; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( ( aExp == 0xFF ) && aSig ) aSign = 0; if ( aExp ) aSig |= 0x00800000; shiftCount = 0xAF - aExp; aSig64 = aSig; aSig64 <<= 32; if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); return roundAndPackInt32( aSign, aSig64 STATUS_VAR );}/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the 32-bit two's complement integer format. The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic, except that the conversion is always rounded toward zero.| If `a' is a NaN, the largest positive integer is returned. Otherwise, if| the conversion overflows, the largest integer with the same sign as `a' is| returned.*----------------------------------------------------------------------------*/int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM ){ flag aSign; int16 aExp, shiftCount; bits32 aSig; int32 z; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); shiftCount = aExp - 0x9E; if ( 0 <= shiftCount ) { if ( a != 0xCF000000 ) { float_raise( float_flag_invalid STATUS_VAR); if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; } return (sbits32) 0x80000000; } else if ( aExp <= 0x7E ) { if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact; return 0; } aSig = ( aSig | 0x00800000 )<<8; z = aSig>>( - shiftCount ); if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { STATUS(float_exception_flags) |= float_flag_inexact; } if ( aSign ) z = - z; return z;}/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the 64-bit two's complement integer format. The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic---which means in particular that the conversion is rounded| according to the current rounding mode. If `a' is a NaN, the largest| positive integer is returned. Otherwise, if the conversion overflows, the| largest integer with the same sign as `a' is returned.*----------------------------------------------------------------------------*/int64 float32_to_int64( float32 a STATUS_PARAM ){ flag aSign; int16 aExp, shiftCount; bits32 aSig; bits64 aSig64, aSigExtra; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); shiftCount = 0xBE - aExp; if ( shiftCount < 0 ) { float_raise( float_flag_invalid STATUS_VAR); if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { return LIT64( 0x7FFFFFFFFFFFFFFF ); } return (sbits64) LIT64( 0x8000000000000000 ); } if ( aExp ) aSig |= 0x00800000; aSig64 = aSig; aSig64 <<= 40; shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );}/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the 64-bit two's complement integer format. The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic, except that the conversion is always rounded toward zero. If| `a' is a NaN, the largest positive integer is returned. Otherwise, if the| conversion overflows, the largest integer with the same sign as `a' is| returned.*----------------------------------------------------------------------------*/int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM ){ flag aSign; int16 aExp, shiftCount; bits32 aSig; bits64 aSig64; int64 z; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); shiftCount = aExp - 0xBE; if ( 0 <= shiftCount ) { if ( a != 0xDF000000 ) { float_raise( float_flag_invalid STATUS_VAR); if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { return LIT64( 0x7FFFFFFFFFFFFFFF ); } } return (sbits64) LIT64( 0x8000000000000000 ); } else if ( aExp <= 0x7E ) { if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact; return 0; } aSig64 = aSig | 0x00800000; aSig64 <<= 40; z = aSig64>>( - shiftCount ); if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { STATUS(float_exception_flags) |= float_flag_inexact; } if ( aSign ) z = - z; return z;}/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the double-precision floating-point format. The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic.*----------------------------------------------------------------------------*/float64 float32_to_float64( float32 a STATUS_PARAM ){ flag aSign; int16 aExp; bits32 aSig; aSig = extractFloat32Frac( a ); aExp = extractFloat32Exp( a ); aSign = extractFloat32Sign( a ); if ( aExp == 0xFF ) { if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR )); return packFloat64( aSign, 0x7FF, 0 ); } if ( aExp == 0 ) { if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); normalizeFloat32Subnormal( aSig, &aExp, &aSig ); --aExp; } return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );}#ifdef FLOATX80/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the extended double-precision floating-point format. The conversion| is performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic.*----------------------------------------------------------------------------*/floatx80 float32_to_floatx80( float32 a STATUS_PARAM ){ flag aSign; int16 aExp; bits32 aSig;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -