📄 softfloat.c
字号:
if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
zSign = ( a < 0 );
absA = zSign ? - a : a;
shiftCount = countLeadingZeros32( absA ) + 17;
zSig0 = absA;
return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
}
#endif
/*
-------------------------------------------------------------------------------
Returns the result of converting the single-precision floating-point value
`a' to the 32-bit two's complement integer format. The conversion is
performed according to the IEC/IEEE Standard for Binary Floating-point
Arithmetic---which means in particular that the conversion is rounded
according to the current rounding mode. If `a' is a NaN, the largest
positive integer is returned. Otherwise, if the conversion overflows, the
largest integer with the same sign as `a' is returned.
-------------------------------------------------------------------------------
*/
int32 float32_to_int32( float32 a )
{
flag aSign;
int16 aExp, shiftCount;
bits32 aSig;
bits64 zSig;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
if ( aExp ) aSig |= 0x00800000;
shiftCount = 0xAF - aExp;
zSig = aSig;
zSig <<= 32;
if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
return roundAndPackInt32( aSign, zSig );
}
/*
-------------------------------------------------------------------------------
Returns the result of converting the single-precision floating-point value
`a' to the 32-bit two's complement integer format. The conversion is
performed according to the IEC/IEEE Standard for Binary Floating-point
Arithmetic, except that the conversion is always rounded toward zero. If
`a' is a NaN, the largest positive integer is returned. Otherwise, if the
conversion overflows, the largest integer with the same sign as `a' is
returned.
-------------------------------------------------------------------------------
*/
int32 float32_to_int32_round_to_zero( float32 a )
{
flag aSign;
int16 aExp, shiftCount;
bits32 aSig;
int32 z;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
shiftCount = aExp - 0x9E;
if ( 0 <= shiftCount ) {
if ( a == 0xCF000000 ) return 0x80000000;
float_raise( float_flag_invalid );
if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
return 0x80000000;
}
else if ( aExp <= 0x7E ) {
if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
return 0;
}
aSig = ( aSig | 0x00800000 )<<8;
z = aSig>>( - shiftCount );
if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
float_exception_flags |= float_flag_inexact;
}
return aSign ? - z : z;
}
/*
-------------------------------------------------------------------------------
Returns the result of converting the single-precision floating-point value
`a' to the double-precision floating-point format. The conversion is
performed according to the IEC/IEEE Standard for Binary Floating-point
Arithmetic.
-------------------------------------------------------------------------------
*/
float64 float32_to_float64( float32 a )
{
flag aSign;
int16 aExp;
bits32 aSig;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
if ( aExp == 0xFF ) {
if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
return packFloat64( aSign, 0x7FF, 0 );
}
if ( aExp == 0 ) {
if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
normalizeFloat32Subnormal( aSig, &aExp, &aSig );
--aExp;
}
return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
}
#ifdef FLOATX80
/*
-------------------------------------------------------------------------------
Returns the result of converting the single-precision floating-point value
`a' to the extended double-precision floating-point format. The conversion
is performed according to the IEC/IEEE Standard for Binary Floating-point
Arithmetic.
-------------------------------------------------------------------------------
*/
floatx80 float32_to_floatx80( float32 a )
{
flag aSign;
int16 aExp;
bits32 aSig;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
if ( aExp == 0xFF ) {
if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
}
if ( aExp == 0 ) {
if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
normalizeFloat32Subnormal( aSig, &aExp, &aSig );
}
aSig |= 0x00800000;
return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
}
#endif
#ifdef FLOAT128
/*
-------------------------------------------------------------------------------
Returns the result of converting the single-precision floating-point value
`a' to the double-precision floating-point format. The conversion is
performed according to the IEC/IEEE Standard for Binary Floating-point
Arithmetic.
-------------------------------------------------------------------------------
*/
float128 float32_to_float128( float32 a )
{
flag aSign;
int16 aExp;
bits32 aSig;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
if ( aExp == 0xFF ) {
if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
return packFloat128( aSign, 0x7FFF, 0, 0 );
}
if ( aExp == 0 ) {
if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
normalizeFloat32Subnormal( aSig, &aExp, &aSig );
--aExp;
}
return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
}
#endif
/*
-------------------------------------------------------------------------------
Rounds the single-precision floating-point value `a' to an integer, and
returns the result as a single-precision floating-point value. The
operation is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
float32 float32_round_to_int( float32 a )
{
flag aSign;
int16 aExp;
bits32 lastBitMask, roundBitsMask;
int8 roundingMode;
float32 z;
aExp = extractFloat32Exp( a );
if ( 0x96 <= aExp ) {
if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
return propagateFloat32NaN( a, a );
}
return a;
}
if ( aExp <= 0x7E ) {
if ( (bits32) ( a<<1 ) == 0 ) return a;
float_exception_flags |= float_flag_inexact;
aSign = extractFloat32Sign( a );
switch ( float_rounding_mode ) {
case float_round_nearest_even:
if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
return packFloat32( aSign, 0x7F, 0 );
}
break;
case float_round_down:
return aSign ? 0xBF800000 : 0;
case float_round_up:
return aSign ? 0x80000000 : 0x3F800000;
}
return packFloat32( aSign, 0, 0 );
}
lastBitMask = 1;
lastBitMask <<= 0x96 - aExp;
roundBitsMask = lastBitMask - 1;
z = a;
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
z += lastBitMask>>1;
if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
}
else if ( roundingMode != float_round_to_zero ) {
if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
z += roundBitsMask;
}
}
z &= ~ roundBitsMask;
if ( z != a ) float_exception_flags |= float_flag_inexact;
return z;
}
/*
-------------------------------------------------------------------------------
Returns the result of adding the absolute values of the single-precision
floating-point values `a' and `b'. If `zSign' is true, the sum is negated
before being returned. `zSign' is ignored if the result is a NaN. The
addition is performed according to the IEC/IEEE Standard for Binary
Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits32 aSig, bSig, zSig;
int16 expDiff;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
bSig = extractFloat32Frac( b );
bExp = extractFloat32Exp( b );
expDiff = aExp - bExp;
aSig <<= 6;
bSig <<= 6;
if ( 0 < expDiff ) {
if ( aExp == 0xFF ) {
if ( aSig ) return propagateFloat32NaN( a, b );
return a;
}
if ( bExp == 0 ) {
--expDiff;
}
else {
bSig |= 0x20000000;
}
shift32RightJamming( bSig, expDiff, &bSig );
zExp = aExp;
}
else if ( expDiff < 0 ) {
if ( bExp == 0xFF ) {
if ( bSig ) return propagateFloat32NaN( a, b );
return packFloat32( zSign, 0xFF, 0 );
}
if ( aExp == 0 ) {
++expDiff;
}
else {
aSig |= 0x20000000;
}
shift32RightJamming( aSig, - expDiff, &aSig );
zExp = bExp;
}
else {
if ( aExp == 0xFF ) {
if ( aSig | bSig ) return propagateFloat32NaN( a, b );
return a;
}
if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
zSig = 0x40000000 + aSig + bSig;
zExp = aExp;
goto roundAndPack;
}
aSig |= 0x20000000;
zSig = ( aSig + bSig )<<1;
--zExp;
if ( (sbits32) zSig < 0 ) {
zSig = aSig + bSig;
++zExp;
}
roundAndPack:
return roundAndPackFloat32( zSign, zExp, zSig );
}
/*
-------------------------------------------------------------------------------
Returns the result of subtracting the absolute values of the single-
precision floating-point values `a' and `b'. If `zSign' is true, the
difference is negated before being returned. `zSign' is ignored if the
result is a NaN. The subtraction is performed according to the IEC/IEEE
Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits32 aSig, bSig, zSig;
int16 expDiff;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
bSig = extractFloat32Frac( b );
bExp = extractFloat32Exp( b );
expDiff = aExp - bExp;
aSig <<= 7;
bSig <<= 7;
if ( 0 < expDiff ) goto aExpBigger;
if ( expDiff < 0 ) goto bExpBigger;
if ( aExp == 0xFF ) {
if ( aSig | bSig ) return propagateFloat32NaN( a, b );
float_raise( float_flag_invalid );
return float32_default_nan;
}
if ( aExp == 0 ) {
aExp = 1;
bExp = 1;
}
if ( bSig < aSig ) goto aBigger;
if ( aSig < bSig ) goto bBigger;
return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
bExpBigger:
if ( bExp == 0xFF ) {
if ( bSig ) return propagateFloat32NaN( a, b );
return packFloat32( zSign ^ 1, 0xFF, 0 );
}
if ( aExp == 0 ) {
++expDiff;
}
else {
aSig |= 0x40000000;
}
shift32RightJamming( aSig, - expDiff, &aSig );
bSig |= 0x40000000;
bBigger:
zSig = bSig - aSig;
zExp = bExp;
zSign ^= 1;
goto normalizeRoundAndPack;
aExpBigger:
if ( aExp == 0xFF ) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -