📄 softfloat.c
字号:
}
aSign = extractFloat32Sign( a );
bSign = extractFloat32Sign( b );
if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
return ( a != b ) && ( aSign ^ ( a < b ) );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point value
| `a' to the 32-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic---which means in particular that the conversion is rounded
| according to the current rounding mode. If `a' is a NaN, the largest
| positive integer is returned. Otherwise, if the conversion overflows, the
| largest integer with the same sign as `a' is returned.
*----------------------------------------------------------------------------*/
int32 float64_to_int32( float64 a )
{
flag aSign;
int16 aExp, shiftCount;
bits32 aSig0, aSig1, absZ, aSigExtra;
int32 z;
int8 roundingMode;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
aSign = extractFloat64Sign( a );
shiftCount = aExp - 0x413;
if ( 0 <= shiftCount ) {
if ( 0x41E < aExp ) {
if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
goto invalid;
}
shortShift64Left(
aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
if ( 0x80000000 < absZ ) goto invalid;
}
else {
aSig1 = ( aSig1 != 0 );
if ( aExp < 0x3FE ) {
aSigExtra = aExp | aSig0 | aSig1;
absZ = 0;
}
else {
aSig0 |= 0x00100000;
aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
absZ = aSig0>>( - shiftCount );
}
}
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
if ( (sbits32) aSigExtra < 0 ) {
++absZ;
if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
}
z = aSign ? - absZ : absZ;
}
else {
aSigExtra = ( aSigExtra != 0 );
if ( aSign ) {
z = - ( absZ
+ ( ( roundingMode == float_round_down ) & aSigExtra ) );
}
else {
z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
}
}
if ( ( aSign ^ ( z < 0 ) ) && z ) {
invalid:
float_raise( float_flag_invalid );
return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
}
if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point value
| `a' to the 32-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic, except that the conversion is always rounded toward zero.
| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
| the conversion overflows, the largest integer with the same sign as `a' is
| returned.
*----------------------------------------------------------------------------*/
int32 float64_to_int32_round_to_zero( float64 a )
{
flag aSign;
int16 aExp, shiftCount;
bits32 aSig0, aSig1, absZ, aSigExtra;
int32 z;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
aSign = extractFloat64Sign( a );
shiftCount = aExp - 0x413;
if ( 0 <= shiftCount ) {
if ( 0x41E < aExp ) {
if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
goto invalid;
}
shortShift64Left(
aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
}
else {
if ( aExp < 0x3FF ) {
if ( aExp | aSig0 | aSig1 ) {
float_exception_flags |= float_flag_inexact;
}
return 0;
}
aSig0 |= 0x00100000;
aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
absZ = aSig0>>( - shiftCount );
}
z = aSign ? - absZ : absZ;
if ( ( aSign ^ ( z < 0 ) ) && z ) {
invalid:
float_raise( float_flag_invalid );
return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
}
if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point value
| `a' to the single-precision floating-point format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic.
*----------------------------------------------------------------------------*/
float32 float64_to_float32( float64 a )
{
flag aSign;
int16 aExp;
bits32 aSig0, aSig1, zSig;
bits32 allZero;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
aSign = extractFloat64Sign( a );
if ( aExp == 0x7FF ) {
if ( aSig0 | aSig1 ) {
return commonNaNToFloat32( float64ToCommonNaN( a ) );
}
return packFloat32( aSign, 0xFF, 0 );
}
shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
if ( aExp ) zSig |= 0x40000000;
return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
}
/*----------------------------------------------------------------------------
| Rounds the double-precision floating-point value `a' to an integer,
| and returns the result as a double-precision floating-point value. The
| operation is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 float64_round_to_int( float64 a )
{
flag aSign;
int16 aExp;
bits32 lastBitMask, roundBitsMask;
int8 roundingMode;
float64 z;
aExp = extractFloat64Exp( a );
if ( 0x413 <= aExp ) {
if ( 0x433 <= aExp ) {
if ( ( aExp == 0x7FF )
&& ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
return propagateFloat64NaN( a, a );
}
return a;
}
lastBitMask = 1;
lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
roundBitsMask = lastBitMask - 1;
z = a;
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
if ( lastBitMask ) {
add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
}
else {
if ( (sbits32) z.low < 0 ) {
++z.high;
if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
}
}
}
else if ( roundingMode != float_round_to_zero ) {
if ( extractFloat64Sign( z )
^ ( roundingMode == float_round_up ) ) {
add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
}
}
z.low &= ~ roundBitsMask;
}
else {
if ( aExp <= 0x3FE ) {
if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
float_exception_flags |= float_flag_inexact;
aSign = extractFloat64Sign( a );
switch ( float_rounding_mode ) {
case float_round_nearest_even:
if ( ( aExp == 0x3FE )
&& ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
) {
return packFloat64( aSign, 0x3FF, 0, 0 );
}
break;
case float_round_down:
return
aSign ? packFloat64( 1, 0x3FF, 0, 0 )
: packFloat64( 0, 0, 0, 0 );
case float_round_up:
return
aSign ? packFloat64( 1, 0, 0, 0 )
: packFloat64( 0, 0x3FF, 0, 0 );
}
return packFloat64( aSign, 0, 0, 0 );
}
lastBitMask = 1;
lastBitMask <<= 0x413 - aExp;
roundBitsMask = lastBitMask - 1;
z.low = 0;
z.high = a.high;
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
z.high += lastBitMask>>1;
if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
z.high &= ~ lastBitMask;
}
}
else if ( roundingMode != float_round_to_zero ) {
if ( extractFloat64Sign( z )
^ ( roundingMode == float_round_up ) ) {
z.high |= ( a.low != 0 );
z.high += roundBitsMask;
}
}
z.high &= ~ roundBitsMask;
}
if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
float_exception_flags |= float_flag_inexact;
}
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of adding the absolute values of the double-precision
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
| before being returned. `zSign' is ignored if the result is a NaN.
| The addition is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
int16 expDiff;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
bSig1 = extractFloat64Frac1( b );
bSig0 = extractFloat64Frac0( b );
bExp = extractFloat64Exp( b );
expDiff = aExp - bExp;
if ( 0 < expDiff ) {
if ( aExp == 0x7FF ) {
if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
return a;
}
if ( bExp == 0 ) {
--expDiff;
}
else {
bSig0 |= 0x00100000;
}
shift64ExtraRightJamming(
bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
zExp = aExp;
}
else if ( expDiff < 0 ) {
if ( bExp == 0x7FF ) {
if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
return packFloat64( zSign, 0x7FF, 0, 0 );
}
if ( aExp == 0 ) {
++expDiff;
}
else {
aSig0 |= 0x00100000;
}
shift64ExtraRightJamming(
aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
zExp = bExp;
}
else {
if ( aExp == 0x7FF ) {
if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
return propagateFloat64NaN( a, b );
}
return a;
}
add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
zSig2 = 0;
zSig0 |= 0x00200000;
zExp = aExp;
goto shiftRight1;
}
aSig0 |= 0x00100000;
add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
--zExp;
if ( zSig0 < 0x00200000 ) goto roundAndPack;
++zExp;
shiftRight1:
shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
roundAndPack:
return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
}
/*----------------------------------------------------------------------------
| Returns the result of subtracting the absolute values of the double-
| precision floating-point values `a' and `b'. If `zSign' is 1, the
| difference is negated before being returned. `zSign' is ignored if the
| result is a NaN. The subtraction is performed according to the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
int16 expDiff;
float64 z;
aSig1 = extractFloat
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -