📄 softfloat.c
字号:
roundingMode = float_rounding_mode;
roundNearestEven = ( roundingMode == float_round_nearest_even );
increment = ( (sbits32) zSig2 < 0 );
if ( ! roundNearestEven ) {
if ( roundingMode == float_round_to_zero ) {
increment = 0;
}
else {
if ( zSign ) {
increment = ( roundingMode == float_round_down ) && zSig2;
}
else {
increment = ( roundingMode == float_round_up ) && zSig2;
}
}
}
if ( 0x7FD <= (bits16) zExp ) {
if ( ( 0x7FD < zExp )
|| ( ( zExp == 0x7FD )
&& eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
&& increment
)
) {
float_raise(float_flag_inexact );
float_raise( float_flag_overflow);
if ( ( roundingMode == float_round_to_zero )
|| ( zSign && ( roundingMode == float_round_up ) )
|| ( ! zSign && ( roundingMode == float_round_down ) )
) {
return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
}
return packFloat64( zSign, 0x7FF, 0, 0 );
}
if ( zExp < 0 ) {
isTiny =
( float_detect_tininess == float_tininess_before_rounding )
|| ( zExp < -1 )
|| ! increment
|| lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
shift64ExtraRightJamming(
zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
zExp = 0;
if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
if ( roundNearestEven ) {
increment = ( (sbits32) zSig2 < 0 );
}
else {
if ( zSign ) {
increment = ( roundingMode == float_round_down ) && zSig2;
}
else {
increment = ( roundingMode == float_round_up ) && zSig2;
}
}
}
}
if ( zSig2 ) float_exception_flags |= float_flag_inexact;
if ( increment ) {
add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
}
else {
if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
}
return packFloat64( zSign, zExp, zSig0, zSig1 );
}
/*----------------------------------------------------------------------------
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
| and significand formed by the concatenation of `zSig0' and `zSig1', and
| returns the proper double-precision floating-point value corresponding
| to the abstract input. This routine is just like `roundAndPackFloat64'
| except that the input significand has fewer bits and does not have to be
| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
| point exponent.
*----------------------------------------------------------------------------*/
static float64
normalizeRoundAndPackFloat64(
flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
{
int8 shiftCount;
bits32 zSig2;
if ( zSig0 == 0 ) {
zSig0 = zSig1;
zSig1 = 0;
zExp -= 32;
}
shiftCount = countLeadingZeros32( zSig0 ) - 11;
if ( 0 <= shiftCount ) {
zSig2 = 0;
shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
}
else {
shift64ExtraRightJamming(
zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
}
zExp -= shiftCount;
return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the 32-bit two's complement integer `a' to
| the single-precision floating-point format. The conversion is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float32 int32_to_float32( int32 a )
{
flag zSign;
if ( a == 0 ) return 0;
if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
zSign = ( a < 0 );
return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the 32-bit two's complement integer `a' to
| the double-precision floating-point format. The conversion is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 int32_to_float64( int32 a )
{
flag zSign;
bits32 absA;
int8 shiftCount;
bits32 zSig0, zSig1;
if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
zSign = ( a < 0 );
absA = zSign ? - a : a;
shiftCount = countLeadingZeros32( absA ) - 11;
if ( 0 <= shiftCount ) {
zSig0 = absA<<shiftCount;
zSig1 = 0;
}
else {
shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
}
return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
| `a' to the 32-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic---which means in particular that the conversion is rounded
| according to the current rounding mode. If `a' is a NaN, the largest
| positive integer is returned. Otherwise, if the conversion overflows, the
| largest integer with the same sign as `a' is returned.
*----------------------------------------------------------------------------*/
int32 float32_to_int32( float32 a )
{
flag aSign;
int16 aExp, shiftCount;
bits32 aSig, aSigExtra;
int32 z;
int8 roundingMode;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
shiftCount = aExp - 0x96;
if ( 0 <= shiftCount ) {
if ( 0x9E <= aExp ) {
if ( a != 0xCF000000 ) {
float_raise( float_flag_invalid );
if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
return 0x7FFFFFFF;
}
}
return (sbits32) 0x80000000;
}
z = ( aSig | 0x00800000 )<<shiftCount;
if ( aSign ) z = - z;
}
else {
if ( aExp < 0x7E ) {
aSigExtra = aExp | aSig;
z = 0;
}
else {
aSig |= 0x00800000;
aSigExtra = aSig<<( shiftCount & 31 );
z = aSig>>( - shiftCount );
}
if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
if ( (sbits32) aSigExtra < 0 ) {
++z;
if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
}
if ( aSign ) z = - z;
}
else {
aSigExtra = ( aSigExtra != 0 );
if ( aSign ) {
z += ( roundingMode == float_round_down ) & aSigExtra;
z = - z;
}
else {
z += ( roundingMode == float_round_up ) & aSigExtra;
}
}
}
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
| `a' to the 32-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic, except that the conversion is always rounded toward zero.
| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
| the conversion overflows, the largest integer with the same sign as `a' is
| returned.
*----------------------------------------------------------------------------*/
int32 float32_to_int32_round_to_zero( float32 a )
{
flag aSign;
int16 aExp, shiftCount;
bits32 aSig;
int32 z;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
shiftCount = aExp - 0x9E;
if ( 0 <= shiftCount ) {
if ( a != 0xCF000000 ) {
float_raise( float_flag_invalid );
if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
}
return (sbits32) 0x80000000;
}
else if ( aExp <= 0x7E ) {
if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
return 0;
}
aSig = ( aSig | 0x00800000 )<<8;
z = aSig>>( - shiftCount );
if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
float_exception_flags |= float_flag_inexact;
}
if ( aSign ) z = - z;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the single-precision floating-point value
| `a' to the double-precision floating-point format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic.
*----------------------------------------------------------------------------*/
float64 float32_to_float64( float32 a )
{
flag aSign;
int16 aExp;
bits32 aSig, zSig0, zSig1;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
if ( aExp == 0xFF ) {
if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
return packFloat64( aSign, 0x7FF, 0, 0 );
}
if ( aExp == 0 ) {
if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
normalizeFloat32Subnormal( aSig, &aExp, &aSig );
--aExp;
}
shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
}
/*----------------------------------------------------------------------------
| Rounds the single-precision floating-point value `a' to an integer,
| and returns the result as a single-precision floating-point value. The
| operation is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float32 float32_round_to_int( float32 a )
{
flag aSign;
int16 aExp;
bits32 lastBitMask, roundBitsMask;
int8 roundingMode;
float32 z;
aExp = extractFloat32Exp( a );
if ( 0x96 <= aExp ) {
if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
return propagateFloat32NaN( a, a );
}
return a;
}
if ( aExp <= 0x7E ) {
if ( (bits32) ( a<<1 ) == 0 ) return a;
float_exception_flags |= float_flag_inexact;
aSign = extractFloat32Sign( a );
switch ( float_rounding_mode ) {
case float_round_nearest_even:
if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
return packFloat32( aSign, 0x7F, 0 );
}
break;
case float_round_down:
return aSign ? 0xBF800000 : 0;
case float_round_up:
return aSign ? 0x80000000 : 0x3F800000;
}
return packFloat32( aSign, 0, 0 );
}
lastBitMask = 1;
lastBitMask <<= 0x96 - aExp;
roundBitsMask = lastBitMask - 1;
z = a;
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
z += lastBitMask>>1;
if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
}
else if ( roundingMode != float_round_to_zero ) {
if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
z += roundBitsMask;
}
}
z &= ~ roundBitsMask;
if ( z != a ) float_exception_flags |= float_flag_inexact;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of adding the absolute values of the single-precision
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
| before being returned. `zSign' is ignored if the result is a NaN.
| The addition is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -