📄 softfloat.c
字号:
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
aSign = extractFloat64Sign( a );
if ( aExp == 0x7FF ) {
if ( aSig0 | aSig1 ) {
return commonNaNToFloat32( float64ToCommonNaN( a ) );
}
return packFloat32( aSign, 0xFF, 0 );
}
shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
if ( aExp ) zSig |= 0x40000000;
return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
}
/*----------------------------------------------------------------------------
| Rounds the double-precision floating-point value `a' to an integer,
| and returns the result as a double-precision floating-point value. The
| operation is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 float64_round_to_int( float64 a )
{
flag aSign;
int16 aExp;
bits32 lastBitMask, roundBitsMask;
int8 roundingMode;
float64 z;
aExp = extractFloat64Exp( a );
if ( 0x413 <= aExp ) {
if ( 0x433 <= aExp ) {
if ( ( aExp == 0x7FF )
&& ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
return propagateFloat64NaN( a, a );
}
return a;
}
lastBitMask = 1;
lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
roundBitsMask = lastBitMask - 1;
z = a;
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
if ( lastBitMask ) {
add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
}
else {
if ( (sbits32) z.low < 0 ) {
++z.high;
if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
}
}
}
else if ( roundingMode != float_round_to_zero ) {
if ( extractFloat64Sign( z )
^ ( roundingMode == float_round_up ) ) {
add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
}
}
z.low &= ~ roundBitsMask;
}
else {
if ( aExp <= 0x3FE ) {
if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
float_exception_flags |= float_flag_inexact;
aSign = extractFloat64Sign( a );
switch ( float_rounding_mode ) {
case float_round_nearest_even:
if ( ( aExp == 0x3FE )
&& ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
) {
return packFloat64( aSign, 0x3FF, 0, 0 );
}
break;
case float_round_down:
return
aSign ? packFloat64( 1, 0x3FF, 0, 0 )
: packFloat64( 0, 0, 0, 0 );
case float_round_up:
return
aSign ? packFloat64( 1, 0, 0, 0 )
: packFloat64( 0, 0x3FF, 0, 0 );
}
return packFloat64( aSign, 0, 0, 0 );
}
lastBitMask = 1;
lastBitMask <<= 0x413 - aExp;
roundBitsMask = lastBitMask - 1;
z.low = 0;
z.high = a.high;
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
z.high += lastBitMask>>1;
if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
z.high &= ~ lastBitMask;
}
}
else if ( roundingMode != float_round_to_zero ) {
if ( extractFloat64Sign( z )
^ ( roundingMode == float_round_up ) ) {
z.high |= ( a.low != 0 );
z.high += roundBitsMask;
}
}
z.high &= ~ roundBitsMask;
}
if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
float_exception_flags |= float_flag_inexact;
}
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of adding the absolute values of the double-precision
| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
| before being returned. `zSign' is ignored if the result is a NaN.
| The addition is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
int16 expDiff;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
bSig1 = extractFloat64Frac1( b );
bSig0 = extractFloat64Frac0( b );
bExp = extractFloat64Exp( b );
expDiff = aExp - bExp;
if ( 0 < expDiff ) {
if ( aExp == 0x7FF ) {
if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
return a;
}
if ( bExp == 0 ) {
--expDiff;
}
else {
bSig0 |= 0x00100000;
}
shift64ExtraRightJamming(
bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
zExp = aExp;
}
else if ( expDiff < 0 ) {
if ( bExp == 0x7FF ) {
if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
return packFloat64( zSign, 0x7FF, 0, 0 );
}
if ( aExp == 0 ) {
++expDiff;
}
else {
aSig0 |= 0x00100000;
}
shift64ExtraRightJamming(
aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
zExp = bExp;
}
else {
if ( aExp == 0x7FF ) {
if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
return propagateFloat64NaN( a, b );
}
return a;
}
add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
zSig2 = 0;
zSig0 |= 0x00200000;
zExp = aExp;
goto shiftRight1;
}
aSig0 |= 0x00100000;
add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
--zExp;
if ( zSig0 < 0x00200000 ) goto roundAndPack;
++zExp;
shiftRight1:
shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
roundAndPack:
return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
}
/*----------------------------------------------------------------------------
| Returns the result of subtracting the absolute values of the double-
| precision floating-point values `a' and `b'. If `zSign' is 1, the
| difference is negated before being returned. `zSign' is ignored if the
| result is a NaN. The subtraction is performed according to the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
{
int16 aExp, bExp, zExp;
bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
int16 expDiff;
float64 z;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
bSig1 = extractFloat64Frac1( b );
bSig0 = extractFloat64Frac0( b );
bExp = extractFloat64Exp( b );
expDiff = aExp - bExp;
shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
if ( 0 < expDiff ) goto aExpBigger;
if ( expDiff < 0 ) goto bExpBigger;
if ( aExp == 0x7FF ) {
if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
return propagateFloat64NaN( a, b );
}
float_raise( float_flag_invalid );
z.low = float64_default_nan_low;
z.high = float64_default_nan_high;
return z;
}
if ( aExp == 0 ) {
aExp = 1;
bExp = 1;
}
if ( bSig0 < aSig0 ) goto aBigger;
if ( aSig0 < bSig0 ) goto bBigger;
if ( bSig1 < aSig1 ) goto aBigger;
if ( aSig1 < bSig1 ) goto bBigger;
return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
bExpBigger:
if ( bExp == 0x7FF ) {
if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
}
if ( aExp == 0 ) {
++expDiff;
}
else {
aSig0 |= 0x40000000;
}
shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
bSig0 |= 0x40000000;
bBigger:
sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
zExp = bExp;
zSign ^= 1;
goto normalizeRoundAndPack;
aExpBigger:
if ( aExp == 0x7FF ) {
if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
return a;
}
if ( bExp == 0 ) {
--expDiff;
}
else {
bSig0 |= 0x40000000;
}
shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
aSig0 |= 0x40000000;
aBigger:
sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
zExp = aExp;
normalizeRoundAndPack:
--zExp;
return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
}
/*----------------------------------------------------------------------------
| Returns the result of adding the double-precision floating-point values `a'
| and `b'. The operation is performed according to the IEC/IEEE Standard for
| Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 float64_add( float64 a, float64 b )
{
flag aSign, bSign;
aSign = extractFloat64Sign( a );
bSign = extractFloat64Sign( b );
if ( aSign == bSign ) {
return addFloat64Sigs( a, b, aSign );
}
else {
return subFloat64Sigs( a, b, aSign );
}
}
/*----------------------------------------------------------------------------
| Returns the result of subtracting the double-precision floating-point values
| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
| for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 float64_sub( float64 a, float64 b )
{
flag aSign, bSign;
aSign = extractFloat64Sign( a );
bSign = extractFloat64Sign( b );
if ( aSign == bSign ) {
return subFloat64Sigs( a, b, aSign );
}
else {
return addFloat64Sigs( a, b, aSign );
}
}
/*----------------------------------------------------------------------------
| Returns the result of multiplying the double-precision floating-point values
| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
| for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float64 float64_mul( float64 a, float64 b )
{
flag aSign, bSign, zSign;
int16 aExp, bExp, zExp;
bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
float64 z;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
aSign = extractFloat64Sign( a );
bSig1 = extractFloat64Frac1( b );
bSig0 = extractFloat64Frac0( b );
bExp = extractFloat64Exp( b );
bSign = extractFloat64Sign( b );
zSign = aSign ^ bSign;
if ( aExp == 0x7FF ) {
if ( ( aSig0 | aSig1 )
|| ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
return propagateFloat64NaN( a, b );
}
if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
return packFloat64( zSign, 0x7FF, 0, 0 );
}
if ( bExp == 0x7FF ) {
if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
invalid:
float_raise( float_flag_invalid );
z.low = float64_default_nan_low;
z.high = float64_default_nan_high;
return z;
}
return packFloa
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -