📄 softfloat.c
字号:
q = estimateDiv64To32( aSig, 0, bSig );
q = ( 2 < q ) ? q - 2 : 0;
q >>= 32 - expDiff;
bSig >>= 2;
aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
}
else {
aSig >>= 2;
bSig >>= 2;
}
do {
alternateASig = aSig;
++q;
aSig -= bSig;
} while ( 0 <= (sbits32) aSig );
sigMean = aSig + alternateASig;
if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
aSig = alternateASig;
}
zSign = ( (sbits32) aSig < 0 );
if ( zSign ) aSig = - aSig;
return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
}
/*----------------------------------------------------------------------------
| Returns the square root of the single-precision floating-point value `a'.
| The operation is performed according to the IEC/IEEE Standard for Binary
| Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
float32 float32_sqrt( float32 a )
{
flag aSign;
int16 aExp, zExp;
bits32 aSig, zSig, rem0, rem1, term0, term1;
aSig = extractFloat32Frac( a );
aExp = extractFloat32Exp( a );
aSign = extractFloat32Sign( a );
if ( aExp == 0xFF ) {
if ( aSig ) return propagateFloat32NaN( a, 0 );
if ( ! aSign ) return a;
float_raise( float_flag_invalid );
return float32_default_nan;
}
if ( aSign ) {
if ( ( aExp | aSig ) == 0 ) return a;
float_raise( float_flag_invalid );
return float32_default_nan;
}
if ( aExp == 0 ) {
if ( aSig == 0 ) return 0;
normalizeFloat32Subnormal( aSig, &aExp, &aSig );
}
zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
aSig = ( aSig | 0x00800000 )<<8;
zSig = estimateSqrt32( aExp, aSig ) + 2;
if ( ( zSig & 0x7F ) <= 5 ) {
if ( zSig < 2 ) {
zSig = 0x7FFFFFFF;
goto roundAndPack;
}
else {
aSig >>= aExp & 1;
mul32To64( zSig, zSig, &term0, &term1 );
sub64( aSig, 0, term0, term1, &rem0, &rem1 );
while ( (sbits32) rem0 < 0 ) {
--zSig;
shortShift64Left( 0, zSig, 1, &term0, &term1 );
term1 |= 1;
add64( rem0, rem1, term0, term1, &rem0, &rem1 );
}
zSig |= ( ( rem0 | rem1 ) != 0 );
}
}
shift32RightJamming( zSig, 1, &zSig );
roundAndPack:
return roundAndPackFloat32( 0, zExp, zSig );
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is equal to
| the corresponding value `b', and 0 otherwise. The comparison is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
flag float32_eq( float32 a, float32 b )
{
if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
) {
if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
float_raise( float_flag_invalid );
}
return 0;
}
return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is less than
| or equal to the corresponding value `b', and 0 otherwise. The comparison
| is performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic.
*----------------------------------------------------------------------------*/
flag float32_le( float32 a, float32 b )
{
flag aSign, bSign;
if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
) {
float_raise( float_flag_invalid );
return 0;
}
aSign = extractFloat32Sign( a );
bSign = extractFloat32Sign( b );
if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
return ( a == b ) || ( aSign ^ ( a < b ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is less than
| the corresponding value `b', and 0 otherwise. The comparison is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
flag float32_lt( float32 a, float32 b )
{
flag aSign, bSign;
if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
) {
float_raise( float_flag_invalid );
return 0;
}
aSign = extractFloat32Sign( a );
bSign = extractFloat32Sign( b );
if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
return ( a != b ) && ( aSign ^ ( a < b ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is equal to
| the corresponding value `b', and 0 otherwise. The invalid exception is
| raised if either operand is a NaN. Otherwise, the comparison is performed
| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
flag float32_eq_signaling( float32 a, float32 b )
{
if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
) {
float_raise( float_flag_invalid );
return 0;
}
return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is less than or
| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
| cause an exception. Otherwise, the comparison is performed according to the
| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
flag float32_le_quiet( float32 a, float32 b )
{
flag aSign, bSign;
if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
) {
if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
float_raise( float_flag_invalid );
}
return 0;
}
aSign = extractFloat32Sign( a );
bSign = extractFloat32Sign( b );
if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
return ( a == b ) || ( aSign ^ ( a < b ) );
}
/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is less than
| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
| exception. Otherwise, the comparison is performed according to the IEC/IEEE
| Standard for Binary Floating-Point Arithmetic.
*----------------------------------------------------------------------------*/
flag float32_lt_quiet( float32 a, float32 b )
{
flag aSign, bSign;
if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
|| ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
) {
if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
float_raise( float_flag_invalid );
}
return 0;
}
aSign = extractFloat32Sign( a );
bSign = extractFloat32Sign( b );
if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
return ( a != b ) && ( aSign ^ ( a < b ) );
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point value
| `a' to the 32-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic---which means in particular that the conversion is rounded
| according to the current rounding mode. If `a' is a NaN, the largest
| positive integer is returned. Otherwise, if the conversion overflows, the
| largest integer with the same sign as `a' is returned.
*----------------------------------------------------------------------------*/
int32 float64_to_int32( float64 a )
{
flag aSign;
int16 aExp, shiftCount;
bits32 aSig0, aSig1, absZ, aSigExtra;
int32 z;
int8 roundingMode;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
aSign = extractFloat64Sign( a );
shiftCount = aExp - 0x413;
if ( 0 <= shiftCount ) {
if ( 0x41E < aExp ) {
if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
goto invalid;
}
shortShift64Left(
aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
if ( 0x80000000 < absZ ) goto invalid;
}
else {
aSig1 = ( aSig1 != 0 );
if ( aExp < 0x3FE ) {
aSigExtra = aExp | aSig0 | aSig1;
absZ = 0;
}
else {
aSig0 |= 0x00100000;
aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
absZ = aSig0>>( - shiftCount );
}
}
roundingMode = float_rounding_mode;
if ( roundingMode == float_round_nearest_even ) {
if ( (sbits32) aSigExtra < 0 ) {
++absZ;
if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
}
z = aSign ? - absZ : absZ;
}
else {
aSigExtra = ( aSigExtra != 0 );
if ( aSign ) {
z = - ( absZ
+ ( ( roundingMode == float_round_down ) & aSigExtra ) );
}
else {
z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
}
}
if ( ( aSign ^ ( z < 0 ) ) && z ) {
invalid:
float_raise( float_flag_invalid );
return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
}
if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point value
| `a' to the 32-bit two's complement integer format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic, except that the conversion is always rounded toward zero.
| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
| the conversion overflows, the largest integer with the same sign as `a' is
| returned.
*----------------------------------------------------------------------------*/
int32 float64_to_int32_round_to_zero( float64 a )
{
flag aSign;
int16 aExp, shiftCount;
bits32 aSig0, aSig1, absZ, aSigExtra;
int32 z;
aSig1 = extractFloat64Frac1( a );
aSig0 = extractFloat64Frac0( a );
aExp = extractFloat64Exp( a );
aSign = extractFloat64Sign( a );
shiftCount = aExp - 0x413;
if ( 0 <= shiftCount ) {
if ( 0x41E < aExp ) {
if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
goto invalid;
}
shortShift64Left(
aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
}
else {
if ( aExp < 0x3FF ) {
if ( aExp | aSig0 | aSig1 ) {
float_exception_flags |= float_flag_inexact;
}
return 0;
}
aSig0 |= 0x00100000;
aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
absZ = aSig0>>( - shiftCount );
}
z = aSign ? - absZ : absZ;
if ( ( aSign ^ ( z < 0 ) ) && z ) {
invalid:
float_raise( float_flag_invalid );
return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
}
if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
return z;
}
/*----------------------------------------------------------------------------
| Returns the result of converting the double-precision floating-point value
| `a' to the single-precision floating-point format. The conversion is
| performed according to the IEC/IEEE Standard for Binary Floating-Point
| Arithmetic.
*----------------------------------------------------------------------------*/
float32 float64_to_float32( float64 a )
{
flag aSign;
int16 aExp;
bits32 aSig0, aSig1, zSig;
bits32 allZero;
aSig1 = extractFloat64Frac1( a );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -