⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 softfloat.c

📁 RISC processor ARM-7 emulator
💻 C
📖 第 1 页 / 共 5 页
字号:
    if ( zSig0 == 0 ) {
        zSig0 = zSig1;
        zSig1 = 0;
        zExp -= 64;
    }
    shiftCount = countLeadingZeros64( zSig0 );
    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    zExp -= shiftCount;
    return
        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );

}

#endif

#ifdef FLOAT128

/*
-------------------------------------------------------------------------------
Returns the least-significant 64 fraction bits of the quadruple-precision
floating-point value `a'.
-------------------------------------------------------------------------------
*/
INLINE bits64 extractFloat128Frac1( float128 a )
{

    return a.low;

}

/*
-------------------------------------------------------------------------------
Returns the most-significant 48 fraction bits of the quadruple-precision
floating-point value `a'.
-------------------------------------------------------------------------------
*/
INLINE bits64 extractFloat128Frac0( float128 a )
{

    return a.high & LIT64( 0x0000FFFFFFFFFFFF );

}

/*
-------------------------------------------------------------------------------
Returns the exponent bits of the quadruple-precision floating-point value
`a'.
-------------------------------------------------------------------------------
*/
INLINE int32 extractFloat128Exp( float128 a )
{

    return ( a.high>>48 ) & 0x7FFF;

}

/*
-------------------------------------------------------------------------------
Returns the sign bit of the quadruple-precision floating-point value `a'.
-------------------------------------------------------------------------------
*/
INLINE flag extractFloat128Sign( float128 a )
{

    return a.high>>63;

}

/*
-------------------------------------------------------------------------------
Normalizes the subnormal quadruple-precision floating-point value
represented by the denormalized significand formed by the concatenation of
`aSig0' and `aSig1'.  The normalized exponent is stored at the location
pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
significand are stored at the location pointed to by `zSig0Ptr', and the
least significant 64 bits of the normalized significand are stored at the
location pointed to by `zSig1Ptr'.
-------------------------------------------------------------------------------
*/
static void
 normalizeFloat128Subnormal(
     bits64 aSig0,
     bits64 aSig1,
     int32 *zExpPtr,
     bits64 *zSig0Ptr,
     bits64 *zSig1Ptr
 )
{
    int8 shiftCount;

    if ( aSig0 == 0 ) {
        shiftCount = countLeadingZeros64( aSig1 ) - 15;
        if ( shiftCount < 0 ) {
            *zSig0Ptr = aSig1>>( - shiftCount );
            *zSig1Ptr = aSig1<<( shiftCount & 63 );
        }
        else {
            *zSig0Ptr = aSig1<<shiftCount;
            *zSig1Ptr = 0;
        }
        *zExpPtr = - shiftCount - 63;
    }
    else {
        shiftCount = countLeadingZeros64( aSig0 ) - 15;
        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
        *zExpPtr = 1 - shiftCount;
    }

}

/*
-------------------------------------------------------------------------------
Packs the sign `zSign', the exponent `zExp', and the significand formed
by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
floating-point value, returning the result.  After being shifted into the
proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
added together to form the most significant 32 bits of the result.  This
means that any integer portion of `zSig0' will be added into the exponent.
Since a properly normalized significand will have an integer portion equal
to 1, the `zExp' input should be 1 less than the desired result exponent
whenever `zSig0' and `zSig1' concatenated form a complete, normalized
significand.
-------------------------------------------------------------------------------
*/
INLINE float128
 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
{
    float128 z;

    z.low = zSig1;
    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
    return z;

}

/*
-------------------------------------------------------------------------------
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
and extended significand formed by the concatenation of `zSig0', `zSig1',
and `zSig2', and returns the proper quadruple-precision floating-point value
corresponding to the abstract input.  Ordinarily, the abstract value is
simply rounded and packed into the quadruple-precision format, with the
inexact exception raised if the abstract input cannot be represented
exactly.  If the abstract value is too large, however, the overflow and
inexact exceptions are raised and an infinity or maximal finite value is
returned.  If the abstract value is too small, the input value is rounded to
a subnormal number, and the underflow and inexact exceptions are raised if
the abstract input cannot be represented exactly as a subnormal quadruple-
precision floating-point number.
    The input significand must be normalized or smaller.  If the input
significand is not normalized, `zExp' must be 0; in that case, the result
returned is a subnormal number, and it must not require rounding.  In the
usual case that the input significand is normalized, `zExp' must be 1 less
than the ``true'' floating-point exponent.  The handling of underflow and
overflow follows the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
static float128
 roundAndPackFloat128(
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
{
    int8 roundingMode;
    flag roundNearestEven, increment, isTiny;

    roundingMode = float_rounding_mode;
    roundNearestEven = ( roundingMode == float_round_nearest_even );
    increment = ( (sbits64) zSig2 < 0 );
    if ( ! roundNearestEven ) {
        if ( roundingMode == float_round_to_zero ) {
            increment = 0;
        }
        else {
            if ( zSign ) {
                increment = ( roundingMode == float_round_down ) && zSig2;
            }
            else {
                increment = ( roundingMode == float_round_up ) && zSig2;
            }
        }
    }
    if ( 0x7FFD <= (bits32) zExp ) {
        if (    ( 0x7FFD < zExp )
             || (    ( zExp == 0x7FFD )
                  && eq128(
                         LIT64( 0x0001FFFFFFFFFFFF ),
                         LIT64( 0xFFFFFFFFFFFFFFFF ),
                         zSig0,
                         zSig1
                     )
                  && increment
                )
           ) {
            float_raise( float_flag_overflow | float_flag_inexact );
            if (    ( roundingMode == float_round_to_zero )
                 || ( zSign && ( roundingMode == float_round_up ) )
                 || ( ! zSign && ( roundingMode == float_round_down ) )
               ) {
                return
                    packFloat128(
                        zSign,
                        0x7FFE,
                        LIT64( 0x0000FFFFFFFFFFFF ),
                        LIT64( 0xFFFFFFFFFFFFFFFF )
                    );
            }
            return packFloat128( zSign, 0x7FFF, 0, 0 );
        }
        if ( zExp < 0 ) {
            isTiny =
                   ( float_detect_tininess == float_tininess_before_rounding )
                || ( zExp < -1 )
                || ! increment
                || lt128(
                       zSig0,
                       zSig1,
                       LIT64( 0x0001FFFFFFFFFFFF ),
                       LIT64( 0xFFFFFFFFFFFFFFFF )
                   );
            shift128ExtraRightJamming(
                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
            zExp = 0;
            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
            if ( roundNearestEven ) {
                increment = ( (sbits64) zSig2 < 0 );
            }
            else {
                if ( zSign ) {
                    increment = ( roundingMode == float_round_down ) && zSig2;
                }
                else {
                    increment = ( roundingMode == float_round_up ) && zSig2;
                }
            }
        }
    }
    if ( zSig2 ) float_exception_flags |= float_flag_inexact;
    if ( increment ) {
        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
    }
    else {
        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
    }
    return packFloat128( zSign, zExp, zSig0, zSig1 );

}

/*
-------------------------------------------------------------------------------
Takes an abstract floating-point value having sign `zSign', exponent `zExp',
and significand formed by the concatenation of `zSig0' and `zSig1', and
returns the proper quadruple-precision floating-point value corresponding to
the abstract input.  This routine is just like `roundAndPackFloat128' except
that the input significand has fewer bits and does not have to be normalized
in any way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
point exponent.
-------------------------------------------------------------------------------
*/
static float128
 normalizeRoundAndPackFloat128(
     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
{
    int8 shiftCount;
    bits64 zSig2;

    if ( zSig0 == 0 ) {
        zSig0 = zSig1;
        zSig1 = 0;
        zExp -= 64;
    }
    shiftCount = countLeadingZeros64( zSig0 ) - 15;
    if ( 0 <= shiftCount ) {
        zSig2 = 0;
        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
    }
    else {
        shift128ExtraRightJamming(
            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
    }
    zExp -= shiftCount;
    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );

}

#endif

/*
-------------------------------------------------------------------------------
Returns the result of converting the 32-bit two's complement integer `a' to
the single-precision floating-point format.  The conversion is performed
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
float32 int32_to_float32( int32 a )
{
    flag zSign;

    if ( a == 0 ) return 0;
    if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
    zSign = ( a < 0 );
    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );

}

/*
-------------------------------------------------------------------------------
Returns the result of converting the 32-bit two's complement integer `a' to
the double-precision floating-point format.  The conversion is performed
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
float64 int32_to_float64( int32 a )
{
    flag aSign;
    uint32 absA;
    int8 shiftCount;
    bits64 zSig;

    if ( a == 0 ) return 0;
    aSign = ( a < 0 );
    absA = aSign ? - a : a;
    shiftCount = countLeadingZeros32( absA ) + 21;
    zSig = absA;
    return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );

}

#ifdef FLOATX80

/*
-------------------------------------------------------------------------------
Returns the result of converting the 32-bit two's complement integer `a'
to the extended double-precision floating-point format.  The conversion
is performed according to the IEC/IEEE Standard for Binary Floating-point
Arithmetic.
-------------------------------------------------------------------------------
*/
floatx80 int32_to_floatx80( int32 a )
{
    flag zSign;
    uint32 absA;
    int8 shiftCount;
    bits64 zSig;

    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
    zSign = ( a < 0 );
    absA = zSign ? - a : a;
    shiftCount = countLeadingZeros32( absA ) + 32;
    zSig = absA;
    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );

}

#endif

#ifdef FLOAT128

/*
-------------------------------------------------------------------------------
Returns the result of converting the 32-bit two's complement integer `a' to
the quadruple-precision floating-point format.  The conversion is performed
according to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
-------------------------------------------------------------------------------
*/
float128 int32_to_float128( int32 a )
{
    flag zSign;
    uint32 absA;
    int8 shiftCount;
    bits64 zSig0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -