softfloat.c

来自「基于4个mips核的noc设计」· C语言 代码 · 共 1,846 行 · 第 1/5 页

C
1,846
字号
                increment = ( roundingMode == float_round_up ) && zSig2;            }        }    }    if ( 0x7FD <= (zExp & 0xffff)) {        if (    ( 0x7FD < zExp )             || (    ( zExp == 0x7FD )                  && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )                  && increment                )           ) {            float_raise( float_flag_overflow | float_flag_inexact );            if (    ( roundingMode == float_round_to_zero )                 || ( zSign && ( roundingMode == float_round_up ) )                 || ( ! zSign && ( roundingMode == float_round_down ) )               ) {                return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );            }            return packFloat64( zSign, 0x7FF, 0, 0 );        }        if ( zExp < 0 ) {            isTiny =                   ( float_detect_tininess == float_tininess_before_rounding )                || ( zExp < -1 )                || ! increment                || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );            shift64ExtraRightJamming(                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );            zExp = 0;            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );            if ( roundNearestEven ) {                increment = ( (sbits32) zSig2 < 0 );            }            else {                if ( zSign ) {                    increment = ( roundingMode == float_round_down ) && zSig2;                }                else {                    increment = ( roundingMode == float_round_up ) && zSig2;                }            }        }    }    if ( zSig2 ) float_exception_flags |= float_flag_inexact;    if ( increment ) {        add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );    }    else {        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;    }    return packFloat64( zSign, zExp, zSig0, zSig1 );}/*----------------------------------------------------------------------------| Takes an abstract floating-point value having sign `zSign', exponent `zExp',| and significand formed by the concatenation of `zSig0' and `zSig1', and| returns the proper double-precision floating-point value corresponding| to the abstract input.  This routine is just like `roundAndPackFloat64'| except that the input significand has fewer bits and does not have to be| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-| point exponent.*----------------------------------------------------------------------------*/static float64 normalizeRoundAndPackFloat64(     flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 ){    int8 shiftCount;    bits32 zSig2;    if ( zSig0 == 0 ) {        zSig0 = zSig1;        zSig1 = 0;        zExp -= 32;    }    shiftCount = countLeadingZeros32( zSig0 ) - 11;    if ( 0 <= shiftCount ) {        zSig2 = 0;        shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );    }    else {        shift64ExtraRightJamming(            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );    }    zExp -= shiftCount;    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );}/*----------------------------------------------------------------------------| Returns the result of converting the 32-bit two's complement integer `a' to| the single-precision floating-point format.  The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float32 int32_to_float32( int32 a ){    flag zSign;    if ( a == 0 ) return 0;    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );    zSign = ( a < 0 );    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );}/*----------------------------------------------------------------------------| Returns the result of converting the 32-bit two's complement integer `a' to| the double-precision floating-point format.  The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 int32_to_float64( int32 a ){    flag zSign;    bits32 absA;    int8 shiftCount;    bits32 zSig0, zSig1;    if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );    zSign = ( a < 0 );    absA = zSign ? - a : a;    shiftCount = countLeadingZeros32( absA ) - 11;    if ( 0 <= shiftCount ) {        zSig0 = absA<<shiftCount;        zSig1 = 0;    }    else {        shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );    }    return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );}/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the 32-bit two's complement integer format.  The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic---which means in particular that the conversion is rounded| according to the current rounding mode.  If `a' is a NaN, the largest| positive integer is returned.  Otherwise, if the conversion overflows, the| largest integer with the same sign as `a' is returned.*----------------------------------------------------------------------------*/int32 float32_to_int32( float32 a ){    flag aSign;    int16 aExp, shiftCount;    bits32 aSig, aSigExtra;    int32 z;    int8 roundingMode;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    aSign = extractFloat32Sign( a );    shiftCount = aExp - 0x96;    if ( 0 <= shiftCount ) {        if ( 0x9E <= aExp ) {            if ( a != 0xCF000000 ) {                float_raise( float_flag_invalid );                if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {                    return 0x7FFFFFFF;                }            }            return (sbits32) 0x80000000;        }        z = ( aSig | 0x00800000 )<<shiftCount;        if ( aSign ) z = - z;    }    else {        if ( aExp < 0x7E ) {            aSigExtra = aExp | aSig;            z = 0;        }        else {            aSig |= 0x00800000;            aSigExtra = aSig<<( shiftCount & 31 );            z = aSig>>( - shiftCount );        }        if ( aSigExtra ) float_exception_flags |= float_flag_inexact;        roundingMode = float_rounding_mode;        if ( roundingMode == float_round_nearest_even ) {            if ( (sbits32) aSigExtra < 0 ) {                ++z;                if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;            }            if ( aSign ) z = - z;        }        else {            aSigExtra = ( aSigExtra != 0 );            if ( aSign ) {                z += ( roundingMode == float_round_down ) & aSigExtra;                z = - z;            }            else {                z += ( roundingMode == float_round_up ) & aSigExtra;            }        }    }    return z;}/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the 32-bit two's complement integer format.  The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic, except that the conversion is always rounded toward zero.| If `a' is a NaN, the largest positive integer is returned.  Otherwise, if| the conversion overflows, the largest integer with the same sign as `a' is| returned.*----------------------------------------------------------------------------*/int32 float32_to_int32_round_to_zero( float32 a ){    flag aSign;    int16 aExp, shiftCount;    bits32 aSig;    int32 z;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    aSign = extractFloat32Sign( a );    shiftCount = aExp - 0x9E;    if ( 0 <= shiftCount ) {        if ( a != 0xCF000000 ) {            float_raise( float_flag_invalid );            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;        }        return (sbits32) 0x80000000;    }    else if ( aExp <= 0x7E ) {        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;        return 0;    }    aSig = ( aSig | 0x00800000 )<<8;    z = aSig>>( - shiftCount );    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {        float_exception_flags |= float_flag_inexact;    }    if ( aSign ) z = - z;    return z;}/*----------------------------------------------------------------------------| Returns the result of converting the single-precision floating-point value| `a' to the double-precision floating-point format.  The conversion is| performed according to the IEC/IEEE Standard for Binary Floating-Point| Arithmetic.*----------------------------------------------------------------------------*/float64 float32_to_float64( float32 a ){    flag aSign;    int16 aExp;    bits32 aSig, zSig0, zSig1;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    aSign = extractFloat32Sign( a );    if ( aExp == 0xFF ) {        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );        return packFloat64( aSign, 0x7FF, 0, 0 );    }    if ( aExp == 0 ) {        if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );        normalizeFloat32Subnormal( aSig, &aExp, &aSig );        --aExp;    }    shift64Right( aSig, 0, 3, &zSig0, &zSig1 );    return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );}/*----------------------------------------------------------------------------| Rounds the single-precision floating-point value `a' to an integer,| and returns the result as a single-precision floating-point value.  The| operation is performed according to the IEC/IEEE Standard for Binary| Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float32 float32_round_to_int( float32 a ){    flag aSign;    int16 aExp;    bits32 lastBitMask, roundBitsMask;    int8 roundingMode;    float32 z;    aExp = extractFloat32Exp( a );    if ( 0x96 <= aExp ) {        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {            return propagateFloat32NaN( a, a );        }        return a;    }    if ( aExp <= 0x7E ) {        if ( (bits32) ( a<<1 ) == 0 ) return a;        float_exception_flags |= float_flag_inexact;        aSign = extractFloat32Sign( a );        switch ( float_rounding_mode ) {         case float_round_nearest_even:            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {                return packFloat32( aSign, 0x7F, 0 );            }            break;         case float_round_down:            return aSign ? 0xBF800000 : 0;         case float_round_up:            return aSign ? 0x80000000 : 0x3F800000;        }        return packFloat32( aSign, 0, 0 );    }    lastBitMask = 1;    lastBitMask <<= 0x96 - aExp;    roundBitsMask = lastBitMask - 1;    z = a;    roundingMode = float_rounding_mode;    if ( roundingMode == float_round_nearest_even ) {        z += lastBitMask>>1;        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;    }    else if ( roundingMode != float_round_to_zero ) {        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {            z += roundBitsMask;        }    }    z &= ~ roundBitsMask;    if ( z != a ) float_exception_flags |= float_flag_inexact;    return z;}/*----------------------------------------------------------------------------| Returns the result of adding the absolute values of the single-precision| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated| before being returned.  `zSign' is ignored if the result is a NaN.| The addition is performed according to the IEC/IEEE Standard for Binary| Floating-Point Arithmetic.*----------------------------------------------------------------------------*/static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ){    int16 aExp, bExp, zExp;    bits32 aSig, bSig, zSig;    int16 expDiff;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    bSig = extractFloat32Frac( b );    bExp = extractFloat32Exp( b );    expDiff = aExp - bExp;    aSig <<= 6;    bSig <<= 6;    if ( 0 < expDiff ) {        if ( aExp == 0xFF ) {            if ( aSig ) return propagateFloat32NaN( a, b );            return a;        }        if ( bExp == 0 ) {            --expDiff;        }        else {            bSig |= 0x20000000;        }        shift32RightJamming( bSig, expDiff, &bSig );        zExp = aExp;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?