softfloat.c

来自「基于组件方式开发操作系统的OSKIT源代码」· C语言 代码 · 共 1,910 行 · 第 1/5 页

C
1,910
字号
        zExtra = zExtra != 0;        if ( aSign ) {            z = - ( absZ + ( ( roundingMode == float_round_down ) & zExtra ) );        }        else {            z = absZ + ( ( roundingMode == float_round_up ) & zExtra );        }    }    if ( ( aSign ^ ( z < 0 ) ) && z ) {        float_raise( float_flag_invalid );        return aSign ? -0x80000000 : 0x7FFFFFFF;    }    if ( zExtra ) float_exception_flags |= float_flag_inexact;    return z;}/*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic, except that the conversion is always rounded toward zero.  If`a' is a NaN, the largest positive integer is returned.  If the conversionoverflows, the largest integer with the same sign as `a' is returned.-------------------------------------------------------------------------------*/int32 float64_to_int32_round_to_zero( float64 a ){    flag aSign;    int16 aExp, shiftCount;    bits32 aSig0, aSig1;    bits32 absZ, zExtra;    int32 z;/*    uint8 roundingMode;*/    aSig1 = extractFloat64Frac1( a );    aSig0 = extractFloat64Frac0( a );    aExp = extractFloat64Exp( a );    aSign = extractFloat64Sign( a );    shiftCount = aExp - 0x413;    if ( 0 <= shiftCount ) {        if ( 11 < shiftCount ) {            if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;            absZ = 0xC0000000;        }        else {            shortShiftUp64(                aSig0 | 0x100000, aSig1, shiftCount, &absZ, &zExtra );        }    }    else {        if ( aExp < 0x3FE ) {            zExtra = aExp | aSig0 | aSig1;            absZ = 0;        }        else {            aSig0 |= 0x100000;            zExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;            absZ = aSig0>>( - shiftCount );        }    }    z = aSign ? - absZ : absZ;    if ( ( aSign ^ ( z < 0 ) ) && z ) {        float_raise( float_flag_invalid );        return aSign ? -0x80000000 : 0x7FFFFFFF;    }    if ( zExtra ) float_exception_flags |= float_flag_inexact;    return z;}/*-------------------------------------------------------------------------------Returns the result of converting the double-precision floating-point value`a' to the single-precision floating-point format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic.  The underflow exception is raised only if the result is asubnormal.-------------------------------------------------------------------------------*/float32 float64_to_float32( float64 a ){    flag aSign;    int16 aExp;    bits32 aSig0, aSig1, zSig;    bits32 allZero;    aSig1 = extractFloat64Frac1( a );    aSig0 = extractFloat64Frac0( a );    aExp = extractFloat64Exp( a );    aSign = extractFloat64Sign( a );    if ( aExp == 0x7FF ) {        if ( aSig0 | aSig1 ) return float64ToFloat32NaN( a );        return packFloat32( aSign, 0xFF, 0 );    }    shiftDown64Jamming( aSig0, aSig1, 22, &allZero, &zSig );    if ( aExp ) zSig |= 0x40000000;    return roundAndPackFloat32( aSign, aExp - 0x3FF + 0x7E, zSig );}/*-------------------------------------------------------------------------------Rounds the single-precision floating-point value `a' to an integer, andreturns the result as a single-precision floating-point value.  Theoperation is performed according to the IEC/IEEE Standard for BinaryFloating-point Arithmetic.-------------------------------------------------------------------------------*/float32 float32_round_to_int( float32 a ){    flag aSign;    int16 aExp;    uint32 lastBitMask, roundBitsMask;    uint8 roundingMode;    float32 z;    aExp = extractFloat32Exp( a );    if ( 0x96 <= aExp ) {        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {            return propagateFloat32NaN( a, a );        }        return a;    }    if ( aExp <= 0x7E ) {        if ( a<<1 == 0 ) return a;        float_exception_flags |= float_flag_inexact;        aSign = extractFloat32Sign( a );        switch ( float_rounding_mode ) {            case float_round_nearest_even:                if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {                    return packFloat32( aSign, 0x7F, 0 );                }                break;            case float_round_down:                return                    aSign ? packFloat32( 1, 0x7F, 0 ) : packFloat32( 0, 0, 0 );            case float_round_up:                return                    aSign ? packFloat32( 1, 0, 0 ) : packFloat32( 0, 0x7F, 0 );        }        return packFloat32( aSign, 0, 0 );    }    lastBitMask = 1<<( 0x96 - aExp );    roundBitsMask = lastBitMask - 1;    z = a;    roundingMode = float_rounding_mode;    if ( roundingMode == float_round_nearest_even ) {        z += lastBitMask>>1;        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;    }    else if ( roundingMode != float_round_to_zero ) {        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {            z += roundBitsMask;        }    }    z &= ~ roundBitsMask;    if ( z != a ) float_exception_flags |= float_flag_inexact;    return z;}/*-------------------------------------------------------------------------------Returns the result of adding the absolute values of the single-precisionfloating-point values `a' and `b'.  If `zSign' is true, the sum is negatedbefore being returned.  `zSign' is ignored if the result is a NaN.  Theaddition is performed according to the IEC/IEEE Standard for BinaryFloating-point Arithmetic.-------------------------------------------------------------------------------*/static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ){    int16 aExp, bExp, zExp;    bits32 aSig, bSig, zSig;    int16 expDiff;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    bSig = extractFloat32Frac( b );    bExp = extractFloat32Exp( b );    expDiff = aExp - bExp;    aSig <<= 6;    bSig <<= 6;    if ( 0 < expDiff ) {        if ( aExp == 0xFF ) {            if ( aSig ) return propagateFloat32NaN( a, b );            return a;        }        if ( bExp == 0 ) {            --expDiff;        }        else {            bSig |= 0x20000000;        }        shiftDown32Jamming( bSig, expDiff, &bSig );        zExp = aExp;    }    else if ( expDiff < 0 ) {        if ( bExp == 0xFF ) {            if ( bSig ) return propagateFloat32NaN( a, b );            return packFloat32( zSign, 0xFF, 0 );        }        if ( aExp == 0 ) {            ++expDiff;        }        else {            aSig |= 0x20000000;        }        shiftDown32Jamming( aSig, - expDiff, &aSig );        zExp = bExp;    }    else {        if ( aExp == 0xFF ) {            if ( aSig | bSig ) return propagateFloat32NaN( a, b );            return a;        }        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );        zSig = 0x40000000 + aSig + bSig;        zExp = aExp;        goto roundAndPack;    }    aSig |= 0x20000000;    zSig = ( aSig + bSig )<<1;    --zExp;    if ( ( (sbits32) zSig ) < 0 ) {        zSig = aSig + bSig;        ++zExp;    }  roundAndPack:    return roundAndPackFloat32( zSign, zExp, zSig );}/*-------------------------------------------------------------------------------Returns the result of subtracting the absolute values of the single-precision floating-point values `a' and `b'.  If `zSign' is true, thedifference is negated before being returned.  `zSign' is ignored if theresult is a NaN.  The subtraction is performed according to the IEC/IEEEStandard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ){    int16 aExp, bExp, zExp;    bits32 aSig, bSig, zSig;    int16 expDiff;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    bSig = extractFloat32Frac( b );    bExp = extractFloat32Exp( b );    expDiff = aExp - bExp;    aSig <<= 7;    bSig <<= 7;    if ( 0 < expDiff ) goto aExpBigger;    if ( expDiff < 0 ) goto bExpBigger;    if ( aExp == 0xFF ) {        if ( aSig | bSig ) return propagateFloat32NaN( a, b );        float_raise( float_flag_invalid );        return float32_default_nan;    }    if ( aExp == 0 ) {        aExp = 1;        bExp = 1;    }    if ( bSig < aSig ) goto aBigger;    if ( aSig < bSig ) goto bBigger;    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );  bExpBigger:    if ( bExp == 0xFF ) {        if ( bSig ) return propagateFloat32NaN( a, b );        return packFloat32( zSign ^ 1, 0xFF, 0 );    }    if ( aExp == 0 ) {        ++expDiff;    }    else {        aSig |= 0x40000000;    }    shiftDown32Jamming( aSig, - expDiff, &aSig );    bSig |= 0x40000000;  bBigger:    zSig = bSig - aSig;    zExp = bExp;    zSign ^= 1;    goto normalizeRoundAndPack;  aExpBigger:    if ( aExp == 0xFF ) {        if ( aSig ) return propagateFloat32NaN( a, b );        return a;    }    if ( bExp == 0 ) {        --expDiff;    }    else {        bSig |= 0x40000000;    }    shiftDown32Jamming( bSig, expDiff, &bSig );    aSig |= 0x40000000;  aBigger:    zSig = aSig - bSig;    zExp = aExp;  normalizeRoundAndPack:    --zExp;    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );}/*-------------------------------------------------------------------------------Returns the result of adding the single-precision floating-point values `a'and `b'.  The operation is performed according to the IEC/IEEE Standard forBinary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float32 float32_add( float32 a, float32 b ){    flag aSign, bSign;    aSign = extractFloat32Sign( a );    bSign = extractFloat32Sign( b );    if ( aSign == bSign ) {        return addFloat32Sigs( a, b, aSign );    }    else {        return subFloat32Sigs( a, b, aSign );    }}/*-------------------------------------------------------------------------------Returns the result of subtracting the single-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float32 float32_sub( float32 a, float32 b ){    flag aSign, bSign;    aSign = extractFloat32Sign( a );    bSign = extractFloat32Sign( b );    if ( aSign == bSign ) {        return subFloat32Sigs( a, b, aSign );    }    else {        return addFloat32Sigs( a, b, aSign );    }}/*-------------------------------------------------------------------------------Returns the result of multiplying the single-precision floating-point values`a' and `b'.  The operation is performed according to the IEC/IEEE Standardfor Binary Floating-point Arithmetic.  The underflow exception is raisedonly if the result is a subnormal.-------------------------------------------------------------------------------*/float32 float32_mul( float32 a, float32 b ){    flag aSign, bSign, zSign;    int16 aExp, bExp, zExp;    bits32 aSig, bSig, zSig0, zSig1;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    aSign = extractFloat32Sign( a );    bSig = extractFloat32Frac( b );    bExp = extractFloat32Exp( b );    bSign = extractFloat32Sign( b );    zSign = aSign ^ bSign;    if ( aExp == 0xFF ) {        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {            return propagateFloat32NaN( a, b );        }        if ( ( bExp | bSig ) == 0 ) {            float_raise( float_flag_invalid );

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?