📄 softfloat.c

📁 上传linux-jx2410的源代码
💻 C
📖 第 1 页 / 共 5 页
字号:
/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe single-precision floating-point format.  The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float32 int32_to_float32( int32 a ){    flag zSign;    if ( a == 0 ) return 0;    if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );    zSign = ( a < 0 );    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );}/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe double-precision floating-point format.  The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float64 int32_to_float64( int32 a ){    flag aSign;    uint32 absA;    int8 shiftCount;    bits64 zSig;    if ( a == 0 ) return 0;    aSign = ( a < 0 );    absA = aSign ? - a : a;    shiftCount = countLeadingZeros32( absA ) + 21;    zSig = absA;    return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );}#ifdef FLOATX80/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a'to the extended double-precision floating-point format.  The conversionis performed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic.-------------------------------------------------------------------------------*/floatx80 int32_to_floatx80( int32 a ){    flag zSign;    uint32 absA;    int8 shiftCount;    bits64 zSig;    if ( a == 0 ) return packFloatx80( 0, 0, 0 );    zSign = ( a < 0 );    absA = zSign ? - a : a;    shiftCount = countLeadingZeros32( absA ) + 32;    zSig = absA;    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );}#endif/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic---which means in particular that the conversion is roundedaccording to the current rounding mode.  If `a' is a NaN, the largestpositive integer is returned.  Otherwise, if the conversion overflows, thelargest integer with the same sign as `a' is returned.-------------------------------------------------------------------------------*/int32 float32_to_int32( float32 a ){    flag aSign;    int16 aExp, shiftCount;    bits32 aSig;    bits64 zSig;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    aSign = extractFloat32Sign( a );    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;    if ( aExp ) aSig |= 0x00800000;    shiftCount = 0xAF - aExp;    zSig = aSig;    zSig <<= 32;    if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );    return roundAndPackInt32( aSign, zSig );}/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic, except that the conversion is always rounded toward zero.  If`a' is a NaN, the largest positive integer is returned.  Otherwise, if theconversion overflows, the largest integer with the same sign as `a' isreturned.-------------------------------------------------------------------------------*/int32 float32_to_int32_round_to_zero( float32 a ){    flag aSign;    int16 aExp, shiftCount;    bits32 aSig;    int32 z;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    aSign = extractFloat32Sign( a );    shiftCount = aExp - 0x9E;    if ( 0 <= shiftCount ) {        if ( a == 0xCF000000 ) return 0x80000000;        float_raise( float_flag_invalid );        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;        return 0x80000000;    }    else if ( aExp <= 0x7E ) {        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;        return 0;    }    aSig = ( aSig | 0x00800000 )<<8;    z = aSig>>( - shiftCount );    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {        float_exception_flags |= float_flag_inexact;    }    return aSign ? - z : z;}/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the double-precision floating-point format.  The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic.-------------------------------------------------------------------------------*/float64 float32_to_float64( float32 a ){    flag aSign;    int16 aExp;    bits32 aSig;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    aSign = extractFloat32Sign( a );    if ( aExp == 0xFF ) {        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );        return packFloat64( aSign, 0x7FF, 0 );    }    if ( aExp == 0 ) {        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );        normalizeFloat32Subnormal( aSig, &aExp, &aSig );        --aExp;    }    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );}#ifdef FLOATX80/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the extended double-precision floating-point format.  The conversionis performed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic.-------------------------------------------------------------------------------*/floatx80 float32_to_floatx80( float32 a ){    flag aSign;    int16 aExp;    bits32 aSig;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    aSign = extractFloat32Sign( a );    if ( aExp == 0xFF ) {        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );    }    if ( aExp == 0 ) {        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );        normalizeFloat32Subnormal( aSig, &aExp, &aSig );    }    aSig |= 0x00800000;    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );}#endif/*-------------------------------------------------------------------------------Rounds the single-precision floating-point value `a' to an integer, andreturns the result as a single-precision floating-point value.  Theoperation is performed according to the IEC/IEEE Standard for BinaryFloating-point Arithmetic.-------------------------------------------------------------------------------*/float32 float32_round_to_int( float32 a ){    flag aSign;    int16 aExp;    bits32 lastBitMask, roundBitsMask;    int8 roundingMode;    float32 z;    aExp = extractFloat32Exp( a );    if ( 0x96 <= aExp ) {        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {            return propagateFloat32NaN( a, a );        }        return a;    }    if ( aExp <= 0x7E ) {        if ( (bits32) ( a<<1 ) == 0 ) return a;        float_exception_flags |= float_flag_inexact;        aSign = extractFloat32Sign( a );        switch ( float_rounding_mode ) {         case float_round_nearest_even:            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {                return packFloat32( aSign, 0x7F, 0 );            }            break;         case float_round_down:            return aSign ? 0xBF800000 : 0;         case float_round_up:            return aSign ? 0x80000000 : 0x3F800000;        }        return packFloat32( aSign, 0, 0 );    }    lastBitMask = 1;    lastBitMask <<= 0x96 - aExp;    roundBitsMask = lastBitMask - 1;    z = a;    roundingMode = float_rounding_mode;    if ( roundingMode == float_round_nearest_even ) {        z += lastBitMask>>1;        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;    }    else if ( roundingMode != float_round_to_zero ) {        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {            z += roundBitsMask;        }    }    z &= ~ roundBitsMask;    if ( z != a ) float_exception_flags |= float_flag_inexact;    return z;}/*-------------------------------------------------------------------------------Returns the result of adding the absolute values of the single-precisionfloating-point values `a' and `b'.  If `zSign' is true, the sum is negatedbefore being returned.  `zSign' is ignored if the result is a NaN.  Theaddition is performed according to the IEC/IEEE Standard for BinaryFloating-point Arithmetic.-------------------------------------------------------------------------------*/static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ){    int16 aExp, bExp, zExp;    bits32 aSig, bSig, zSig;    int16 expDiff;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    bSig = extractFloat32Frac( b );    bExp = extractFloat32Exp( b );    expDiff = aExp - bExp;    aSig <<= 6;    bSig <<= 6;    if ( 0 < expDiff ) {        if ( aExp == 0xFF ) {            if ( aSig ) return propagateFloat32NaN( a, b );            return a;        }        if ( bExp == 0 ) {            --expDiff;        }        else {            bSig |= 0x20000000;        }        shift32RightJamming( bSig, expDiff, &bSig );        zExp = aExp;    }    else if ( expDiff < 0 ) {        if ( bExp == 0xFF ) {            if ( bSig ) return propagateFloat32NaN( a, b );            return packFloat32( zSign, 0xFF, 0 );        }        if ( aExp == 0 ) {            ++expDiff;        }        else {            aSig |= 0x20000000;        }        shift32RightJamming( aSig, - expDiff, &aSig );        zExp = bExp;    }    else {        if ( aExp == 0xFF ) {            if ( aSig | bSig ) return propagateFloat32NaN( a, b );            return a;        }        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );        zSig = 0x40000000 + aSig + bSig;        zExp = aExp;        goto roundAndPack;    }    aSig |= 0x20000000;    zSig = ( aSig + bSig )<<1;    --zExp;    if ( (sbits32) zSig < 0 ) {        zSig = aSig + bSig;        ++zExp;    } roundAndPack:    return roundAndPackFloat32( zSign, zExp, zSig );}/*-------------------------------------------------------------------------------Returns the result of subtracting the absolute values of the single-precision floating-point values `a' and `b'.  If `zSign' is true, thedifference is negated before being returned.  `zSign' is ignored if theresult is a NaN.  The subtraction is performed according to the IEC/IEEEStandard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ){    int16 aExp, bExp, zExp;    bits32 aSig, bSig, zSig;    int16 expDiff;    aSig = extractFloat32Frac( a );    aExp = extractFloat32Exp( a );    bSig = extractFloat32Frac( b );    bExp = extractFloat32Exp( b );    expDiff = aExp - bExp;    aSig <<= 7;    bSig <<= 7;    if ( 0 < expDiff ) goto aExpBigger;    if ( expDiff < 0 ) goto bExpBigger;    if ( aExp == 0xFF ) {        if ( aSig | bSig ) return propagateFloat32NaN( a, b );        float_raise( float_flag_invalid );        return float32_default_nan;    }    if ( aExp == 0 ) {        aExp = 1;        bExp = 1;    }    if ( bSig < aSig ) goto aBigger;    if ( aSig < bSig ) goto bBigger;    return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); bExpBigger:    if ( bExp == 0xFF ) {        if ( bSig ) return propagateFloat32NaN( a, b );        return packFloat32( zSign ^ 1, 0xFF, 0 );    }
💿 文件大小 30188 K
👤 上传用户 zhang8818200
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux-jx #2410 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -