softfloat.c

来自「基于4个mips核的noc设计」· C语言 代码 · 共 1,846 行 · 第 1/5 页

C
1,846
字号
        lastBitMask = 1;        lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;        roundBitsMask = lastBitMask - 1;        z = a;        roundingMode = float_rounding_mode;        if ( roundingMode == float_round_nearest_even ) {            if ( lastBitMask ) {                add64( z.high, z.low, 0, lastBitMask>>1, (unsigned int *) &z.high, (unsigned int *) &z.low );                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;            }            else {                if ( (sbits32) z.low < 0 ) {                    ++z.high;                    if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;                }            }        }        else if ( roundingMode != float_round_to_zero ) {            if (   extractFloat64Sign( z )                 ^ ( roundingMode == float_round_up ) ) {                add64( z.high, z.low, 0, roundBitsMask, (unsigned int *) &z.high, (unsigned int *) &z.low );            }        }        z.low &= ~ roundBitsMask;    }    else {        if ( aExp <= 0x3FE ) {            if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;            float_exception_flags |= float_flag_inexact;            aSign = extractFloat64Sign( a );            switch ( float_rounding_mode ) {             case float_round_nearest_even:                if (    ( aExp == 0x3FE )                     && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )                   ) {                    return packFloat64( aSign, 0x3FF, 0, 0 );                }                break;             case float_round_down:                return                      aSign ? packFloat64( 1, 0x3FF, 0, 0 )                    : packFloat64( 0, 0, 0, 0 );             case float_round_up:                return                      aSign ? packFloat64( 1, 0, 0, 0 )                    : packFloat64( 0, 0x3FF, 0, 0 );            }            return packFloat64( aSign, 0, 0, 0 );        }        lastBitMask = 1;        lastBitMask <<= 0x413 - aExp;        roundBitsMask = lastBitMask - 1;        z.low = 0;        z.high = a.high;        roundingMode = float_rounding_mode;        if ( roundingMode == float_round_nearest_even ) {            z.high += lastBitMask>>1;            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {                z.high &= ~ lastBitMask;            }        }        else if ( roundingMode != float_round_to_zero ) {            if (   extractFloat64Sign( z )                 ^ ( roundingMode == float_round_up ) ) {                z.high |= ( a.low != 0 );                z.high += roundBitsMask;            }        }        z.high &= ~ roundBitsMask;    }    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {        float_exception_flags |= float_flag_inexact;    }    return z;}/*----------------------------------------------------------------------------| Returns the result of adding the absolute values of the double-precision| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated| before being returned.  `zSign' is ignored if the result is a NaN.| The addition is performed according to the IEC/IEEE Standard for Binary| Floating-Point Arithmetic.*----------------------------------------------------------------------------*/static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ){    int16 aExp, bExp, zExp;    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;    int16 expDiff;    aSig1 = extractFloat64Frac1( a );    aSig0 = extractFloat64Frac0( a );    aExp = extractFloat64Exp( a );    bSig1 = extractFloat64Frac1( b );    bSig0 = extractFloat64Frac0( b );    bExp = extractFloat64Exp( b );    expDiff = aExp - bExp;    if ( 0 < expDiff ) {        if ( aExp == 0x7FF ) {            if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );            return a;        }        if ( bExp == 0 ) {            --expDiff;        }        else {            bSig0 |= 0x00100000;        }        shift64ExtraRightJamming(            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );        zExp = aExp;    }    else if ( expDiff < 0 ) {        if ( bExp == 0x7FF ) {            if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );            return packFloat64( zSign, 0x7FF, 0, 0 );        }        if ( aExp == 0 ) {            ++expDiff;        }        else {            aSig0 |= 0x00100000;        }        shift64ExtraRightJamming(            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );        zExp = bExp;    }    else {        if ( aExp == 0x7FF ) {            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {                return propagateFloat64NaN( a, b );            }            return a;        }        add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );        if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );        zSig2 = 0;        zSig0 |= 0x00200000;        zExp = aExp;        goto shiftRight1;    }    aSig0 |= 0x00100000;    add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );    --zExp;    if ( zSig0 < 0x00200000 ) goto roundAndPack;    ++zExp; shiftRight1:    shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); roundAndPack:    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );}/*----------------------------------------------------------------------------| Returns the result of subtracting the absolute values of the double-| precision floating-point values `a' and `b'.  If `zSign' is 1, the| difference is negated before being returned.  `zSign' is ignored if the| result is a NaN.  The subtraction is performed according to the IEC/IEEE| Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ){    int16 aExp, bExp, zExp;    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;    int16 expDiff;    float64 z;    aSig1 = extractFloat64Frac1( a );    aSig0 = extractFloat64Frac0( a );    aExp = extractFloat64Exp( a );    bSig1 = extractFloat64Frac1( b );    bSig0 = extractFloat64Frac0( b );    bExp = extractFloat64Exp( b );    expDiff = aExp - bExp;    shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );    shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );    if ( 0 < expDiff ) goto aExpBigger;    if ( expDiff < 0 ) goto bExpBigger;    if ( aExp == 0x7FF ) {        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {            return propagateFloat64NaN( a, b );        }        float_raise( float_flag_invalid );        z.low = float64_default_nan_low;        z.high = float64_default_nan_high;        return z;    }    if ( aExp == 0 ) {        aExp = 1;        bExp = 1;    }    if ( bSig0 < aSig0 ) goto aBigger;    if ( aSig0 < bSig0 ) goto bBigger;    if ( bSig1 < aSig1 ) goto aBigger;    if ( aSig1 < bSig1 ) goto bBigger;    return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); bExpBigger:    if ( bExp == 0x7FF ) {        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );        return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );    }    if ( aExp == 0 ) {        ++expDiff;    }    else {        aSig0 |= 0x40000000;    }    shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );    bSig0 |= 0x40000000; bBigger:    sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );    zExp = bExp;    zSign ^= 1;    goto normalizeRoundAndPack; aExpBigger:    if ( aExp == 0x7FF ) {        if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );        return a;    }    if ( bExp == 0 ) {        --expDiff;    }    else {        bSig0 |= 0x40000000;    }    shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );    aSig0 |= 0x40000000; aBigger:    sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );    zExp = aExp; normalizeRoundAndPack:    --zExp;    return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );}/*----------------------------------------------------------------------------| Returns the result of adding the double-precision floating-point values `a'| and `b'.  The operation is performed according to the IEC/IEEE Standard for| Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 float64_add( float64 a, float64 b ){    flag aSign, bSign;    aSign = extractFloat64Sign( a );    bSign = extractFloat64Sign( b );    if ( aSign == bSign ) {        return addFloat64Sigs( a, b, aSign );    }    else {        return subFloat64Sigs( a, b, aSign );    }}/*----------------------------------------------------------------------------| Returns the result of subtracting the double-precision floating-point values| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard| for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 float64_sub( float64 a, float64 b ){    flag aSign, bSign;    aSign = extractFloat64Sign( a );    bSign = extractFloat64Sign( b );    if ( aSign == bSign ) {        return subFloat64Sigs( a, b, aSign );    }    else {        return addFloat64Sigs( a, b, aSign );    }}/*----------------------------------------------------------------------------| Returns the result of multiplying the double-precision floating-point values| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard| for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 float64_mul( float64 a, float64 b ){    flag aSign, bSign, zSign;    int16 aExp, bExp, zExp;    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;    float64 z;    aSig1 = extractFloat64Frac1( a );    aSig0 = extractFloat64Frac0( a );    aExp = extractFloat64Exp( a );    aSign = extractFloat64Sign( a );    bSig1 = extractFloat64Frac1( b );    bSig0 = extractFloat64Frac0( b );    bExp = extractFloat64Exp( b );    bSign = extractFloat64Sign( b );    zSign = aSign ^ bSign;    if ( aExp == 0x7FF ) {        if (    ( aSig0 | aSig1 )             || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {            return propagateFloat64NaN( a, b );        }        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;        return packFloat64( zSign, 0x7FF, 0, 0 );    }    if ( bExp == 0x7FF ) {        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );        if ( ( aExp | aSig0 | aSig1 ) == 0 ) { invalid:            float_raise( float_flag_invalid );            z.low = float64_default_nan_low;            z.high = float64_default_nan_high;            return z;        }        return packFloat64( zSign, 0x7FF, 0, 0 );    }    if ( aExp == 0 ) {        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );    }    if ( bExp == 0 ) {        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );        normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );    }    zExp = aExp + bExp - 0x400;    aSig0 |= 0x00100000;    shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );    mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );    add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );    zSig2 |= ( zSig3 != 0 );    if ( 0x00200000 <= zSig0 ) {        shift64ExtraRightJamming(            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );        ++zExp;    }    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );}/*----------------------------------------------------------------------------| Returns the result of dividing the double-precision floating-point value `a'| by the corresponding value `b'.  The operation is performed according to the| IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 float64_div( float64 a, float64 b ){    flag aSign, bSign, zSign;    int16 aExp, bExp, zExp;    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;    bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;    float64 z;    aSig1 = extractFloat64Frac1( a );    aSig0 = extractFloat64Frac0( a );    aExp = extractFloat64Exp( a );    aSign = extractFloat64Sign( a );    bSig1 = extractFloat64Frac1( b );    bSig0 = extractFloat64Frac0( b );    bExp = extractFloat64Exp( b );    bSign

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?