softfloat.c
来自「基于4个mips核的noc设计」· C语言 代码 · 共 1,846 行 · 第 1/5 页
C
1,846 行
lastBitMask = 1; lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1; roundBitsMask = lastBitMask - 1; z = a; roundingMode = float_rounding_mode; if ( roundingMode == float_round_nearest_even ) { if ( lastBitMask ) { add64( z.high, z.low, 0, lastBitMask>>1, (unsigned int *) &z.high, (unsigned int *) &z.low ); if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; } else { if ( (sbits32) z.low < 0 ) { ++z.high; if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1; } } } else if ( roundingMode != float_round_to_zero ) { if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { add64( z.high, z.low, 0, roundBitsMask, (unsigned int *) &z.high, (unsigned int *) &z.low ); } } z.low &= ~ roundBitsMask; } else { if ( aExp <= 0x3FE ) { if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a; float_exception_flags |= float_flag_inexact; aSign = extractFloat64Sign( a ); switch ( float_rounding_mode ) { case float_round_nearest_even: if ( ( aExp == 0x3FE ) && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) { return packFloat64( aSign, 0x3FF, 0, 0 ); } break; case float_round_down: return aSign ? packFloat64( 1, 0x3FF, 0, 0 ) : packFloat64( 0, 0, 0, 0 ); case float_round_up: return aSign ? packFloat64( 1, 0, 0, 0 ) : packFloat64( 0, 0x3FF, 0, 0 ); } return packFloat64( aSign, 0, 0, 0 ); } lastBitMask = 1; lastBitMask <<= 0x413 - aExp; roundBitsMask = lastBitMask - 1; z.low = 0; z.high = a.high; roundingMode = float_rounding_mode; if ( roundingMode == float_round_nearest_even ) { z.high += lastBitMask>>1; if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { z.high &= ~ lastBitMask; } } else if ( roundingMode != float_round_to_zero ) { if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { z.high |= ( a.low != 0 ); z.high += roundBitsMask; } } z.high &= ~ roundBitsMask; } if ( ( z.low != a.low ) || ( z.high != a.high ) ) { float_exception_flags |= float_flag_inexact; } return z;}/*----------------------------------------------------------------------------| Returns the result of adding the absolute values of the double-precision| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated| before being returned. `zSign' is ignored if the result is a NaN.| The addition is performed according to the IEC/IEEE Standard for Binary| Floating-Point Arithmetic.*----------------------------------------------------------------------------*/static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ){ int16 aExp, bExp, zExp; bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; int16 expDiff; aSig1 = extractFloat64Frac1( a ); aSig0 = extractFloat64Frac0( a ); aExp = extractFloat64Exp( a ); bSig1 = extractFloat64Frac1( b ); bSig0 = extractFloat64Frac0( b ); bExp = extractFloat64Exp( b ); expDiff = aExp - bExp; if ( 0 < expDiff ) { if ( aExp == 0x7FF ) { if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); return a; } if ( bExp == 0 ) { --expDiff; } else { bSig0 |= 0x00100000; } shift64ExtraRightJamming( bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); zExp = aExp; } else if ( expDiff < 0 ) { if ( bExp == 0x7FF ) { if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); return packFloat64( zSign, 0x7FF, 0, 0 ); } if ( aExp == 0 ) { ++expDiff; } else { aSig0 |= 0x00100000; } shift64ExtraRightJamming( aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); zExp = bExp; } else { if ( aExp == 0x7FF ) { if ( aSig0 | aSig1 | bSig0 | bSig1 ) { return propagateFloat64NaN( a, b ); } return a; } add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 ); zSig2 = 0; zSig0 |= 0x00200000; zExp = aExp; goto shiftRight1; } aSig0 |= 0x00100000; add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); --zExp; if ( zSig0 < 0x00200000 ) goto roundAndPack; ++zExp; shiftRight1: shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); roundAndPack: return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );}/*----------------------------------------------------------------------------| Returns the result of subtracting the absolute values of the double-| precision floating-point values `a' and `b'. If `zSign' is 1, the| difference is negated before being returned. `zSign' is ignored if the| result is a NaN. The subtraction is performed according to the IEC/IEEE| Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ){ int16 aExp, bExp, zExp; bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; int16 expDiff; float64 z; aSig1 = extractFloat64Frac1( a ); aSig0 = extractFloat64Frac0( a ); aExp = extractFloat64Exp( a ); bSig1 = extractFloat64Frac1( b ); bSig0 = extractFloat64Frac0( b ); bExp = extractFloat64Exp( b ); expDiff = aExp - bExp; shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 ); shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 ); if ( 0 < expDiff ) goto aExpBigger; if ( expDiff < 0 ) goto bExpBigger; if ( aExp == 0x7FF ) { if ( aSig0 | aSig1 | bSig0 | bSig1 ) { return propagateFloat64NaN( a, b ); } float_raise( float_flag_invalid ); z.low = float64_default_nan_low; z.high = float64_default_nan_high; return z; } if ( aExp == 0 ) { aExp = 1; bExp = 1; } if ( bSig0 < aSig0 ) goto aBigger; if ( aSig0 < bSig0 ) goto bBigger; if ( bSig1 < aSig1 ) goto aBigger; if ( aSig1 < bSig1 ) goto bBigger; return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 ); bExpBigger: if ( bExp == 0x7FF ) { if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); return packFloat64( zSign ^ 1, 0x7FF, 0, 0 ); } if ( aExp == 0 ) { ++expDiff; } else { aSig0 |= 0x40000000; } shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); bSig0 |= 0x40000000; bBigger: sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); zExp = bExp; zSign ^= 1; goto normalizeRoundAndPack; aExpBigger: if ( aExp == 0x7FF ) { if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b ); return a; } if ( bExp == 0 ) { --expDiff; } else { bSig0 |= 0x40000000; } shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); aSig0 |= 0x40000000; aBigger: sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); zExp = aExp; normalizeRoundAndPack: --zExp; return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );}/*----------------------------------------------------------------------------| Returns the result of adding the double-precision floating-point values `a'| and `b'. The operation is performed according to the IEC/IEEE Standard for| Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 float64_add( float64 a, float64 b ){ flag aSign, bSign; aSign = extractFloat64Sign( a ); bSign = extractFloat64Sign( b ); if ( aSign == bSign ) { return addFloat64Sigs( a, b, aSign ); } else { return subFloat64Sigs( a, b, aSign ); }}/*----------------------------------------------------------------------------| Returns the result of subtracting the double-precision floating-point values| `a' and `b'. The operation is performed according to the IEC/IEEE Standard| for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 float64_sub( float64 a, float64 b ){ flag aSign, bSign; aSign = extractFloat64Sign( a ); bSign = extractFloat64Sign( b ); if ( aSign == bSign ) { return subFloat64Sigs( a, b, aSign ); } else { return addFloat64Sigs( a, b, aSign ); }}/*----------------------------------------------------------------------------| Returns the result of multiplying the double-precision floating-point values| `a' and `b'. The operation is performed according to the IEC/IEEE Standard| for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 float64_mul( float64 a, float64 b ){ flag aSign, bSign, zSign; int16 aExp, bExp, zExp; bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; float64 z; aSig1 = extractFloat64Frac1( a ); aSig0 = extractFloat64Frac0( a ); aExp = extractFloat64Exp( a ); aSign = extractFloat64Sign( a ); bSig1 = extractFloat64Frac1( b ); bSig0 = extractFloat64Frac0( b ); bExp = extractFloat64Exp( b ); bSign = extractFloat64Sign( b ); zSign = aSign ^ bSign; if ( aExp == 0x7FF ) { if ( ( aSig0 | aSig1 ) || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) { return propagateFloat64NaN( a, b ); } if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; return packFloat64( zSign, 0x7FF, 0, 0 ); } if ( bExp == 0x7FF ) { if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b ); if ( ( aExp | aSig0 | aSig1 ) == 0 ) { invalid: float_raise( float_flag_invalid ); z.low = float64_default_nan_low; z.high = float64_default_nan_high; return z; } return packFloat64( zSign, 0x7FF, 0, 0 ); } if ( aExp == 0 ) { if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); } if ( bExp == 0 ) { if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 ); normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); } zExp = aExp + bExp - 0x400; aSig0 |= 0x00100000; shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 ); mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); zSig2 |= ( zSig3 != 0 ); if ( 0x00200000 <= zSig0 ) { shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); ++zExp; } return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );}/*----------------------------------------------------------------------------| Returns the result of dividing the double-precision floating-point value `a'| by the corresponding value `b'. The operation is performed according to the| IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 float64_div( float64 a, float64 b ){ flag aSign, bSign, zSign; int16 aExp, bExp, zExp; bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3; float64 z; aSig1 = extractFloat64Frac1( a ); aSig0 = extractFloat64Frac0( a ); aExp = extractFloat64Exp( a ); aSign = extractFloat64Sign( a ); bSig1 = extractFloat64Frac1( b ); bSig0 = extractFloat64Frac0( b ); bExp = extractFloat64Exp( b ); bSign
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?