📄 softfloat.c
字号:
++zSig0; zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); if ( (sbits64) zSig0 < 0 ) zExp = 1; } return packFloatx80( zSign, zExp, zSig0 ); } } if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact; if ( increment ) { ++zSig0; if ( zSig0 == 0 ) { ++zExp; zSig0 = LIT64( 0x8000000000000000 ); } else { zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); } } else { if ( zSig0 == 0 ) zExp = 0; } return packFloatx80( zSign, zExp, zSig0 );}/*----------------------------------------------------------------------------| Takes an abstract floating-point value having sign `zSign', exponent| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',| and returns the proper extended double-precision floating-point value| corresponding to the abstract input. This routine is just like| `roundAndPackFloatx80' except that the input significand does not have to be| normalized.*----------------------------------------------------------------------------*/static floatx80 normalizeRoundAndPackFloatx80( int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM){ int8 shiftCount; if ( zSig0 == 0 ) { zSig0 = zSig1; zSig1 = 0; zExp -= 64; } shiftCount = countLeadingZeros64( zSig0 ); shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); zExp -= shiftCount; return roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);}#endif#ifdef FLOAT128/*----------------------------------------------------------------------------| Returns the least-significant 64 fraction bits of the quadruple-precision| floating-point value `a'.*----------------------------------------------------------------------------*/INLINE bits64 extractFloat128Frac1( float128 a ){ return a.low;}/*----------------------------------------------------------------------------| Returns the most-significant 48 fraction bits of the quadruple-precision| floating-point value `a'.*----------------------------------------------------------------------------*/INLINE bits64 extractFloat128Frac0( float128 a ){ return a.high & LIT64( 0x0000FFFFFFFFFFFF );}/*----------------------------------------------------------------------------| Returns the exponent bits of the quadruple-precision floating-point value| `a'.*----------------------------------------------------------------------------*/INLINE int32 extractFloat128Exp( float128 a ){ return ( a.high>>48 ) & 0x7FFF;}/*----------------------------------------------------------------------------| Returns the sign bit of the quadruple-precision floating-point value `a'.*----------------------------------------------------------------------------*/INLINE flag extractFloat128Sign( float128 a ){ return a.high>>63;}/*----------------------------------------------------------------------------| Normalizes the subnormal quadruple-precision floating-point value| represented by the denormalized significand formed by the concatenation of| `aSig0' and `aSig1'. The normalized exponent is stored at the location| pointed to by `zExpPtr'. The most significant 49 bits of the normalized| significand are stored at the location pointed to by `zSig0Ptr', and the| least significant 64 bits of the normalized significand are stored at the| location pointed to by `zSig1Ptr'.*----------------------------------------------------------------------------*/static void normalizeFloat128Subnormal( bits64 aSig0, bits64 aSig1, int32 *zExpPtr, bits64 *zSig0Ptr, bits64 *zSig1Ptr ){ int8 shiftCount; if ( aSig0 == 0 ) { shiftCount = countLeadingZeros64( aSig1 ) - 15; if ( shiftCount < 0 ) { *zSig0Ptr = aSig1>>( - shiftCount ); *zSig1Ptr = aSig1<<( shiftCount & 63 ); } else { *zSig0Ptr = aSig1<<shiftCount; *zSig1Ptr = 0; } *zExpPtr = - shiftCount - 63; } else { shiftCount = countLeadingZeros64( aSig0 ) - 15; shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); *zExpPtr = 1 - shiftCount; }}/*----------------------------------------------------------------------------| Packs the sign `zSign', the exponent `zExp', and the significand formed| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision| floating-point value, returning the result. After being shifted into the| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply| added together to form the most significant 32 bits of the result. This| means that any integer portion of `zSig0' will be added into the exponent.| Since a properly normalized significand will have an integer portion equal| to 1, the `zExp' input should be 1 less than the desired result exponent| whenever `zSig0' and `zSig1' concatenated form a complete, normalized| significand.*----------------------------------------------------------------------------*/INLINE float128 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ){ float128 z; z.low = zSig1; z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; return z;}/*----------------------------------------------------------------------------| Takes an abstract floating-point value having sign `zSign', exponent `zExp',| and extended significand formed by the concatenation of `zSig0', `zSig1',| and `zSig2', and returns the proper quadruple-precision floating-point value| corresponding to the abstract input. Ordinarily, the abstract value is| simply rounded and packed into the quadruple-precision format, with the| inexact exception raised if the abstract input cannot be represented| exactly. However, if the abstract value is too large, the overflow and| inexact exceptions are raised and an infinity or maximal finite value is| returned. If the abstract value is too small, the input value is rounded to| a subnormal number, and the underflow and inexact exceptions are raised if| the abstract input cannot be represented exactly as a subnormal quadruple-| precision floating-point number.| The input significand must be normalized or smaller. If the input| significand is not normalized, `zExp' must be 0; in that case, the result| returned is a subnormal number, and it must not require rounding. In the| usual case that the input significand is normalized, `zExp' must be 1 less| than the ``true'' floating-point exponent. The handling of underflow and| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/static float128 roundAndPackFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 STATUS_PARAM){ int8 roundingMode; flag roundNearestEven, increment, isTiny; roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); increment = ( (sbits64) zSig2 < 0 ); if ( ! roundNearestEven ) { if ( roundingMode == float_round_to_zero ) { increment = 0; } else { if ( zSign ) { increment = ( roundingMode == float_round_down ) && zSig2; } else { increment = ( roundingMode == float_round_up ) && zSig2; } } } if ( 0x7FFD <= (bits32) zExp ) { if ( ( 0x7FFD < zExp ) || ( ( zExp == 0x7FFD ) && eq128( LIT64( 0x0001FFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFFFF ), zSig0, zSig1 ) && increment ) ) { float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); if ( ( roundingMode == float_round_to_zero ) || ( zSign && ( roundingMode == float_round_up ) ) || ( ! zSign && ( roundingMode == float_round_down ) ) ) { return packFloat128( zSign, 0x7FFE, LIT64( 0x0000FFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFFFF ) ); } return packFloat128( zSign, 0x7FFF, 0, 0 ); } if ( zExp < 0 ) { isTiny = ( STATUS(float_detect_tininess) == float_tininess_before_rounding ) || ( zExp < -1 ) || ! increment || lt128( zSig0, zSig1, LIT64( 0x0001FFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFFFF ) ); shift128ExtraRightJamming( zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); zExp = 0; if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR); if ( roundNearestEven ) { increment = ( (sbits64) zSig2 < 0 ); } else { if ( zSign ) { increment = ( roundingMode == float_round_down ) && zSig2; } else { increment = ( roundingMode == float_round_up ) && zSig2; } } } } if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact; if ( increment ) { add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); } else { if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; } return packFloat128( zSign, zExp, zSig0, zSig1 );}/*----------------------------------------------------------------------------| Takes an abstract floating-point value having sign `zSign', exponent `zExp',| and significand formed by the concatenation of `zSig0' and `zSig1', and| returns the proper quadruple-precision floating-point value corresponding| to the abstract input. This routine is just like `roundAndPackFloat128'| except that the input significand has fewer bits and does not have to be| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-| point exponent.*----------------------------------------------------------------------------*/static float128 normalizeRoundAndPackFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 STATUS_PARAM){ int8 shiftCount; bits64 zSig2; if ( zSig0 == 0 ) { zSig0 = zSig1; zSig1 = 0; zExp -= 64; } shiftCount = countLeadingZeros64( zSig0 ) - 15; if ( 0 <= shiftCount ) { zSig2 = 0; shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); } else { shift128ExtraRightJamming( zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); } zExp -= shiftCount; return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);}#endif/*----------------------------------------------------------------------------| Returns the result of converting the 32-bit two's complement integer `a'| to the single-precision floating-point format. The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float32 int32_to_float32( int32 a STATUS_PARAM ){ flag zSign; if ( a == 0 ) return 0; if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); zSign = ( a < 0 ); return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );}/*----------------------------------------------------------------------------| Returns the result of converting the 32-bit two's complement integer `a'| to the double-precision floating-point format. The conversion is performed| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.*----------------------------------------------------------------------------*/float64 int32_to_float64( int32 a STATUS_PARAM ){ flag zSign; uint32 absA; int8 shiftCount; bits64 zSig; if ( a == 0 ) return 0; zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros32( absA ) + 21; zSig = absA; return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );}#ifdef FLOATX80
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -