📄 softfloat.c
字号:
floating-point value `a'.-------------------------------------------------------------------------------*/INLINE bits64 extractFloat128Frac1( float128 a ){ return a.low;}/*-------------------------------------------------------------------------------Returns the most-significant 48 fraction bits of the quadruple-precisionfloating-point value `a'.-------------------------------------------------------------------------------*/INLINE bits64 extractFloat128Frac0( float128 a ){ return a.high & LIT64( 0x0000FFFFFFFFFFFF );}/*-------------------------------------------------------------------------------Returns the exponent bits of the quadruple-precision floating-point value`a'.-------------------------------------------------------------------------------*/INLINE int32 extractFloat128Exp( float128 a ){ return ( a.high>>48 ) & 0x7FFF;}/*-------------------------------------------------------------------------------Returns the sign bit of the quadruple-precision floating-point value `a'.-------------------------------------------------------------------------------*/INLINE flag extractFloat128Sign( float128 a ){ return a.high>>63;}/*-------------------------------------------------------------------------------Normalizes the subnormal quadruple-precision floating-point valuerepresented by the denormalized significand formed by the concatenation of`aSig0' and `aSig1'. The normalized exponent is stored at the locationpointed to by `zExpPtr'. The most significant 49 bits of the normalizedsignificand are stored at the location pointed to by `zSig0Ptr', and theleast significant 64 bits of the normalized significand are stored at thelocation pointed to by `zSig1Ptr'.-------------------------------------------------------------------------------*/static void normalizeFloat128Subnormal( bits64 aSig0, bits64 aSig1, int32 *zExpPtr, bits64 *zSig0Ptr, bits64 *zSig1Ptr ){ int8 shiftCount; if ( aSig0 == 0 ) { shiftCount = countLeadingZeros64( aSig1 ) - 15; if ( shiftCount < 0 ) { *zSig0Ptr = aSig1>>( - shiftCount ); *zSig1Ptr = aSig1<<( shiftCount & 63 ); } else { *zSig0Ptr = aSig1<<shiftCount; *zSig1Ptr = 0; } *zExpPtr = - shiftCount - 63; } else { shiftCount = countLeadingZeros64( aSig0 ) - 15; shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); *zExpPtr = 1 - shiftCount; }}/*-------------------------------------------------------------------------------Packs the sign `zSign', the exponent `zExp', and the significand formedby the concatenation of `zSig0' and `zSig1' into a quadruple-precisionfloating-point value, returning the result. After being shifted into theproper positions, the three fields `zSign', `zExp', and `zSig0' are simplyadded together to form the most significant 32 bits of the result. Thismeans that any integer portion of `zSig0' will be added into the exponent.Since a properly normalized significand will have an integer portion equalto 1, the `zExp' input should be 1 less than the desired result exponentwhenever `zSig0' and `zSig1' concatenated form a complete, normalizedsignificand.-------------------------------------------------------------------------------*/INLINE float128 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ){ float128 z; z.low = zSig1; z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; return z;}/*-------------------------------------------------------------------------------Takes an abstract floating-point value having sign `zSign', exponent `zExp',and extended significand formed by the concatenation of `zSig0', `zSig1',and `zSig2', and returns the proper quadruple-precision floating-point valuecorresponding to the abstract input. Ordinarily, the abstract value issimply rounded and packed into the quadruple-precision format, with theinexact exception raised if the abstract input cannot be representedexactly. If the abstract value is too large, however, the overflow andinexact exceptions are raised and an infinity or maximal finite value isreturned. If the abstract value is too small, the input value is rounded toa subnormal number, and the underflow and inexact exceptions are raised ifthe abstract input cannot be represented exactly as a subnormal quadruple-precision floating-point number. The input significand must be normalized or smaller. If the inputsignificand is not normalized, `zExp' must be 0; in that case, the resultreturned is a subnormal number, and it must not require rounding. In theusual case that the input significand is normalized, `zExp' must be 1 lessthan the ``true'' floating-point exponent. The handling of underflow andoverflow follows the IEC/IEEE Standard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/static float128 roundAndPackFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ){ int8 roundingMode; flag roundNearestEven, increment, isTiny; roundingMode = float_rounding_mode; roundNearestEven = ( roundingMode == float_round_nearest_even ); increment = ( (sbits64) zSig2 < 0 ); if ( ! roundNearestEven ) { if ( roundingMode == float_round_to_zero ) { increment = 0; } else { if ( zSign ) { increment = ( roundingMode == float_round_down ) && zSig2; } else { increment = ( roundingMode == float_round_up ) && zSig2; } } } if ( 0x7FFD <= (bits32) zExp ) { if ( ( 0x7FFD < zExp ) || ( ( zExp == 0x7FFD ) && eq128( LIT64( 0x0001FFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFFFF ), zSig0, zSig1 ) && increment ) ) { float_raise( float_flag_overflow | float_flag_inexact ); if ( ( roundingMode == float_round_to_zero ) || ( zSign && ( roundingMode == float_round_up ) ) || ( ! zSign && ( roundingMode == float_round_down ) ) ) { return packFloat128( zSign, 0x7FFE, LIT64( 0x0000FFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFFFF ) ); } return packFloat128( zSign, 0x7FFF, 0, 0 ); } if ( zExp < 0 ) { isTiny = ( float_detect_tininess == float_tininess_before_rounding ) || ( zExp < -1 ) || ! increment || lt128( zSig0, zSig1, LIT64( 0x0001FFFFFFFFFFFF ), LIT64( 0xFFFFFFFFFFFFFFFF ) ); shift128ExtraRightJamming( zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); zExp = 0; if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); if ( roundNearestEven ) { increment = ( (sbits64) zSig2 < 0 ); } else { if ( zSign ) { increment = ( roundingMode == float_round_down ) && zSig2; } else { increment = ( roundingMode == float_round_up ) && zSig2; } } } } if ( zSig2 ) float_exception_flags |= float_flag_inexact; if ( increment ) { add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); } else { if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; } return packFloat128( zSign, zExp, zSig0, zSig1 );}/*-------------------------------------------------------------------------------Takes an abstract floating-point value having sign `zSign', exponent `zExp',and significand formed by the concatenation of `zSig0' and `zSig1', andreturns the proper quadruple-precision floating-point value corresponding tothe abstract input. This routine is just like `roundAndPackFloat128' exceptthat the input significand has fewer bits and does not have to be normalizedin any way. In all cases, `zExp' must be 1 less than the ``true'' floating-point exponent.-------------------------------------------------------------------------------*/static float128 normalizeRoundAndPackFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ){ int8 shiftCount; bits64 zSig2; if ( zSig0 == 0 ) { zSig0 = zSig1; zSig1 = 0; zExp -= 64; } shiftCount = countLeadingZeros64( zSig0 ) - 15; if ( 0 <= shiftCount ) { zSig2 = 0; shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); } else { shift128ExtraRightJamming( zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); } zExp -= shiftCount; return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );}#endif/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe single-precision floating-point format. The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float32 int32_to_float32( int32 a ){ flag zSign; if ( a == 0 ) return 0; if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); zSign = ( a < 0 ); return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );}/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe double-precision floating-point format. The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float64 int32_to_float64( int32 a ){ flag aSign; uint32 absA; int8 shiftCount; bits64 zSig; if ( a == 0 ) return 0; aSign = ( a < 0 ); absA = aSign ? - a : a; shiftCount = countLeadingZeros32( absA ) + 21; zSig = absA; return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );}#ifdef FLOATX80/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a'to the extended double-precision floating-point format. The conversionis performed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic.-------------------------------------------------------------------------------*/floatx80 int32_to_floatx80( int32 a ){ flag zSign; uint32 absA; int8 shiftCount; bits64 zSig; if ( a == 0 ) return packFloatx80( 0, 0, 0 ); zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros32( absA ) + 32; zSig = absA; return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );}#endif#ifdef FLOAT128/*-------------------------------------------------------------------------------Returns the result of converting the 32-bit two's complement integer `a' tothe quadruple-precision floating-point format. The conversion is performedaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.-------------------------------------------------------------------------------*/float128 int32_to_float128( int32 a ){ flag zSign; uint32 absA; int8 shiftCount; bits64 zSig0; if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); zSign = ( a < 0 ); absA = zSign ? - a : a; shiftCount = countLeadingZeros32( absA ) + 17; zSig0 = absA; return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );}#endif/*-------------------------------------------------------------------------------Returns the result of converting the single-precision floating-point value`a' to the 32-bit two's complement integer format. The conversion isperformed according to the IEC/IEEE Standard for Binary Floating-pointArithmetic---which means in particular that the conversion is roundedaccording to the current rounding mode. If `a' is a NaN, the largestpositive integer is returned. Otherwise, if the conversion overflows, thelargest integer with the same sign as `a' is returned.-------------------------------------------------------------------------------*/int32 float32_to_int32( float32 a ){ flag aSign; int16 aExp, shiftCount; bits32 aSig; bits64 zSig;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -