dsp.cc
来自「M5,一个功能强大的多处理器系统模拟器.很多针对处理器架构,性能的研究都使用它作」· CC 代码 · 共 1,233 行 · 第 1/3 页
CC
1,233 行
uint32_t ouflag = 0; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, fmt, SIGNED ); simdUnpack( b, b_values, fmt, SIGNED ); for( i=0; i<nvals; i++ ) { if( saturate ) a_values[i] = dspSaturate( a_values[i] * b_values[i], fmt, SIGNED, &ouflag ); else a_values[i] = checkOverflow( a_values[i] * b_values[i], fmt, SIGNED, &ouflag ); } simdPack( a_values, &result, fmt ); if( ouflag ) writeDSPControl( dspctl, (ouflag<<5)<<DSP_CTL_POS[DSP_OUFLAG], 1<<DSP_OUFLAG); return( result );}int32_tMipsISA::dspMuleu( int32_t a, int32_t b, int32_t mode, uint32_t *dspctl ){ int i = 0; int nvals = SIMD_NVALS[SIMD_FMT_PH]; int32_t result; uint32_t ouflag = 0; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, SIMD_FMT_QB, UNSIGNED ); simdUnpack( b, b_values, SIMD_FMT_PH, UNSIGNED ); switch( mode ) { case MODE_L: for( i=0; i<nvals; i++ ) b_values[i] = dspSaturate( a_values[i+2] * b_values[i], SIMD_FMT_PH, UNSIGNED, &ouflag ); break; case MODE_R: for( i=0; i<nvals; i++ ) b_values[i] = dspSaturate( a_values[i] * b_values[i], SIMD_FMT_PH, UNSIGNED, &ouflag ); break; } simdPack( b_values, &result, SIMD_FMT_PH ); if( ouflag ) writeDSPControl( dspctl, (ouflag<<5)<<DSP_CTL_POS[DSP_OUFLAG], 1<<DSP_OUFLAG); return( result );}int32_tMipsISA::dspMuleq( int32_t a, int32_t b, int32_t mode, uint32_t *dspctl ){ int i = 0; int nvals = SIMD_NVALS[SIMD_FMT_W]; int32_t result; uint32_t ouflag = 0; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; uint64_t c_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, SIMD_FMT_PH, SIGNED ); simdUnpack( b, b_values, SIMD_FMT_PH, SIGNED ); switch( mode ) { case MODE_L: for( i=0; i<nvals; i++ ) c_values[i] = dspSaturate( a_values[i+1] * b_values[i+1] << 1, SIMD_FMT_W, SIGNED, &ouflag ); break; case MODE_R: for( i=0; i<nvals; i++ ) c_values[i] = dspSaturate( a_values[i] * b_values[i] << 1, SIMD_FMT_W, SIGNED, &ouflag ); break; } simdPack( c_values, &result, SIMD_FMT_W ); if( ouflag ) writeDSPControl( dspctl, (ouflag<<5)<<DSP_CTL_POS[DSP_OUFLAG], 1<<DSP_OUFLAG); return( result );}int64_tMipsISA::dspDpaq( int64_t dspac, int32_t a, int32_t b, int32_t ac, int32_t infmt, int32_t outfmt, int32_t postsat, int32_t mode, uint32_t *dspctl ){ int i = 0; int nvals = SIMD_NVALS[infmt]; int64_t result = 0; int64_t temp = 0; uint32_t ouflag = 0; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, infmt, SIGNED ); simdUnpack( b, b_values, infmt, SIGNED ); for( i=0; i<nvals; i++ ) { switch( mode ) { case MODE_X: if( a_values[nvals-1-i] == FIXED_SMIN[infmt] && b_values[i] == FIXED_SMIN[infmt] ) { result += FIXED_SMAX[outfmt]; ouflag = 1; } else result += a_values[nvals-1-i] * b_values[i] << 1; break; default: if( a_values[i] == FIXED_SMIN[infmt] && b_values[i] == FIXED_SMIN[infmt] ) { result += FIXED_SMAX[outfmt]; ouflag = 1; } else result += a_values[i] * b_values[i] << 1; break; } } if( postsat ) { if( outfmt == SIMD_FMT_L ) { int signa = bits( dspac, 63, 63 ); int signb = bits( result, 63, 63 ); temp = dspac + result; if( ( signa == signb ) && ( bits( temp, 63, 63 ) != signa ) ) { ouflag = 1; if( signa ) dspac = FIXED_SMIN[outfmt]; else dspac = FIXED_SMAX[outfmt]; } else dspac = temp; } else dspac = dspSaturate( dspac + result, outfmt, SIGNED, &ouflag ); } else dspac += result; if( ouflag ) *dspctl = insertBits( *dspctl, 16+ac, 16+ac, 1 ); return( dspac );}int64_tMipsISA::dspDpsq( int64_t dspac, int32_t a, int32_t b, int32_t ac, int32_t infmt, int32_t outfmt, int32_t postsat, int32_t mode, uint32_t *dspctl ){ int i = 0; int nvals = SIMD_NVALS[infmt]; int64_t result = 0; int64_t temp = 0; uint32_t ouflag = 0; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, infmt, SIGNED ); simdUnpack( b, b_values, infmt, SIGNED ); for( i=0; i<nvals; i++ ) { switch( mode ) { case MODE_X: if( a_values[nvals-1-i] == FIXED_SMIN[infmt] && b_values[i] == FIXED_SMIN[infmt] ) { result += FIXED_SMAX[outfmt]; ouflag = 1; } else result += a_values[nvals-1-i] * b_values[i] << 1; break; default: if( a_values[i] == FIXED_SMIN[infmt] && b_values[i] == FIXED_SMIN[infmt] ) { result += FIXED_SMAX[outfmt]; ouflag = 1; } else result += a_values[i] * b_values[i] << 1; break; } } if( postsat ) { if( outfmt == SIMD_FMT_L ) { int signa = bits( dspac, 63, 63 ); int signb = bits( -result, 63, 63 ); temp = dspac - result; if( ( signa == signb ) && ( bits( temp, 63, 63 ) != signa ) ) { ouflag = 1; if( signa ) dspac = FIXED_SMIN[outfmt]; else dspac = FIXED_SMAX[outfmt]; } else dspac = temp; } else dspac = dspSaturate( dspac - result, outfmt, SIGNED, &ouflag ); } else dspac -= result; if( ouflag ) *dspctl = insertBits( *dspctl, 16+ac, 16+ac, 1 ); return( dspac );}int64_tMipsISA::dspDpa( int64_t dspac, int32_t a, int32_t b, int32_t ac, int32_t fmt, int32_t sign, int32_t mode ){ int i = 0; int nvals = SIMD_NVALS[fmt]; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, fmt, sign ); simdUnpack( b, b_values, fmt, sign ); for( i=0; i<2; i++ ) { switch( mode ) { case MODE_L: dspac += a_values[nvals-1-i] * b_values[nvals-1-i]; break; case MODE_R: dspac += a_values[nvals-3-i] * b_values[nvals-3-i]; break; case MODE_X: dspac += a_values[nvals-1-i] * b_values[i]; break; } } return dspac;}int64_tMipsISA::dspDps( int64_t dspac, int32_t a, int32_t b, int32_t ac, int32_t fmt, int32_t sign, int32_t mode ){ int i = 0; int nvals = SIMD_NVALS[fmt]; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, fmt, sign ); simdUnpack( b, b_values, fmt, sign ); for( i=0; i<2; i++ ) { switch( mode ) { case MODE_L: dspac -= a_values[nvals-1-i] * b_values[nvals-1-i]; break; case MODE_R: dspac -= a_values[nvals-3-i] * b_values[nvals-3-i]; break; case MODE_X: dspac -= a_values[nvals-1-i] * b_values[i]; break; } } return dspac;}int64_tMipsISA::dspMaq( int64_t dspac, int32_t a, int32_t b, int32_t ac, int32_t fmt, int32_t mode, int32_t saturate, uint32_t *dspctl ){ int i = 0; int nvals = SIMD_NVALS[fmt-1]; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; int64_t temp = 0; uint32_t ouflag = 0; simdUnpack( a, a_values, fmt, SIGNED ); simdUnpack( b, b_values, fmt, SIGNED ); for( i=0; i<nvals; i++ ) { switch( mode ) { case MODE_L: temp = a_values[i+1] * b_values[i+1] << 1; if( a_values[i+1] == FIXED_SMIN[fmt] && b_values[i+1] == FIXED_SMIN[fmt] ) { temp = (int64_t)FIXED_SMAX[fmt-1]; ouflag = 1; } break; case MODE_R: temp = a_values[i] * b_values[i] << 1; if( a_values[i] == FIXED_SMIN[fmt] && b_values[i] == FIXED_SMIN[fmt] ) { temp = (int64_t)FIXED_SMAX[fmt-1]; ouflag = 1; } break; } temp += dspac; if( saturate ) temp = dspSaturate( temp, fmt-1, SIGNED, &ouflag ); if( ouflag ) *dspctl = insertBits( *dspctl, 16+ac, 16+ac, 1 ); } return temp;}int64_tMipsISA::dspMulsa( int64_t dspac, int32_t a, int32_t b, int32_t ac, int32_t fmt ){ uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, fmt, SIGNED ); simdUnpack( b, b_values, fmt, SIGNED ); dspac += a_values[1] * b_values[1] - a_values[0] * b_values[0]; return dspac;}int64_tMipsISA::dspMulsaq( int64_t dspac, int32_t a, int32_t b, int32_t ac, int32_t fmt, uint32_t *dspctl ){ int i = 0; int nvals = SIMD_NVALS[fmt]; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; int64_t temp[2]; uint32_t ouflag = 0; simdUnpack( a, a_values, fmt, SIGNED ); simdUnpack( b, b_values, fmt, SIGNED ); for( i=nvals-1; i>-1; i-- ) { temp[i] = a_values[i] * b_values[i] << 1; if( a_values[i] == FIXED_SMIN[fmt] && b_values[i] == FIXED_SMIN[fmt] ) { temp[i] = FIXED_SMAX[fmt-1]; ouflag = 1; } } dspac += temp[1] - temp[0]; if( ouflag ) *dspctl = insertBits( *dspctl, 16+ac, 16+ac, 1 ); return dspac;}voidMipsISA::dspCmp( int32_t a, int32_t b, int32_t fmt, int32_t sign, int32_t op, uint32_t *dspctl ){ int i = 0; int nvals = SIMD_NVALS[fmt]; int ccond = 0; uint64_t a_values[SIMD_MAX_VALS]; uint64_t b_values[SIMD_MAX_VALS]; simdUnpack( a, a_values, fmt, sign ); simdUnpack( b, b_values, fmt, sign ); for( i=0; i<nvals; i++ ) {
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?