📄 h_generic_simd64.c
字号:
ULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy ){ return mk8x8( toUChar( sel8x8_7(xx) - sel8x8_7(yy) ), toUChar( sel8x8_6(xx) - sel8x8_6(yy) ), toUChar( sel8x8_5(xx) - sel8x8_5(yy) ), toUChar( sel8x8_4(xx) - sel8x8_4(yy) ), toUChar( sel8x8_3(xx) - sel8x8_3(yy) ), toUChar( sel8x8_2(xx) - sel8x8_2(yy) ), toUChar( sel8x8_1(xx) - sel8x8_1(yy) ), toUChar( sel8x8_0(xx) - sel8x8_0(yy) ) );}/* ------------ Saturating subtraction ------------ */ULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy ){ return mk16x4( qsub16S( sel16x4_3(xx), sel16x4_3(yy) ), qsub16S( sel16x4_2(xx), sel16x4_2(yy) ), qsub16S( sel16x4_1(xx), sel16x4_1(yy) ), qsub16S( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy ){ return mk8x8( qsub8S( sel8x8_7(xx), sel8x8_7(yy) ), qsub8S( sel8x8_6(xx), sel8x8_6(yy) ), qsub8S( sel8x8_5(xx), sel8x8_5(yy) ), qsub8S( sel8x8_4(xx), sel8x8_4(yy) ), qsub8S( sel8x8_3(xx), sel8x8_3(yy) ), qsub8S( sel8x8_2(xx), sel8x8_2(yy) ), qsub8S( sel8x8_1(xx), sel8x8_1(yy) ), qsub8S( sel8x8_0(xx), sel8x8_0(yy) ) );}ULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy ){ return mk16x4( qsub16U( sel16x4_3(xx), sel16x4_3(yy) ), qsub16U( sel16x4_2(xx), sel16x4_2(yy) ), qsub16U( sel16x4_1(xx), sel16x4_1(yy) ), qsub16U( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy ){ return mk8x8( qsub8U( sel8x8_7(xx), sel8x8_7(yy) ), qsub8U( sel8x8_6(xx), sel8x8_6(yy) ), qsub8U( sel8x8_5(xx), sel8x8_5(yy) ), qsub8U( sel8x8_4(xx), sel8x8_4(yy) ), qsub8U( sel8x8_3(xx), sel8x8_3(yy) ), qsub8U( sel8x8_2(xx), sel8x8_2(yy) ), qsub8U( sel8x8_1(xx), sel8x8_1(yy) ), qsub8U( sel8x8_0(xx), sel8x8_0(yy) ) );}/* ------------ Multiplication ------------ */ULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy ){ return mk16x4( mul16( sel16x4_3(xx), sel16x4_3(yy) ), mul16( sel16x4_2(xx), sel16x4_2(yy) ), mul16( sel16x4_1(xx), sel16x4_1(yy) ), mul16( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy ){ return mk16x4( mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ), mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ), mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ), mulhi16S( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy ){ return mk16x4( mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ), mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ), mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ), mulhi16U( sel16x4_0(xx), sel16x4_0(yy) ) );}/* ------------ Comparison ------------ */ULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy ){ return mk32x2( cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ), cmpeq32( sel32x2_0(xx), sel32x2_0(yy) ) );}ULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy ){ return mk16x4( cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ), cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ), cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ), cmpeq16( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy ){ return mk8x8( cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ), cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ), cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ), cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ), cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ), cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ), cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ), cmpeq8( sel8x8_0(xx), sel8x8_0(yy) ) );}ULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy ){ return mk32x2( cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ), cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) ) );}ULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy ){ return mk16x4( cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ), cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ), cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ), cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy ){ return mk8x8( cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ), cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ), cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ), cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ), cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ), cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ), cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ), cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) ) );}ULong h_generic_calc_CmpNEZ32x2 ( ULong xx ){ return mk32x2( cmpnez32( sel32x2_1(xx) ), cmpnez32( sel32x2_0(xx) ) );}ULong h_generic_calc_CmpNEZ16x4 ( ULong xx ){ return mk16x4( cmpnez16( sel16x4_3(xx) ), cmpnez16( sel16x4_2(xx) ), cmpnez16( sel16x4_1(xx) ), cmpnez16( sel16x4_0(xx) ) );}ULong h_generic_calc_CmpNEZ8x8 ( ULong xx ){ return mk8x8( cmpnez8( sel8x8_7(xx) ), cmpnez8( sel8x8_6(xx) ), cmpnez8( sel8x8_5(xx) ), cmpnez8( sel8x8_4(xx) ), cmpnez8( sel8x8_3(xx) ), cmpnez8( sel8x8_2(xx) ), cmpnez8( sel8x8_1(xx) ), cmpnez8( sel8x8_0(xx) ) );}/* ------------ Saturating narrowing ------------ */ULong h_generic_calc_QNarrow32Sx2 ( ULong aa, ULong bb ){ UInt d = sel32x2_1(aa); UInt c = sel32x2_0(aa); UInt b = sel32x2_1(bb); UInt a = sel32x2_0(bb); return mk16x4( qnarrow32Sto16(d), qnarrow32Sto16(c), qnarrow32Sto16(b), qnarrow32Sto16(a) );}ULong h_generic_calc_QNarrow16Sx4 ( ULong aa, ULong bb ){ UShort h = sel16x4_3(aa); UShort g = sel16x4_2(aa); UShort f = sel16x4_1(aa); UShort e = sel16x4_0(aa); UShort d = sel16x4_3(bb); UShort c = sel16x4_2(bb); UShort b = sel16x4_1(bb); UShort a = sel16x4_0(bb); return mk8x8( qnarrow16Sto8(h), qnarrow16Sto8(g), qnarrow16Sto8(f), qnarrow16Sto8(e), qnarrow16Sto8(d), qnarrow16Sto8(c), qnarrow16Sto8(b), qnarrow16Sto8(a) );}ULong h_generic_calc_QNarrow16Ux4 ( ULong aa, ULong bb ){ UShort h = sel16x4_3(aa); UShort g = sel16x4_2(aa); UShort f = sel16x4_1(aa); UShort e = sel16x4_0(aa); UShort d = sel16x4_3(bb); UShort c = sel16x4_2(bb); UShort b = sel16x4_1(bb); UShort a = sel16x4_0(bb); return mk8x8( qnarrow16Uto8(h), qnarrow16Uto8(g), qnarrow16Uto8(f), qnarrow16Uto8(e), qnarrow16Uto8(d), qnarrow16Uto8(c), qnarrow16Uto8(b), qnarrow16Uto8(a) );}/* ------------ Interleaving ------------ */ULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb ){ return mk8x8( sel8x8_7(aa), sel8x8_7(bb), sel8x8_6(aa), sel8x8_6(bb), sel8x8_5(aa), sel8x8_5(bb), sel8x8_4(aa), sel8x8_4(bb) );}ULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb ){ return mk8x8( sel8x8_3(aa), sel8x8_3(bb), sel8x8_2(aa), sel8x8_2(bb), sel8x8_1(aa), sel8x8_1(bb), sel8x8_0(aa), sel8x8_0(bb) );}ULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb ){ return mk16x4( sel16x4_3(aa), sel16x4_3(bb), sel16x4_2(aa), sel16x4_2(bb) );}ULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb ){ return mk16x4( sel16x4_1(aa), sel16x4_1(bb), sel16x4_0(aa), sel16x4_0(bb) );}ULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb ){ return mk32x2( sel32x2_1(aa), sel32x2_1(bb) );}ULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb ){ return mk32x2( sel32x2_0(aa), sel32x2_0(bb) );}/* ------------ Shifting ------------ *//* Note that because these primops are undefined if the shift amount equals or exceeds the lane width, the shift amount is masked so that the scalar shifts are always in range. In fact, given the semantics of these primops (ShlN16x4, etc) it is an error if in fact we are ever given an out-of-range shift amount. */ULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn ){ /* vassert(nn < 32); */ nn &= 31; return mk32x2( shl32( sel32x2_1(xx), nn ), shl32( sel32x2_0(xx), nn ) );}ULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn ){ /* vassert(nn < 16); */ nn &= 15; return mk16x4( shl16( sel16x4_3(xx), nn ), shl16( sel16x4_2(xx), nn ), shl16( sel16x4_1(xx), nn ), shl16( sel16x4_0(xx), nn ) );}ULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn ){ /* vassert(nn < 32); */ nn &= 31; return mk32x2( shr32( sel32x2_1(xx), nn ), shr32( sel32x2_0(xx), nn ) );}ULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn ){ /* vassert(nn < 16); */ nn &= 15; return mk16x4( shr16( sel16x4_3(xx), nn ), shr16( sel16x4_2(xx), nn ), shr16( sel16x4_1(xx), nn ), shr16( sel16x4_0(xx), nn ) );}ULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn ){ /* vassert(nn < 32); */ nn &= 31; return mk32x2( sar32( sel32x2_1(xx), nn ), sar32( sel32x2_0(xx), nn ) );}ULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn ){ /* vassert(nn < 16); */ nn &= 15; return mk16x4( sar16( sel16x4_3(xx), nn ), sar16( sel16x4_2(xx), nn ), sar16( sel16x4_1(xx), nn ), sar16( sel16x4_0(xx), nn ) );}/* ------------ Averaging ------------ */ULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy ){ return mk8x8( avg8U( sel8x8_7(xx), sel8x8_7(yy) ), avg8U( sel8x8_6(xx), sel8x8_6(yy) ), avg8U( sel8x8_5(xx), sel8x8_5(yy) ), avg8U( sel8x8_4(xx), sel8x8_4(yy) ), avg8U( sel8x8_3(xx), sel8x8_3(yy) ), avg8U( sel8x8_2(xx), sel8x8_2(yy) ), avg8U( sel8x8_1(xx), sel8x8_1(yy) ), avg8U( sel8x8_0(xx), sel8x8_0(yy) ) );}ULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy ){ return mk16x4( avg16U( sel16x4_3(xx), sel16x4_3(yy) ), avg16U( sel16x4_2(xx), sel16x4_2(yy) ), avg16U( sel16x4_1(xx), sel16x4_1(yy) ), avg16U( sel16x4_0(xx), sel16x4_0(yy) ) );}/* ------------ max/min ------------ */ULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy ){ return mk16x4( max16S( sel16x4_3(xx), sel16x4_3(yy) ), max16S( sel16x4_2(xx), sel16x4_2(yy) ), max16S( sel16x4_1(xx), sel16x4_1(yy) ), max16S( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy ){ return mk8x8( max8U( sel8x8_7(xx), sel8x8_7(yy) ), max8U( sel8x8_6(xx), sel8x8_6(yy) ), max8U( sel8x8_5(xx), sel8x8_5(yy) ), max8U( sel8x8_4(xx), sel8x8_4(yy) ), max8U( sel8x8_3(xx), sel8x8_3(yy) ), max8U( sel8x8_2(xx), sel8x8_2(yy) ), max8U( sel8x8_1(xx), sel8x8_1(yy) ), max8U( sel8x8_0(xx), sel8x8_0(yy) ) );}ULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy ){ return mk16x4( min16S( sel16x4_3(xx), sel16x4_3(yy) ), min16S( sel16x4_2(xx), sel16x4_2(yy) ), min16S( sel16x4_1(xx), sel16x4_1(yy) ), min16S( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy ){ return mk8x8( min8U( sel8x8_7(xx), sel8x8_7(yy) ), min8U( sel8x8_6(xx), sel8x8_6(yy) ), min8U( sel8x8_5(xx), sel8x8_5(yy) ), min8U( sel8x8_4(xx), sel8x8_4(yy) ), min8U( sel8x8_3(xx), sel8x8_3(yy) ), min8U( sel8x8_2(xx), sel8x8_2(yy) ), min8U( sel8x8_1(xx), sel8x8_1(yy) ), min8U( sel8x8_0(xx), sel8x8_0(yy) ) );}/*---------------------------------------------------------------*//*--- end host-generic/h_generic_simd64.c ---*//*---------------------------------------------------------------*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -