📄 h_generic_simd64.c
字号:
/*---------------------------------------------------------------*//*--- ---*//*--- This file (host-generic/h_generic_simd64.c) is ---*//*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*//*--- ---*//*---------------------------------------------------------------*//* This file is part of LibVEX, a library for dynamic binary instrumentation and translation. Copyright (C) 2004-2005 OpenWorks LLP. All rights reserved. This library is made available under a dual licensing scheme. If you link LibVEX against other code all of which is itself licensed under the GNU General Public License, version 2 dated June 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL is missing, you can obtain a copy of the GPL v2 from the Free Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. For any other uses of LibVEX, you must first obtain a commercial license from OpenWorks LLP. Please contact info@open-works.co.uk for information about commercial licensing. This software is provided by OpenWorks LLP "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall OpenWorks LLP be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. Neither the names of the U.S. Department of Energy nor the University of California nor the names of its contributors may be used to endorse or promote products derived from this software without prior written permission.*//* Generic helper functions for doing 64-bit SIMD arithmetic in cases where the instruction selectors cannot generate code in-line. These are purely back-end entities and cannot be seen/referenced from IR. */#include "libvex_basictypes.h"#include "host-generic/h_generic_simd64.h"/* Tuple/select functions for 32x2 vectors. */static inline ULong mk32x2 ( UInt w1, UInt w0 ) { return (((ULong)w1) << 32) | ((ULong)w0);}static inline UInt sel32x2_1 ( ULong w64 ) { return 0xFFFFFFFF & toUInt(w64 >> 32);}static inline UInt sel32x2_0 ( ULong w64 ) { return 0xFFFFFFFF & toUInt(w64);}/* Tuple/select functions for 16x4 vectors. gcc is pretty hopeless with 64-bit shifts so we give it a hand. */static inline ULong mk16x4 ( UShort w3, UShort w2, UShort w1, UShort w0 ) { UInt hi32 = (((UInt)w3) << 16) | ((UInt)w2); UInt lo32 = (((UInt)w1) << 16) | ((UInt)w0); return mk32x2(hi32, lo32);}static inline UShort sel16x4_3 ( ULong w64 ) { UInt hi32 = toUInt(w64 >> 32); return toUShort(0xFFFF & (hi32 >> 16));}static inline UShort sel16x4_2 ( ULong w64 ) { UInt hi32 = toUInt(w64 >> 32); return toUShort(0xFFFF & hi32);}static inline UShort sel16x4_1 ( ULong w64 ) { UInt lo32 = (UInt)w64; return toUShort(0xFFFF & (lo32 >> 16));}static inline UShort sel16x4_0 ( ULong w64 ) { UInt lo32 = (UInt)w64; return toUShort(0xFFFF & lo32);}/* Tuple/select functions for 8x8 vectors. */static inline ULong mk8x8 ( UChar w7, UChar w6, UChar w5, UChar w4, UChar w3, UChar w2, UChar w1, UChar w0 ) { UInt hi32 = (((UInt)w7) << 24) | (((UInt)w6) << 16) | (((UInt)w5) << 8) | (((UInt)w4) << 0); UInt lo32 = (((UInt)w3) << 24) | (((UInt)w2) << 16) | (((UInt)w1) << 8) | (((UInt)w0) << 0); return mk32x2(hi32, lo32);}static inline UChar sel8x8_7 ( ULong w64 ) { UInt hi32 = toUInt(w64 >> 32); return toUChar(0xFF & (hi32 >> 24));}static inline UChar sel8x8_6 ( ULong w64 ) { UInt hi32 = toUInt(w64 >> 32); return toUChar(0xFF & (hi32 >> 16));}static inline UChar sel8x8_5 ( ULong w64 ) { UInt hi32 = toUInt(w64 >> 32); return toUChar(0xFF & (hi32 >> 8));}static inline UChar sel8x8_4 ( ULong w64 ) { UInt hi32 = toUInt(w64 >> 32); return toUChar(0xFF & (hi32 >> 0));}static inline UChar sel8x8_3 ( ULong w64 ) { UInt lo32 = (UInt)w64; return toUChar(0xFF & (lo32 >> 24));}static inline UChar sel8x8_2 ( ULong w64 ) { UInt lo32 = (UInt)w64; return toUChar(0xFF & (lo32 >> 16));}static inline UChar sel8x8_1 ( ULong w64 ) { UInt lo32 = (UInt)w64; return toUChar(0xFF & (lo32 >> 8));}static inline UChar sel8x8_0 ( ULong w64 ) { UInt lo32 = (UInt)w64; return toUChar(0xFF & (lo32 >> 0));}/* Scalar helpers. */static inline Short qadd16S ( Short xx, Short yy ) { Int t = ((Int)xx) + ((Int)yy); if (t < -32768) t = -32768; if (t > 32767) t = 32767; return (Short)t;}static inline Char qadd8S ( Char xx, Char yy ){ Int t = ((Int)xx) + ((Int)yy); if (t < -128) t = -128; if (t > 127) t = 127; return (Char)t;}static inline UShort qadd16U ( UShort xx, UShort yy ){ UInt t = ((UInt)xx) + ((UInt)yy); if (t > 0xFFFF) t = 0xFFFF; return (UShort)t;}static inline UChar qadd8U ( UChar xx, UChar yy ){ UInt t = ((UInt)xx) + ((UInt)yy); if (t > 0xFF) t = 0xFF; return (UChar)t;}static inline Short qsub16S ( Short xx, Short yy ){ Int t = ((Int)xx) - ((Int)yy); if (t < -32768) t = -32768; if (t > 32767) t = 32767; return (Short)t;}static inline Char qsub8S ( Char xx, Char yy ){ Int t = ((Int)xx) - ((Int)yy); if (t < -128) t = -128; if (t > 127) t = 127; return (Char)t;}static inline UShort qsub16U ( UShort xx, UShort yy ){ Int t = ((Int)xx) - ((Int)yy); if (t < 0) t = 0; if (t > 0xFFFF) t = 0xFFFF; return (UShort)t;}static inline UChar qsub8U ( UChar xx, UChar yy ){ Int t = ((Int)xx) - ((Int)yy); if (t < 0) t = 0; if (t > 0xFF) t = 0xFF; return (UChar)t;}static inline Short mul16 ( Short xx, Short yy ){ Int t = ((Int)xx) * ((Int)yy); return (Short)t;}static inline Short mulhi16S ( Short xx, Short yy ){ Int t = ((Int)xx) * ((Int)yy); t >>=/*s*/ 16; return (Short)t;}static inline UShort mulhi16U ( UShort xx, UShort yy ){ UInt t = ((UInt)xx) * ((UInt)yy); t >>=/*u*/ 16; return (UShort)t;}static inline UInt cmpeq32 ( UInt xx, UInt yy ){ return xx==yy ? 0xFFFFFFFF : 0;}static inline UShort cmpeq16 ( UShort xx, UShort yy ){ return toUShort(xx==yy ? 0xFFFF : 0);}static inline UChar cmpeq8 ( UChar xx, UChar yy ){ return toUChar(xx==yy ? 0xFF : 0);}static inline UInt cmpgt32S ( Int xx, Int yy ){ return xx>yy ? 0xFFFFFFFF : 0;}static inline UShort cmpgt16S ( Short xx, Short yy ){ return toUShort(xx>yy ? 0xFFFF : 0);}static inline UChar cmpgt8S ( Char xx, Char yy ){ return toUChar(xx>yy ? 0xFF : 0);}static inline UInt cmpnez32 ( UInt xx ){ return xx==0 ? 0 : 0xFFFFFFFF;}static inline UShort cmpnez16 ( UShort xx ){ return toUShort(xx==0 ? 0 : 0xFFFF);}static inline UChar cmpnez8 ( UChar xx ){ return toUChar(xx==0 ? 0 : 0xFF);}static inline Short qnarrow32Sto16 ( UInt xx0 ){ Int xx = (Int)xx0; if (xx < -32768) xx = -32768; if (xx > 32767) xx = 32767; return (Short)xx;}static inline Char qnarrow16Sto8 ( UShort xx0 ){ Short xx = (Short)xx0; if (xx < -128) xx = -128; if (xx > 127) xx = 127; return (Char)xx;}static inline UChar qnarrow16Uto8 ( UShort xx0 ){ Short xx = (Short)xx0; if (xx < 0) xx = 0; if (xx > 255) xx = 255; return (UChar)xx;}/* shifts: we don't care about out-of-range ones, since that is dealt with at a higher level. */static inline UShort shl16 ( UShort v, UInt n ){ return toUShort(v << n);}static inline UShort shr16 ( UShort v, UInt n ){ return toUShort((((UShort)v) >> n));}static inline UShort sar16 ( UShort v, UInt n ){ return toUShort(((Short)v) >> n);}static inline UInt shl32 ( UInt v, UInt n ){ return v << n;}static inline UInt shr32 ( UInt v, UInt n ){ return (((UInt)v) >> n);}static inline UInt sar32 ( UInt v, UInt n ){ return ((Int)v) >> n;}static inline UChar avg8U ( UChar xx, UChar yy ){ UInt xxi = (UInt)xx; UInt yyi = (UInt)yy; UInt r = (xxi + yyi + 1) >> 1; return (UChar)r;}static inline UShort avg16U ( UShort xx, UShort yy ){ UInt xxi = (UInt)xx; UInt yyi = (UInt)yy; UInt r = (xxi + yyi + 1) >> 1; return (UShort)r;}static inline Short max16S ( Short xx, Short yy ){ return toUShort((xx > yy) ? xx : yy);}static inline UChar max8U ( UChar xx, UChar yy ){ return toUChar((xx > yy) ? xx : yy);}static inline Short min16S ( Short xx, Short yy ){ return toUShort((xx < yy) ? xx : yy);}static inline UChar min8U ( UChar xx, UChar yy ){ return toUChar((xx < yy) ? xx : yy);}/* ----------------------------------------------------- *//* Start of the externally visible functions. These simply implement the corresponding IR primops. *//* ----------------------------------------------------- *//* ------------ Normal addition ------------ */ULong h_generic_calc_Add32x2 ( ULong xx, ULong yy ){ return mk32x2( sel32x2_1(xx) + sel32x2_1(yy), sel32x2_0(xx) + sel32x2_0(yy) );}ULong h_generic_calc_Add16x4 ( ULong xx, ULong yy ){ return mk16x4( toUShort( sel16x4_3(xx) + sel16x4_3(yy) ), toUShort( sel16x4_2(xx) + sel16x4_2(yy) ), toUShort( sel16x4_1(xx) + sel16x4_1(yy) ), toUShort( sel16x4_0(xx) + sel16x4_0(yy) ) );}ULong h_generic_calc_Add8x8 ( ULong xx, ULong yy ){ return mk8x8( toUChar( sel8x8_7(xx) + sel8x8_7(yy) ), toUChar( sel8x8_6(xx) + sel8x8_6(yy) ), toUChar( sel8x8_5(xx) + sel8x8_5(yy) ), toUChar( sel8x8_4(xx) + sel8x8_4(yy) ), toUChar( sel8x8_3(xx) + sel8x8_3(yy) ), toUChar( sel8x8_2(xx) + sel8x8_2(yy) ), toUChar( sel8x8_1(xx) + sel8x8_1(yy) ), toUChar( sel8x8_0(xx) + sel8x8_0(yy) ) );}/* ------------ Saturating addition ------------ */ULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy ){ return mk16x4( qadd16S( sel16x4_3(xx), sel16x4_3(yy) ), qadd16S( sel16x4_2(xx), sel16x4_2(yy) ), qadd16S( sel16x4_1(xx), sel16x4_1(yy) ), qadd16S( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy ){ return mk8x8( qadd8S( sel8x8_7(xx), sel8x8_7(yy) ), qadd8S( sel8x8_6(xx), sel8x8_6(yy) ), qadd8S( sel8x8_5(xx), sel8x8_5(yy) ), qadd8S( sel8x8_4(xx), sel8x8_4(yy) ), qadd8S( sel8x8_3(xx), sel8x8_3(yy) ), qadd8S( sel8x8_2(xx), sel8x8_2(yy) ), qadd8S( sel8x8_1(xx), sel8x8_1(yy) ), qadd8S( sel8x8_0(xx), sel8x8_0(yy) ) );}ULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy ){ return mk16x4( qadd16U( sel16x4_3(xx), sel16x4_3(yy) ), qadd16U( sel16x4_2(xx), sel16x4_2(yy) ), qadd16U( sel16x4_1(xx), sel16x4_1(yy) ), qadd16U( sel16x4_0(xx), sel16x4_0(yy) ) );}ULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy ){ return mk8x8( qadd8U( sel8x8_7(xx), sel8x8_7(yy) ), qadd8U( sel8x8_6(xx), sel8x8_6(yy) ), qadd8U( sel8x8_5(xx), sel8x8_5(yy) ), qadd8U( sel8x8_4(xx), sel8x8_4(yy) ), qadd8U( sel8x8_3(xx), sel8x8_3(yy) ), qadd8U( sel8x8_2(xx), sel8x8_2(yy) ), qadd8U( sel8x8_1(xx), sel8x8_1(yy) ), qadd8U( sel8x8_0(xx), sel8x8_0(yy) ) );}/* ------------ Normal subtraction ------------ */ULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy ){ return mk32x2( sel32x2_1(xx) - sel32x2_1(yy), sel32x2_0(xx) - sel32x2_0(yy) );}ULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy ){ return mk16x4( toUShort( sel16x4_3(xx) - sel16x4_3(yy) ), toUShort( sel16x4_2(xx) - sel16x4_2(yy) ), toUShort( sel16x4_1(xx) - sel16x4_1(yy) ), toUShort( sel16x4_0(xx) - sel16x4_0(yy) ) );}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -