📄 toir.c
字号:
/*--------------------------------------------------------------------*//*--- ---*//*--- This file (guest-x86/toIR.c) is ---*//*--- Copyright (C) OpenWorks LLP. All rights reserved. ---*//*--- ---*//*--------------------------------------------------------------------*//* This file is part of LibVEX, a library for dynamic binary instrumentation and translation. Copyright (C) 2004-2006 OpenWorks LLP. All rights reserved. This library is made available under a dual licensing scheme. If you link LibVEX against other code all of which is itself licensed under the GNU General Public License, version 2 dated June 1991 ("GPL v2"), then you may use LibVEX under the terms of the GPL v2, as appearing in the file LICENSE.GPL. If the file LICENSE.GPL is missing, you can obtain a copy of the GPL v2 from the Free Software Foundation Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. For any other uses of LibVEX, you must first obtain a commercial license from OpenWorks LLP. Please contact info@open-works.co.uk for information about commercial licensing. This software is provided by OpenWorks LLP "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall OpenWorks LLP be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. Neither the names of the U.S. Department of Energy nor the University of California nor the names of its contributors may be used to endorse or promote products derived from this software without prior written permission.*//* TODO: All Puts to CC_OP/CC_DEP1/CC_DEP2/CC_NDEP should really be checked to ensure a 32-bit value is being written. FUCOMI(P): what happens to A and S flags? Currently are forced to zero. x87 FP Limitations: * all arithmetic done at 64 bits * no FP exceptions, except for handling stack over/underflow * FP rounding mode observed only for float->int conversions and int->float conversions which could lose accuracy, and for float-to-float rounding. For all other operations, round-to-nearest is used, regardless. * FP sin/cos/tan/sincos: C2 flag is always cleared. IOW the simulation claims the argument is in-range (-2^63 <= arg <= 2^63) even when it isn't. * some of the FCOM cases could do with testing -- not convinced that the args are the right way round. * FSAVE does not re-initialise the FPU; it should do * FINIT not only initialises the FPU environment, it also zeroes all the FP registers. It should leave the registers unchanged. RDTSC returns one, always. SAHF should cause eflags[1] == 1, and in fact it produces 0. As per Intel docs this bit has no meaning anyway. Since PUSHF is the only way to observe eflags[1], a proper fix would be to make that bit be set by PUSHF. The state of %eflags.AC (alignment check, bit 18) is recorded by the simulation (viz, if you set it with popf then a pushf produces the value you set it to), but it is otherwise ignored. In particular, setting it to 1 does NOT cause alignment checking to happen. Programs that set it to 1 and then rely on the resulting SIGBUSs to inform them of misaligned accesses will not work. Implementation sysenter is necessarily partial. sysenter is a kind of system call entry. When doing a sysenter, the return address is not known -- that is something that is beyond Vex's knowledge. So the generated IR forces a return to the scheduler, which can do what it likes to simulate the systemter, but it MUST set this thread's guest_EIP field with the continuation address before resuming execution. If that doesn't happen, the thread will jump to address zero, which is probably fatal. This module uses global variables and so is not MT-safe (if that should ever become relevant). The delta values are 32-bit ints, not 64-bit ints. That means this module may not work right if run on a 64-bit host. That should be fixed properly, really -- if anyone ever wants to use Vex to translate x86 code for execution on a 64-bit host. *//* Performance holes: - fcom ; fstsw %ax ; sahf sahf does not update the O flag (sigh) and so O needs to be computed. This is done expensively; it would be better to have a calculate_eflags_o helper. - emwarns; some FP codes can generate huge numbers of these if the fpucw is changed in an inner loop. It would be better for the guest state to have an emwarn-enable reg which can be set zero or nonzero. If it is zero, emwarns are not flagged, and instead control just flows all the way through bbs as usual.*//* "Special" instructions. This instruction decoder can decode three special instructions which mean nothing natively (are no-ops as far as regs/mem are concerned) but have meaning for supporting Valgrind. A special instruction is flagged by the 12-byte preamble C1C703 C1C70D C1C71D C1C713 (in the standard interpretation, that means: roll $3, %edi; roll $13, %edi; roll $29, %edi; roll $19, %edi). Following that, one of the following 3 are allowed (standard interpretation in parentheses): 87DB (xchgl %ebx,%ebx) %EDX = client_request ( %EAX ) 87C9 (xchgl %ecx,%ecx) %EAX = guest_NRADDR 87D2 (xchgl %edx,%edx) call-noredir *%EAX Any other bytes following the 12-byte preamble are illegal and constitute a failure in instruction decoding. This all assumes that the preamble will never occur except in specific code fragments designed for Valgrind to catch. No prefixes may precede a "Special" instruction.*//* Translates x86 code to IR. */#include "libvex_basictypes.h"#include "libvex_ir.h"#include "libvex.h"#include "libvex_guest_x86.h"#include "main/vex_util.h"#include "main/vex_globals.h"#include "guest-generic/bb_to_IR.h"#include "guest-generic/g_generic_x87.h"#include "guest-x86/gdefs.h"/*------------------------------------------------------------*//*--- Globals ---*//*------------------------------------------------------------*//* These are set at the start of the translation of an insn, right down in disInstr_X86, so that we don't have to pass them around endlessly. They are all constant during the translation of any given insn. *//* We need to know this to do sub-register accesses correctly. */static Bool host_is_bigendian;/* Pointer to the guest code area (points to start of BB, not to the insn being processed). */static UChar* guest_code;/* The guest address corresponding to guest_code[0]. */static Addr32 guest_EIP_bbstart;/* The guest address for the instruction currently being translated. */static Addr32 guest_EIP_curr_instr;/* The IRBB* into which we're generating code. */static IRBB* irbb;/*------------------------------------------------------------*//*--- Debugging output ---*//*------------------------------------------------------------*/#define DIP(format, args...) \ if (vex_traceflags & VEX_TRACE_FE) \ vex_printf(format, ## args)#define DIS(buf, format, args...) \ if (vex_traceflags & VEX_TRACE_FE) \ vex_sprintf(buf, format, ## args)/*------------------------------------------------------------*//*--- Offsets of various parts of the x86 guest state. ---*//*------------------------------------------------------------*/#define OFFB_EAX offsetof(VexGuestX86State,guest_EAX)#define OFFB_EBX offsetof(VexGuestX86State,guest_EBX)#define OFFB_ECX offsetof(VexGuestX86State,guest_ECX)#define OFFB_EDX offsetof(VexGuestX86State,guest_EDX)#define OFFB_ESP offsetof(VexGuestX86State,guest_ESP)#define OFFB_EBP offsetof(VexGuestX86State,guest_EBP)#define OFFB_ESI offsetof(VexGuestX86State,guest_ESI)#define OFFB_EDI offsetof(VexGuestX86State,guest_EDI)#define OFFB_EIP offsetof(VexGuestX86State,guest_EIP)#define OFFB_CC_OP offsetof(VexGuestX86State,guest_CC_OP)#define OFFB_CC_DEP1 offsetof(VexGuestX86State,guest_CC_DEP1)#define OFFB_CC_DEP2 offsetof(VexGuestX86State,guest_CC_DEP2)#define OFFB_CC_NDEP offsetof(VexGuestX86State,guest_CC_NDEP)#define OFFB_FPREGS offsetof(VexGuestX86State,guest_FPREG[0])#define OFFB_FPTAGS offsetof(VexGuestX86State,guest_FPTAG[0])#define OFFB_DFLAG offsetof(VexGuestX86State,guest_DFLAG)#define OFFB_IDFLAG offsetof(VexGuestX86State,guest_IDFLAG)#define OFFB_ACFLAG offsetof(VexGuestX86State,guest_ACFLAG)#define OFFB_FTOP offsetof(VexGuestX86State,guest_FTOP)#define OFFB_FC3210 offsetof(VexGuestX86State,guest_FC3210)#define OFFB_FPROUND offsetof(VexGuestX86State,guest_FPROUND)#define OFFB_CS offsetof(VexGuestX86State,guest_CS)#define OFFB_DS offsetof(VexGuestX86State,guest_DS)#define OFFB_ES offsetof(VexGuestX86State,guest_ES)#define OFFB_FS offsetof(VexGuestX86State,guest_FS)#define OFFB_GS offsetof(VexGuestX86State,guest_GS)#define OFFB_SS offsetof(VexGuestX86State,guest_SS)#define OFFB_LDT offsetof(VexGuestX86State,guest_LDT)#define OFFB_GDT offsetof(VexGuestX86State,guest_GDT)#define OFFB_SSEROUND offsetof(VexGuestX86State,guest_SSEROUND)#define OFFB_XMM0 offsetof(VexGuestX86State,guest_XMM0)#define OFFB_XMM1 offsetof(VexGuestX86State,guest_XMM1)#define OFFB_XMM2 offsetof(VexGuestX86State,guest_XMM2)#define OFFB_XMM3 offsetof(VexGuestX86State,guest_XMM3)#define OFFB_XMM4 offsetof(VexGuestX86State,guest_XMM4)#define OFFB_XMM5 offsetof(VexGuestX86State,guest_XMM5)#define OFFB_XMM6 offsetof(VexGuestX86State,guest_XMM6)#define OFFB_XMM7 offsetof(VexGuestX86State,guest_XMM7)#define OFFB_EMWARN offsetof(VexGuestX86State,guest_EMWARN)#define OFFB_TISTART offsetof(VexGuestX86State,guest_TISTART)#define OFFB_TILEN offsetof(VexGuestX86State,guest_TILEN)#define OFFB_NRADDR offsetof(VexGuestX86State,guest_NRADDR)/*------------------------------------------------------------*//*--- Helper bits and pieces for deconstructing the ---*//*--- x86 insn stream. ---*//*------------------------------------------------------------*//* This is the Intel register encoding -- integer regs. */#define R_EAX 0#define R_ECX 1#define R_EDX 2#define R_EBX 3#define R_ESP 4#define R_EBP 5#define R_ESI 6#define R_EDI 7#define R_AL (0+R_EAX)#define R_AH (4+R_EAX)/* This is the Intel register encoding -- segment regs. */#define R_ES 0#define R_CS 1#define R_SS 2#define R_DS 3#define R_FS 4#define R_GS 5/* Add a statement to the list held by "irbb". */static void stmt ( IRStmt* st ){ addStmtToIRBB( irbb, st );}/* Generate a new temporary of the given type. */static IRTemp newTemp ( IRType ty ){ vassert(isPlausibleIRType(ty)); return newIRTemp( irbb->tyenv, ty );}/* Bomb out if we can't handle something. */__attribute__ ((noreturn))static void unimplemented ( HChar* str ){ vex_printf("x86toIR: unimplemented feature\n"); vpanic(str);}/* Various simple conversions */static UInt extend_s_8to32( UInt x ){ return (UInt)((((Int)x) << 24) >> 24);}static UInt extend_s_16to32 ( UInt x ){ return (UInt)((((Int)x) << 16) >> 16);}/* Fetch a byte from the guest insn stream. */static UChar getIByte ( Int delta ){ return guest_code[delta];}/* Extract the reg field from a modRM byte. */static Int gregOfRM ( UChar mod_reg_rm ){ return (Int)( (mod_reg_rm >> 3) & 7 );}/* Figure out whether the mod and rm parts of a modRM byte refer to a register or memory. If so, the byte will have the form 11XXXYYY, where YYY is the register number. */static Bool epartIsReg ( UChar mod_reg_rm ){ return toBool(0xC0 == (mod_reg_rm & 0xC0));}/* ... and extract the register number ... */static Int eregOfRM ( UChar mod_reg_rm ){ return (Int)(mod_reg_rm & 0x7);}/* Get a 8/16/32-bit unsigned value out of the insn stream. */static UChar getUChar ( Int delta ){ UChar v = guest_code[delta+0]; return toUChar(v);}static UInt getUDisp16 ( Int delta ){ UInt v = guest_code[delta+1]; v <<= 8; v |= guest_code[delta+0]; return v & 0xFFFF;}static UInt getUDisp32 ( Int delta ){ UInt v = guest_code[delta+3]; v <<= 8; v |= guest_code[delta+2]; v <<= 8; v |= guest_code[delta+1]; v <<= 8; v |= guest_code[delta+0]; return v;}static UInt getUDisp ( Int size, Int delta ){ switch (size) { case 4: return getUDisp32(delta); case 2: return getUDisp16(delta); case 1: return (UInt)getUChar(delta); default: vpanic("getUDisp(x86)"); } return 0; /*notreached*/}/* Get a byte value out of the insn stream and sign-extend to 32 bits. */static UInt getSDisp8 ( Int delta ){ return extend_s_8to32( (UInt) (guest_code[delta]) );}static UInt getSDisp16 ( Int delta0 ){ UChar* eip = (UChar*)(&guest_code[delta0]); UInt d = *eip++; d |= ((*eip++) << 8); return extend_s_16to32(d);}static UInt getSDisp ( Int size, Int delta ){ switch (size) { case 4: return getUDisp32(delta); case 2: return getSDisp16(delta); case 1: return getSDisp8(delta); default: vpanic("getSDisp(x86)"); } return 0; /*notreached*/}/*------------------------------------------------------------*//*--- Helpers for constructing IR. ---*//*------------------------------------------------------------*//* Create a 1/2/4 byte read of an x86 integer registers. For 16/8 bit
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -