📄 decode.cpp
字号:
// decode.cpp
// Copyright (C) 2008 Willow Schlanger
// Search for: fixme
// Unimplemented: decode_get_modrm().
#include "types.h"
#include "x86s_common.h"
#include "x86s_decode.h"
#include "x86s_decode.h"
namespace x86s
{
// This is terminated with an element with an insn number of insn__count
encoding_t encodings[] =
{
#include "out_encodings.h"
};
int search_table[insn__string_count] =
{
#include "out_insn_search_table.h"
};
const char *insn_strings[insn__count] =
{
#include "out_insn_strings.h"
};
insn_def_t insns[insn__count] =
{
#include "out_insns.h"
};
U4 decoder_table[] =
{
#include "out_decoder_table.h"
};
// --- begin decoder ---
// internal
struct decode_prefix_state
{
U1 lockrep : 2; // none, f0, f2, f3
U1 op66 : 1;
U1 op67 : 1;
U1 rex_w : 1;
U1 rex_r : 1;
U1 rex_x : 1;
U1 rex_b : 1;
U1 sreg : 3; // 0..5 or 7 if none
};
// --- end decoder ---
static bool decode_get_byte(decode_state_t &s, U1 &c);
// returns 1 if limit exceeded.
static int decode_prefix(struct decode_state_t &s, struct decode_prefix_state *p)
{
U1 c;
p->lockrep = 0;
p->op66 = 0;
p->op67 = 0;
p->rex_w = 0;
p->rex_r = 0;
p->rex_x = 0;
p->rex_b = 0;
p->sreg = 7;
// p->size begins as 0 on input.
for(;;)
{
if(decode_get_byte(s, c))
return 1;
// 001ss110
if((c & 0xe7) == 0x26)
p->sreg = ((c >> 3) & 3);
else
// 011001xx
if((c & 0xfc) == 0x64)
{
if(c < 0x66)
p->sreg = (c & 7);
else
if(c == 0x66)
p->op66 = 1;
else
// 0x67
p->op67 = 1;
}
else
// 111100xx
if((c & 0xfc) == 0xf0)
{
if(c == 0xf0)
p->lockrep = 1;
else
if(c == 0xf1)
break;
else
{
// 0xf2 or 0xf3
// 0xf0 overrides 0xf2, 0xf3.
if(p->lockrep == 0)
p->lockrep = (c & 3);
}
}
else
break;
// byte was accepted.
++s.size;
}
// now if in 64-bit mode, get any REX prefix.
// note: 'c' is already the next (unaccepted) byte.
if(s.dsz == argsize_64)
{
if((c & 0xf0) == 0x40)
{
p->rex_w = (c >> 3) & 1;
p->rex_r = (c >> 2) & 1;
p->rex_x = (c >> 1) & 1;
p->rex_b = (c >> 0) & 1;
// byte was accepted.
++s.size;
}
}
return 0;
}
// --- begin modr/m decode code ---
// FIXME -- Check this.
// 16-bit modr/m's are impossible in long mode (dsz == argsize_64).
static int decode_modrm_16(decode_state_t *s, U1 c, int *disp_size)
{
U1 mod = (c >> 6) & 3;
U1 rm = c & 7;
*disp_size = -1;
if(rm < 6)
s->icode->ea.index = 6 + (rm & 1); // [si] [di]
if(rm < 4)
s->icode->ea.base = 3 + (rm & 2); // ax cx dx [bx] sp [bp] si di
else
if(rm == 6)
{
if(mod == 0)
*disp_size = argsize_16;
else
s->icode->ea.base = 5; // bp
}
else
if(rm == 7)
{
s->icode->ea.base = 3; // bx
}
if(mod > 0)
*disp_size = (mod == 1) ? argsize_8 : argsize_16;
return 0;
}
// FIXME -- Check this-> Esp. 64-bit support! Need to add 64 bit REX support here.
// Right? Or do 32-bit modr/m's need that??? Maybe not!
static int decode_modrm_32(decode_state_t *s, U1 c, int *disp_size)
{
U1 mod = (c >> 6) & 3;
U1 rm = c & 7;
U1 ss, i, b;
*disp_size = -1;
if(rm == 4)
{
// SIB
if(mod == 1)
*disp_size = argsize_8;
else
if(mod == 2)
*disp_size = argsize_32;
if(decode_get_byte(*s, c))
return 1;
// bug fix -- did we forget to do this anywhere else?
++s->size;
ss = (c >> 6) & 3;
i = (c >> 3) & 7;
b = c & 7;
if(i != 4)
{
s->icode->ea.index = i;
s->icode->ea.index_scale = ss;
}
if(b != 5)
{
s->icode->ea.base = b;
}
else
{
if(mod == 0)
*disp_size = argsize_32;
else
s->icode->ea.base = 5;
}
}
else
if(rm == 5 && mod == 0)
*disp_size = argsize_32;
else
{
s->icode->ea.base = rm;
// bugfix--is this right yet?
if(mod == 1)
*disp_size = argsize_8;
else
if(mod == 2)
*disp_size = argsize_32;
}
return 0;
}
static int decode_modrm_64(decode_state_t *s, U1 c, int *disp_size)
{
return 3; // unimplemented - report opcode invalid
}
# include <stdio.h>
static int decode_fetch_many_bytes(struct decode_state_t &s, void *target, U1 size);
// Returns 1 if reached end of code segment.
// 'asz' must be valid when this is called!
static int decode_get_modrm(decode_state_t &s, U1 *first_modrm_byte, struct decode_prefix_state *p)
{
// FIXME -- THIS NEEDS A LOT OF WORK.
U1 c;
int disp_size = -1;
if(decode_get_byte(s, c))
return 1;
*first_modrm_byte = c;
++s.size;
//bugfix--moved to caller
//s.icode->ea.base = 31;
//s.icode->ea.index = 31;
//s.icode->ea.index_scale = 0;
//s.icode->ea.disp8 = 0;
//---
if(c < 0xc0)
{
int x;
if(s.icode->asz == argsize_16)
x = decode_modrm_16(&s, c, &disp_size);
else
if(s.icode->asz == argsize_32)
x = decode_modrm_32(&s, c, &disp_size);
else
x = decode_modrm_64(&s, c, &disp_size);
if(x != 0)
return x;
// Get displacement here.
if(disp_size != -1)
{
s.icode->has_disp = 1;
switch(disp_size)
{
case argsize_8:
{
s.icode->ea.disp8 = 1;
U1 value;
int x = decode_fetch_many_bytes(s, &value, argsize_8);
if(x != 0)
return x;
switch(s.icode->asz)
{
case argsize_16:
s.icode->disp = (U4)(U2)(S2)(S1)value;
break;
case argsize_32:
s.icode->disp = (U4)(S4)(S1)value;
break;
case argsize_64:
// note: if ea.disp8 == 1, you have to sign extend
// the displacement to 64 bits if asz == size_64.
s.icode->disp = (U4)(S4)(S1)value;
//s.icode->imm = (value < 0x80) ? (U4)(0) : (U4)(-1);
break;
default:
return 4;
}
break;
//return 0;
}
case argsize_16: // not possible in long mode
case argsize_32:
{
int x = decode_fetch_many_bytes(s, &s.icode->disp, s.icode->asz);
if(x != 0)
return x;
break;
}
case argsize_64:
{
U8 value;
int x = decode_fetch_many_bytes(s, &value, argsize_64);
if(x != 0)
return x;
s.icode->imm = (U8)value >> (U8)32;
s.icode->disp = (U4)value;
break;
}
default:
return 1;
}
}
// FIXME --- what about rbp/rsp ???
// --- what about rip-relative addressing???
// --- I think this is right ....
if(p->sreg == 7 && s.icode->ea.base != 31 && (s.icode->ea.base & 6) == 4) // bp, ebp, or esp base
s.icode->ea.sreg = 6;
}
return 0;
}
// --- end modr/m decode code ---
// --- begin rewrite ---
// Returns true if error, false otherwise.
// User must remember to increment s.size.
static bool decode_get_byte(decode_state_t &s, U1 &c)
{
const U1 *ptr = s.insn + (UINT)s.size;
if(ptr >= s.end)
return true;
c = *ptr;
return false; // no error
}
static int decode_fetch_many_bytes(struct decode_state_t &s, void *target, U1 size)
{
int x;
U1 c;
const UINT bytesleft = s.end - (s.insn + (UINT)s.size);
switch(size)
{
case argsize_8:
x = decode_get_byte(s, c);
if(x)
return 1;
++s.size;
*(U1 *)(target) = c;
return 0;
case argsize_16:
if(bytesleft < 2)
return 1;
*(U2 *)(target) = GET_U2(s.insn + (UINT)s.size);
s.size += 2;
return 0;
case argsize_32:
if(bytesleft < 4)
return 1;
*(U4 *)(target) = GET_U4(s.insn + (UINT)s.size);
s.size += 4;
return 0;
case argsize_64:
if(bytesleft < 8)
return 1;
*(U8 *)(target) = GET_U8(s.insn + (UINT)s.size);
s.size += 8;
return 0;
default:
break;
}
return 1;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -