📄 decode.cpp
字号:
// This goes thru the arguments for the encoding selected by s.encoding.
// Returns 1 if cs limit exceeded (or internal error), 0 otherwise.
static int decode_get_args(struct decode_state_t &s, struct decode_prefix_state *p, int has_modrm)
{
int i;
encoding_t *e = encodings + s.encoding;
bool immyet = false;
for(i = 0; i < MAX_ARGS; ++i)
{
s.icode->argtype[i] = e->argtype[i];
s.icode->argsize[i] = e->argsize[i];
s.icode->argvalue[i] = e->argvalue[i];
}
for(i = 0; i < MAX_ARGS; ++i)
{
if(get_argtype_lo(s.icode->argtype[i]) == argtype_void)
break;
s.icode->argsize[i] = specialize_arg_size(s.icode->argsize[i], s.dsz, s.icode->osz, s.icode->asz);
switch(get_argtype_lo(s.icode->argtype[i]))
{
case argtype_reg:
switch(get_argtype_hi(s.icode->argtype[i]))
{
case argtypehi_reg_sr:
if(s.icode->argvalue[i] == 0xff)
s.icode->argvalue[i] = (s.modrm0 >> 3) & 7;
break;
case argtypehi_reg_basecode:
if(s.icode->argvalue[i] == 0xff)
s.icode->argvalue[i] = (s.basecode & 7) | ((U1)(p->rex_b) << 4);
s.icode->argtype[i] = ARGTYPE(argtype_reg, argtypehi_reg_r);
break;
case argtypehi_reg_r:
case argtypehi_reg_cr:
case argtypehi_reg_dr:
if(s.icode->argvalue[i] == 0xff)
s.icode->argvalue[i] = ((s.modrm0 >> 3) & 7) | ((U1)(p->rex_r) << 4);
break;
default:
break;
}
break;
case argtype_mem:
switch(get_argtype_hi(s.icode->argtype[i]))
{
case argtypehi_mem_rm:
if(s.modrm0 >= 0xc0)
{
s.icode->argtype[i] = ARGTYPE(argtype_reg, argtypehi_reg_r);
s.icode->argvalue[i] = (s.modrm0 & 7) | ((U1)(p->rex_b) << 4);
}
else
s.icode->argtype[i] = ARGTYPE(argtype_mem, argtypehi_mem_mem);
break;
case argtypehi_mem_disp:
s.icode->has_disp = 1;
switch(s.icode->asz)
{
case argsize_16: // not possible in long mode
case argsize_32:
{
int x = decode_fetch_many_bytes(s, &s.icode->disp, s.icode->asz);
if(x != 0)
return x;
}
break;
case argsize_64:
{
U8 value;
int x = decode_fetch_many_bytes(s, &value, argsize_64);
if(x != 0)
return x;
s.icode->imm = (U8)value >> (U8)32;
s.icode->disp = (U4)value;
}
break;
default:
return 1;
}
break;
case argtypehi_mem_mem: // fine as-is
default:
break;
}
break;
case argtype_imm:
switch(get_argtype_hi(s.icode->argtype[i]))
{
case argtypehi_imm_imm:
case argtypehi_imm_rel:
s.icode->has_imm = 1;
// imm's never have a hi size -- we use two imm's for e.g. far branch targets etc.
if(get_argsize_lo(s.icode->argsize[i]) != argsize_8 &&
e->suffix.sx == sx_yes
)
{
s.icode->sx = 1;
U1 value;
int x = decode_fetch_many_bytes(s, &value, argsize_8);
if(x != 0)
return x;
switch(s.icode->osz)
{
case argsize_16:
s.icode->imm = (U4)(U2)(S2)(S1)value;
break;
case argsize_32:
s.icode->imm = (U4)(S4)(S1)value;
break;
case argsize_64:
s.icode->imm = (U4)(S4)(S1)value;
// note: if s.icode->sx == 1 and an immediate operand size is 64 bits,
// then you have to sign extend the 32-bit 'imm' to 64 bits.
//s.icode->disp = (value < 0x80) ? (U4)(0) : (U4)(-1);
break;
default:
return 1;
}
}
else
if(get_argsize_lo(s.icode->argsize[i]) == argsize_64)
{
U8 value;
int x = decode_fetch_many_bytes(s, &value, argsize_64);
if(x != 0)
return x;
s.icode->imm = (U4)value;
s.icode->disp = (U8)value >> (U8)32;
}
else
{
int x = decode_fetch_many_bytes(s, immyet ? &s.icode->disp : &s.icode->imm, get_argsize_lo(s.icode->argsize[i]));
if(x != 0)
return x;
}
immyet = true;
break;
case argtypehi_imm_cc:
s.icode->argvalue[i] = s.basecode & 0xf;
break;
case argtypehi_imm_implict8: // fine as-is
default:
break;
}
break;
}
}
return 0; // unimplemented
}
// --- end rewrite ---
// Return codes.
// 0 : success
// 1 : s.icode, s.basecode, s.size, s.modrm0 undefined.
// exceeded limit while decoding.
// 2 : invalid opcode - opcode invalid when dsz is 64
// 3 : invalid opcode - general
// 4 : invalid opcode - or internal error!
int decode(decode_state_t &s)
{
struct decode_prefix_state p;
U4 basecode;
int tableofs; // should this be SINT ???
U4 a;
U1 c;
U1 b;
U1 has_modrm;
s.encoding = 0xffffff; // normalize value
s.size = 0;
s.icode->disp = 0;
s.icode->imm = 0;
s.icode->has_disp = 0; // bugfix 12/23/2008
// bugfix 01-03-2009: put these here.
s.icode->ea.base = 31;
s.icode->ea.index = 31;
s.icode->ea.index_scale = 0;
s.icode->ea.disp8 = 0;
// get any prefix bytes.
if(decode_prefix(s, &p))
return 1;
// get the main opcode byte.
if(decode_get_byte(s, c))
return 1;
++s.size;
if(c == 0x0f)
{
if(decode_get_byte(s, c))
return 1;
++s.size;
if(c == 0x0f)
basecode = 0xfff; // 3D Now! instruction
else
basecode = 0x100 + c;
}
else
basecode = c;
// initialize some things.
if(s.dsz == argsize_16)
{
s.icode->osz = (p.op66) ? argsize_32 : argsize_16;
s.icode->asz = (p.op67) ? argsize_32 : argsize_16;
}
else
if(s.dsz == argsize_32)
{
s.icode->osz = (p.op66) ? argsize_16 : argsize_32;
s.icode->asz = (p.op67) ? argsize_16 : argsize_32;
}
else // 64-bit mode
{
// Note: this is fixed up later (below), after we have
// decoded the instruction.
s.icode->asz = (p.op67) ? argsize_32 : argsize_64;
s.icode->osz = (p.op66) ? argsize_16 : argsize_32;
}
s.icode->lockrep = p.lockrep;
s.icode->ea.sreg = p.sreg;
if(basecode == 0xfff)
{
// NOTE: As no 3DNow! instructions exist yet, this code is not yet tested!
// get modr/m.
if(decode_get_modrm(s, &c, &p))
return 1;
s.modrm0 = c;
if(decode_get_byte(s, b))
return 1;
basecode = (U4)0x200 + (U4)b;
tableofs = basecode;
if(p.op66)
tableofs += 0x300;
// none, f0, f2, f3
if(p.lockrep == 2)
tableofs += 0x600;
else
if(p.lockrep == 3)
tableofs += 0x600 * 2; /* 0xc00 */
a = decoder_table[tableofs] & 0xffffff;
b = (decoder_table[tableofs] >> 24) & 0xff;
}
else
{
// see if it has a modr/m or displacement.
// A base table exists with 0x1200 (4608) entries, all invalid opcodes to begin with.
// offset = basecode + 0x300 * op66 + 0x600 * repeat
tableofs = basecode;
if(p.op66)
tableofs += 0x300;
// none, f0, f2, f3
if(p.lockrep == 2)
tableofs += 0x600;
else
if(p.lockrep == 3)
tableofs += 0x600 * 2; /* 0xc00 */
a = decoder_table[tableofs] & 0xffffff;
b = (decoder_table[tableofs] >> 24) & 0xff;
if(b == 4)
{
if(decode_get_byte(s, c))
return 1;
++s.size;
tableofs = a + (U4)c;
a = decoder_table[tableofs] & 0xffffff;
b = (decoder_table[tableofs] >> 24) & 0xff;
}
if(b == 0)
{
if(a == 0xffffff)
{
s.size = 0;
return 3;
}
has_modrm = encodings[a].suffix.ro != ro_def;
}
else
has_modrm = 1;
// now get modr/m if there is one.
// we do not hceck for an argument implying rm because we can just
// check for ro_def which is used if and only if there is no modr/m.
if(has_modrm)
{
if(decode_get_modrm(s, &c, &p))
return 1;
s.modrm0 = c;
}
}
// 'b' could be nonzero. If so, we have to find the correct encoding based
// on parsed 'c' (first modr/m byte).
if(b == 1)
{
tableofs = a + ((c >> 3) & 7);
a = decoder_table[tableofs] & 0xffffff;
b = (decoder_table[tableofs] >> 24) & 0xff;
}
if(b == 2)
{
tableofs = a + ((c >> 6) & 3);
a = decoder_table[tableofs] & 0xffffff;
b = (decoder_table[tableofs] >> 24) & 0xff;
}
if(b == 3)
{
tableofs = a + (c & 7);
a = decoder_table[tableofs] & 0xffffff;
b = (decoder_table[tableofs] >> 24) & 0xff;
}
// demand 'b' is 0 now, after walking thru the table.
if(b != 0 || a == 0xffffff)
{
s.size = 0;
return 4;
}
s.encoding = a;
// if no64 and in 64 bit mode, report failure here.
if(s.dsz == argsize_64 && encodings[s.encoding].suffix.o == o_no64)
{
s.encoding = 0xffffff;
s.size = 0;
return 2;
}
s.icode->insn = encodings[s.encoding].insn;
if(p.rex_w || (s.dsz == argsize_64 && encodings[s.encoding].suffix.o == o_is64 && !p.op66))
s.icode->osz = argsize_64;
s.basecode = basecode;
// Go thru all arguments, getting any immediates or the displacement operand.
//s.icode->has_disp = 0; // bugfix 12/23/2008
s.icode->has_imm = 0;
s.icode->sx = 0;
int xx = decode_get_args(s, &p, has_modrm);
return xx;
}
} // namespace x86s
#if 0
#include <iostream>
using namespace x86s;
int main()
{
U1 code[] = {0xb8, 0x00, 0x00};
icode_t icode;
decode_state_t decoder;
decoder.icode = &icode;
decoder.insn = code;
decoder.end = code + sizeof(code);
decoder.dsz = argsize_16;
int x = decode(decoder);
std::cout << "decode() returned " << x << std::endl;
if(x == 0)
{
std::cout << "size = " << (UINT)decoder.size << std::endl;
std::cout << "encoding = " << decoder.encoding << std::endl;
std::cout << "insn enum = " << encodings[decoder.encoding].insn << std::endl;
const char *s = insn_strings[encodings[decoder.encoding].insn];
std::cout << "insn name = " << ((s == 0) ? "(null)" : s) << std::endl;
}
return 0;
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -