📄 crudcom2.cpp
字号:
// crudcom2.cpp
// Copyright (C) 2008 Willow Schlanger
// somehow discover 'long' types?
//
// - need to fix 'nop' -> xchg <reg>,<same reg> --> asgn(void).
// - how are we going to handle the stack (push / pop)'s?
// - tmp(X) -> tmpX for output.
//
// now if we have a procedure that
// does this: pop ecx / jmp ecx
// then the procedure's "decompile" flag is cleared to false.
// a helper script file may indicate which procedures to totally
// ignore. how to implement that? also need to implement indirect
// call/jump's, possibly with a helper script.
// on call, during data flow analysis check if the target is
// decompile == true--if not, make OUR decompile==false and
// just go on to the next procedure.
//
// Note--we must be prepared to encounter a JMP/Jcond to the
// current procedure.
//
// The live_output's for a procedure are interesting and important
// to compute the way we do, but we also need vote's for a procedure's
// outputs. Assume external inputs return void. Currently, push;s are
// assumed to really read something and pop's assumed to really write
// it. There is no stack simulation, etc. The idea is, many functions
// will have a lot of outputs--which is fine, but we don't want to print
// code for outputs that are never "voted" for e.g. used in an immediately
// following code block.
#define DEBUG_LINE_NUMBERS 0
#include "beta.h"
#include "parser.h"
#include "semantics.h"
#include "disasm.h"
// --- dataflow ---
#include <cstdio>
#include <set>
#include <cstddef>
#include <map>
#include <list>
#include <vector>
namespace x86s
{
// ht = hcode type
enum
{
ht_expr, // asgn(dest, src) or asgn(void)
// Note--for now, data flow analysis will assume
// *** What about SETcc,CMOVcc? How to handle cc argument?
// *** these use _x86_cc().
ht_jump, // target(s) specified by basic block
// if jmp indirect, there are >= 1 targets (assume it's part of same procedure if indirect jmp.)
// if it's called elsewhere AND we do an indirect jump to it, we have trouble.
// else there is exactly one target.
// if it's a different procedure, there are ARGUMENTS.
// *** add arguments. 1st arg is indirect access, or NULL if direct. 2nd arg is actual arguments.
ht_call, // target[0] is next insn, target[1] is call target. left empty for handled indirect calls.
// if you call the next insn, there's only one target.
// decompile to: call f() then invoke f().
// *** add arguments. 1st arg is indirect access, or NULL if direct. 2nd arg is actual arguments.
ht_jcond, // target[0] is next insn, target[1] is branch target
// only one target if jcond is to self.
// *** first argument is condition code value, 0..15.
// *** second argument NULL, or is argsize_16/argsize_32/argsize_64 for cx/ecx/rcx
// +0x100 if we branch if cx/ecx/rcx==0.
// +0x200 if we branch if cx/ecx/rcx!=0
// +0x300 if we decrement cx/ecx/rcx by 1, then branch if it's nonzero.
// *** 3nd argument is hll expression. (need to add this one).
// *** any of these 3 args may be NULL.
// don't support jcond's to other procedures. make reparse fail if that is done.
ht_ret, // the end. *** has an expression -- return code -- ?
//ht_push, // these are supposedly.. ) use asgn(stack(0), x)
//ht_pop, // ..going to be removed ) or asgn(x, stack(0))
ht__end
};
const char *ht_strings[] =
{
"expr",
"jump",
"call",
"jcond",
"ret"//,
//"push",
//"pop"
};
enum
{
et_literal32, // subtype is 0
et_nodetype, // subtype is nt_*, except nt_literal; u.data[] are arguments (NULL for none)
et_nodeoperand, // subtype is no_*; 'u' not used.
et_asgn, // assignment expression
et_reg, // subtype is 0 for general register, u.data[0] is argsize_*, u.data[1] is register index number
et_reg_reg, // subtype is 0. u.data[0] is argsize_*, u.data[1] is 1st reg index, u.data[2] is 2nd reg index.
et_ea16, // subtype is segreg + 8 * index_shift. u.data[0] is base + 32 * index (use 31 for none). data[1] is disp.
et_ea32, // subtype is segreg + 8 * index_shift. u.data[0] is base + 32 * index (use 31 for none). data[1] is disp.
et_ea64, // subtype is segreg + 8 * index_shift. u.data[0] is base + 32 * index (use 31 for none). data[1] is disp.
// add symbol table references (identifier) support here...
};
struct expr_t
{
U1 size; // ns_*, ns_void for et_literal32 and nt_void
U1 type; // et_*
U2 subtype; // depends on 'type'.
union
{
U4 data[3]; // data[0] is a 32-bit value for et_literal.
expr_t *subexpr[3];
} u;
void make_literal32(U4 value)
{
size = ns_void;
type = et_literal32;
subtype = 0;
u.data[0] = value;
u.data[1] = 0;
u.data[2] = 0;
}
void make_void()
{
size = ns_void;
type = et_nodetype;
subtype = nt_void;
u.data[0] = 0;
u.data[1] = 0;
u.data[2] = 0;
}
expr_t()
{
u.subexpr[0] = NULL;
u.subexpr[1] = NULL;
u.subexpr[2] = NULL;
}
};
void kill_expr(expr_t *x)
{
if(x == NULL)
return;
if(x->type == et_nodetype || x->type == et_asgn)
{
kill_expr(x->u.subexpr[0]);
kill_expr(x->u.subexpr[1]);
kill_expr(x->u.subexpr[2]);
}
delete x;
}
struct hcode_file_t
{
U8 flags; // bit =1 if use/def that flag
U8 reg[16]; // bits =1 for a byte if use/def that byte
void add_register(U4 argsize, U4 index)
{
if(index > 8)
throw std::runtime_error("unsupported register index");
if(argsize == argsize_8)
{
if(index < 4)
reg[index] |= 0x000000ff;
else
reg[index - 4] |= 0x0000ff00;
return;
}
if(argsize == argsize_16)
{
reg[index] |= 0x0000ffff;
return;
}
if(argsize == argsize_32)
{
reg[index] |= 0xffffffff;
return;
}
throw std::runtime_error("unsupported register size");
}
void clear()
{
flags = 0;
for(int i = 0; i < 16; ++i)
reg[i] = 0;
}
bool equals(hcode_file_t &src)
{
if(flags != src.flags)
return false;
for(int i = 0; i < 16; ++i)
if(reg[i] != src.reg[i])
return false;
return true;
}
bool is_zero()
{
if(flags != 0)
return false;
for(int i = 0; i < 16; ++i)
if(reg[i] != 0)
return false;
return true;
}
void set_bits(hcode_file_t &src)
{
flags |= src.flags;
for(int i = 0; i < 16; ++i)
reg[i] |= src.reg[i];
}
void set_bits_mask(hcode_file_t &a, hcode_file_t &b)
{
flags |= (a.flags & ~b.flags);
for(int i = 0; i < 16; ++i)
reg[i] |= (a.reg[i] & ~b.reg[i]);
}
void debug()
{
if((U4)flags != 0)
std::cout << " flags=" << std::hex << (U4)flags << std::dec << std::endl;
for(int i = 0; i < 8; ++i)
{
if((U4)reg[i] != 0)
std::cout << " reg" << i << "=" << std::hex << (U4)reg[i] << std::dec << std::endl;
}
}
const char *debug_get_reg(int reg, U8 mask)
{
const char *regs8[] = {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"};
const char *regs16[] = {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"};
const char *regs32[] = {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"};
if(reg >= 8)
return "???";
if(mask == 0xff00)
return (reg < 4) ? regs8[reg + 4] : "???";
if(mask == 0xff)
return (reg < 4) ? regs8[reg] : "???";
if(mask == 0xffff)
return regs16[reg];
return regs32[reg];
}
};
struct hcode_t
{
int opcode; // ht_*
std::list<expr_t *> operands; // generally we use front(), and when there's a 2nd arg, back().
// also used for argument lists.
hcode_file_t use, def;
hcode_t()
{
use.clear();
def.clear();
}
~hcode_t()
{
for(std::list<expr_t *>::iterator i = operands.begin(); i != operands.end(); ++i)
kill_expr(*i);
}
};
struct procedure_t;
struct bb_t
{
procedure_t *proc;
U8 address;
std::vector<bb_t *> out_edges;
std::set<bb_t *> in_edges;
std::list<hcode_t> hcode;
hcode_file_t live_in;
hcode_file_t live_out;
hcode_file_t live_use;
hcode_file_t def;
bb_t()
{
live_in.clear();
live_out.clear();
live_use.clear();
def.clear();
}
};
typedef std::map<U8, bb_t> bb_map_t;
struct procedure_t
{
U8 address; // this way we can use procedure_t * ptr's and still get the address if we need it
bb_t *root; // NULL if not scanned yet
bb_map_t map; // basic block container
std::list<bb_t *> dfslast;
hcode_file_t live_in;
hcode_file_t live_out;
hcode_file_t vote; // anything used after a call to this procedure, in the subsequant basic block
// gets a bit set here. AND vote bits by live_out to learn the relevant outputs.
bool remove_cc; // set to true if NO basic blocks have any flags live_use'd.
procedure_t()
{
live_in.clear();
live_out.clear();
vote.clear();
remove_cc = true;
}
};
typedef std::map<U8, procedure_t> procedure_map_t;
#include "dataflow.h"
class reparser_t
{
memory_t &memory;
int dsz;
procedure_t &proc;
procedure_map_t &procs;
call_graph_t &xgraph;
public:
reparser_t(memory_t &mem, int dsz_t, procedure_t &proc_t, procedure_map_t &procs_t, call_graph_t &xgraph_t) :
memory(mem),
dsz(dsz_t),
proc(proc_t),
procs(procs_t),
xgraph(xgraph_t)
{
for(U8 i = 0; i < memory.image_size; ++i)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -