📄 dataflow.h
字号:
// *i is the current hcode_t we're processing.
if(i->opcode == ht_expr)
{
if(i->operands.size() != 1)
throw std::runtime_error("bad expression encountered (1)");
expr_t *asgn = i->operands.front();
if(asgn->type != et_asgn)
throw std::runtime_error("bad expression encountered (2)");
expr_t *dest = asgn->u.subexpr[0];
// note: we leave in asgn(void) -- could just as easilly erase it.
if(dest == NULL)
continue;
if(dest->type == et_nodeoperand && dest->subtype == no_void)
continue;
expr_t *src = asgn->u.subexpr[1];
if(src == NULL)
throw std::runtime_error("bad expression encountered (3)");
if(src->type == et_nodeoperand && src->subtype == no_void)
throw std::runtime_error("bad expression encountered (4)");
// ok, asgn(dest, src) encountered.
// dest may be a reg/flag, or deref(ea_<16/32/64>).
// src may be a complex expr--if you encounter any flags, regs, or
// ea's, it's all input.
process_1_asgn(dest, i->use, i->def);
process_1_asgn(src, i->use, i->use);
// see if dest, src are both the exact same register.
if(dest->type == et_reg && src->type == et_reg)
{
if(dest->subtype != 0 || src->subtype != 0)
throw std::runtime_error("bad expression encountered (5)");
if(dest->u.data[0] == src->u.data[0] && dest->u.data[1] == src->u.data[1])
{
// asgn(<register>, <exact same register>);
std::list<hcode_t>::iterator old = i;
++i;
#if ALLOW_ERASE
bb->hcode.erase(old);
#endif
continue;
}
}
++i;
}
else
{
// *** Handle loop/loopz/loopnz here.
// *** If it's a loop* instruction, insert asgn(rcx:asz, sub(rcx:asz, 1)) in the
// *** hcode. WHY DO THAT???
// When we get to post_process_1(), all hcode nodes will have a variety
// of inputs, but will 'define' only a register OR a flag AT MOST.
if(i->opcode == ht_jcond)
{
if(i->operands.size() < 2)
throw std::runtime_error("malformed jcond (1)");
// get cc.
int cc = -1;
if(i->operands.front() != NULL)
{
if(i->operands.front()->type != et_literal32)
throw std::runtime_error("malformed jcond (2)");
cc = i->operands.front()->u.data[0];
}
if(cc != -1)
use_flags(cc, i->use);
// handle cx/ecx/rcx here.
std::list<expr_t *>::iterator tmp = i->operands.begin();
++tmp;
if(*tmp != NULL)
{
if((*tmp)->type != et_literal32)
throw std::runtime_error("malformed jcond (3)");
U4 rcx = (*tmp)->u.data[0];
U1 rcx_size = (U1)rcx;
i->use.add_register(rcx_size, 1); // cx, ecx, or rcx
if(rcx >= 0x300)
{
i->def.add_register(rcx_size, 1); // cx, ecx, or rcx
expr_t *cx = new expr_t();
if(rcx_size == argsize_16)
cx->size = ns_word;
else
if(rcx_size == argsize_32)
cx->size = ns_dword;
else
if(rcx_size == argsize_64)
cx->size = ns_qword;
cx->type = et_reg;
cx->subtype = 0;
cx->u.data[0] = rcx_size;
cx->u.data[1] = 1; // cx, ecx, or rcx
cx->u.data[2] = 0; // not used
expr_t *cxsrc = new expr_t();
*cxsrc = *cx;
expr_t *one = new expr_t();
one->make_literal32(1);
expr_t *sub = new expr_t();
sub->size = cx->size;
sub->type = et_nodetype;
sub->subtype = nt_sub;
sub->u.subexpr[0] = cxsrc;
sub->u.subexpr[1] = one;
sub->u.subexpr[2] = NULL;
expr_t *asgn = new expr_t();
asgn->size = cx->size;
asgn->type = et_asgn;
asgn->subtype = 0;
asgn->u.subexpr[0] = cx;
asgn->u.subexpr[1] = sub;
asgn->u.subexpr[2] = NULL;
std::list<hcode_t>::iterator ii = bb->hcode.insert(i, hcode_t());
ii->opcode = ht_expr;
ii->operands.push_back(asgn);
ii->use.add_register(rcx_size, 1); // new hcode uses cx/ecx/rcx
ii->def.add_register(rcx_size, 1); // and defines it as well
// just inserted asgn(rcx, sub(rcx, 1)) and manually did use/def for it
// no longer need to implictly decrement cx/ecx/rcx - we did it explictly.
(*tmp)->u.data[0] -= 0x100;
}
}
}
else
if(i->opcode == ht_jump)
;
else
if(i->opcode == ht_call)
;
else
if(i->opcode == ht_ret)
;
else
throw std::runtime_error("Unknown hcode opcode encountered");
++i;
}
}
post_process_1(bb);
}
void do_process(std::list<bb_t *> &dfslast, bb_t *node)
{
if(process_visited.find(node->address) != process_visited.end())
return;
process_visited.insert(node->address);
// call all out edges
std::vector<bb_t *>::iterator i = node->out_edges.end();
for(;;)
{
if(i == node->out_edges.begin())
break;
--i;
if((*i)->proc == node->proc)
do_process(dfslast, *i);
}
dfslast.push_front(node);
}
void build_call_dfs(std::set<U8> &visited, std::list<U8> &dfs, U8 address, parser_call_t &calls)
{
if(visited.find(address) != visited.end())
return; // already been here.
visited.insert(address);
for(std::set<U8>::iterator i = calls.calls.begin(); i != calls.calls.end(); ++i)
build_call_dfs(visited, dfs, *i, xgraph[*i]);
dfs.push_back(address);
}
// --- begin dcc algorithm
// This section contains code with derrived from dcc, a GPL'd decompiler,
// with the following copyright notice:
// (C) Cristina Cifuentes
// This decompiler is also GPL'd code, so there is no problem with that.
public:
// This does the actual data flow analysis.
// It goes thru procedures via a depth-first search, so that we'll have
// information on any called procedures already.
void process_2()
{
std::list<U8> dfs;
std::set<U8> visited;
for(call_graph_t::iterator i = xgraph.begin(); i != xgraph.end(); ++i)
build_call_dfs(visited, dfs, i->first, i->second);
visited.clear();
hcode_file_t live_out;
for(std::list<U8>::iterator i = dfs.begin(); i != dfs.end(); ++i)
{
if(!xgraph[*i].decompile)
continue; // can't decompile this procedure
//std::cout << std::hex;
//std::cout << *i << ":" << std::endl;
procedure_t &proc = procs[*i];
// ***
// *** proc.live_out should be the UNION of everything output by a basic block
// (except when something is preserved/restored; two xchg's do nothing!)
// *** proc.live_in is a set. if a path exists that uses a register/flag
// before it was defined, then the register/flag needs to be in the set.
// *** if we encounter a procedure, we'll already know that procedure's live_in
// and live_out. Give feedback--if we USE something from a procedure, tell
// that procedure (how to do this? it's complicated! e.g. we could return
// a register from a procedure and the caller could use it.)
// *** so, we need two mechanisms of tracking--does something hold its input
// value? does something hold a value OUTPUT from a procedure--and if so,
// what procedure?
live_out.clear();
live_reg_analysis(proc, live_out);
}
}
private:
void live_reg_analysis(procedure_t &proc, hcode_file_t &live_out)
{
#if 1
// --begin testing code--
live_out.clear(); // already zero'd
for(std::list<bb_t *>::iterator j = proc.dfslast.begin(); j != proc.dfslast.end(); ++j)
{
bb_t *bb = *j;
live_out.set_bits(bb->def);
if(bb->live_use.flags != 0)
proc.remove_cc = false;
for(std::vector<bb_t *>::iterator k = bb->out_edges.begin(); k != bb->out_edges.end(); ++k)
{
if((*k)->proc != &proc)
{
// call/invokation to another procedure
live_out.set_bits((*k)->proc->live_out);
proc.remove_cc &= (*k)->proc->remove_cc;
}
}
}
// --end testing code--
bool done;
hcode_file_t prev_live_in, prev_live_out;
proc.live_out = live_out;
bb_t *bb = NULL;
do
{
done = true;
// the following line is correct - do not reverse it!
for(std::list<bb_t *>::iterator j = proc.dfslast.end(); ;)
{
if(j == proc.dfslast.begin())
break;
--j;
bb = *j;
//std::cout << " " << (U4)(bb->address) << std::endl;;
// process this bb.
prev_live_in = bb->live_in;
prev_live_out = bb->live_out;
if(bb->out_edges.empty())
{
bb->live_out = live_out;
// fixme--get return expr of function (dcc does that here).
if(!bb->hcode.empty())
{
if(bb->hcode.back().opcode == ht_ret)
{
bb->hcode.back().use = live_out;
}
}
}
else
{
// dcc does this -- "check successors".
for(std::vector<bb_t *>::iterator k = bb->out_edges.begin(); k != bb->out_edges.end(); ++k)
{
// *** we have out edges to different procedures, I don't think dcc does that.
// *** this is a fix for that.
// *** how are we handling jmp to a different procedure?
if(bb->proc == &proc)
{
bb->live_out.set_bits((*k)->live_in);
}
}
// dcc -- "propagate to invoked procedure".
if(!bb->hcode.empty())
{
if(bb->hcode.back().opcode == ht_call)
{
// fixme--add support for library routines and indirect calls, here.
if(bb->out_edges.size() == 1)
{
// decompile to: call f() then jmp - e.g. invoke - f().
throw std::runtime_error("call to the following instruction not yet implemented");
}
if(bb->out_edges.size() != 2)
throw std::runtime_error("internal error - call node needs two targets");
procedure_t &callee = *bb->out_edges[1]->proc;
// this algorithm works like dcc...
//if(&callee != &proc)
{
bb->live_out = callee.live_in;
// fixme--what about indirect calls? see above
if(!bb->live_out.is_zero())
{
bb->hcode.back().use = callee.live_in;
bb->hcode.back().def = callee.live_out;
}
}
bb_t *next = bb->out_edges[0];
callee.vote.set_bits(next->live_in);
}
// fixme--what about JUMPs to a procedure?
}
#if 1
else
if(bb->hcode.back().opcode == ht_jump)
{
if(bb->out_edges.empty())
throw std::runtime_error("jump node with no targets encountered");
// direct jumps have only one target.
// indirect jumps have >= 1 target and are assumed to be to the same procedure.
if(bb->out_edges.size() == 1 && bb->out_edges.front()->proc != &proc)
{
throw std::runtime_error("jump to a different procedure - not implemented yet");
}
}
#endif
}
bb->live_in = bb->live_use;
bb->live_in.set_bits_mask(bb->live_out, bb->def);
if(!prev_live_in.equals(bb->live_in) || !prev_live_out.equals(bb->live_out))
done = false;
}
} while(!done);
if(bb != NULL)
{
if(!bb->live_in.is_zero())
{
//std::cout << "case 1" << std::endl;
proc.live_in = bb->live_in;
}
else
{
//std::cout << "case 2" << std::endl;
}
// fixme -- remove references to register variables.
// dcc: if si/di is a register variable, remove it from "live in"
// for 'proc' and 'bb'.
}
//std::cout << std::dec << std::endl;
//proc.live_out = live_out; // was at beginning
// --- begin testing code ---
// ** Note, this code does NOT work.
if(proc.remove_cc)
{
for(std::list<bb_t *>::iterator j = proc.dfslast.begin(); j != proc.dfslast.end(); ++j)
{
bb_t *bb = *j;
for(std::list<hcode_t>::iterator k = bb->hcode.begin(); k != bb->hcode.end(); )
{
if(k->def.flags != 0)
{
// we have already made sure only one flag is defined at a time.
std::list<hcode_t>::iterator h = k;
for(++h; h != bb->hcode.end(); ++h)
{
if((h->use.flags & k->def.flags) != 0)
break;
}
if(h == bb->hcode.end())
{
h = k;
++k;
bb->hcode.erase(h);
}
else
++k;
}
else
++k;
}
}
}
// --- end testing code ---
#endif
}
// --- end dcc algorithm
};
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -