📄 dataflow.h
字号:
// dataflow.h
// Copyright (C) 2008 Willow Schlanger
#define ALLOW_ERASE 1
#define OPTIMIZE 1
#if DEBUG_LINE_NUMBERS
#undef ALLOW_ERASE
#define ALLOW_ERASE 0
#undef OPTIMIZE
#define OPTIMIZE 0
#endif
class dataflow_t
{
call_graph_t &xgraph;
memory_t &memory;
int dsz;
std::set<U8> process_visited;
public:
procedure_map_t procs;
dataflow_t(call_graph_t &xgraph_t, memory_t &mem_t, int dsz_t) :
xgraph(xgraph_t),
memory(mem_t),
dsz(dsz_t)
{
}
void dataflow(procedure_t &p, parser_call_t &j);
void go(U8 target)
{
std::map<U8, procedure_t>::iterator k = procs.find(target);
if(k != procs.end())
{
return; // already been here
}
procedure_t &p = procs[target]; // this must be here!!!
// initialize 'p' here.
p.address = target;
p.root = NULL;
parser_call_t &j = xgraph.find(target)->second;
// call all children.
for(std::set<U8>::iterator i = j.calls.begin(); i != j.calls.end(); ++i)
go(*i);
dataflow(p, j);
}
// This goes thru all basic blocks in all procedures still marked 'decompile'.
// For each basic block, it adds flag/register use/def information to each hcode node.
// When done with a basic block, it removes dead register/flag assignments.
// mov ax,5
// mov al,3 -- new definition must be LARGER to be considered dead.
// e.g. mov al,3
// mov ax,7
// here, mov al,3 is removed.
// we have a list of hcode pointers for things that are live on output for the basic
// block. [[do we need such a list ? if so, generate it at post_process_1().]]
//
// later, we will find the live outputs for the procedure.
// also, we will learn what is later input FROM all invokations of a procedure (except
// external pocedures, which have a known API).
// finally, we can determine what is REALLY output by a given basic block--not just
// live, but gets used in some path.
// if something is not REALLY output, and it is later used by the basic block exactly
// once, then absorb it into the expression by marking it 'unique'.
// ex.:
// asgn(x86_ax, w[bp+6]);
// asgn(w[bp+6], add(x86_ax, w[bp+4]));
// here, ax might well be live on output but since it's not REALLY output, and since
// it's later used exactly once, we combine the hcode's into
// asgn(w[bp+6], add(w[bp+6], w[bp+4]));
// register arguments are handled specially.
// anything that remains must be a register variable.
//
// later, we discover arguments--input registers or the stack--and makes a list of
// output registers/flags. also looks for ret <bytes>.
// also identify 'identifiers' when possible (see above).
void process()
{
for(procedure_map_t::iterator i = procs.begin(); i != procs.end(); ++i)
{
if(!xgraph[i->first].decompile)
continue; // can't decompile this procedure
process_visited.clear();
do_process(i->second.dfslast, i->second.root);
}
process_visited.clear();
}
void process_1()
{
for(procedure_map_t::iterator i = procs.begin(); i != procs.end(); ++i)
{
if(!xgraph[i->first].decompile)
continue; // can't decompile this procedure
//std::cout << std::hex;
//std::cout << i->first << ":" << std::endl;
for(std::list<bb_t *>::iterator j = i->second.dfslast.begin(); j != i->second.dfslast.end(); ++j)
{
//std::cout << " " << (U4)((*j)->address) << std::endl;;
process_1_bb(*j);
}
//std::cout << std::dec << std::endl;
}
}
private:
void use_flags(U4 cc, hcode_file_t &use)
{
switch(cc)
{
case 0:
case 1:
use.flags |= (1 << 11);
break;
case 2:
case 3:
use.flags |= (1 << 0);
break;
case 4:
case 5:
use.flags |= (1 << 6);
break;
case 6:
case 7:
use.flags |= (1 << 0) | (1 << 6);
break;
case 8:
case 9:
use.flags |= (1 << 7);
break;
case 10:
case 11:
use.flags |= (1 << 2);
break;
case 12:
case 13:
use.flags |= (1 << 11) | (1 << 7);
break;
case 14:
case 15:
use.flags |= (1 << 11) | (1 << 7) | (1 << 6);
break;
default:
throw std::runtime_error("unhandled condition code");
}
}
void process_1_asgn(expr_t *x, hcode_file_t &use, hcode_file_t &du)
{
if(x->type == et_literal32)
return;
if(x->type == et_nodetype)
{
if(x->subtype == nt__x86_cc)
{
use_flags(x->u.subexpr[0]->u.data[0], use);
}
bool same_reg = false;
expr_t *a = x->u.subexpr[0];
expr_t *b = x->u.subexpr[1];
if(a != NULL && b != NULL)
{
if(a->type == et_reg && a->subtype == 0 &&
b->type == et_reg && b->subtype == 0
)
{
if(a->u.data[0] == b->u.data[0] &&
a->u.data[1] == b->u.data[1]
)
same_reg = true;
}
}
// sub(<reg>,<same reg>) -> 0
// xor(<reg>,<same reg>) -> 0
// and(<reg>,<same reg>) -> <reg>
// or(<reg,<same reg>) -> <reg>
#if OPTIMIZE
if(same_reg)
{
if(x->subtype == nt_sub || x->subtype == nt_bitxor)
{
delete a;
delete b;
x->u.subexpr[0] = NULL;
x->u.subexpr[1] = NULL;
x->make_literal32(0);
return;
}
if(x->subtype == nt_bitand || x->subtype == nt_bitor)
{
du.add_register(a->u.data[0], a->u.data[1]);
*x = *a;
delete a;
delete b;
return;
}
}
#endif
for(int i = 0; i < 3; ++i)
{
if(x->u.subexpr[i] != NULL)
process_1_asgn(x->u.subexpr[i], use, du);
else
break;
}
return;
}
if(x->type == et_nodeoperand)
{
if(x->subtype == no_undefined)
;
else
if(x->subtype > no__begin_x86_flags && x->subtype < no__end_x86_flags)
{
switch(x->subtype)
{
case no_x86_df:
du.flags |= (1 << 10);
break;
case no_x86_af:
du.flags |= (1 << 4);
break;
case no_x86_of:
du.flags |= (1 << 11);
break;
case no_x86_pf:
du.flags |= (1 << 2);
break;
case no_x86_sf:
du.flags |= (1 << 7);
break;
case no_x86_zf:
du.flags |= (1 << 6);
break;
case no_x86_cf:
du.flags |= (1 << 0);
break;
defualt:
throw std::runtime_error("unsupported flag");
break;
}
}
else
throw std::runtime_error("unsupported node-operand type");
return;
}
if(x->type == et_reg)
{
if(x->subtype != 0)
throw std::runtime_error("unsupported register subtype (1)");
du.add_register(x->u.data[0], x->u.data[1]);
return;
}
if(x->type == et_reg_reg)
{
if(x->subtype != 0)
throw std::runtime_error("unsupported register subtype (2)");
du.add_register(x->u.data[0], x->u.data[1]);
du.add_register(x->u.data[0], x->u.data[2]);
return;
}
if(x->type == et_ea16 || x->type == et_ea32)
{
if((x->u.data[0] & 31) != 31)
use.add_register((x->type == et_ea16) ? argsize_16 : argsize_32, (x->u.data[0]) & 31);
if(((x->u.data[0] / 32) & 31) != 31)
use.add_register((x->type == et_ea16) ? argsize_16 : argsize_32, (x->u.data[0] / 32) & 31);
return;
}
throw std::runtime_error("unsupported expression type");
}
// This is called by process_1_bb() on a basic block when it's done with that block, for "post-process"
// processing. It removes hcode node's that define something that is later defined over, in same
// basic block.
// *** update bb->def and bb->live_use.
void post_process_1(bb_t *bb)
{
#if DEBUG_LINE_NUMBERS
int line = 0;
std::cout << std::hex << (U4)bb->address << std::dec << ":" << std::endl;
#endif
for(std::list<hcode_t>::iterator i = bb->hcode.begin(); i != bb->hcode.end();)
{
#if DEBUG_LINE_NUMBERS
++line;
std::cout << " " << line << std::endl;
#endif
for(int j = 0; j < 16; ++j)
{
// if a register bit is used BEFORE being defined, then set the corresponding
// bit in bb->live_use.reg[j].
bb->live_use.reg[j] |= (i->use.reg[j] & ~bb->def.reg[j]);
bb->def.reg[j] |= i->def.reg[j];
}
bb->live_use.flags |= (i->use.flags & ~bb->def.flags);
if(i->def.flags != 0)
{
int def_flag = -1;
int j;
for(j = 0; j < 64; ++j)
if((i->def.flags & (1 << j)) != 0)
break;
def_flag = j;
if(i->def.flags != (1 << def_flag))
throw std::runtime_error("hcode modifies more than one flag");
for(j = 0; j < 16; ++j)
if(i->def.reg[j] != 0)
throw std::runtime_error("hcode modifies one or more registers in addition to modifying a flag");
// def_flag is modified by this hcode!
bb->def.flags |= (1 << def_flag);
std::list<hcode_t>::iterator tmp = i;
++i;
for(std::list<hcode_t>::iterator k = i; k != bb->hcode.end(); ++k)
{
if((k->use.flags & (1 << def_flag)) != 0)
break; // someone inputs it
if((k->def.flags & (1 << def_flag)) != 0)
{
#if ALLOW_ERASE
bb->hcode.erase(tmp);
#endif
break;
}
}
continue;
}
// see if any registers are modified by this hcode.
int def_reg = -1;
int j;
for(j = 0; j < 16; ++j)
if(i->def.reg[j] != 0)
break;
if(j != 16)
{
def_reg = j;
int def_reg_2 = -1;
for(++j; j < 16; ++j)
if(i->def.reg[j] != 0)
break;
def_reg_2 = j;
if(j != 16)
{
for(++j; j < 16; ++j)
if(i->def.reg[j] != 0)
throw std::runtime_error("hcode modifies more than two registers");
}
// def_reg is modified by this hcode.
std::list<hcode_t>::iterator tmp = i;
++i;
int kill = 0;
for(std::list<hcode_t>::iterator k = i; k != bb->hcode.end(); ++k)
{
if((tmp->def.reg[def_reg] & k->use.reg[def_reg]) != 0)
break; // someone inputs 1st reg
if(def_reg_2 != -1)
if((tmp->def.reg[def_reg_2] & k->use.reg[def_reg_2]) != 0)
break; // someone inputs 2nd reg
if((tmp->def.reg[def_reg] & ~k->def.reg[def_reg]) == 0)
{
kill |= 1;
if(def_reg_2 == -1)
break;
}
if(def_reg_2 != -1)
{
if((tmp->def.reg[def_reg_2] & ~k->def.reg[def_reg_2]) == 0)
{
kill |= 2;
if(kill == 3)
break;
}
}
}
#if ALLOW_ERASE
if(kill == 1 && def_reg_2 == -1)
kill == 3;
if(kill == 3)
{
bb->hcode.erase(tmp);
}
#endif
continue;
}
++i;
}
}
void process_1_bb(bb_t *bb)
{
int line = 0;
for(std::list<hcode_t>::iterator i = bb->hcode.begin(); i != bb->hcode.end();)
{
// ++line;
// std::cout << " " << std::dec << line << std::hex << std::endl;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -