📄 crudcom2.cpp
字号:
{
#if 0
std::cout << std::hex;
for(std::set<bb_t *>::iterator k = (*j)->in_edges.begin(); k != (*j)->in_edges.end(); ++k)
std::cout << " " << (U4)(*k);
std::cout << " vs. " << (U4)(&bb) << std::endl;
std::cout << std::dec;
#endif
(*j)->in_edges.erase(&bb);
}
}
}
//p.map.clear();
//p.root = NULL;
#endif
//std::cout << "Failure at " << std::hex << (U4)p.address << std::dec << std::endl;
return;
}
//std::cout << "Success at " << std::hex << (U4)p.address << std::dec << std::endl;
for(U8 i = 0; i < memory.image_size; ++i)
{
if(memory.meta[i].local == 1)
memory.meta[i].decompiled = 1;
}
memory.meta[p.address].decompiled = 1;
#if 0
std::printf("%08x", (U4)((target + memory.image_base) & memory.rip_mask));
if(!j.decompile)
std::printf("*");
std::printf(":");
for(std::set<U8>::iterator i = j.calls.begin(); i != j.calls.end(); ++i)
std::printf(" %08x", (U4)((*i + memory.image_base) & memory.rip_mask));
std::printf("\n");
#endif
}
} // namespace x86s
// --- main program ---
#include <list>
using namespace x86s;
using namespace std;
static U1 *read_file(const char *filename, U4 *size)
{
U1 *data;
FILE *f = fopen(filename, "rb");
if(f == NULL)
return NULL;
if(feof(f))
{
fclose(f);
return NULL; // no data
}
fseek(f, -1, SEEK_END);
*size = 1 + ftell(f);
data = new U1 [*size];
rewind(f);
if(fread(data, *size, 1, f) != 1)
{
delete [] data;
fclose(f);
return NULL; // unable to read
}
fclose(f);
return data;
}
// This shows how to do a depth-first search on all decompilable-functions.
#if 0
std::set<U8> test_loc;
void test_dfs(parser_t &parser, U8 target, memory_t &memory)
{
if(test_loc.find(target) != test_loc.end())
return; // been here already
test_loc.insert(target);
// call all children.
for(std::set<U8>::iterator i = parser.xgraph[target].calls.begin(); i != parser.xgraph[target].calls.end(); ++i)
test_dfs(parser, *i, memory);
std::printf("%08x", (U4)((target + memory.image_base) & memory.rip_mask));
if(!parser.xgraph[target].decompile)
std::printf("*");
std::printf(":");
for(std::set<U8>::iterator i = parser.xgraph[target].calls.begin(); i != parser.xgraph[target].calls.end(); ++i)
std::printf(" %08x", (U4)((*i + memory.image_base) & memory.rip_mask));
std::printf("\n");
}
#endif
void help()
{
std::cout << "Copyright (C) 2008 Willow Schlanger" << std::endl;
std::cout << "Decompiler for x86 and x86-64 binary image files" << std::endl;
std::cout << std::endl;
std::cout << "usage: crudcom2 filename bits origin [entrypoint]" << std::endl;
std::cout << "bits must be one of: 16, 32, 64 (64 bit mode needs some work!)" << std::endl;
std::cout << "the entrypoint is an offset in the image, and defaults to 0." << std::endl;
}
int main(int argc, char **argv)
{
std::string filename;
int dsz = argsize_16;
if(argc < 4)
{
help();
return 1;
}
filename = argv[1];
std::string bits = argv[2];
if(bits == "16")
dsz = argsize_16;
else
if(bits == "32")
dsz = argsize_32;
else
if(bits == "64")
dsz = argsize_64;
else
{
help();
std::cout << std::endl << "error: bad number of bits -- must be exactly one of these: 16, 32, 64" << std::endl;
return 1;
}
char *endp;
U4 origin = strtol(argv[3], &endp, 0);
if(*endp != '\0')
{
help();
std::cout << std::endl << "error: bad origin given - use 0x for hex numbers" << std::endl;
return 1;
}
U4 myentry = 0;
if(argc >= 5)
{
myentry = strtol(argv[4], &endp, 0);
if(*endp != '\0')
{
help();
std::cout << std::endl << "error: bad entrypoint given - use 0x for hex numbers" << std::endl;
return 1;
}
}
bool disassemble = true; // generate disassembly output?
bool skip_decompiled = false; // set to true to skip decompiled functions in "beta" output (do this when out.cpp is being produced)
U4 size_tag;
U1 *image = read_file(filename.c_str(), &size_tag);
if(image == NULL)
{
std::cout << filename << ": can\'t open and read, or file has a size of 0 bytes\n" << std::endl;
return 1;
}
U8 size = size_tag;
std::list<U8> entries;
entries.push_back(myentry);
//U8 entry = myentry;
memory_t memory;
if(dsz == argsize_16)
memory.rip_mask = 0xffff;
else
if(dsz == argsize_32)
memory.rip_mask = 0xffffffff;
else
memory.rip_mask = (U8)(-1ll);
memory.image = image;
memory.image_size = size;
memory.image_base = origin;
memory.cs_base = 0;
memory.meta = new metadata_t [size];
try
{
parser_t parser(memory, dsz);
std::cout << "Parsing..." << std::endl;
bool again;
do
{
parser.reset();
for(std::list<U8>::iterator i = entries.begin(); i != entries.end(); ++i)
parser.add_entrypoint(*i);
again = parser.parse();
} while(again);
// decompile here. clear 'decompile' flag for a function if we were unable
// to decompile it.
// crudcom2: just do data flow analysis on a depth-first-search basis.
// convert icode to tcode. make the semantics generator generate a maximum
// size of all insn tcode's, as a #DEFINE. use a static buffer with that size.
// copy tcode to this buffer. specialize all sizes--incl. argsize[0|1|2] sizes.
// now convert to hcode, with chaining and special handling for temporaries,
// arguments, and considering overlaps and memory accesses (so-called deref
// nodes).
// Store all the hcode's.
// Bail out if we ever encounter an instruction with no semantics associated
// with it (no operations are represented as asgn(void)), except for insn's
// we have special handling for.
// Where/when to do idiom analysis? Also remember to check the peep-er and to
// make switch's work. We don't want e.g. mov edi,edi to think edi is an input,
// etc.
// All we want is global data flow analysis, at this point--crudcom3.
// The idea is, generate hcode for the procedure then pass that to the universal
// decompiling machine--and have that machine perform dataflow analysis. Is this
// a good idea? I think it is--and implement a reaching algorithm...
// However for crudcom2, maybe all we need is hcode with use/def chains but no
// real expression conversion. I'm not sure what to do just yet...
// Remember, if you ever encounter an entrypoint (besides the current procedure's
// entrypoint) whose metadata has 'entry' set, just 'invoke' that function and
// continue processing locations for the current procedure.
//printf("Call graph:\n");
//test_loc.clear();
//test_dfs(parser, entry, memory); // call for each entrypoint
#if 1
dataflow_t dataflow(parser.xgraph, memory, dsz); // fixme--pass in parser.indirect too.
std::cout << "Reparsing..." << std::endl;
for(std::list<U8>::iterator i = entries.begin(); i != entries.end(); ++i)
dataflow.go(*i);
// This generates a dfs-last list of procedures, makes possible process_1() etc. calls.
std::cout << "Processing..." << std::endl;
dataflow.process();
// This goes thru all basic blocks, does a little optimization and sets use, def things and
// removes code that's dead for a given basic block.
std::cout << "Processing (1)..." << std::endl;
dataflow.process_1();
// Now do REAL dataflow analysis. Find out what registers are input by a procedure and what
// registers or flags are potentially output. Bail out if any condition codes are ever
// externally input. Note--for CALLs, this looks up the external inputs / outputs of the
// invoked procedure. For ex., if f1() calls f2() and f2() uses esi, it could be that
// esi was input into f2 and then fed into f2. f1 gets an esi register argument.
//
// Finally we need to deal with the stack. Don't assume PUSH and POP really modify/use
// their operand. Learn the arguments the procedure uses--via e.g. [ebp+4] existance,
// or use of e.g. ret 4. IS THIS A SEPARATE ISSUE? WHY DO WE WANT TO DO THIS HERE??
//
// Also after a call, notify the callee about what register/flag outputs actually got used.
// This makes it possible to do some "pruning" at the end.
std::cout << "Processing (2)..." << std::endl;
dataflow.process_2();
#if 1 // set to 1 for output
//for(std::list<U8>::iterator i = entries.begin(); i != entries.end(); ++i)
for(procedure_map_t::iterator i = dataflow.procs.begin(); i != dataflow.procs.end(); ++i)
{
if(!parser.xgraph[i->first].decompile)
std::cout << "// fn_" << std::hex << i->first << std::dec << " could not be decompiled." << std::endl;
else
{
std::cout << std::endl;
/*std::cout << "live_in:" << std::endl;
i->second.live_in.debug();
std::cout << "live_out:" << std::endl;
i->second.live_out.debug();*/
if(i->second.live_in.flags != 0)
std::cout << "// flags in: 0x" << std::hex << (U4)(i->second.live_in.flags) << std::dec << std::endl;
if(i->second.live_out.flags != 0)
std::cout << "// flags out: 0x" << std::hex << (U4)(i->second.live_out.flags) << std::dec << std::endl;
std::cout << "void fn_" << std::hex << (U4)(i->first + memory.image_base) << std::dec << "(" << std::endl;
std::cout << " [in]";
for(int ii = 0; ii < 8; ++ii)
{
if(i->second.live_in.reg[ii] != 0)
{
if(ii != 0)
std::cout << ",";
std::cout << " " << i->second.live_in.debug_get_reg(ii, i->second.live_in.reg[ii]);
}
}
std::cout << std::endl;
std::cout << " [out]";
for(int ii = 0; ii < 8; ++ii)
{
if(i->second.live_out.reg[ii] != 0)
{
if(ii != 0)
std::cout << ",";
std::cout << " " << i->second.live_out.debug_get_reg(ii, i->second.live_out.reg[ii]);
}
}
std::cout << std::endl;
std::cout << ")" << std::endl;
std::cout << "{" << std::endl;
test_list.clear();
test_dfs(i->second.root);
do_test(memory.image_base);
std::cout << "}" << std::endl;
}
}
std::cout << "Still here..." << std::endl;
#endif
// generate out.cpp here... and set skip_decompiled to true.
;;; // unimplemented
#if 0
// generate "out.beta" here.
// Note: the beta file is for all code that was "impure" in some way.
std::cout << "Writing out.beta..." << std::endl;
ofstream fbeta("out.beta");
beta_writer_t beta(parser, memory, fbeta);
beta.go(skip_decompiled, true /* hll style */);
#endif
#endif
skip_decompiled = true;
if(disassemble)
{
// To Be Done:
// Do a depth-first search on all decompilable functions and output them here in assembly,
// but with comments including data flow analysis results--what registers and flags are input
// and output by the function? Is there a ret N for N > 0, or do we access any fixed (non-variable)
// arguments on the stack in any paths? We want to learn the number of dsz-sized arguments and include
// it in the 'decompiled' assembly listing.
ofstream fasm("out.asm");
std::cout << "Writing out.asm..." << std::endl;
//disassembler_t disasm(parser, memory, fasm);
//disasm.go(false);
beta_writer_t disasm(parser, memory, fasm);
disasm.go(skip_decompiled, false /* hll style */);
}
std::cout << "Done" << std::endl;
}
catch(std::runtime_error err)
{
std::cout << "Error: " << err.what() << std::endl;
}
delete [] memory.meta;
delete [] image;
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -