⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crudcom2.cpp

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 CPP
📖 第 1 页 / 共 3 页
字号:
				{
#if 0
					std::cout << std::hex;
					for(std::set<bb_t *>::iterator k = (*j)->in_edges.begin(); k != (*j)->in_edges.end(); ++k)
						std::cout << " " << (U4)(*k);
					std::cout << " vs. " << (U4)(&bb) << std::endl;
					std::cout << std::dec;
#endif
					(*j)->in_edges.erase(&bb);
				}
			}
		}
		//p.map.clear();
		//p.root = NULL;
#endif
		//std::cout << "Failure at " << std::hex << (U4)p.address << std::dec << std::endl;
		return;
	}
	
	//std::cout << "Success at " << std::hex << (U4)p.address << std::dec << std::endl;

	for(U8 i = 0; i < memory.image_size; ++i)
	{
		if(memory.meta[i].local == 1)
			memory.meta[i].decompiled = 1;
	}
	memory.meta[p.address].decompiled = 1;

#if 0
	std::printf("%08x", (U4)((target + memory.image_base) & memory.rip_mask));
	if(!j.decompile)
		std::printf("*");
	std::printf(":");
	for(std::set<U8>::iterator i = j.calls.begin(); i != j.calls.end(); ++i)
		std::printf(" %08x", (U4)((*i + memory.image_base) & memory.rip_mask));
	std::printf("\n");
#endif

}

}	// namespace x86s

// --- main program ---

#include <list>

using namespace x86s;
using namespace std;

static U1 *read_file(const char *filename, U4 *size)
{
	U1 *data;
	FILE *f = fopen(filename, "rb");
	if(f == NULL)
		return NULL;
	if(feof(f))
	{
		fclose(f);
		return NULL;	// no data
	}
	fseek(f, -1, SEEK_END);
	*size = 1 + ftell(f);
	data = new U1 [*size];
	rewind(f);
	if(fread(data, *size, 1, f) != 1)
	{
		delete [] data;
		fclose(f);
		return NULL;	// unable to read
	}
	fclose(f);
	return data;
}

// This shows how to do a depth-first search on all decompilable-functions.
#if 0
std::set<U8> test_loc;
void test_dfs(parser_t &parser, U8 target, memory_t &memory)
{
	if(test_loc.find(target) != test_loc.end())
		return;		// been here already
	test_loc.insert(target);
	// call all children.
	for(std::set<U8>::iterator i = parser.xgraph[target].calls.begin(); i != parser.xgraph[target].calls.end(); ++i)
		test_dfs(parser, *i, memory);
	std::printf("%08x", (U4)((target + memory.image_base) & memory.rip_mask));
	if(!parser.xgraph[target].decompile)
		std::printf("*");
	std::printf(":");
	for(std::set<U8>::iterator i = parser.xgraph[target].calls.begin(); i != parser.xgraph[target].calls.end(); ++i)
		std::printf(" %08x", (U4)((*i + memory.image_base) & memory.rip_mask));
	std::printf("\n");
}
#endif

void help()
{
	std::cout << "Copyright (C) 2008 Willow Schlanger" << std::endl;
	std::cout << "Decompiler for x86 and x86-64 binary image files" << std::endl;
	std::cout << std::endl;
	std::cout << "usage: crudcom2 filename bits origin [entrypoint]" << std::endl;
	std::cout << "bits must be one of: 16, 32, 64 (64 bit mode needs some work!)" << std::endl;
	std::cout << "the entrypoint is an offset in the image, and defaults to 0." << std::endl;
}

int main(int argc, char **argv)
{
	std::string filename;
	int dsz = argsize_16;
	
	if(argc < 4)
	{
		help();
		return 1;
	}
	filename = argv[1];
	std::string bits = argv[2];
	if(bits == "16")
		dsz = argsize_16;
	else
	if(bits == "32")
		dsz = argsize_32;
	else
	if(bits == "64")
		dsz = argsize_64;
	else
	{
		help();
		std::cout << std::endl << "error: bad number of bits -- must be exactly one of these: 16, 32, 64" << std::endl;
		return 1;
	}
	
	char *endp;
	U4 origin = strtol(argv[3], &endp, 0);
	if(*endp != '\0')
	{
		help();
		std::cout << std::endl << "error: bad origin given - use 0x for hex numbers" << std::endl;
		return 1;
	}
	
	U4 myentry = 0;
	
	if(argc >= 5)
	{
		myentry = strtol(argv[4], &endp, 0);
		if(*endp != '\0')
		{
			help();
			std::cout << std::endl << "error: bad entrypoint given - use 0x for hex numbers" << std::endl;
			return 1;
		}
	}
	
	bool disassemble = true;		// generate disassembly output?
	bool skip_decompiled = false;	// set to true to skip decompiled functions in "beta" output (do this when out.cpp is being produced)

	U4 size_tag;
	U1 *image = read_file(filename.c_str(), &size_tag);
	if(image == NULL)
	{
		std::cout << filename << ": can\'t open and read, or file has a size of 0 bytes\n" << std::endl;
		return 1;
	}
	U8 size = size_tag;
	
	std::list<U8> entries;
	entries.push_back(myentry);	
	//U8 entry = myentry;
	
	memory_t memory;
	if(dsz == argsize_16)
		memory.rip_mask = 0xffff;
	else
	if(dsz == argsize_32)
		memory.rip_mask = 0xffffffff;
	else
		memory.rip_mask = (U8)(-1ll);
	memory.image = image;
	memory.image_size = size;
	memory.image_base = origin;
	memory.cs_base = 0;
	memory.meta = new metadata_t [size];
	try
	{
		parser_t parser(memory, dsz);
		
		std::cout << "Parsing..." << std::endl;
		
		bool again;
		do
		{
			parser.reset();
			for(std::list<U8>::iterator i = entries.begin(); i != entries.end(); ++i)
				parser.add_entrypoint(*i);
			again = parser.parse();
		}	while(again);
		
		// decompile here. clear 'decompile' flag for a function if we were unable
		// to decompile it.
		// crudcom2: just do data flow analysis on a depth-first-search basis.
		// convert icode to tcode. make the semantics generator generate a maximum
		// size of all insn tcode's, as a #DEFINE. use a static buffer with that size.
		// copy tcode to this buffer. specialize all sizes--incl. argsize[0|1|2] sizes.
		// now convert to hcode, with chaining and special handling for temporaries,
		// arguments, and considering overlaps and memory accesses (so-called deref
		// nodes).
		// Store all the hcode's.
		// Bail out if we ever encounter an instruction with no semantics associated
		// with it (no operations are represented as asgn(void)), except for insn's
		// we have special handling for.
		// Where/when to do idiom analysis? Also remember to check the peep-er and to
		// make switch's work. We don't want e.g. mov edi,edi to think edi is an input,
		// etc.
		// All we want is global data flow analysis, at this point--crudcom3.
		// The idea is, generate hcode for the procedure then pass that to the universal
		// decompiling machine--and have that machine perform dataflow analysis. Is this
		// a good idea? I think it is--and implement a reaching algorithm...
		// However for crudcom2, maybe all we need is hcode with use/def chains but no
		// real expression conversion. I'm not sure what to do just yet...
		// Remember, if you ever encounter an entrypoint (besides the current procedure's
		// entrypoint) whose metadata has 'entry' set, just 'invoke' that function and
		// continue processing locations for the current procedure.
		
		//printf("Call graph:\n");
		//test_loc.clear();
		//test_dfs(parser, entry, memory);	// call for each entrypoint
#if 1
		dataflow_t dataflow(parser.xgraph, memory, dsz);		// fixme--pass in parser.indirect too.
		std::cout << "Reparsing..." << std::endl;
		for(std::list<U8>::iterator i = entries.begin(); i != entries.end(); ++i)
			dataflow.go(*i);

		// This generates a dfs-last list of procedures, makes possible process_1() etc. calls.
		std::cout << "Processing..." << std::endl;
		dataflow.process();

		// This goes thru all basic blocks, does a little optimization and sets use, def things and
		// removes code that's dead for a given basic block.
		std::cout << "Processing (1)..." << std::endl;
		dataflow.process_1();
		
		// Now do REAL dataflow analysis. Find out what registers are input by a procedure and what
		// registers or flags are potentially output. Bail out if any condition codes are ever
		// externally input. Note--for CALLs, this looks up the external inputs / outputs of the
		// invoked procedure. For ex., if f1() calls f2() and f2() uses esi, it could be that
		// esi was input into f2 and then fed into f2. f1 gets an esi register argument.
		//
		// Finally we need to deal with the stack. Don't assume PUSH and POP really modify/use
		// their operand. Learn the arguments the procedure uses--via e.g. [ebp+4] existance,
		// or use of e.g. ret 4. IS THIS A SEPARATE ISSUE? WHY DO WE WANT TO DO THIS HERE??
		//
		// Also after a call, notify the callee about what register/flag outputs actually got used.
		// This makes it possible to do some "pruning" at the end.
		std::cout << "Processing (2)..." << std::endl;
		dataflow.process_2();

#if 1	// set to 1 for output
		//for(std::list<U8>::iterator i = entries.begin(); i != entries.end(); ++i)
		for(procedure_map_t::iterator i = dataflow.procs.begin(); i != dataflow.procs.end(); ++i)
		{
			if(!parser.xgraph[i->first].decompile)
				std::cout << "// fn_" << std::hex << i->first << std::dec << " could not be decompiled." << std::endl;
			else
			{
				std::cout << std::endl;
				/*std::cout << "live_in:" << std::endl;
				i->second.live_in.debug();
				std::cout << "live_out:" << std::endl;
				i->second.live_out.debug();*/
				if(i->second.live_in.flags != 0)
					std::cout << "// flags in: 0x" << std::hex << (U4)(i->second.live_in.flags) << std::dec << std::endl;
				if(i->second.live_out.flags != 0)
					std::cout << "// flags out: 0x" << std::hex << (U4)(i->second.live_out.flags) << std::dec << std::endl;
				std::cout << "void fn_" << std::hex << (U4)(i->first + memory.image_base) << std::dec << "(" << std::endl;
				std::cout << "  [in]";
				for(int ii = 0; ii < 8; ++ii)
				{
					if(i->second.live_in.reg[ii] != 0)
					{
						if(ii != 0)
							std::cout << ",";
						std::cout << " " << i->second.live_in.debug_get_reg(ii, i->second.live_in.reg[ii]);
					}
				}
				std::cout << std::endl;
				std::cout << "  [out]";
				for(int ii = 0; ii < 8; ++ii)
				{
					if(i->second.live_out.reg[ii] != 0)
					{
						if(ii != 0)
							std::cout << ",";
						std::cout << " " << i->second.live_out.debug_get_reg(ii, i->second.live_out.reg[ii]);
					}
				}
				std::cout << std::endl;
				std::cout << ")" << std::endl;
				std::cout << "{" << std::endl;
				test_list.clear();
				test_dfs(i->second.root);
				do_test(memory.image_base);
				std::cout << "}" << std::endl;
			}
		}
		
		std::cout << "Still here..." << std::endl;
#endif

		// generate out.cpp here... and set skip_decompiled to true.
		;;;	// unimplemented
		
#if 0
		// generate "out.beta" here.
		// Note: the beta file is for all code that was "impure" in some way.
		std::cout << "Writing out.beta..." << std::endl;
		ofstream fbeta("out.beta");
		beta_writer_t beta(parser, memory, fbeta);
		beta.go(skip_decompiled, true /* hll style */);
#endif
#endif
skip_decompiled = true;		
		if(disassemble)
		{
			// To Be Done:
			// Do a depth-first search on all decompilable functions and output them here in assembly,
			// but with comments including data flow analysis results--what registers and flags are input
			// and output by the function? Is there a ret N for N > 0, or do we access any fixed (non-variable)
			// arguments on the stack in any paths? We want to learn the number of dsz-sized arguments and include
			// it in the 'decompiled' assembly listing.
			
			ofstream fasm("out.asm");
			std::cout << "Writing out.asm..." << std::endl;
			//disassembler_t disasm(parser, memory, fasm);
			//disasm.go(false);
			beta_writer_t disasm(parser, memory, fasm);
			disasm.go(skip_decompiled, false /* hll style */);
		}
		std::cout << "Done" << std::endl;
	}
	catch(std::runtime_error err)
	{
		std::cout << "Error: " << err.what() << std::endl;
	}
	delete [] memory.meta;
	delete [] image;
	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -