⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crudcom2.cpp

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 CPP
📖 第 1 页 / 共 3 页
字号:
// crudcom2.cpp
// Copyright (C) 2008 Willow Schlanger

// somehow discover 'long' types?
//
// - need to fix 'nop' -> xchg <reg>,<same reg> --> asgn(void).
// - how are we going to handle the stack (push / pop)'s?
// - tmp(X) -> tmpX for output.
//
// now if we have a procedure that
// does this: pop ecx / jmp ecx
// then the procedure's "decompile" flag is cleared to false.
// a helper script file may indicate which procedures to totally
// ignore. how to implement that? also need to implement indirect
// call/jump's, possibly with a helper script.
// on call, during data flow analysis check if the target is
// decompile == true--if not, make OUR decompile==false and
// just go on to the next procedure.
//
// Note--we must be prepared to encounter a JMP/Jcond to the
// current procedure.
//
// The live_output's for a procedure are interesting and important
// to compute the way we do, but we also need vote's for a procedure's
// outputs. Assume external inputs return void. Currently, push;s are
// assumed to really read something and pop's assumed to really write
// it. There is no stack simulation, etc. The idea is, many functions
// will have a lot of outputs--which is fine, but we don't want to print
// code for outputs that are never "voted" for e.g. used in an immediately
// following code block.

#define DEBUG_LINE_NUMBERS 0

#include "beta.h"
#include "parser.h"
#include "semantics.h"
#include "disasm.h"

// --- dataflow ---

#include <cstdio>
#include <set>
#include <cstddef>
#include <map>
#include <list>
#include <vector>

namespace x86s
{

// ht = hcode type
enum
{
	ht_expr,	// asgn(dest, src) or asgn(void)
				// Note--for now, data flow analysis will assume
				// *** What about SETcc,CMOVcc? How to handle cc argument?
				// *** these use _x86_cc().
	ht_jump,	// target(s) specified by basic block
				// if jmp indirect, there are >= 1 targets (assume it's part of same procedure if indirect jmp.)
				// if it's called elsewhere AND we do an indirect jump to it, we have trouble.
				// else there is exactly one target.
				// if it's a different procedure, there are ARGUMENTS.
				// *** add arguments. 1st arg is indirect access, or NULL if direct. 2nd arg is actual arguments.
	ht_call,	// target[0] is next insn, target[1] is call target. left empty for handled indirect calls.
				// if you call the next insn, there's only one target.
				// decompile to: call f() then invoke f().
				// *** add arguments. 1st arg is indirect access, or NULL if direct. 2nd arg is actual arguments.
	ht_jcond,	// target[0] is next insn, target[1] is branch target
				// only one target if jcond is to self.
				// *** first argument is condition code value, 0..15.
				// *** second argument NULL, or is argsize_16/argsize_32/argsize_64 for cx/ecx/rcx
				//     +0x100 if we branch if cx/ecx/rcx==0.
				//     +0x200 if we branch if cx/ecx/rcx!=0
				//     +0x300 if we decrement cx/ecx/rcx by 1, then branch if it's nonzero.
				// *** 3nd argument is hll expression. (need to add this one).
				// *** any of these 3 args may be NULL.
				// don't support jcond's to other procedures. make reparse fail if that is done.
	ht_ret,		// the end. *** has an expression -- return code -- ?
	//ht_push,	// these are supposedly..     ) use asgn(stack(0), x)
	//ht_pop,		// ..going to be removed  ) or  asgn(x, stack(0))
	ht__end
};

const char *ht_strings[] =
{
	"expr",
	"jump",
	"call",
	"jcond",
	"ret"//,
	//"push",
	//"pop"
};

enum
{
	et_literal32,	// subtype is 0
	et_nodetype,	// subtype is nt_*, except nt_literal; u.data[] are arguments (NULL for none)
	et_nodeoperand,	// subtype is no_*; 'u' not used.
	et_asgn,		// assignment expression
	et_reg,			// subtype is 0 for general register, u.data[0] is argsize_*, u.data[1] is register index number
	et_reg_reg,		// subtype is 0. u.data[0] is argsize_*, u.data[1] is 1st reg index, u.data[2] is 2nd reg index.
	et_ea16,		// subtype is segreg + 8 * index_shift. u.data[0] is base + 32 * index (use 31 for none). data[1] is disp.
	et_ea32,		// subtype is segreg + 8 * index_shift. u.data[0] is base + 32 * index (use 31 for none). data[1] is disp.
	et_ea64,		// subtype is segreg + 8 * index_shift. u.data[0] is base + 32 * index (use 31 for none). data[1] is disp.
	// add symbol table references (identifier) support here...
};

struct expr_t
{
	U1 size;		// ns_*, ns_void for et_literal32 and nt_void
	U1 type;		// et_*
	U2 subtype;		// depends on 'type'.
	union
	{
		U4 data[3];	// data[0] is a 32-bit value for et_literal.
		expr_t *subexpr[3];
	}	u;
	
	void make_literal32(U4 value)
	{
		size = ns_void;
		type = et_literal32;
		subtype = 0;
		u.data[0] = value;
		u.data[1] = 0;
		u.data[2] = 0;
	}
	
	void make_void()
	{
		size = ns_void;
		type = et_nodetype;
		subtype = nt_void;
		u.data[0] = 0;
		u.data[1] = 0;
		u.data[2] = 0;
	}
	
	expr_t()
	{
		u.subexpr[0] = NULL;
		u.subexpr[1] = NULL;
		u.subexpr[2] = NULL;
	}
};

void kill_expr(expr_t *x)
{
	if(x == NULL)
		return;
	if(x->type == et_nodetype || x->type == et_asgn)
	{
		kill_expr(x->u.subexpr[0]);
		kill_expr(x->u.subexpr[1]);
		kill_expr(x->u.subexpr[2]);
	}
	delete x;
}

struct hcode_file_t
{
	U8 flags;						// bit =1 if use/def that flag
	U8 reg[16];						// bits =1 for a byte if use/def that byte
	
	void add_register(U4 argsize, U4 index)
	{
		if(index > 8)
			throw std::runtime_error("unsupported register index");
		if(argsize == argsize_8)
		{
			if(index < 4)
				reg[index] |= 0x000000ff;
			else
				reg[index - 4] |= 0x0000ff00;
			return;
		}
		if(argsize == argsize_16)
		{
			reg[index] |= 0x0000ffff;
			return;
		}
		if(argsize == argsize_32)
		{
			reg[index] |= 0xffffffff;
			return;
		}
		throw std::runtime_error("unsupported register size");
	}
	
	void clear()
	{
		flags = 0;
		for(int i = 0; i < 16; ++i)
			reg[i] = 0;
	}
	
	bool equals(hcode_file_t &src)
	{
		if(flags != src.flags)
			return false;
		for(int i = 0; i < 16; ++i)
			if(reg[i] != src.reg[i])
				return false;
		return true;
	}
	
	bool is_zero()
	{
		if(flags != 0)
			return false;
		for(int i = 0; i < 16; ++i)
			if(reg[i] != 0)
				return false;
		return true;
	}
	
	void set_bits(hcode_file_t &src)
	{
		flags |= src.flags;
		for(int i = 0; i < 16; ++i)
			reg[i] |= src.reg[i];
	}
	
	void set_bits_mask(hcode_file_t &a, hcode_file_t &b)
	{
		flags |= (a.flags & ~b.flags);
		for(int i = 0; i < 16; ++i)
			reg[i] |= (a.reg[i] & ~b.reg[i]);
	}
	
	void debug()
	{
		if((U4)flags != 0)
			std::cout << "  flags=" << std::hex << (U4)flags << std::dec << std::endl;
		for(int i = 0; i < 8; ++i)
		{
			if((U4)reg[i] != 0)
				std::cout << "  reg" << i << "=" << std::hex << (U4)reg[i] << std::dec << std::endl;
		}
	}
	
	const char *debug_get_reg(int reg, U8 mask)
	{
		const char *regs8[] = {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"};
		const char *regs16[] = {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"};
		const char *regs32[] = {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"};
		if(reg >= 8)
			return "???";
		if(mask == 0xff00)
			return (reg < 4) ? regs8[reg + 4] : "???";
		if(mask == 0xff)
			return (reg < 4) ? regs8[reg] : "???";
		if(mask == 0xffff)
			return regs16[reg];
		return regs32[reg];
	}
};

struct hcode_t
{
	int opcode;						// ht_*
	std::list<expr_t *> operands;	// generally we use front(), and when there's a 2nd arg, back().
									// also used for argument lists.
	hcode_file_t use, def;
	hcode_t()
	{
		use.clear();
		def.clear();
	}
	
	~hcode_t()
	{
		for(std::list<expr_t *>::iterator i = operands.begin(); i != operands.end(); ++i)
			kill_expr(*i);
	}
};

struct procedure_t;

struct bb_t
{
	procedure_t *proc;
	U8 address;
	std::vector<bb_t *> out_edges;
	std::set<bb_t *> in_edges;
	std::list<hcode_t> hcode;
	
	hcode_file_t live_in;
	hcode_file_t live_out;

	hcode_file_t live_use;
	hcode_file_t def;

	bb_t()
	{
		live_in.clear();
		live_out.clear();

		live_use.clear();
		def.clear();
	}
};

typedef std::map<U8, bb_t> bb_map_t;

struct procedure_t
{
	U8 address;		// this way we can use procedure_t * ptr's and still get the address if we need it
	bb_t *root;		// NULL if not scanned yet
	bb_map_t map;	// basic block container

	std::list<bb_t *> dfslast;
	hcode_file_t live_in;
	hcode_file_t live_out;
	hcode_file_t vote;	// anything used after a call to this procedure, in the subsequant basic block
						// gets a bit set here. AND vote bits by live_out to learn the relevant outputs.
	
	bool remove_cc;	// set to true if NO basic blocks have any flags live_use'd.
	
	procedure_t()
	{
		live_in.clear();
		live_out.clear();
		vote.clear();
		remove_cc = true;
	}
};

typedef std::map<U8, procedure_t> procedure_map_t;

#include "dataflow.h"

class reparser_t
{
	memory_t &memory;
	int dsz;
	procedure_t &proc;
	procedure_map_t &procs;
	call_graph_t &xgraph;
	public:
	reparser_t(memory_t &mem, int dsz_t, procedure_t &proc_t, procedure_map_t &procs_t, call_graph_t &xgraph_t) :
		memory(mem),
		dsz(dsz_t),
		proc(proc_t),
		procs(procs_t),
		xgraph(xgraph_t)
	{
		for(U8 i = 0; i < memory.image_size; ++i)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -