⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crudcom1.cpp

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// parser.cpp
// Copyright (C) 2008 Willow Schlanger

#include <list>
#include <map>
#include <stdexcept>
#include <cstddef>
#include <fstream>
#include <iostream>

#include "../x86s/types.h"
#include "../x86s/x86s_common.h"
#include "../x86s/x86s_decode.h"
#include "../crudasm/asmwriter.h"

#include <set>
#include <cstdio>

namespace x86s {

struct metadata_t
{
	U1 size : 4;		// been here? 1 for invalid opcodes, bytes_left for insn's that overhang image size.
	U1 local : 1;		// part of current procedure?
	U1 entry : 1;		// is this the start of a procedure?
	U1 branch : 1;
	U1 available : 1;
	
	void clear(bool clear_entry)
	{
		size = 0;
		local = 0;
		if(clear_entry)
			entry = 0;
		available = 0;
		branch = 0;
	}
};

struct memory_t
{
	U1 *image;
	metadata_t *meta;
	U8 image_size;
	U8 image_base;	// origin -- image[0] is really here
	U8 cs_base;	// offset in image that cs:0 corresponds to [?]
	U8 rip_mask;
};

struct parser_call_t
{
	bool decompile;
	std::set<U8> calls;
	parser_call_t()
	{
		decompile = true;
	}
};

class parser_t
{
	memory_t &memory;
	public:
	int dsz;
	
	typedef std::map<U8, parser_call_t> call_graph_t;
	call_graph_t xgraph;

	parser_t(memory_t &mem, int dsz_t);
	
	// Call this between reparsing.
	void reset();
	
	// After calling reset, call this at least once.
	void add_entrypoint(U8 target);
	
	// Then call this to do the parsing.
	// Returns true if reparsing is necessary.
	bool parse();

	private:
	bool reparse;
	void do_parse(U8 target);
	std::set<U8> wanted_procs;
};

parser_t::parser_t(memory_t &mem, int dsz_t) :
	memory(mem),
	dsz(dsz_t)
{
	for(U8 i = 0; i < memory.image_size; ++i)
		memory.meta[i].clear(true);
}

void parser_t::reset()
{
	wanted_procs.clear();
	for(U8 i = 0; i < memory.image_size; ++i)
		memory.meta[i].clear(false);
	xgraph.clear();
}
	
void parser_t::add_entrypoint(U8 target)
{
	wanted_procs.insert(target);
	memory.meta[target].entry = 1;
}
	
bool parser_t::parse()
{
	U8 target;
	reparse = false;
	while(!wanted_procs.empty())
	{
		target = *wanted_procs.begin();
		wanted_procs.erase(wanted_procs.begin());
		for(U8 i = 0; i < memory.image_size; ++i)
			memory.meta[i].local = 0;
		do_parse(target);
	}
	return reparse;
}

// parse the procedure at 'target'.
#define ICODE_PEEP_SIZE 4	/* must be > 1, and must be a power of 2 */
void parser_t::do_parse(U8 target)
{
	U8 myproc = target;

	if(memory.meta[target].size != 0)
	{
		//std::printf("; Not re-parsing procedure at %08x\n", (U4)target);
		//printf("%08x - calls %08x\n", (U4)myproc, (U4)tmp);
		// already been here.
		return;
	}
	//std::printf("; Now parsing procedure at %08x\n", (U4)target);
	if(memory.meta[target].entry == 0)
		throw std::runtime_error("attempt to parse a non-procedure");
	
	std::set<U8> locals;
	locals.insert(target);

	parser_call_t &graph = xgraph[target];
	if(!graph.calls.empty())
	{
		throw std::runtime_error("internal error - reentered a procedure");
	}

	int x;
	decode_state_t s;
	s.dsz = dsz;

	U8 bytesleft, offset, tmp;
	
	icode_t icode_table[ICODE_PEEP_SIZE];
	UINT icode_start, icode_size;
	
	bool is_start = true;

	// now parse this procedure.
	while(!locals.empty())
	{
		target = *locals.begin();
		locals.erase(locals.begin());
		
		//printf("Now parsing locals starting at %08x\n", (U4)target);
		
		icode_start = ICODE_PEEP_SIZE - 1;
		icode_size = 0;

again:

		//printf("%08x(%d)\n", (U4)target, memory.meta[target].entry);

		// see if we've been here locally.
		if(memory.meta[target].local == 1)
		{
			// already been here - part of current procedure.
			continue;
		}
		
		if(!is_start)
		{
			if(memory.meta[target].entry == 1)
			{
				//printf("%08x - calls %08x [+]\n", (U4)myproc, (U4)target);
				graph.calls.insert(target);
				wanted_procs.insert(target);
				continue;
			}
		}
		is_start = false;

		// make sure we have NOT been here. if we have, set reparse to true.
		//if(memory.meta[target].size != 0)
		//	throw std::runtime_error("internal error: unhandled reentrancy");
		
		// decode 'target'.
		s.insn = memory.image + target;
		icode_start = (icode_start + 1) & (ICODE_PEEP_SIZE - 1);
		s.icode = icode_table + icode_start;
		if(icode_size < ICODE_PEEP_SIZE)
			++icode_size;
		
		bytesleft = memory.image_size - target;
		if(bytesleft > 15)
			bytesleft = 15;
		s.end = s.insn + bytesleft;
		
		x = decode(s);
		
		memory.meta[target].local = 1;

		if(x == 1)
		{
			memory.meta[target].size = bytesleft;
			graph.decompile = false;
			continue;
		}
		else
		if(x != 0)
		{
			// invalid opcode.
			memory.meta[target].size = 1;
			graph.decompile = false;
			continue;
		}

		memory.meta[target].size = s.size;
		
		// look at instruciton here.
		switch(encodings[s.encoding].insn)
		{
			case insn__ret:
			case insn__retf:
			case insn__retfnum:
			case insn__retnum:
			case insn__iret:
				continue;
			case insn_jmp:
				icode_start = ICODE_PEEP_SIZE - 1;
				icode_size = 0;
				// fixme: add 64-bit support here. Also do something about memory.cs_base.
				offset = (U8)s.icode->imm + (U8)target + (U8)s.size + (U8)memory.image_base;
				offset &= memory.rip_mask;
				if(offset < (U8)memory.image_base)
					throw std::runtime_error("jmp above image base");
				target = offset - (U8)memory.image_base;
				memory.meta[target].branch = 1;
				s.insn = memory.image + target;
				//printf("jmp to %08x\n", offset);
				goto again;
			case insn_call:
				icode_start = ICODE_PEEP_SIZE - 1;
				icode_size = 0;
				// fixme: add 64-bit support here. Also do something about memory.cs_base.
				offset = (U8)s.icode->imm + (U8)target + (U8)s.size + (U8)memory.image_base;
				offset &= memory.rip_mask;
				if(offset < (U8)memory.image_base)
					throw std::runtime_error("call above image base");
				//target = offset - (U8)memory.image_base;
				//s.insn = memory.image + target;
				//goto again;
				// add to list of wanted procedures.
				tmp = offset - (U8)memory.image_base;
				//printf("%08x - calls %08x\n", (U4)myproc, (U4)tmp);
				graph.calls.insert(tmp);
//wanted_procs.insert(tmp);
				if(memory.meta[tmp].size == 0)
				{
					wanted_procs.insert(tmp);
				}
				else
				{
					if(memory.meta[tmp].entry == 0)
						reparse = true;
				}
				memory.meta[tmp].entry = 1;
				break;
			case insn__jcc:
			case insn__jrcxz:
			case insn__loopnz:
			case insn__loopz:
			case insn__loop:
				icode_start = ICODE_PEEP_SIZE - 1;
				icode_size = 0;
				// fixme: add 64-bit support here. Also do something about memory.cs_base.
				offset = (U8)s.icode->imm + (U8)target + (U8)s.size + (U8)memory.image_base;
				offset &= memory.rip_mask;
				if(offset < (U8)memory.image_base)
					throw std::runtime_error("branch above image base");
				tmp = offset - (U8)memory.image_base;
				if(tmp >= memory.image_size)
					throw std::runtime_error("branch beyond image");
				memory.meta[tmp].branch = 1;
				if(memory.meta[tmp].local == 0)
				{
					locals.insert(tmp);
				}
				break;
			case insn__calli:
				// high level languages can do indirect calls.
				break;
			case insn__callfd:
			case insn__callfi:
				graph.decompile = false;
				break;
			case insn__jmpfd:
			case insn__jmpfi:
				graph.decompile = false;
				continue;
			case insn__jmpi:
				// consult peep-hole buffer to see if we can handle it.
				// fixme--do this.
				// cmp [ebp-<#>],<#>
				// ja/jae target
				// mov reg,[ebp-<#>]
				// jmp [reg*osz+<#>]
				//reg/[ebp-<#>] : have osz size.
			
				graph.decompile = false;
				continue;
		}
		
		// next instruction.
		s.insn += s.size;
		target += s.size;

		// *** check for CS wraparound here !!! (important for 16-bit code...)

		if(s.insn == (memory.image_size + memory.image))
		{
			graph.decompile = false;
			continue;
		}
		
		goto again;
	}
}

}	// namespace x86s

// --- test code ---

using namespace x86s;
using namespace std;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -