📄 disasm.h

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 H
字号:
// parser.cpp
// Copyright (C) 2008 Willow Schlanger

#ifndef l_crudcom__disasm_h__included
#define l_crudcom__disasm_h__included

#include <list>
#include <map>
#include <stdexcept>
#include <cstddef>
#include <fstream>
#include <iostream>

#include "../x86s/types.h"
#include "../x86s/x86s_common.h"
#include "../x86s/x86s_decode.h"
#include "../crudasm/asmwriter.h"

#include <set>
#include <cstdio>

#include "parser.h"

namespace x86s {

// Once done parsing, this disassembles everything, from start to finish.
class disassembler_t
{
	private:
	std::ostream &of;
	parser_t &parser;
	memory_t &memory;
	//std::set<U8> targets;
	public:
	disassembler_t(parser_t &parser_tag, memory_t &memory_tag, std::ostream &of_t) :
		of(of_t),
		parser(parser_tag),
		memory(memory_tag)
	{
	}
	~disassembler_t()
	{
		of << std::endl;
	}
	void go(bool skip_decompiled)
	{
		icode_t icode;
		decode_state_t s;
		s.icode = &icode;
		s.dsz = parser.dsz;
		int x;
		char line[1024];
		NasmWriter nasmdis;
		U8 offset, target;
		
		of << std::hex << "org 0x" << memory.image_base << std::dec;
		
		for(U8 i = 0; i < memory.image_size; )
		{
again:
			if(memory.meta[i].entry == 1)
			{
				of << std::endl;
				of << "; Calls:";
				for(std::set<U8>::iterator j = parser.xgraph[i].calls.begin(); j != parser.xgraph[i].calls.end(); ++j)
					of << " " << std::hex << "fn_" << (U4)((*j + memory.image_base) & memory.rip_mask) << std::dec;
				of << std::endl;
				if(memory.meta[i].branch == 1)
					of << "; There are other branches here too." << std::endl;
				if(!parser.xgraph[i].decompile)
					of << "; This function can not be decompiled." << std::endl;
				of << "fn_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << std::dec << ":" << std::endl;
				if(parser.xgraph[i].decompile)
				{
					if(skip_decompiled)
					{
						of << "; This function was decompiled. Skipping to the next function entrypoint..." << std::endl;
						for(;;)
						{
							if(memory.meta[i].size == 0)
								++i;
							else
								i += memory.meta[i].size;
							if(i == memory.image_size)
								break;
							if(memory.meta[i].entry == 1)
								break;
						}
						continue;
					}
				}
			}
			else
			if(memory.meta[i].branch == 1)
			{
				of << "loc_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << std::dec << ":" << std::endl;
			}
			
			if(memory.meta[i].size != 0)
			{
				bool overlaps = false;
				for(U8 j = 1; j < memory.meta[i].size; ++j)
					if(memory.meta[i + j].entry || memory.meta[i + j].branch)
					{
						overlaps = true;
						break;
					}
				if(overlaps)
				{
					of << "loc_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << ": db 0x" << (U2)memory.image[i] << std::dec;
					of << " ; overlapping instruction detected.";
					of << std::endl;
					++i;
					goto again;
				}
			
				s.insn = memory.image + i;
				s.end = s.insn + 15;
				x = decode(s);
				if(x != 0)	// this will happen for invalid opcodes...
				{
					of << std::endl;
					for(;;)
					{
						of << "loc_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << ": db 0x" << (U2)memory.image[i] << std::dec << std::endl;
						++i;
						if(i == memory.image_size)
							return;
						if(memory.meta[i].size != 0)
							goto again;
					}
				}
				
				line[0] = '\0';
				const char *imm = NULL;
				const char *disp = NULL;
				char buf[1024];
				bool star = false;
				
				//---
				// look at instruciton here.
				target = i;
				switch(encodings[s.encoding].insn)
				{
					case insn__ret:
					case insn__retf:
					case insn__retfnum:
					case insn__retnum:
					case insn__iret:
						break;
					case insn_jmp:
					case insn__jcc:
					case insn__jrcxz:
					case insn__loopnz:
					case insn__loopz:
					case insn__loop:
					case insn_call:
						// fixme: add 64-bit support here. Also do something about memory.cs_base.
						offset = (U8)s.icode->imm + (U8)target + (U8)s.size + (U8)memory.image_base;
						offset &= memory.rip_mask;
						if(offset < (U8)memory.image_base)
							throw std::runtime_error("jmp above image base");
						target = offset - (U8)memory.image_base;
						if(memory.meta[target].entry)
						{
							std::sprintf(buf, "fn_%x", offset);
							imm = buf;
						}
						else
						if(memory.meta[target].branch)
						{
							std::sprintf(buf, "loc_%x", offset);
							imm = buf;
						}
						break;
					case insn__calli:
					case insn__callfd:
					case insn__callfi:
					case insn__jmpfd:
					case insn__jmpfi:
					case insn__jmpi:
						star = true;
						break;
				}
				//---
				
				nasmdis.disasm(line, &s, imm, disp, i + (U8)s.size + memory.image_base);
				
				of << "  " << line;
				if(star)
					of << "  ; (*) not followed";
				of << std::endl;

				i += memory.meta[i].size;
			}
			else
			{
				int linecount = -1;
				bool yet = false;
				do
				{
					if(linecount == -1 || linecount == 15)
					{
						linecount = 0;
						of << std::endl;
						of << "loc_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << ": db";
						yet = false;
					}
					else
						++linecount;
					if(yet)
						of << ",";
					of << " 0x" << std::hex << (U2)memory.image[i] << std::dec;
					yet = true;
				
					++i;
				}	while(i != memory.image_size && memory.meta[i].size == 0);
				of << std::endl;
				if(i != memory.image_size)
				{
					if(memory.meta[i].entry == 0)
					{
						of << std::endl;
					}
				}
			}
		}
	}
};

}	// namespace x86s

#endif	// l_crudcom__disasm_h__included
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -