⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 beta.h

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 H
字号:
// beta.h
// Copyright (C) 2008 Willow Schlanger

#ifndef l_crudcom__beta_h__included
#define l_crudcom__beta_h__included

#include <list>
#include <map>
#include <stdexcept>
#include <cstddef>
#include <fstream>
#include <iostream>

#include "../x86s/types.h"
#include "../x86s/x86s_common.h"
#include "../x86s/x86s_decode.h"
#include "../crudasm/asmwriter.h"

#include <set>
#include <cstdio>

#include "parser.h"
#include "semantics.h"

namespace x86s {

struct beta_line_t
{
	U8 address;
	int label;		// 0 for none, else one of: 'c' for call here, 'b' if branch here but no call here
	bool is_asm;	// 'line' is assembly.
	std::set<int> flags_in, flags_out;	// no__begin_x86_flags..no__end_x86_flags
	std::string line;
	std::string comment;
};

class beta_writer_t
{
	std::ostream &of;
	parser_t &parser;
	memory_t &memory;

	public:
	beta_writer_t(parser_t &parser_tag, memory_t &memory_tag, std::ostream &of_t) :
		of(of_t),
		parser(parser_tag),
		memory(memory_tag)
	{
	}

	~beta_writer_t()
	{
		of << std::endl;
	}

	void go(bool skip_decompiled, bool hll_style)
	{
		bool pure = true;	// set to false if an unhandled instruction is encountered in the basic block
		std::list<beta_line_t> basic_block;
		
		icode_t icode;
		decode_state_t s;
		s.icode = &icode;
		s.dsz = parser.dsz;
		int x;
		char line[1024];
		NasmWriter nasmdis;
		U8 offset, target;
		
		//tnode tscratch[X86S_SEMANTICS_MAX_SIZE];
		//U4 tscratch_size;
		specialized_tcode_t sp;
		
		of << std::hex << "org 0x" << memory.image_base << std::dec << std::endl;
		
		for(U8 i = 0; i < memory.image_size; )
		{
			if(memory.meta[i].size == 0)
			{
				//
				int linecount = -1;
				bool yet = false;
				do
				{
					if(linecount == -1 || linecount == 15)
					{
						linecount = 0;
						of << std::endl;
						of << "loc_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << ": db";
						yet = false;
					}
					else
						++linecount;
					if(yet)
						of << ",";
					of << " 0x" << std::hex << (U2)memory.image[i] << std::dec;
					yet = true;
				
					++i;
				}	while(i != memory.image_size && memory.meta[i].size == 0);
				of << std::endl;
				if(i != memory.image_size)
				{
					if(memory.meta[i].entry == 0)
					{
						of << std::endl;
					}
				}
				continue;
			}
			
			// memory.meta[i].size != 0. check for overlapping instructions.
			// if not overlapping, see if it's been decompiled and skip_decompiled == true.
			// else, decode it and check for decoder error.
			
			//---
			bool overlaps = false;
			for(U8 j = 1; j < memory.meta[i].size; ++j)
			{
				if(memory.meta[i + j].entry || memory.meta[i + j].branch)
				{
					overlaps = true;
					break;
				}
			}
			bool dump = overlaps;
			if(!overlaps)
			{
				if(skip_decompiled && memory.meta[i].decompiled == 1)
				{
					if(memory.meta[i].entry == 1)
					{
						of << std::endl;
						of << (hll_style ? "//" : ";");
						of << " fn_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << std::dec << " - is decompiled!" << std::endl;
						if(memory.meta[i].branch == 1)
						{
							of << (hll_style ? "//" : ";");
							of << " There are non-call branches here too." << std::endl;
						}
						of << std::endl;
					}
					i += memory.meta[i].size;
					continue;
				}
				s.insn = memory.image + i;
				s.end = s.insn + 15;
				x = decode(s);
				if(x != 0)
					dump = true;
			}
			//---
			
			if(dump)
			{
				of << std::endl;
				for(;;)
				{
					of << memory.meta[i].entry ? "fn_" : "loc_";
					of << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << ": db 0x" << (U2)memory.image[i] << std::dec << std::endl;
					++i;
					if(i == memory.image_size)
						return;
					if(memory.meta[i].size != 0)
						break;
				}
				continue;
			}
			
			// decode success.
			basic_block.clear();
			bool done = false;
			bool star = false;
			pure = true;
			sp.clear();
			do
			{
				// The instruction at 'i' has been decoded and is now in s.icode.
				// It is not an overlapping instruction.
				basic_block.push_back(beta_line_t());
				basic_block.back().address = i;
				basic_block.back().label = 0;
				if(memory.meta[i].entry == 1)
					basic_block.back().label = 'c';
				else
				if(memory.meta[i].branch == 1)
					basic_block.back().label = 'b';
				
				line[0] = '\0';
				const char *imm = NULL;
				const char *disp = NULL;
				char buf[1024];
				buf[0] = '\0';
				
				target = i;
				basic_block.back().is_asm = true;
				switch(encodings[s.encoding].insn)
				{
					case insn_int:
					case insn__int3:
						done = true;
						break;
					case insn__ret:
					case insn__retf:
					case insn__retfnum:
					case insn__retnum:
					case insn__iret:
						done = true;
						break;
					case insn_jmp:
					case insn__jrcxz:
					case insn__loopnz:
					case insn__loopz:
					case insn__loop:
					case insn_call:
						done = true;
					case insn__jcc:
						if(encodings[s.encoding].insn == insn__jcc)
						{
							U1 cc = s.icode->argvalue[1];
							if(cc == 0 || cc == 1)
								basic_block.back().flags_in.insert(no_x86_of);
							else
							if(cc == 2 || cc == 3)
								basic_block.back().flags_in.insert(no_x86_cf);
							else
							if(cc == 4 || cc == 5)
								basic_block.back().flags_in.insert(no_x86_zf);
							else
							if(cc == 6 || cc == 7)
							{
								basic_block.back().flags_in.insert(no_x86_cf);
								basic_block.back().flags_in.insert(no_x86_zf);
							}
							else
							if(cc == 8 || cc == 9)
								basic_block.back().flags_in.insert(no_x86_sf);
							else
							if(cc == 0xa || cc == 0xb)
								basic_block.back().flags_in.insert(no_x86_pf);
							else
							if(cc == 0xc || cc == 0xd)
							{
								basic_block.back().flags_in.insert(no_x86_sf);
								basic_block.back().flags_in.insert(no_x86_of);
							}
							else
							if(cc == 0xe || cc == 0xf)
							{
								basic_block.back().flags_in.insert(no_x86_zf);
								basic_block.back().flags_in.insert(no_x86_sf);
								basic_block.back().flags_in.insert(no_x86_of);
							}
							else
								done = true;
						}
						// fixme: add 64-bit support here. Also do something about memory.cs_base.
						offset = (U8)s.icode->imm + (U8)target + (U8)s.size + (U8)memory.image_base;
						offset &= memory.rip_mask;
						if(offset < (U8)memory.image_base)
							throw std::runtime_error("jmp above image base");
						target = offset - (U8)memory.image_base;
						if(memory.meta[target].entry)
						{
							std::sprintf(buf, "fn_%x", offset);
							imm = buf;
						}
						else
						if(memory.meta[target].branch)
						{
							std::sprintf(buf, "loc_%x", offset);
							imm = buf;
						}
						break;
					case insn__calli:
					case insn__callfd:
					case insn__callfi:
					case insn__jmpfd:
					case insn__jmpfi:
					case insn__jmpi:
						star = true;
						done = true;
						break;
					default:
//pure = false;
#if 1
						if(s.icode->lockrep == 1)
							pure = false;
						else
						if(hll_style)
						{
							tcode_element &te = x86_tcode_table[encodings[s.encoding].insn];
							if(te.index != 0)
							{
								//basic_block.back().is_asm = false if we recognize the instruction.
								// write to basic_block.back(). if need many lines, add them--all with is_asm==false.
								//basic_block.back().comment = "high level code";
								//tnode tscratch[X86S_SEMANTICS_MAX_SIZE];
								//U4 tscratch_size;
								//tscratch_size = 0;
								sp.accept(s.encoding, *s.icode, s.dsz);
								
								// Go thru all asgn sp.tnodes and push the resulting string onto basic_block,
								// additionally setting flags_in and flags_out.
								// std::set<int>.
								
								std::string output;
								//std::cout << "(" << std::hex << te.index[2] << std::dec << ")" << std::endl;
								for(U4 index = 0; te.index[2 + index] != 0xffffffff; ++index)
								{
									U4 *ptr = te.index + 2 + index;
									if(index != 0)
									{
										basic_block.push_back(beta_line_t());
										basic_block.back().is_asm = false;
										basic_block.back().address = i;
										basic_block.back().label = 0;
									}
									
									sp.get_text(te.index[2 + index] - te.index[1],
										output,
										&basic_block.back().flags_in,
										&basic_block.back().flags_out,
										*s.icode
									);
									
									basic_block.back().is_asm = false;
									basic_block.back().line = output;
								}
								
								/*basic_block.back().is_asm = false;
								basic_block.back().line = "// line 1";

								basic_block.push_back(beta_line_t());
								basic_block.back().is_asm = false;
								basic_block.back().address = i;
								basic_block.back().label = 0;

								basic_block.back().line = "// line 2";*/
							}
							else
							{
								pure = false;
							}
						}
#endif
				}
				
				if(basic_block.back().is_asm)
				{
					if(star)
						basic_block.back().comment = "(*) not followed";
					nasmdis.disasm(line, &s, imm, disp, i + (U8)s.size + memory.image_base);
					basic_block.back().line = line;
				}
				
				// Go to the next instruction.
				i += memory.meta[i].size;
				if(i == memory.image_size)
					break;	// was return -- bug fix
				if(memory.meta[i].size == 0)
					break;
				//if(was_branch)
				//	break;				
				//---
				overlaps = false;
				for(U8 j = 1; j < memory.meta[i].size; ++j)
				{
					if(memory.meta[i + j].entry || memory.meta[i + j].branch)
					{
						overlaps = true;
						break;
					}
				}
				dump = overlaps;
				if(!overlaps)
				{
					if(skip_decompiled && memory.meta[i].decompiled == 1)
					{
						//i += memory.meta[i].size;
						break;
					}
					s.insn = memory.image + i;
					s.end = s.insn + 15;
					x = decode(s);
					if(x != 0)
						dump = true;
				}
				//---
				if(memory.meta[i].entry == 1 || memory.meta[i].branch == 1)
					done = true;
			}	while(!done && !dump);
			
			if(pure)
			{
				// elliminate dead registers.
				// e.g. if you have
				// x86_of = _x86_sub_of(d[x86_bx + x86_si + 0x55aa], 0x1);
				// x86_af = _x86_sub_af(trunc<byte>(d[x86_bx + x86_si + 0x55aa]), 0x1);
				// x86_of = _x86_add_of(x86_bx, 0x1);
				// x86_af = _x86_add_af(trunc<byte>(x86_bx), 0x1);
				// then you can remove the first two of, af outputs because there are subsequent outputs without
				// subsequent inputs.
				for(std::list<beta_line_t>::iterator j = basic_block.begin(); j != basic_block.end();)
				{
					int outflag = -1;
					if(j->flags_out.size() == 1)
						outflag = *j->flags_out.begin();
					std::list<beta_line_t>::iterator tmp = j;
					++j;
					if(outflag != -1)
					{
						for(std::list<beta_line_t>::iterator k = j; k != basic_block.end(); ++k)
						{
							if(k->flags_in.find(outflag) != k->flags_in.end())
								break;	// someone inputs it.
							if(k->flags_out.find(outflag) != k->flags_out.end())
							{
								tmp->flags_out.clear();
								tmp->flags_out.insert(-1);
							}
						}
					}
				}
			}
			
			// Print output here.
			if(!basic_block.empty())
			{
				if(basic_block.begin()->label == 'c')
					of << std::endl;
			}
			if(hll_style)
			{
				of << "{" << std::endl;
			}
			for(std::list<beta_line_t>::iterator j = basic_block.begin(); j != basic_block.end(); ++j)
			{
				bool nl = false;
				if(j->label == 'c')
				{
					//of << std::endl;
					// look up j->address and print list of callees here.
					
					if(!parser.xgraph[j->address].calls.empty())
					{
						of << (hll_style ? "//" : ";");
						of << " Calls:";
						for(std::set<U8>::iterator jj = parser.xgraph[j->address].calls.begin(); jj != parser.xgraph[j->address].calls.end(); ++jj)
							of << " " << std::hex << "fn_" << (U4)((*jj + memory.image_base) & memory.rip_mask) << std::dec;
						of << std::endl;
					}
					
					if(memory.meta[j->address].branch == 1)
					{
						of << (hll_style ? "//" : ";");
						of << " There are non-call branches here too." << std::endl;
					}

					of << "fn_";
					nl = true;
				}
				else
				if(j->label != 0)
				{
					of << "loc_";
					nl = true;
				}
				if(j->label != 0)
				{
					of << std::hex << (U4)((j->address + memory.image_base) & memory.rip_mask) << std::dec;
					of << ":" << std::endl;
					nl = true;
				}
				if(j->is_asm)
				{
					if(hll_style)
						of << "  asm " << j->line << ";";
					else
						of << "  " << j->line;
					nl = true;
				}
				else
				{
					bool skip = false;
					if(j->flags_out.size() == 1)
						if(*j->flags_out.begin() == -1)
							skip = true;
					if(!skip)
					{
						of << "  " << j->line;
						nl = true;
					}
				}
				if(!j->comment.empty())
				{
					nl = true;
					if(hll_style)
						of << "  // " << j->comment;
					else
						of << "  ; " << j->comment;
				}
				if(nl)
					of << std::endl;
			}
			basic_block.clear();
			if(hll_style)
				of << "}" << std::endl;
		}
	}
};

}	// namespace x86s

#endif	// l_crudcom__beta_h__included

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -