⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dataflow.h

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 H
📖 第 1 页 / 共 2 页
字号:
// dataflow.h
// Copyright (C) 2008 Willow Schlanger

#define ALLOW_ERASE 1
#define OPTIMIZE 1

#if DEBUG_LINE_NUMBERS
#undef ALLOW_ERASE
#define ALLOW_ERASE 0
#undef OPTIMIZE
#define OPTIMIZE 0
#endif

class dataflow_t
{
	call_graph_t &xgraph;
	memory_t &memory;
	int dsz;
	std::set<U8> process_visited;

	public:
	procedure_map_t procs;
	dataflow_t(call_graph_t &xgraph_t, memory_t &mem_t, int dsz_t) :
		xgraph(xgraph_t),
		memory(mem_t),
		dsz(dsz_t)
	{
	}
	
	void dataflow(procedure_t &p, parser_call_t &j);
	
	void go(U8 target)
	{
		std::map<U8, procedure_t>::iterator k = procs.find(target);
		if(k != procs.end())
		{
			return;	// already been here
		}
		procedure_t &p = procs[target];	// this must be here!!!
		// initialize 'p' here.
		p.address = target;
		p.root = NULL;
		parser_call_t &j = xgraph.find(target)->second;

		// call all children.
		for(std::set<U8>::iterator i = j.calls.begin(); i != j.calls.end(); ++i)
			go(*i);

		dataflow(p, j);
	}
	
	// This goes thru all basic blocks in all procedures still marked 'decompile'.
	// For each basic block, it adds flag/register use/def information to each hcode node.
	// When done with a basic block, it removes dead register/flag assignments.
	//   mov ax,5
	//   mov al,3  -- new definition must be LARGER to be considered dead.
	//   e.g. mov al,3
	//        mov ax,7
	//   here, mov al,3 is removed.
	// we have a list of hcode pointers for things that are live on output for the basic
	// block. [[do we need such a list ? if so, generate it at post_process_1().]]
	//
	// later, we will find the live outputs for the procedure.
	// also, we will learn what is later input FROM all invokations of a procedure (except
	// external pocedures, which have a known API).
	// finally, we can determine what is REALLY output by a given basic block--not just
	// live, but gets used in some path.
	// if something is not REALLY output, and it is later used by the basic block exactly
	// once, then absorb it into the expression by marking it 'unique'.
	// ex.:
	//    asgn(x86_ax, w[bp+6]);
	//    asgn(w[bp+6], add(x86_ax, w[bp+4]));
	// here, ax might well be live on output but since it's not REALLY output, and since
	// it's later used exactly once, we combine the hcode's into
	//    asgn(w[bp+6], add(w[bp+6], w[bp+4]));
	// register arguments are handled specially.
	// anything that remains must be a register variable.
	//
	// later, we discover arguments--input registers or the stack--and makes a list of
	// output registers/flags. also looks for ret <bytes>.
	// also identify 'identifiers' when possible (see above).
	void process()
	{
		for(procedure_map_t::iterator i = procs.begin(); i != procs.end(); ++i)
		{
		
			if(!xgraph[i->first].decompile)
				continue;	// can't decompile this procedure
			process_visited.clear();
			do_process(i->second.dfslast, i->second.root);
		}
		process_visited.clear();
	}
	
	void process_1()
	{
		for(procedure_map_t::iterator i = procs.begin(); i != procs.end(); ++i)
		{
			if(!xgraph[i->first].decompile)
				continue;	// can't decompile this procedure
			//std::cout << std::hex;
			//std::cout << i->first << ":" << std::endl;
			for(std::list<bb_t *>::iterator j = i->second.dfslast.begin(); j != i->second.dfslast.end(); ++j)
			{
				//std::cout << "  " << (U4)((*j)->address) << std::endl;;
				
				process_1_bb(*j);
			}
			//std::cout << std::dec << std::endl;
		}
	}
	
	private:
	void use_flags(U4 cc, hcode_file_t &use)
	{
		switch(cc)
		{
			case 0:
			case 1:
				use.flags |= (1 << 11);
				break;
			case 2:
			case 3:
				use.flags |= (1 << 0);
				break;
			case 4:
			case 5:
				use.flags |= (1 << 6);
				break;
			case 6:
			case 7:
				use.flags |= (1 << 0) | (1 << 6);
				break;
			case 8:
			case 9:
				use.flags |= (1 << 7);
				break;
			case 10:
			case 11:
				use.flags |= (1 << 2);
				break;
			case 12:
			case 13:
				use.flags |= (1 << 11) | (1 << 7);
				break;
			case 14:
			case 15:
				use.flags |= (1 << 11) | (1 << 7) | (1 << 6);
				break;
			default:
				throw std::runtime_error("unhandled condition code");
		}
	}

	void process_1_asgn(expr_t *x, hcode_file_t &use, hcode_file_t &du)
	{
		if(x->type == et_literal32)
			return;
		if(x->type == et_nodetype)
		{
			if(x->subtype == nt__x86_cc)
			{
				use_flags(x->u.subexpr[0]->u.data[0], use);
			}

			bool same_reg = false;
			expr_t *a = x->u.subexpr[0];
			expr_t *b = x->u.subexpr[1];
		
			if(a != NULL && b != NULL)
			{
				if(a->type == et_reg && a->subtype == 0 &&
					b->type == et_reg && b->subtype == 0
				)
				{
					if(a->u.data[0] == b->u.data[0] &&
						a->u.data[1] == b->u.data[1]
					)
						same_reg = true;
				}
			}

			// sub(<reg>,<same reg>) -> 0
			// xor(<reg>,<same reg>) -> 0
			// and(<reg>,<same reg>) -> <reg>
			// or(<reg,<same reg>) -> <reg>
#if OPTIMIZE
			if(same_reg)
			{
				if(x->subtype == nt_sub || x->subtype == nt_bitxor)
				{
					delete a;
					delete b;
					x->u.subexpr[0] = NULL;
					x->u.subexpr[1] = NULL;
					x->make_literal32(0);
					return;
				}
				if(x->subtype == nt_bitand || x->subtype == nt_bitor)
				{
					du.add_register(a->u.data[0], a->u.data[1]);
					*x = *a;
					delete a;
					delete b;
					
					return;
				}
			}
#endif

			for(int i = 0; i < 3; ++i)
			{
				if(x->u.subexpr[i] != NULL)
					process_1_asgn(x->u.subexpr[i], use, du);
				else
					break;
			}
			return;
		}

		if(x->type == et_nodeoperand)
		{
			if(x->subtype == no_undefined)
				;
			else
			if(x->subtype > no__begin_x86_flags && x->subtype < no__end_x86_flags)
			{
				switch(x->subtype)
				{
					case no_x86_df:
						du.flags |= (1 << 10);
						break;
					case no_x86_af:
						du.flags |= (1 << 4);
						break;
					case no_x86_of:
						du.flags |= (1 << 11);
						break;
					case no_x86_pf:
						du.flags |= (1 << 2);
						break;
					case no_x86_sf:
						du.flags |= (1 << 7);
						break;
					case no_x86_zf:
						du.flags |= (1 << 6);
						break;
					case no_x86_cf:
						du.flags |= (1 << 0);
						break;
					defualt:
						throw std::runtime_error("unsupported flag");
						break;
				}
			}
			else
				throw std::runtime_error("unsupported node-operand type");
			return;
		}
		if(x->type == et_reg)
		{
			if(x->subtype != 0)
				throw std::runtime_error("unsupported register subtype (1)");
			du.add_register(x->u.data[0], x->u.data[1]);
			return;
		}
		if(x->type == et_reg_reg)
		{
			if(x->subtype != 0)
				throw std::runtime_error("unsupported register subtype (2)");
			du.add_register(x->u.data[0], x->u.data[1]);
			du.add_register(x->u.data[0], x->u.data[2]);
			return;
		}
		if(x->type == et_ea16 || x->type == et_ea32)
		{
			if((x->u.data[0] & 31) != 31)
				use.add_register((x->type == et_ea16) ? argsize_16 : argsize_32, (x->u.data[0]) & 31);
			if(((x->u.data[0] / 32) & 31) != 31)
				use.add_register((x->type == et_ea16) ? argsize_16 : argsize_32, (x->u.data[0] / 32) & 31);
			return;
		}
		throw std::runtime_error("unsupported expression type");
	}
	
	// This is called by process_1_bb() on a basic block when it's done with that block, for "post-process"
	// processing. It removes hcode node's that define something that is later defined over, in same
	// basic block.
	// *** update bb->def and bb->live_use.
	void post_process_1(bb_t *bb)
	{
#if DEBUG_LINE_NUMBERS
		int line = 0;
		std::cout << std::hex << (U4)bb->address << std::dec << ":" << std::endl;
#endif
		for(std::list<hcode_t>::iterator i = bb->hcode.begin(); i != bb->hcode.end();)
		{
#if DEBUG_LINE_NUMBERS
			++line;
			std::cout << "  " << line << std::endl;
#endif
			for(int j = 0; j < 16; ++j)
			{
				// if a register bit is used BEFORE being defined, then set the corresponding
				// bit in bb->live_use.reg[j].
				bb->live_use.reg[j] |= (i->use.reg[j] & ~bb->def.reg[j]);
				bb->def.reg[j] |= i->def.reg[j];
			}
			bb->live_use.flags |= (i->use.flags & ~bb->def.flags);
		
			if(i->def.flags != 0)
			{
				int def_flag = -1;
				int j;
				for(j = 0; j < 64; ++j)
					if((i->def.flags & (1 << j)) != 0)
						break;
				def_flag = j;
				if(i->def.flags != (1 << def_flag))
					throw std::runtime_error("hcode modifies more than one flag");
				for(j = 0; j < 16; ++j)
					if(i->def.reg[j] != 0)
						throw std::runtime_error("hcode modifies one or more registers in addition to modifying a flag");

				// def_flag is modified by this hcode!
				bb->def.flags |= (1 << def_flag);
				
				std::list<hcode_t>::iterator tmp = i;
				++i;
				for(std::list<hcode_t>::iterator k = i; k != bb->hcode.end(); ++k)
				{
					if((k->use.flags & (1 << def_flag)) != 0)
						break;	// someone inputs it
					if((k->def.flags & (1 << def_flag)) != 0)
					{
#if ALLOW_ERASE
						bb->hcode.erase(tmp);
#endif
						break;
					}
				}
				
				continue;
			}
			
			// see if any registers are modified by this hcode.
			int def_reg = -1;
			int j;
			for(j = 0; j < 16; ++j)
				if(i->def.reg[j] != 0)
					break;
			if(j != 16)
			{
				def_reg = j;
				int def_reg_2 = -1;
				for(++j; j < 16; ++j)
					if(i->def.reg[j] != 0)
						break;
				def_reg_2 = j;
				if(j != 16)
				{
					for(++j; j < 16; ++j)
						if(i->def.reg[j] != 0)
							throw std::runtime_error("hcode modifies more than two registers");
				}

				// def_reg is modified by this hcode.

				std::list<hcode_t>::iterator tmp = i;
				++i;
				int kill = 0;
				for(std::list<hcode_t>::iterator k = i; k != bb->hcode.end(); ++k)
				{
					if((tmp->def.reg[def_reg] & k->use.reg[def_reg]) != 0)
						break;	// someone inputs 1st reg
					if(def_reg_2 != -1)
						if((tmp->def.reg[def_reg_2] & k->use.reg[def_reg_2]) != 0)
							break;	// someone inputs 2nd reg
					if((tmp->def.reg[def_reg] & ~k->def.reg[def_reg]) == 0)
					{
						kill |= 1;
						if(def_reg_2 == -1)
							break;
					}
					if(def_reg_2 != -1)
					{
						if((tmp->def.reg[def_reg_2] & ~k->def.reg[def_reg_2]) == 0)
						{
							kill |= 2;
							if(kill == 3)
								break;
						}
					}
				}
#if ALLOW_ERASE
				if(kill == 1 && def_reg_2 == -1)
					kill == 3;
				if(kill == 3)
				{
					bb->hcode.erase(tmp);
				}
#endif
				
				continue;
			}
			
			++i;
		}
	}
	
	void process_1_bb(bb_t *bb)
	{
		int line = 0;

		for(std::list<hcode_t>::iterator i = bb->hcode.begin(); i != bb->hcode.end();)
		{
//			++line;
//			std::cout << "    " << std::dec << line << std::hex << std::endl;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -