📄 dataflow.h

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 H
📖 第 1 页 / 共 2 页
字号:
上一页 12
		
			// *i is the current hcode_t we're processing.
			if(i->opcode == ht_expr)
			{
				if(i->operands.size() != 1)
					throw std::runtime_error("bad expression encountered (1)");
				expr_t *asgn = i->operands.front();
				if(asgn->type != et_asgn)
					throw std::runtime_error("bad expression encountered (2)");
				expr_t *dest = asgn->u.subexpr[0];
				// note: we leave in asgn(void) -- could just as easilly erase it.
				if(dest == NULL)
					continue;
				if(dest->type == et_nodeoperand && dest->subtype == no_void)
					continue;
				expr_t *src = asgn->u.subexpr[1];
				if(src == NULL)
					throw std::runtime_error("bad expression encountered (3)");
				if(src->type == et_nodeoperand && src->subtype == no_void)
					throw std::runtime_error("bad expression encountered (4)");
				
				// ok, asgn(dest, src) encountered.
				// dest may be a reg/flag, or deref(ea_<16/32/64>).
				// src may be a complex expr--if you encounter any flags, regs, or
				// ea's, it's all input.
				process_1_asgn(dest, i->use, i->def);
				process_1_asgn(src, i->use, i->use);

				// see if dest, src are both the exact same register.
				if(dest->type == et_reg && src->type == et_reg)
				{
					if(dest->subtype != 0 || src->subtype != 0)
						throw std::runtime_error("bad expression encountered (5)");
					if(dest->u.data[0] == src->u.data[0] && dest->u.data[1] == src->u.data[1])
					{
						// asgn(<register>, <exact same register>);
						std::list<hcode_t>::iterator old = i;
						++i;
#if ALLOW_ERASE
						bb->hcode.erase(old);
#endif
						continue;
					}
				}
				++i;
			}
			else
			{
				// *** Handle loop/loopz/loopnz here.
				// *** If it's a loop* instruction, insert asgn(rcx:asz, sub(rcx:asz, 1)) in the
				// *** hcode. WHY DO THAT???
				// When we get to post_process_1(), all hcode nodes will have a variety
				// of inputs, but will 'define' only a register OR a flag AT MOST.
				if(i->opcode == ht_jcond)
				{
					if(i->operands.size() < 2)
						throw std::runtime_error("malformed jcond (1)");
					
					// get cc.
					int cc = -1;
					if(i->operands.front() != NULL)
					{
						if(i->operands.front()->type != et_literal32)
							throw std::runtime_error("malformed jcond (2)");
						cc = i->operands.front()->u.data[0];
					}
					
					if(cc != -1)
						use_flags(cc, i->use);

					// handle cx/ecx/rcx here.
					std::list<expr_t *>::iterator tmp = i->operands.begin();
					++tmp;
					if(*tmp != NULL)
					{
						if((*tmp)->type != et_literal32)
							throw std::runtime_error("malformed jcond (3)");
						U4 rcx = (*tmp)->u.data[0];
						U1 rcx_size = (U1)rcx;
						i->use.add_register(rcx_size, 1);	// cx, ecx, or rcx
						if(rcx >= 0x300)
						{
							i->def.add_register(rcx_size, 1);	// cx, ecx, or rcx
							
							expr_t *cx = new expr_t();
							if(rcx_size == argsize_16)
								cx->size = ns_word;
							else
							if(rcx_size == argsize_32)
								cx->size = ns_dword;
							else
							if(rcx_size == argsize_64)
								cx->size = ns_qword;
							cx->type = et_reg;
							cx->subtype = 0;
							cx->u.data[0] = rcx_size;
							cx->u.data[1] = 1;				// cx, ecx, or rcx
							cx->u.data[2] = 0;	// not used
							
							expr_t *cxsrc = new expr_t();
							*cxsrc = *cx;
							
							expr_t *one = new expr_t();
							one->make_literal32(1);
							
							expr_t *sub = new expr_t();
							sub->size = cx->size;
							sub->type = et_nodetype;
							sub->subtype = nt_sub;
							sub->u.subexpr[0] = cxsrc;
							sub->u.subexpr[1] = one;
							sub->u.subexpr[2] = NULL;
							
							expr_t *asgn = new expr_t();
							asgn->size = cx->size;
							asgn->type = et_asgn;
							asgn->subtype = 0;
							asgn->u.subexpr[0] = cx;
							asgn->u.subexpr[1] = sub;
							asgn->u.subexpr[2] = NULL;
							
							std::list<hcode_t>::iterator ii = bb->hcode.insert(i, hcode_t());
							ii->opcode = ht_expr;
							ii->operands.push_back(asgn);
							ii->use.add_register(rcx_size, 1);	// new hcode uses cx/ecx/rcx
							ii->def.add_register(rcx_size, 1);	// and defines it as well
							
							// just inserted asgn(rcx, sub(rcx, 1)) and manually did use/def for it
							// no longer need to implictly decrement cx/ecx/rcx - we did it explictly.
							(*tmp)->u.data[0] -= 0x100;
						}
					}
				}
				else
				if(i->opcode == ht_jump)
					;
				else
				if(i->opcode == ht_call)
					;
				else
				if(i->opcode == ht_ret)
					;
				else
					throw std::runtime_error("Unknown hcode opcode encountered");

				++i;
			}
		}
		
		post_process_1(bb);
	}
	
	void do_process(std::list<bb_t *> &dfslast, bb_t *node)
	{
		if(process_visited.find(node->address) != process_visited.end())
			return;
		process_visited.insert(node->address);
		// call all out edges
		std::vector<bb_t *>::iterator i = node->out_edges.end();
		for(;;)
		{
			if(i == node->out_edges.begin())
				break;
			--i;
			if((*i)->proc == node->proc)
				do_process(dfslast, *i);
		}
		dfslast.push_front(node);
	}
	
	void build_call_dfs(std::set<U8> &visited, std::list<U8> &dfs, U8 address, parser_call_t &calls)
	{
		if(visited.find(address) != visited.end())
			return;	// already been here.
		visited.insert(address);
		for(std::set<U8>::iterator i = calls.calls.begin(); i != calls.calls.end(); ++i)
			build_call_dfs(visited, dfs, *i, xgraph[*i]);
		dfs.push_back(address);
	}
	
	// --- begin dcc algorithm
	// This section contains code with derrived from dcc, a GPL'd decompiler,
	// with the following copyright notice:
	// (C) Cristina Cifuentes
	// This decompiler is also GPL'd code, so there is no problem with that.
	
	public:
	
	// This does the actual data flow analysis.
	// It goes thru procedures via a depth-first search, so that we'll have
	// information on any called procedures already.
	void process_2()
	{
		std::list<U8> dfs;
		std::set<U8> visited;
		
		for(call_graph_t::iterator i = xgraph.begin(); i != xgraph.end(); ++i)
			build_call_dfs(visited, dfs, i->first, i->second);
		visited.clear();
		
		hcode_file_t live_out;
		for(std::list<U8>::iterator i = dfs.begin(); i != dfs.end(); ++i)
		{
			if(!xgraph[*i].decompile)
				continue;	// can't decompile this procedure
			//std::cout << std::hex;
			//std::cout << *i << ":" << std::endl;

			procedure_t &proc = procs[*i];			
			
			// ***
			// *** proc.live_out should be the UNION of everything output by a basic block
			//     (except when something is preserved/restored; two xchg's do nothing!)
			// *** proc.live_in is a set. if a path exists that uses a register/flag
			//     before it was defined, then the register/flag needs to be in the set.
			// *** if we encounter a procedure, we'll already know that procedure's live_in
			//     and live_out. Give feedback--if we USE something from a procedure, tell
			//     that procedure (how to do this? it's complicated! e.g. we could return
			//     a register from a procedure and the caller could use it.)
			// *** so, we need two mechanisms of tracking--does something hold its input
			//     value? does something hold a value OUTPUT from a procedure--and if so,
			//     what procedure?
			
			live_out.clear();
			live_reg_analysis(proc, live_out);
		}
	}
	
	private:
	
	void live_reg_analysis(procedure_t &proc, hcode_file_t &live_out)
	{
#if 1
		// --begin testing code--
		live_out.clear();	// already zero'd
		for(std::list<bb_t *>::iterator j = proc.dfslast.begin(); j != proc.dfslast.end(); ++j)
		{
			bb_t *bb = *j;
			live_out.set_bits(bb->def);
			if(bb->live_use.flags != 0)
				proc.remove_cc = false;
			for(std::vector<bb_t *>::iterator k = bb->out_edges.begin(); k != bb->out_edges.end(); ++k)
			{
				if((*k)->proc != &proc)
				{
					// call/invokation to another procedure
					live_out.set_bits((*k)->proc->live_out);
					proc.remove_cc &= (*k)->proc->remove_cc;
				}
			}
		}
		// --end testing code--

		bool done;
		hcode_file_t prev_live_in, prev_live_out;
		proc.live_out = live_out;
		bb_t *bb = NULL;
		do
		{
			done = true;
			// the following line is correct - do not reverse it!
			for(std::list<bb_t *>::iterator j = proc.dfslast.end(); ;)
			{
				if(j == proc.dfslast.begin())
					break;
				--j;
			
				bb = *j;
				//std::cout << "  " << (U4)(bb->address) << std::endl;;
				
				// process this bb.
				prev_live_in = bb->live_in;
				prev_live_out = bb->live_out;
				
				if(bb->out_edges.empty())
				{
					bb->live_out = live_out;

					// fixme--get return expr of function (dcc does that here).

					if(!bb->hcode.empty())
					{
						if(bb->hcode.back().opcode == ht_ret)
						{
							bb->hcode.back().use = live_out;
						}
					}
				}
				else
				{
					// dcc does this -- "check successors".
					for(std::vector<bb_t *>::iterator k = bb->out_edges.begin(); k != bb->out_edges.end(); ++k)
					{
						// *** we have out edges to different procedures, I don't think dcc does that.
						// *** this is a fix for that.
						// *** how are we handling jmp to a different procedure?
						if(bb->proc == &proc)
						{
							bb->live_out.set_bits((*k)->live_in);
						}
					}
					
					// dcc -- "propagate to invoked procedure".
					if(!bb->hcode.empty())
					{
						if(bb->hcode.back().opcode == ht_call)
						{
							// fixme--add support for library routines and indirect calls, here.
							if(bb->out_edges.size() == 1)
							{
								// decompile to: call f() then jmp - e.g. invoke - f().
								throw std::runtime_error("call to the following instruction not yet implemented");
							}
							if(bb->out_edges.size() != 2)
								throw std::runtime_error("internal error - call node needs two targets");
							procedure_t &callee = *bb->out_edges[1]->proc;

							// this algorithm works like dcc...
							//if(&callee != &proc)
							{
								bb->live_out = callee.live_in;
								
								// fixme--what about indirect calls? see above
								if(!bb->live_out.is_zero())
								{
									bb->hcode.back().use = callee.live_in;
									bb->hcode.back().def = callee.live_out;
								}
							}
							
							bb_t *next = bb->out_edges[0];
							callee.vote.set_bits(next->live_in);
						}
						// fixme--what about JUMPs to a procedure?
					}
#if 1
					else
					if(bb->hcode.back().opcode == ht_jump)
					{
						if(bb->out_edges.empty())
							throw std::runtime_error("jump node with no targets encountered");
						// direct jumps have only one target.
						// indirect jumps have >= 1 target and are assumed to be to the same procedure.
						if(bb->out_edges.size() == 1 && bb->out_edges.front()->proc != &proc)
						{
							throw std::runtime_error("jump to a different procedure - not implemented yet");
						}
					}
#endif
				}

		        bb->live_in = bb->live_use;
		        bb->live_in.set_bits_mask(bb->live_out, bb->def);
		        
		        if(!prev_live_in.equals(bb->live_in) || !prev_live_out.equals(bb->live_out))
		        	done = false;
			}
		}	while(!done);
		
		if(bb != NULL)
		{
			if(!bb->live_in.is_zero())
			{
				//std::cout << "case 1" << std::endl;
				proc.live_in = bb->live_in;
			}
			else
			{
				//std::cout << "case 2" << std::endl;
			}

			// fixme -- remove references to register variables.
			// dcc: if si/di is a register variable, remove it from "live in"
			// for 'proc' and 'bb'.
		}

		//std::cout << std::dec << std::endl;
		//proc.live_out = live_out;		// was at beginning

		// --- begin testing code ---
		// ** Note, this code does NOT work.
		if(proc.remove_cc)
		{
			for(std::list<bb_t *>::iterator j = proc.dfslast.begin(); j != proc.dfslast.end(); ++j)
			{
				bb_t *bb = *j;
				for(std::list<hcode_t>::iterator k = bb->hcode.begin(); k != bb->hcode.end(); )
				{
					if(k->def.flags != 0)
					{
						// we have already made sure only one flag is defined at a time.
						std::list<hcode_t>::iterator h = k;
						for(++h; h != bb->hcode.end(); ++h)
						{
							if((h->use.flags & k->def.flags) != 0)
								break;
						}
						if(h == bb->hcode.end())
						{
							h = k;
							++k;
							bb->hcode.erase(h);
						}
						else
							++k;
					}
					else
						++k;
				}
			}
		}
		// --- end testing code ---
#endif
	}
	
	// --- end dcc algorithm
};
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -