⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scan.cpp

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 CPP
📖 第 1 页 / 共 3 页
字号:
// scan.cpp
// Copyright (C) 2008,2009 Willow Schlanger

#include <iostream>
#include <list>

/*
Note--the code that checks whether a procedure exits or not needs work--right now
we go by the (unqualified) function name, ignoring the module it's in. This is bad
since imports will always be qualified.

QuickCom EXE/DLL Decompiler  Copyright (C) 2008,2009 Willow Schlanger

msvcr80.dll:
loading... done
scanning... error!
781c28f0
[2] JMP to 5094426a is below image!

Error scanning msvcr80.dll!

---

Can't scan msvcrt.dll either! [try this again.]

---

  Need to recognize this:
  -----------------------
  cmp  dword [ebp+0xffffff30],byte +0x09
  ja  loc_411429
  mov  edx,[ebp+0xffffff30]
  jmp  dword near [edx*4+0x00411434]  ; (*)  
  
  It's unoptimized MSVC code that confuses the decompiler at present.

  This code needs to be recognized!!!
77bc7daf  cmp  eax,byte +0x0c
77bc7db2  push  esi
77bc7db3  ja  0x77bc7d62
77bc7db5  movzx  eax,byte [eax+0x77bc7d84]
77bc7dbc  jmp  dword near [eax*4+0x77bc7d6c]		-- indirect JUMP .

*/

#include "scan.h"

#include <cstring>
#include <cstdio>
#include <cassert>

// What if there is this code:
// call fn_x
// fn_x:
//   call A
//   ret
// This should becompile into:
// fn_x();
// return INVOKE(fn_x)();
// That is, the first CALL is normal, then there's an INVOKATION to the target.
// It should work - but is not yet tested.

// What about jmp short $+2 ?? That should terminate a basic block, because there's
// an in-label. What about Jecxz $+2 ???
// This should work but has not been tested either.

namespace ceres
{

// This is required! Add to this as needed.
// NOTE: This is an ugly hack and what is worse, it won't work if it's an IMPORT NAME...
// FIXME!
// --- if the target is a JMP to an IMPORT, see WHAT IMPORT IT IS (fixme, do this!)
// --- RtlExitUserProcess --- this is called. It does not return. However, we want
//     to find out what the return depth of the procedure is... it's -1 presently.
//     NEED TO SET A FLAG IF it IS a call that does not return!!!!
bool noreturn_call(module_t &module, U8 offset)
{
	if(module.is_special(offset))
	{
		std::string s = module.get_name(offset);
		// These are really just guesses.
		// Note that ANY export with this name will be assumed not to exit,
		// even if it's not from kernel32 or ntdll.
		if(s == "RpcRaiseException")
			return true;
		if(s == "RaiseException")
			return true;
		if(s == "ExitProcess")
			return true;
		if(s == "ExitThread")
			return true;
		if(s == "RtlExitUserProcess")
			return true;
		if(s == "RtlExitUserThread")
			return true;
		if(s == "FatalExit")
			return true;
		if(s == "FreeLibraryAndExitThread")
			return true;
	}
	return false;
}

bool scanner_t::handle_switch_zx(module_t &module, U8 offset, decode_state_t &s, scan_target_t &scanx, std::list<U8> &targets)
{
	targets.clear();

	if(icode_size >= 4 && get_argtype_lo(s.icode->argtype[0]) == argtype_mem)
	{
		icode_t &icode_0 = *s.icode;										// jmp dword [reg*4+disp2]
		icode_t &icode_1 = scanx.icode[icode_list[(icode_index - 1) & 15]];	// movzx reg,byte [reg+disp1]
		icode_t &icode_2 = scanx.icode[icode_list[(icode_index - 2) & 15]];	// ja <target>
		icode_t &icode_3 = scanx.icode[icode_list[(icode_index - 3) & 15]];	// cmp reg,<imm>
		
		U2 base = s.icode->ea.base;
		U2 index = s.icode->ea.index;
		U2 index_scale = s.icode->ea.index_scale;
		
		if(base == 31 && index != 31 && index_scale == 2 &&
			icode_3.insn == insn_cmp &&
			icode_2.insn == insn__jcc &&
			icode_1.insn == insn_movzx &&
			icode_2.argvalue[1] == 7 /* ja */
		)
		{
			U4 limit = icode_3.imm;
			U4 table = icode_1.disp - module.image_base();
			U4 xtargets = icode_0.disp - module.image_base();
			for(U4 x = 0; x <= limit; ++x)
			{
				if(table + x >= module.image_size())
				{
					//proc.impure = true;
					continue;
				}
				U1 c = module.get_byte(table + x);
				U4 d = (U4)(c) * 4 + xtargets;
				if(d + 4 - 1 >= module.image_size())
				{
					//proc.impure = true;
					continue;
				}
				U4 e = module.get_dword(d);
				targets.push_back(e);
			}
			return true;
		}
	}

	return false;
}

bool scanner_t::handle_switch(module_t &module, U8 offset, decode_state_t &s, scan_target_t &scanx, std::list<U8> &targets)
{
	// cmp reg,imm
	// ja OutLabel
	// [pop's... - but not to reg]
	// jmp dword near [reg*4+base]
	
	targets.clear();
	if(icode_size >= 3 && get_argtype_lo(s.icode->argtype[0]) == argtype_mem)
	{
		icode_t &icode_0 = *s.icode;										// jmp
		icode_t &icode_1 = scanx.icode[icode_list[(icode_index - 1) & 15]];	// jcc
		
		UINT num = 2;
		for(;;)
		{
			if(scanx.icode[icode_list[(icode_index - num) & 15]].insn == insn_cmp)
				break;
			if(scanx.icode[icode_list[(icode_index - num) & 15]].insn != insn_pop)
				return false;	// MSVC may interleave POP's before jcc
			// We have a pop - make sure it's not to the register in question!
			if(scanx.icode[icode_list[(icode_index - num) & 15]].argvalue[0] == s.icode->ea.index)
				return false;	// uh oh, can't be a switch
			++num;
			if(icode_size < num)
				return false;
		}
		
		icode_t &icode_2 = scanx.icode[icode_list[(icode_index - num) & 15]];	// cmp
		
		U2 base = s.icode->ea.base;
		U2 index = s.icode->ea.index;
		U2 index_scale = s.icode->ea.index_scale;
		
		if(base != 31 || index == 31 || index_scale != 2 || icode_2.insn != insn_cmp || icode_1.insn != insn__jcc)
			return false;
		if(get_argtype_lo(icode_2.argtype[0]) == argtype_reg &&
			get_argtype_lo(icode_2.argtype[1]) == argtype_imm &&
			index == icode_2.argvalue[0] && s.icode->asz == argsize_32 &&
			get_argsize_lo(icode_2.argsize[0]) == argsize_32
		)
		{
			if(icode_1.argvalue[1] == 7)	// jnbe e.g. ja
			{
				U4 limit = icode_2.imm;
				if(s.icode->disp < module.image_base())
					return false;
				U4 table_base = s.icode->disp - module.image_base();
				if(table_base + 4 * limit + 3 >= module.image_size())
					return false;
				U4 x;
				for(U4 i = 0; i <= limit; ++i)
				{
					x = module.get_dword(table_base + 4 * i);
					targets.push_back(x);
				}
				return true;
			}
		}
	}
		
	return false;
}

// pre: s.icode has decoded a calli or jmpi insn.
// returns true if target is an import, precisely; else returns false.
// note--while scanning, if we jmp [import] or call [import] directly, then we do not
// need to follow that particular target--as it's an import.
bool scanner_t::target_is_import(module_t &module, U8 offset, decode_state_t &s)
{
	if(s.icode->has_disp && !s.icode->ea.disp8)
	{
		U4 disp = s.icode->disp;
		if(s.icode->ea.base == 31 &&s.icode->ea.index == 31 && disp >= module.image_base())
		{
			disp -= module.image_base();
			if(disp + 4 - 1 < module.image_size())
			{
				// d['disp'] is within image range.
				if(module.is_special(disp))
				{
					name_t &nm = module.lt_int.target->names[disp];
					if(!nm.import_name.empty())
						return true;
				}
			}
		}
	}
	return false;
}

// This does a depth-first-search on all basic blocks in the procedure, writing the
// result to dfs_out (in reverse order). It then makes sure each BB has been visited,
// e.g. that there are no orphans.

void do_dfsbb(U8 node, std::map<U8, scanbb_t> &blocks, std::list<U8> &dfs_out, std::set<U8> &visited)
{
	if(visited.find(node) != visited.end())
		return;	// already been here
	visited.insert(node);

if(blocks.find(node) == blocks.end())
{
std::cerr << std::hex << node << std::dec << " [+]" << std::endl;
}

assert(blocks.find(node) != blocks.end());

	scanbb_t &bb = blocks[node];
	if(bb.out_edges.size() > 1)
	{
		// We ignore the first out edge, which is always SIMPLY the next insn in memory, for
		// accounting purposes.
		std::vector<U4>::iterator i = bb.out_edges.begin();
		++i;
		for(; i != bb.out_edges.end(); ++i)
		{
if(blocks.find(*i) == blocks.end())
{
std::cerr << std::hex << node << std::dec << " [xref]" << std::endl;
}

			do_dfsbb(*i, blocks, dfs_out, visited);
		}
	}
	
	dfs_out.push_front(node);
}

bool dfsbb(U8 root, std::map<U8, scanbb_t> &blocks, std::list<U8> &dfs_out, U8 module_size, U8 module_base)
{
	std::set<U8> visited;

	dfs_out.clear();

	do_dfsbb(root, blocks, dfs_out, visited);
	
	for(std::map<U8, scanbb_t>::iterator i = blocks.begin(); i != blocks.end(); ++i)
	{
/*if(i->second.stop >= module_size || i->second.offset >= module_size)
{
	std::cerr << std::hex << i->second.offset + module_base << std::dec << " <-" << std::endl;
}*/
assert(i->second.stop < module_size && i->second.offset < module_size);
		if(visited.find(i->first) == visited.end())
		{
std::cerr << "[orphan: " << std::hex << i->first << std::dec << "]" << std::endl;
			return false;
		}
	}
	
	return true;
}

//was: bool scanner_t::scan(ceres::target_t &target, std::string mod)
bool scanner_t::scan(scan_target_t &scanx, std::set<U8> &entrypoints, bool dots, scanprocs_t &procs)
{
	//mod = make_lowercase(mod);
	//module_t &module = target.data[mod];
	// search for:  target, module, obj[].
	
	//std::vector<scanobj_t> obj(module.image_size());
	module_t module(scanx);	// adaptor from old QuickCom code

	std::vector<U4> fcn(module.image_size());	// 0 means this byte not part of a procedure
	memset(&fcn[0], 0, module.image_size() * sizeof(U4));
	
	std::vector<U1> no_decompile(module.image_size());	// =1 if no decompile
	memset(&no_decompile[0], 0, module.image_size() * sizeof(U1));
	
	UINT sz = module.image_size();
	decode_state_t s;
	U8 bytesleft;
	int status;
	std::list<U8> calls;
	
	// *** Why do we want to re-decode it? We don't!!!
	// *** Take it out. Also use 'entrypoints' instead of making our own list.

	std::list<U8> targets;
	
	for(std::set<U8>::iterator i = entrypoints.begin(); i != entrypoints.end(); ++i)
	{
		scanx.meta[*i].target = 1;
		targets.push_front(*i);
		
		scanx.meta[*i].procedure = 1;
		calls.push_front(*i);
	}

	U8 offset, tmp;
	bool done;
	while(!targets.empty())
	{
		offset = targets.back();
		targets.pop_back();
		
		icode_size = 0;
		icode_index = 15;
		
		for(;;)
		{
			if(offset >= module.image_size())
			{
				std::cerr << " error!" << std::endl;
				std::cerr << "Tried to scan insn at " << std::hex << (offset + module.image_base()) << std::dec << " which is beyond image!" << std::endl;
				return false;
			}

			icode_index = (icode_index + 1) & 15;
			if(icode_size < 16)
				++icode_size;
			icode_list[icode_index] = offset;
			s.icode = &scanx.icode[offset];
			s.size = scanx.meta[offset].length;

			scanx.meta[offset].target = 1;
			
			done = false;	// if false, go to next insn in memory when done.
			switch(s.icode->insn)
			{
				// for any type of return, we're done with the current visit.
				case insn__ret:
				case insn__retf:
				case insn__retfnum:
				case insn__retnum:
				case insn__iret:
					done = true;
					break;
				// unhandled stuff.
				case insn__callfd:
				case insn__callfi:
					break;
				// unhandled stuff.
				case insn__jmpfd:
				case insn__jmpfi:
					done = true;
					break;
				case insn__jmpi:
				
					if(target_is_import(module, offset, s))
					{
						done = true;
						break;
					}
					
					if(get_argtype_lo(s.icode->argtype[0]) == argtype_mem)
					{
						// jmp [<target>]

						std::list<U8> swtargets;
						if(handle_switch(module, offset, s, scanx, swtargets) || handle_switch_zx(module, offset, s, scanx, swtargets))
						{
							U8 x;
							for(std::list<U8>::iterator i = swtargets.begin(); i != swtargets.end(); ++i)
							{
								x = *i;
								if(x < module.image_base())
								{
									std::cerr << " error!" << std::endl;
									std::cerr << "Insn at " << std::hex << (offset + module.image_base()) << std::dec << " is switch with target to " << std::hex << x << std::dec << " which is below image base!" << std::endl;
									return false;
								}
								x -= module.image_base();
								if(x >= module.image_size())
								{
									std::cerr << " error!" << std::endl;
									std::cerr << "Insn at " << std::hex << (offset + module.image_base()) << std::dec << " is switch with target to " << std::hex << (x + module.image_base()) << std::dec << " which is beyond image!" << std::endl;
									return false;
								}
								scanx.meta[x].branch = 1;
								if(!scanx.meta[x].target)
								{
									//std::cerr << std::hex << (offset + module.image_base()) << " -> " << (x + module.image_base()) << std::dec << std::endl;
									scanx.meta[x].target = 1;
									targets.push_front(x);
								}
							}
						
//std::cout << "[case 1]";
							done = true;
							break;
						}
						
						// check for known target. if found, we need to visit it... but whose procedure is it?
						if(s.icode->has_disp && !s.icode->ea.disp8)
						{
							U4 disp = s.icode->disp;
							if(s.icode->ea.base == 31 &&s.icode->ea.index == 31 && disp >= module.image_base())
							{
								disp -= module.image_base();
								if(disp + 4 - 1 < module.image_size())
								{
									// We already made sure the target isn't an import name.
									U4 value = module.get_dword(disp);
									if(value >= module.image_base())
									{
										value -= module.image_base();
										if(value < module.image_size())
										{
											// 'value' is a default jmp [<value>] target. we do not care whether there's
											// a relocation there or not. we will visit it too.
											//std::cerr << std::hex << value + module.image_base() << std::dec << std::endl;
											scanx.meta[value].branch = 1;
											if(!scanx.meta[value].target)
											{
												scanx.meta[value].target = 1;
												targets.push_front(value);
											}
											
//std::cout << "[case 2]";
											done = true;
											break;
										}
									}
								}
							}
						}
					}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -