📄 scan.cpp

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 CPP
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
												//std::cerr << std::hex << value + module.image_base() << std::dec << std::endl;
												/*if(!obj[value].meta.target)
												{
													obj[value].meta.target = 1;
													targets.push_front(value);
												}*/
												if(fcn[value] != procnum)
												{
													// Target is not a part of current procedure we've already visited.
													// Because this is a jmp [<target>], we will reenter it.
													fcn[value] = procnum;
													targets.push_front(value);
												}
												
												assert(bb->out_edges.empty());
												bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length));
												bb->out_edges.push_back(value);
												done = true;
												break;
											}
										}
									}
								}
							}
						}
					
						// unhandled jmp indirect.
						// *** fixme--don't decompile this procedure.
//std::cerr << "---";
						proc.u.s.no_decompile = 1;
						done = true;
						break;
						//---end---
					case insn_jmp:
//done = true;
//break;
						// fixme--follow this code if possible.

						tmp = (U8)s.icode->imm + (U8)offset + (U8)s.size + (U8)module.image_base();
						tmp &= (U8)(U4)0xffffffff;
						if(tmp < module.image_base())
						{
							std::cerr << " error!" << std::endl;
//std::cerr << std::hex << offset + module.image_base() << std::dec << std::endl;
							std::cerr << "[4] JMP to " << std::hex << tmp << std::dec << " is below image!" << std::endl;
							return false;
						}
						tmp -= module.image_base();
						if(tmp >= module.image_size())
						{
							std::cerr << " error!" << std::endl;
							std::cerr << "JMP to " << std::hex << tmp + module.image_base() << std::dec << " is beyond image!" << std::endl;
							return false;
						}

//std::cout << "jmp to " << std::hex << tmp + module.image_base() << std::dec << std::endl;

						assert(bb->out_edges.empty());

						//begin changes
						// if we JMP to a call target, invoke it. We don't want an empty "invokation" block (synthetic jump),
						// because we only support basic blocks that have at least one instruction in it - the last instruction
						// determines the type of basic block.
						if(scanx.meta[tmp].procedure)
						{
							// In this case, we produce only ONE out edge, but have an invokation.
							bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length));
							bb->invokation = tmp;
							proc.calls.insert(tmp);
							done = true;
							break;
						}
						//end changes

						bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length));
						bb->out_edges.push_back(tmp);
						
						// bugfix--if we branch to following insn, don't do anything special.
						/*if(tmp == (U8)(offset) + (U8)(scanx.meta[offset].length))
						{
							// branch to following insn
							break;
						}
						else*/
						if(fcn[tmp] == procnum)
						{
							done = true;
							break;
						}
						// Always reenter from JMP--except for the above case that checks
						// to see for scanx.meta[tmp].procedure != 0.
						fcn[tmp] = procnum;
						
						offset = tmp;
						
						// begin fix [fixme--check this!]
						// Switch to new BB.
						if(proc.blocks.find(offset) != proc.blocks.end())
							bb = &proc.blocks[offset];
						else
						{
							// We're creating a new basic block.
							bb = &proc.blocks[offset];
							bb->offset = offset;
							bb->stop = offset;
assert(bb->stop < module.image_size());
							bb->out_edges.clear();
							bb->invokation = (U8)(-1ll);
						}
						// end fix
						
						continue;

					// *** add out edges.
					// *** you can either call yourself or a known procedure, or it can be an unhandled indirect call.
					case insn__calli:
					
						// note: calls do not end a basic block.
					
						// --- begin ---
						if(target_is_import(module, offset, s))
						{
							// add out edge to the imported procedure here.
							// *** what about JMP indirect to an import ???
							//     as it is, the next layer must recognize a
							//     basic block that simply jmp's to an import.
							proc.imports.insert(module.lt_int.target->names[s.icode->disp - module.image_base()].import_name);
							break;
						}
	
						if(get_argtype_lo(s.icode->argtype[0]) == argtype_mem)
						{
							// call [<target>]
	
							// check for known target.
							if(s.icode->has_disp && !s.icode->ea.disp8)
							{
								U4 disp = s.icode->disp;
								if(s.icode->ea.base == 31 &&s.icode->ea.index == 31 && disp >= module.image_base())
								{
									disp -= module.image_base();
									if(disp + 4 - 1 < module.image_size())
									{
										// We already made sure the target isn't an import name.
										U4 value = module.get_dword(disp);
										if(value >= module.image_base())
										{
											value -= module.image_base();
											if(value < module.image_size())
											{
												// 'value' is a default call [<value>] target. we do not care whether there's
												// a relocation there or not. we will visit it too.

												// *** add out edge to 'value' here.
												proc.fixed_indirects.insert(value);
												
												break;
											}
										}
									}
								}
							}
	
						}
	
						// --- end ---
					
						break;
					
					case insn_call:
						// add an out edge if possible.

						tmp = (U8)s.icode->imm + (U8)offset + (U8)s.size + (U8)module.image_base();
						tmp &= (U8)(U4)0xffffffff;
						if(tmp < module.image_base())
						{
							std::cerr << " error!" << std::endl;
							std::cerr << "CALL to " << std::hex << tmp << std::dec << " is below image!" << std::endl;
							return false;
						}
						tmp -= module.image_base();
						if(tmp >= module.image_size())
						{
							std::cerr << " error!" << std::endl;
							std::cerr << "CALL to " << std::hex << tmp + module.image_base() << std::dec << " is beyond image!" << std::endl;
							return false;
						}
						//bugfix: if we call the following instruction, we DO want an out edge to it still.
						//if(tmp == (U8)(offset) + (U8)(scanx.meta[offset].length))
						//	;	// call to following insn
						//else
						{
							// *** add out edge here.
							proc.calls.insert(tmp);
						}

						// bugfix--done if noreturn.
						if(noreturn_call(module, tmp))
						{
							done = true;
						}
						break;

					case insn__jcc:
					case insn__jrcxz:
					case insn__loopnz:
					case insn__loopz:
					case insn__loop:
						// for these, add target to list if applicable.

						tmp = (U8)s.icode->imm + (U8)offset + (U8)s.size + (U8)module.image_base();
						tmp &= (U8)(U4)0xffffffff;
						if(tmp < module.image_base())
						{
							std::cerr << " error!" << std::endl;
							std::cerr << "Control transfer to " << std::hex << tmp << std::dec << " is below image!" << std::endl;
							return false;
						}
						tmp -= module.image_base();
						if(tmp >= module.image_size())
						{
							std::cerr << " error!" << std::endl;
							std::cerr << "Control transfer to " << std::hex << tmp + module.image_base() << std::dec << " is beyond image!" << std::endl;
							return false;
						}

if(!bb->out_edges.empty())
{
std::cerr << std::hex << offset + module.image_base() << " - BB at " << bb->offset + module.image_base() << std::dec << " already has " << bb->out_edges.size() << " out edges!" << std::endl;
}

						assert(bb->out_edges.empty());
						bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length));	// for accounting purposes only

						// begin fix 2.
						// 'next' will be executed next. the branch will be added to the 'targets' list if necessary (???)
						next = offset + (U8)(scanx.meta[offset].length);
						if(scanx.meta[next].procedure)
						{
							bb->invokation = next;		// invoke it.
							proc.calls.insert(next);	// it's "called" by this procedure.
							done = true;
							// Now deal with 'tmp'. If it's == to next, we will have only one out edge (the
							// default out edge).
							if(tmp != next)
							{
								// We CAN visit tmp too - if it's not next!
								// If we have jecxz AnotherProc when AnotherProc is not the next insn, then
								// we WILL REENTER AnotherProc.
								bb->out_edges.push_back(tmp);								// actual target!
								if(fcn[tmp] != procnum)
								{
									fcn[tmp] = procnum;
									// This is a nonstandard case. We have ONE REAL OUT EDGE. It's taken if the condition
									// IS MET. The other out edge is missing, because if the condition IS NOT MET, then
									// we must perform the invokation.
									targets.push_front(tmp);
								}
							}
							break;
						}
						// end fix 2.

						bb->out_edges.push_back(tmp);										// actual target!
						bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length));	// next insn - a real out edge!

						if(tmp == (U8)(offset) + (U8)(scanx.meta[offset].length))
							;	// branch to following insn. don't add to targets list, we'll visit it next!
						else
						if(fcn[tmp] != procnum)
						{
							// conditional branches to another procedure are not supported--just reenter it!
							//if(scanx.meta[tmp].procedure)
							{
								// This is ANOTHER procedure's entrypoint.
								;;; // fixme--add out edge.
							}
							//else
							{
								// we're reentering it.
								fcn[tmp] = procnum;
								targets.push_front(tmp);
							}
						}
												
						// begin fix
						// Switch to new BB.
						offset += (U8)(scanx.meta[offset].length);

						if(fcn[offset] == procnum)
						{
							done = true;
							break;
						}
						fcn[offset] = procnum;
#if 0
						if(scanx.meta[offset].branch)
						{
							// we've never been at 'offset' and it's fcn[] is already set - but
							// we must STOP HERE because of an in-label.
							targets.push_front(offset);
							break;
						}
#endif

						if(proc.blocks.find(offset) != proc.blocks.end())
							bb = &proc.blocks[offset];
						else
						{
							// We're creating a new basic block.
							bb = &proc.blocks[offset];
							bb->offset = offset;
							bb->stop = offset;
assert(bb->stop < module.image_size());
							bb->out_edges.clear();
							bb->invokation = (U8)(-1ll);
						}
						continue;
						// end fix

						//break;
					default:
						break;
				}
				
				if(done)	// if done, bb out edges already added.
					break;
				
				offset += (U8)(scanx.meta[offset].length);	// go to next insn in memory
				
				if(scanx.meta[offset].branch)
				{
					// next insn in memory has an in-label. we will stop here, but before
					// we do, add an out edge.
					bb->out_edges.push_back(offset);	// next insn in memory
					bb->out_edges.push_back(offset);	// fall thru to next block
					
					// --- begin bugfix ---
#if 0
					if(proc.blocks.find(offset) != proc.blocks.end())
						bb = &proc.blocks[offset];
					else
					{
						// We're creating a new basic block.
						bb = &proc.blocks[offset];
						bb->offset = offset;
						bb->stop = offset;
assert(bb->stop < module.image_size());
						bb->out_edges.clear();
						bb->invokation = (U8)(-1ll);
					}
#endif
					// --- end bugfix ---
				}
				
				if(fcn[offset] == procnum)
					break;
				fcn[offset] = procnum;
				if(scanx.meta[offset].branch)
				{
					// we've never been at 'offset' and it's fcn[] is already set - but
					// we must STOP HERE because of an in-label.
					targets.push_front(offset);
					break;
				}
			}
		}
		
		// See if we need to set no_decompile flag.
		if(proc.u.s.no_decompile == 1)
		{
//std::cerr << "[+]";
			// This will be slow... fortunately it does not seem to happen very often.
			// The culprit is probably unhandled JMP <indirect>'s.
			for(U8 u = 0; u < module.image_size(); ++u)
			{
				if(fcn[u] == procnum)
					no_decompile[u] = 1;
			}
		}
	}
	
	//---
	for(U8 u = 0; u < module.image_size(); ++u)
	{
		scanx.meta[u].procedure = 0;
		if(no_decompile[u] == 1)
			scanx.meta[u].procedure = 1;
	}
	//---

// This is an ugly hack!!
#if 1
	for(std::map<U8, scanproc_t>::iterator iter = procs.procs.begin(); iter != procs.procs.end(); ++iter)
	{
		if(iter->second.u.s.no_decompile)
			continue;
		for(std::map<U8, scanbb_t>::iterator j = iter->second.blocks.begin(); j != iter->second.blocks.end(); ++j)
		{
			if(j->second.out_edges.size() < 2)
				continue;
			std::vector<U4>::iterator k = j->second.out_edges.begin();
			++k;
			for(; k != j->second.out_edges.end(); ++k)
			{
				if(iter->second.blocks.find(*k) == iter->second.blocks.end())
				{
					//std::cerr << "Out Edge from " << module.get_name(iter->first) << " goes to neverland and back again." << std::endl;
					goto No;
				}
			}
		}
		continue;
		No:
		// If we found an out ege to neverland and back, then don't even try to decompile it!
		// The problem is, sometimes an export is DATA and not CODE.
		// If anyone is dumb enough to call DATA, then we'll make them put up with "beta" output
		// e.g. output where signatures are not available....
		std::cerr << std::hex << iter->first + module.image_base() << std::dec << " (" << module.get_name(iter->first) << ") can\'t be decompile because of bad edges - is it data?" << std::endl;
		iter->second.u.s.no_decompile = 1;
	}
#endif

#if 0
//--- debug
std::cerr << "[begin]";
	for(std::map<U8, scanproc_t>::iterator iter = procs.procs.begin(); iter != procs.procs.end(); ++iter)
	{
std::cerr << std::hex << iter->first + module.image_base() << std::dec << std::endl;
std::list<U8> dfs_out;
dfsbb(iter->first, iter->second.blocks, dfs_out, module.image_size(), module.image_base());
	}
std::cerr << "[end]";
//--- end debug
#endif

	// Last thing to do is write the data to disk--the scanobj_t vector and the call graph. The
	// module metadata and image is still available to the user, we simply will let the vector
	// fall out of scope, in case the user wishes to scan many files before re-reading from disk
	// and processing them in sets. NOTE--if we have a large set of modules (e.g. DLLs) to process,
	// we might read & parse them one at a time. This is generally not a poblem. Also note that
	// we might discover new procedure entrypoints, as we go along.
	// Now if a JUMP or FALL THRU is made to another procedure, that new procedure appears as an
	// out edge in the call graph and it is understood that we INVOKE it after restoring the stack.
	// However, if the stack is not restored at the time and a common entrypoint is used, the new
	// invokee is INLINED, e.g. code is duplicated.
	// Now if prior to discovery of a new function we execute some code which is later revealed to
	// be part of an indirect procedure, then the new function will duplicate the old code too.
	// Thus if one has int (*f)(); ... f(); ... f(); ... and we know the 1st one calls fn_1() and
	// the 2nd calls fn_2 which, it is learned, shares some nodes with fn_1... then we duplicate
	// code.
	// Basically, all indirect procedures are assumed to have certain heuristic input/outputs. They
	// may input ecx and edx and may output eax and edx. ecx may be destroyed on output but it must
	// not be used upon output. ebx, esp, ebp, esi, edi must be preserved/restored by the callee.
	// Because of this special convention, functions that are called indirectly (which are decompiled
	// after the code discovered via static analysis) . . . can be decompiled in any order without
	// regard to dependencies.
	// Also note that procedures imported from another module, when called, have a known calling
	// convention as well. It is not necessary to have decompiled a procedure imported from another
	// module before decompiling a procedure in the current module that calls the imported procedure.
	//
	// File format is as follows:
	// U8 - size of source binary (loaded program, for synchronization purposes)
	// bytes, (size of src binary) * sizeof(scanobj_t) - scanobj_t's.
	
// [[see quickcom for deleted code.]]
	
	return true;
}

}	// namespace ceres
上一页 1 23
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -