📄 scan.cpp
字号:
//std::cerr << std::hex << value + module.image_base() << std::dec << std::endl;
/*if(!obj[value].meta.target)
{
obj[value].meta.target = 1;
targets.push_front(value);
}*/
if(fcn[value] != procnum)
{
// Target is not a part of current procedure we've already visited.
// Because this is a jmp [<target>], we will reenter it.
fcn[value] = procnum;
targets.push_front(value);
}
assert(bb->out_edges.empty());
bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length));
bb->out_edges.push_back(value);
done = true;
break;
}
}
}
}
}
}
// unhandled jmp indirect.
// *** fixme--don't decompile this procedure.
//std::cerr << "---";
proc.u.s.no_decompile = 1;
done = true;
break;
//---end---
case insn_jmp:
//done = true;
//break;
// fixme--follow this code if possible.
tmp = (U8)s.icode->imm + (U8)offset + (U8)s.size + (U8)module.image_base();
tmp &= (U8)(U4)0xffffffff;
if(tmp < module.image_base())
{
std::cerr << " error!" << std::endl;
//std::cerr << std::hex << offset + module.image_base() << std::dec << std::endl;
std::cerr << "[4] JMP to " << std::hex << tmp << std::dec << " is below image!" << std::endl;
return false;
}
tmp -= module.image_base();
if(tmp >= module.image_size())
{
std::cerr << " error!" << std::endl;
std::cerr << "JMP to " << std::hex << tmp + module.image_base() << std::dec << " is beyond image!" << std::endl;
return false;
}
//std::cout << "jmp to " << std::hex << tmp + module.image_base() << std::dec << std::endl;
assert(bb->out_edges.empty());
//begin changes
// if we JMP to a call target, invoke it. We don't want an empty "invokation" block (synthetic jump),
// because we only support basic blocks that have at least one instruction in it - the last instruction
// determines the type of basic block.
if(scanx.meta[tmp].procedure)
{
// In this case, we produce only ONE out edge, but have an invokation.
bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length));
bb->invokation = tmp;
proc.calls.insert(tmp);
done = true;
break;
}
//end changes
bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length));
bb->out_edges.push_back(tmp);
// bugfix--if we branch to following insn, don't do anything special.
/*if(tmp == (U8)(offset) + (U8)(scanx.meta[offset].length))
{
// branch to following insn
break;
}
else*/
if(fcn[tmp] == procnum)
{
done = true;
break;
}
// Always reenter from JMP--except for the above case that checks
// to see for scanx.meta[tmp].procedure != 0.
fcn[tmp] = procnum;
offset = tmp;
// begin fix [fixme--check this!]
// Switch to new BB.
if(proc.blocks.find(offset) != proc.blocks.end())
bb = &proc.blocks[offset];
else
{
// We're creating a new basic block.
bb = &proc.blocks[offset];
bb->offset = offset;
bb->stop = offset;
assert(bb->stop < module.image_size());
bb->out_edges.clear();
bb->invokation = (U8)(-1ll);
}
// end fix
continue;
// *** add out edges.
// *** you can either call yourself or a known procedure, or it can be an unhandled indirect call.
case insn__calli:
// note: calls do not end a basic block.
// --- begin ---
if(target_is_import(module, offset, s))
{
// add out edge to the imported procedure here.
// *** what about JMP indirect to an import ???
// as it is, the next layer must recognize a
// basic block that simply jmp's to an import.
proc.imports.insert(module.lt_int.target->names[s.icode->disp - module.image_base()].import_name);
break;
}
if(get_argtype_lo(s.icode->argtype[0]) == argtype_mem)
{
// call [<target>]
// check for known target.
if(s.icode->has_disp && !s.icode->ea.disp8)
{
U4 disp = s.icode->disp;
if(s.icode->ea.base == 31 &&s.icode->ea.index == 31 && disp >= module.image_base())
{
disp -= module.image_base();
if(disp + 4 - 1 < module.image_size())
{
// We already made sure the target isn't an import name.
U4 value = module.get_dword(disp);
if(value >= module.image_base())
{
value -= module.image_base();
if(value < module.image_size())
{
// 'value' is a default call [<value>] target. we do not care whether there's
// a relocation there or not. we will visit it too.
// *** add out edge to 'value' here.
proc.fixed_indirects.insert(value);
break;
}
}
}
}
}
}
// --- end ---
break;
case insn_call:
// add an out edge if possible.
tmp = (U8)s.icode->imm + (U8)offset + (U8)s.size + (U8)module.image_base();
tmp &= (U8)(U4)0xffffffff;
if(tmp < module.image_base())
{
std::cerr << " error!" << std::endl;
std::cerr << "CALL to " << std::hex << tmp << std::dec << " is below image!" << std::endl;
return false;
}
tmp -= module.image_base();
if(tmp >= module.image_size())
{
std::cerr << " error!" << std::endl;
std::cerr << "CALL to " << std::hex << tmp + module.image_base() << std::dec << " is beyond image!" << std::endl;
return false;
}
//bugfix: if we call the following instruction, we DO want an out edge to it still.
//if(tmp == (U8)(offset) + (U8)(scanx.meta[offset].length))
// ; // call to following insn
//else
{
// *** add out edge here.
proc.calls.insert(tmp);
}
// bugfix--done if noreturn.
if(noreturn_call(module, tmp))
{
done = true;
}
break;
case insn__jcc:
case insn__jrcxz:
case insn__loopnz:
case insn__loopz:
case insn__loop:
// for these, add target to list if applicable.
tmp = (U8)s.icode->imm + (U8)offset + (U8)s.size + (U8)module.image_base();
tmp &= (U8)(U4)0xffffffff;
if(tmp < module.image_base())
{
std::cerr << " error!" << std::endl;
std::cerr << "Control transfer to " << std::hex << tmp << std::dec << " is below image!" << std::endl;
return false;
}
tmp -= module.image_base();
if(tmp >= module.image_size())
{
std::cerr << " error!" << std::endl;
std::cerr << "Control transfer to " << std::hex << tmp + module.image_base() << std::dec << " is beyond image!" << std::endl;
return false;
}
if(!bb->out_edges.empty())
{
std::cerr << std::hex << offset + module.image_base() << " - BB at " << bb->offset + module.image_base() << std::dec << " already has " << bb->out_edges.size() << " out edges!" << std::endl;
}
assert(bb->out_edges.empty());
bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length)); // for accounting purposes only
// begin fix 2.
// 'next' will be executed next. the branch will be added to the 'targets' list if necessary (???)
next = offset + (U8)(scanx.meta[offset].length);
if(scanx.meta[next].procedure)
{
bb->invokation = next; // invoke it.
proc.calls.insert(next); // it's "called" by this procedure.
done = true;
// Now deal with 'tmp'. If it's == to next, we will have only one out edge (the
// default out edge).
if(tmp != next)
{
// We CAN visit tmp too - if it's not next!
// If we have jecxz AnotherProc when AnotherProc is not the next insn, then
// we WILL REENTER AnotherProc.
bb->out_edges.push_back(tmp); // actual target!
if(fcn[tmp] != procnum)
{
fcn[tmp] = procnum;
// This is a nonstandard case. We have ONE REAL OUT EDGE. It's taken if the condition
// IS MET. The other out edge is missing, because if the condition IS NOT MET, then
// we must perform the invokation.
targets.push_front(tmp);
}
}
break;
}
// end fix 2.
bb->out_edges.push_back(tmp); // actual target!
bb->out_edges.push_back(offset + (U8)(scanx.meta[offset].length)); // next insn - a real out edge!
if(tmp == (U8)(offset) + (U8)(scanx.meta[offset].length))
; // branch to following insn. don't add to targets list, we'll visit it next!
else
if(fcn[tmp] != procnum)
{
// conditional branches to another procedure are not supported--just reenter it!
//if(scanx.meta[tmp].procedure)
{
// This is ANOTHER procedure's entrypoint.
;;; // fixme--add out edge.
}
//else
{
// we're reentering it.
fcn[tmp] = procnum;
targets.push_front(tmp);
}
}
// begin fix
// Switch to new BB.
offset += (U8)(scanx.meta[offset].length);
if(fcn[offset] == procnum)
{
done = true;
break;
}
fcn[offset] = procnum;
#if 0
if(scanx.meta[offset].branch)
{
// we've never been at 'offset' and it's fcn[] is already set - but
// we must STOP HERE because of an in-label.
targets.push_front(offset);
break;
}
#endif
if(proc.blocks.find(offset) != proc.blocks.end())
bb = &proc.blocks[offset];
else
{
// We're creating a new basic block.
bb = &proc.blocks[offset];
bb->offset = offset;
bb->stop = offset;
assert(bb->stop < module.image_size());
bb->out_edges.clear();
bb->invokation = (U8)(-1ll);
}
continue;
// end fix
//break;
default:
break;
}
if(done) // if done, bb out edges already added.
break;
offset += (U8)(scanx.meta[offset].length); // go to next insn in memory
if(scanx.meta[offset].branch)
{
// next insn in memory has an in-label. we will stop here, but before
// we do, add an out edge.
bb->out_edges.push_back(offset); // next insn in memory
bb->out_edges.push_back(offset); // fall thru to next block
// --- begin bugfix ---
#if 0
if(proc.blocks.find(offset) != proc.blocks.end())
bb = &proc.blocks[offset];
else
{
// We're creating a new basic block.
bb = &proc.blocks[offset];
bb->offset = offset;
bb->stop = offset;
assert(bb->stop < module.image_size());
bb->out_edges.clear();
bb->invokation = (U8)(-1ll);
}
#endif
// --- end bugfix ---
}
if(fcn[offset] == procnum)
break;
fcn[offset] = procnum;
if(scanx.meta[offset].branch)
{
// we've never been at 'offset' and it's fcn[] is already set - but
// we must STOP HERE because of an in-label.
targets.push_front(offset);
break;
}
}
}
// See if we need to set no_decompile flag.
if(proc.u.s.no_decompile == 1)
{
//std::cerr << "[+]";
// This will be slow... fortunately it does not seem to happen very often.
// The culprit is probably unhandled JMP <indirect>'s.
for(U8 u = 0; u < module.image_size(); ++u)
{
if(fcn[u] == procnum)
no_decompile[u] = 1;
}
}
}
//---
for(U8 u = 0; u < module.image_size(); ++u)
{
scanx.meta[u].procedure = 0;
if(no_decompile[u] == 1)
scanx.meta[u].procedure = 1;
}
//---
// This is an ugly hack!!
#if 1
for(std::map<U8, scanproc_t>::iterator iter = procs.procs.begin(); iter != procs.procs.end(); ++iter)
{
if(iter->second.u.s.no_decompile)
continue;
for(std::map<U8, scanbb_t>::iterator j = iter->second.blocks.begin(); j != iter->second.blocks.end(); ++j)
{
if(j->second.out_edges.size() < 2)
continue;
std::vector<U4>::iterator k = j->second.out_edges.begin();
++k;
for(; k != j->second.out_edges.end(); ++k)
{
if(iter->second.blocks.find(*k) == iter->second.blocks.end())
{
//std::cerr << "Out Edge from " << module.get_name(iter->first) << " goes to neverland and back again." << std::endl;
goto No;
}
}
}
continue;
No:
// If we found an out ege to neverland and back, then don't even try to decompile it!
// The problem is, sometimes an export is DATA and not CODE.
// If anyone is dumb enough to call DATA, then we'll make them put up with "beta" output
// e.g. output where signatures are not available....
std::cerr << std::hex << iter->first + module.image_base() << std::dec << " (" << module.get_name(iter->first) << ") can\'t be decompile because of bad edges - is it data?" << std::endl;
iter->second.u.s.no_decompile = 1;
}
#endif
#if 0
//--- debug
std::cerr << "[begin]";
for(std::map<U8, scanproc_t>::iterator iter = procs.procs.begin(); iter != procs.procs.end(); ++iter)
{
std::cerr << std::hex << iter->first + module.image_base() << std::dec << std::endl;
std::list<U8> dfs_out;
dfsbb(iter->first, iter->second.blocks, dfs_out, module.image_size(), module.image_base());
}
std::cerr << "[end]";
//--- end debug
#endif
// Last thing to do is write the data to disk--the scanobj_t vector and the call graph. The
// module metadata and image is still available to the user, we simply will let the vector
// fall out of scope, in case the user wishes to scan many files before re-reading from disk
// and processing them in sets. NOTE--if we have a large set of modules (e.g. DLLs) to process,
// we might read & parse them one at a time. This is generally not a poblem. Also note that
// we might discover new procedure entrypoints, as we go along.
// Now if a JUMP or FALL THRU is made to another procedure, that new procedure appears as an
// out edge in the call graph and it is understood that we INVOKE it after restoring the stack.
// However, if the stack is not restored at the time and a common entrypoint is used, the new
// invokee is INLINED, e.g. code is duplicated.
// Now if prior to discovery of a new function we execute some code which is later revealed to
// be part of an indirect procedure, then the new function will duplicate the old code too.
// Thus if one has int (*f)(); ... f(); ... f(); ... and we know the 1st one calls fn_1() and
// the 2nd calls fn_2 which, it is learned, shares some nodes with fn_1... then we duplicate
// code.
// Basically, all indirect procedures are assumed to have certain heuristic input/outputs. They
// may input ecx and edx and may output eax and edx. ecx may be destroyed on output but it must
// not be used upon output. ebx, esp, ebp, esi, edi must be preserved/restored by the callee.
// Because of this special convention, functions that are called indirectly (which are decompiled
// after the code discovered via static analysis) . . . can be decompiled in any order without
// regard to dependencies.
// Also note that procedures imported from another module, when called, have a known calling
// convention as well. It is not necessary to have decompiled a procedure imported from another
// module before decompiling a procedure in the current module that calls the imported procedure.
//
// File format is as follows:
// U8 - size of source binary (loaded program, for synchronization purposes)
// bytes, (size of src binary) * sizeof(scanobj_t) - scanobj_t's.
// [[see quickcom for deleted code.]]
return true;
}
} // namespace ceres
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -