📄 scan.cpp
字号:
// scan.cpp
// Copyright (C) 2008,2009 Willow Schlanger
#include <iostream>
#include <list>
/*
Note--the code that checks whether a procedure exits or not needs work--right now
we go by the (unqualified) function name, ignoring the module it's in. This is bad
since imports will always be qualified.
QuickCom EXE/DLL Decompiler Copyright (C) 2008,2009 Willow Schlanger
msvcr80.dll:
loading... done
scanning... error!
781c28f0
[2] JMP to 5094426a is below image!
Error scanning msvcr80.dll!
---
Can't scan msvcrt.dll either! [try this again.]
---
Need to recognize this:
-----------------------
cmp dword [ebp+0xffffff30],byte +0x09
ja loc_411429
mov edx,[ebp+0xffffff30]
jmp dword near [edx*4+0x00411434] ; (*)
It's unoptimized MSVC code that confuses the decompiler at present.
This code needs to be recognized!!!
77bc7daf cmp eax,byte +0x0c
77bc7db2 push esi
77bc7db3 ja 0x77bc7d62
77bc7db5 movzx eax,byte [eax+0x77bc7d84]
77bc7dbc jmp dword near [eax*4+0x77bc7d6c] -- indirect JUMP .
*/
#include "scan.h"
#include <cstring>
#include <cstdio>
#include <cassert>
// What if there is this code:
// call fn_x
// fn_x:
// call A
// ret
// This should becompile into:
// fn_x();
// return INVOKE(fn_x)();
// That is, the first CALL is normal, then there's an INVOKATION to the target.
// It should work - but is not yet tested.
// What about jmp short $+2 ?? That should terminate a basic block, because there's
// an in-label. What about Jecxz $+2 ???
// This should work but has not been tested either.
namespace ceres
{
// This is required! Add to this as needed.
// NOTE: This is an ugly hack and what is worse, it won't work if it's an IMPORT NAME...
// FIXME!
// --- if the target is a JMP to an IMPORT, see WHAT IMPORT IT IS (fixme, do this!)
// --- RtlExitUserProcess --- this is called. It does not return. However, we want
// to find out what the return depth of the procedure is... it's -1 presently.
// NEED TO SET A FLAG IF it IS a call that does not return!!!!
bool noreturn_call(module_t &module, U8 offset)
{
if(module.is_special(offset))
{
std::string s = module.get_name(offset);
// These are really just guesses.
// Note that ANY export with this name will be assumed not to exit,
// even if it's not from kernel32 or ntdll.
if(s == "RpcRaiseException")
return true;
if(s == "RaiseException")
return true;
if(s == "ExitProcess")
return true;
if(s == "ExitThread")
return true;
if(s == "RtlExitUserProcess")
return true;
if(s == "RtlExitUserThread")
return true;
if(s == "FatalExit")
return true;
if(s == "FreeLibraryAndExitThread")
return true;
}
return false;
}
bool scanner_t::handle_switch_zx(module_t &module, U8 offset, decode_state_t &s, scan_target_t &scanx, std::list<U8> &targets)
{
targets.clear();
if(icode_size >= 4 && get_argtype_lo(s.icode->argtype[0]) == argtype_mem)
{
icode_t &icode_0 = *s.icode; // jmp dword [reg*4+disp2]
icode_t &icode_1 = scanx.icode[icode_list[(icode_index - 1) & 15]]; // movzx reg,byte [reg+disp1]
icode_t &icode_2 = scanx.icode[icode_list[(icode_index - 2) & 15]]; // ja <target>
icode_t &icode_3 = scanx.icode[icode_list[(icode_index - 3) & 15]]; // cmp reg,<imm>
U2 base = s.icode->ea.base;
U2 index = s.icode->ea.index;
U2 index_scale = s.icode->ea.index_scale;
if(base == 31 && index != 31 && index_scale == 2 &&
icode_3.insn == insn_cmp &&
icode_2.insn == insn__jcc &&
icode_1.insn == insn_movzx &&
icode_2.argvalue[1] == 7 /* ja */
)
{
U4 limit = icode_3.imm;
U4 table = icode_1.disp - module.image_base();
U4 xtargets = icode_0.disp - module.image_base();
for(U4 x = 0; x <= limit; ++x)
{
if(table + x >= module.image_size())
{
//proc.impure = true;
continue;
}
U1 c = module.get_byte(table + x);
U4 d = (U4)(c) * 4 + xtargets;
if(d + 4 - 1 >= module.image_size())
{
//proc.impure = true;
continue;
}
U4 e = module.get_dword(d);
targets.push_back(e);
}
return true;
}
}
return false;
}
bool scanner_t::handle_switch(module_t &module, U8 offset, decode_state_t &s, scan_target_t &scanx, std::list<U8> &targets)
{
// cmp reg,imm
// ja OutLabel
// [pop's... - but not to reg]
// jmp dword near [reg*4+base]
targets.clear();
if(icode_size >= 3 && get_argtype_lo(s.icode->argtype[0]) == argtype_mem)
{
icode_t &icode_0 = *s.icode; // jmp
icode_t &icode_1 = scanx.icode[icode_list[(icode_index - 1) & 15]]; // jcc
UINT num = 2;
for(;;)
{
if(scanx.icode[icode_list[(icode_index - num) & 15]].insn == insn_cmp)
break;
if(scanx.icode[icode_list[(icode_index - num) & 15]].insn != insn_pop)
return false; // MSVC may interleave POP's before jcc
// We have a pop - make sure it's not to the register in question!
if(scanx.icode[icode_list[(icode_index - num) & 15]].argvalue[0] == s.icode->ea.index)
return false; // uh oh, can't be a switch
++num;
if(icode_size < num)
return false;
}
icode_t &icode_2 = scanx.icode[icode_list[(icode_index - num) & 15]]; // cmp
U2 base = s.icode->ea.base;
U2 index = s.icode->ea.index;
U2 index_scale = s.icode->ea.index_scale;
if(base != 31 || index == 31 || index_scale != 2 || icode_2.insn != insn_cmp || icode_1.insn != insn__jcc)
return false;
if(get_argtype_lo(icode_2.argtype[0]) == argtype_reg &&
get_argtype_lo(icode_2.argtype[1]) == argtype_imm &&
index == icode_2.argvalue[0] && s.icode->asz == argsize_32 &&
get_argsize_lo(icode_2.argsize[0]) == argsize_32
)
{
if(icode_1.argvalue[1] == 7) // jnbe e.g. ja
{
U4 limit = icode_2.imm;
if(s.icode->disp < module.image_base())
return false;
U4 table_base = s.icode->disp - module.image_base();
if(table_base + 4 * limit + 3 >= module.image_size())
return false;
U4 x;
for(U4 i = 0; i <= limit; ++i)
{
x = module.get_dword(table_base + 4 * i);
targets.push_back(x);
}
return true;
}
}
}
return false;
}
// pre: s.icode has decoded a calli or jmpi insn.
// returns true if target is an import, precisely; else returns false.
// note--while scanning, if we jmp [import] or call [import] directly, then we do not
// need to follow that particular target--as it's an import.
bool scanner_t::target_is_import(module_t &module, U8 offset, decode_state_t &s)
{
if(s.icode->has_disp && !s.icode->ea.disp8)
{
U4 disp = s.icode->disp;
if(s.icode->ea.base == 31 &&s.icode->ea.index == 31 && disp >= module.image_base())
{
disp -= module.image_base();
if(disp + 4 - 1 < module.image_size())
{
// d['disp'] is within image range.
if(module.is_special(disp))
{
name_t &nm = module.lt_int.target->names[disp];
if(!nm.import_name.empty())
return true;
}
}
}
}
return false;
}
// This does a depth-first-search on all basic blocks in the procedure, writing the
// result to dfs_out (in reverse order). It then makes sure each BB has been visited,
// e.g. that there are no orphans.
void do_dfsbb(U8 node, std::map<U8, scanbb_t> &blocks, std::list<U8> &dfs_out, std::set<U8> &visited)
{
if(visited.find(node) != visited.end())
return; // already been here
visited.insert(node);
if(blocks.find(node) == blocks.end())
{
std::cerr << std::hex << node << std::dec << " [+]" << std::endl;
}
assert(blocks.find(node) != blocks.end());
scanbb_t &bb = blocks[node];
if(bb.out_edges.size() > 1)
{
// We ignore the first out edge, which is always SIMPLY the next insn in memory, for
// accounting purposes.
std::vector<U4>::iterator i = bb.out_edges.begin();
++i;
for(; i != bb.out_edges.end(); ++i)
{
if(blocks.find(*i) == blocks.end())
{
std::cerr << std::hex << node << std::dec << " [xref]" << std::endl;
}
do_dfsbb(*i, blocks, dfs_out, visited);
}
}
dfs_out.push_front(node);
}
bool dfsbb(U8 root, std::map<U8, scanbb_t> &blocks, std::list<U8> &dfs_out, U8 module_size, U8 module_base)
{
std::set<U8> visited;
dfs_out.clear();
do_dfsbb(root, blocks, dfs_out, visited);
for(std::map<U8, scanbb_t>::iterator i = blocks.begin(); i != blocks.end(); ++i)
{
/*if(i->second.stop >= module_size || i->second.offset >= module_size)
{
std::cerr << std::hex << i->second.offset + module_base << std::dec << " <-" << std::endl;
}*/
assert(i->second.stop < module_size && i->second.offset < module_size);
if(visited.find(i->first) == visited.end())
{
std::cerr << "[orphan: " << std::hex << i->first << std::dec << "]" << std::endl;
return false;
}
}
return true;
}
//was: bool scanner_t::scan(ceres::target_t &target, std::string mod)
bool scanner_t::scan(scan_target_t &scanx, std::set<U8> &entrypoints, bool dots, scanprocs_t &procs)
{
//mod = make_lowercase(mod);
//module_t &module = target.data[mod];
// search for: target, module, obj[].
//std::vector<scanobj_t> obj(module.image_size());
module_t module(scanx); // adaptor from old QuickCom code
std::vector<U4> fcn(module.image_size()); // 0 means this byte not part of a procedure
memset(&fcn[0], 0, module.image_size() * sizeof(U4));
std::vector<U1> no_decompile(module.image_size()); // =1 if no decompile
memset(&no_decompile[0], 0, module.image_size() * sizeof(U1));
UINT sz = module.image_size();
decode_state_t s;
U8 bytesleft;
int status;
std::list<U8> calls;
// *** Why do we want to re-decode it? We don't!!!
// *** Take it out. Also use 'entrypoints' instead of making our own list.
std::list<U8> targets;
for(std::set<U8>::iterator i = entrypoints.begin(); i != entrypoints.end(); ++i)
{
scanx.meta[*i].target = 1;
targets.push_front(*i);
scanx.meta[*i].procedure = 1;
calls.push_front(*i);
}
U8 offset, tmp;
bool done;
while(!targets.empty())
{
offset = targets.back();
targets.pop_back();
icode_size = 0;
icode_index = 15;
for(;;)
{
if(offset >= module.image_size())
{
std::cerr << " error!" << std::endl;
std::cerr << "Tried to scan insn at " << std::hex << (offset + module.image_base()) << std::dec << " which is beyond image!" << std::endl;
return false;
}
icode_index = (icode_index + 1) & 15;
if(icode_size < 16)
++icode_size;
icode_list[icode_index] = offset;
s.icode = &scanx.icode[offset];
s.size = scanx.meta[offset].length;
scanx.meta[offset].target = 1;
done = false; // if false, go to next insn in memory when done.
switch(s.icode->insn)
{
// for any type of return, we're done with the current visit.
case insn__ret:
case insn__retf:
case insn__retfnum:
case insn__retnum:
case insn__iret:
done = true;
break;
// unhandled stuff.
case insn__callfd:
case insn__callfi:
break;
// unhandled stuff.
case insn__jmpfd:
case insn__jmpfi:
done = true;
break;
case insn__jmpi:
if(target_is_import(module, offset, s))
{
done = true;
break;
}
if(get_argtype_lo(s.icode->argtype[0]) == argtype_mem)
{
// jmp [<target>]
std::list<U8> swtargets;
if(handle_switch(module, offset, s, scanx, swtargets) || handle_switch_zx(module, offset, s, scanx, swtargets))
{
U8 x;
for(std::list<U8>::iterator i = swtargets.begin(); i != swtargets.end(); ++i)
{
x = *i;
if(x < module.image_base())
{
std::cerr << " error!" << std::endl;
std::cerr << "Insn at " << std::hex << (offset + module.image_base()) << std::dec << " is switch with target to " << std::hex << x << std::dec << " which is below image base!" << std::endl;
return false;
}
x -= module.image_base();
if(x >= module.image_size())
{
std::cerr << " error!" << std::endl;
std::cerr << "Insn at " << std::hex << (offset + module.image_base()) << std::dec << " is switch with target to " << std::hex << (x + module.image_base()) << std::dec << " which is beyond image!" << std::endl;
return false;
}
scanx.meta[x].branch = 1;
if(!scanx.meta[x].target)
{
//std::cerr << std::hex << (offset + module.image_base()) << " -> " << (x + module.image_base()) << std::dec << std::endl;
scanx.meta[x].target = 1;
targets.push_front(x);
}
}
//std::cout << "[case 1]";
done = true;
break;
}
// check for known target. if found, we need to visit it... but whose procedure is it?
if(s.icode->has_disp && !s.icode->ea.disp8)
{
U4 disp = s.icode->disp;
if(s.icode->ea.base == 31 &&s.icode->ea.index == 31 && disp >= module.image_base())
{
disp -= module.image_base();
if(disp + 4 - 1 < module.image_size())
{
// We already made sure the target isn't an import name.
U4 value = module.get_dword(disp);
if(value >= module.image_base())
{
value -= module.image_base();
if(value < module.image_size())
{
// 'value' is a default jmp [<value>] target. we do not care whether there's
// a relocation there or not. we will visit it too.
//std::cerr << std::hex << value + module.image_base() << std::dec << std::endl;
scanx.meta[value].branch = 1;
if(!scanx.meta[value].target)
{
scanx.meta[value].target = 1;
targets.push_front(value);
}
//std::cout << "[case 2]";
done = true;
break;
}
}
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -