📄 beta.h
字号:
// beta.h
// Copyright (C) 2008 Willow Schlanger
#ifndef l_crudcom__beta_h__included
#define l_crudcom__beta_h__included
#include <list>
#include <map>
#include <stdexcept>
#include <cstddef>
#include <fstream>
#include <iostream>
#include "../x86s/types.h"
#include "../x86s/x86s_common.h"
#include "../x86s/x86s_decode.h"
#include "../crudasm/asmwriter.h"
#include <set>
#include <cstdio>
#include "parser.h"
#include "semantics.h"
namespace x86s {
struct beta_line_t
{
U8 address;
int label; // 0 for none, else one of: 'c' for call here, 'b' if branch here but no call here
bool is_asm; // 'line' is assembly.
std::set<int> flags_in, flags_out; // no__begin_x86_flags..no__end_x86_flags
std::string line;
std::string comment;
};
class beta_writer_t
{
std::ostream &of;
parser_t &parser;
memory_t &memory;
public:
beta_writer_t(parser_t &parser_tag, memory_t &memory_tag, std::ostream &of_t) :
of(of_t),
parser(parser_tag),
memory(memory_tag)
{
}
~beta_writer_t()
{
of << std::endl;
}
void go(bool skip_decompiled, bool hll_style)
{
bool pure = true; // set to false if an unhandled instruction is encountered in the basic block
std::list<beta_line_t> basic_block;
icode_t icode;
decode_state_t s;
s.icode = &icode;
s.dsz = parser.dsz;
int x;
char line[1024];
NasmWriter nasmdis;
U8 offset, target;
//tnode tscratch[X86S_SEMANTICS_MAX_SIZE];
//U4 tscratch_size;
specialized_tcode_t sp;
of << std::hex << "org 0x" << memory.image_base << std::dec << std::endl;
for(U8 i = 0; i < memory.image_size; )
{
if(memory.meta[i].size == 0)
{
//
int linecount = -1;
bool yet = false;
do
{
if(linecount == -1 || linecount == 15)
{
linecount = 0;
of << std::endl;
of << "loc_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << ": db";
yet = false;
}
else
++linecount;
if(yet)
of << ",";
of << " 0x" << std::hex << (U2)memory.image[i] << std::dec;
yet = true;
++i;
} while(i != memory.image_size && memory.meta[i].size == 0);
of << std::endl;
if(i != memory.image_size)
{
if(memory.meta[i].entry == 0)
{
of << std::endl;
}
}
continue;
}
// memory.meta[i].size != 0. check for overlapping instructions.
// if not overlapping, see if it's been decompiled and skip_decompiled == true.
// else, decode it and check for decoder error.
//---
bool overlaps = false;
for(U8 j = 1; j < memory.meta[i].size; ++j)
{
if(memory.meta[i + j].entry || memory.meta[i + j].branch)
{
overlaps = true;
break;
}
}
bool dump = overlaps;
if(!overlaps)
{
if(skip_decompiled && memory.meta[i].decompiled == 1)
{
if(memory.meta[i].entry == 1)
{
of << std::endl;
of << (hll_style ? "//" : ";");
of << " fn_" << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << std::dec << " - is decompiled!" << std::endl;
if(memory.meta[i].branch == 1)
{
of << (hll_style ? "//" : ";");
of << " There are non-call branches here too." << std::endl;
}
of << std::endl;
}
i += memory.meta[i].size;
continue;
}
s.insn = memory.image + i;
s.end = s.insn + 15;
x = decode(s);
if(x != 0)
dump = true;
}
//---
if(dump)
{
of << std::endl;
for(;;)
{
of << memory.meta[i].entry ? "fn_" : "loc_";
of << std::hex << (U4)((i + memory.image_base) & memory.rip_mask) << ": db 0x" << (U2)memory.image[i] << std::dec << std::endl;
++i;
if(i == memory.image_size)
return;
if(memory.meta[i].size != 0)
break;
}
continue;
}
// decode success.
basic_block.clear();
bool done = false;
bool star = false;
pure = true;
sp.clear();
do
{
// The instruction at 'i' has been decoded and is now in s.icode.
// It is not an overlapping instruction.
basic_block.push_back(beta_line_t());
basic_block.back().address = i;
basic_block.back().label = 0;
if(memory.meta[i].entry == 1)
basic_block.back().label = 'c';
else
if(memory.meta[i].branch == 1)
basic_block.back().label = 'b';
line[0] = '\0';
const char *imm = NULL;
const char *disp = NULL;
char buf[1024];
buf[0] = '\0';
target = i;
basic_block.back().is_asm = true;
switch(encodings[s.encoding].insn)
{
case insn_int:
case insn__int3:
done = true;
break;
case insn__ret:
case insn__retf:
case insn__retfnum:
case insn__retnum:
case insn__iret:
done = true;
break;
case insn_jmp:
case insn__jrcxz:
case insn__loopnz:
case insn__loopz:
case insn__loop:
case insn_call:
done = true;
case insn__jcc:
if(encodings[s.encoding].insn == insn__jcc)
{
U1 cc = s.icode->argvalue[1];
if(cc == 0 || cc == 1)
basic_block.back().flags_in.insert(no_x86_of);
else
if(cc == 2 || cc == 3)
basic_block.back().flags_in.insert(no_x86_cf);
else
if(cc == 4 || cc == 5)
basic_block.back().flags_in.insert(no_x86_zf);
else
if(cc == 6 || cc == 7)
{
basic_block.back().flags_in.insert(no_x86_cf);
basic_block.back().flags_in.insert(no_x86_zf);
}
else
if(cc == 8 || cc == 9)
basic_block.back().flags_in.insert(no_x86_sf);
else
if(cc == 0xa || cc == 0xb)
basic_block.back().flags_in.insert(no_x86_pf);
else
if(cc == 0xc || cc == 0xd)
{
basic_block.back().flags_in.insert(no_x86_sf);
basic_block.back().flags_in.insert(no_x86_of);
}
else
if(cc == 0xe || cc == 0xf)
{
basic_block.back().flags_in.insert(no_x86_zf);
basic_block.back().flags_in.insert(no_x86_sf);
basic_block.back().flags_in.insert(no_x86_of);
}
else
done = true;
}
// fixme: add 64-bit support here. Also do something about memory.cs_base.
offset = (U8)s.icode->imm + (U8)target + (U8)s.size + (U8)memory.image_base;
offset &= memory.rip_mask;
if(offset < (U8)memory.image_base)
throw std::runtime_error("jmp above image base");
target = offset - (U8)memory.image_base;
if(memory.meta[target].entry)
{
std::sprintf(buf, "fn_%x", offset);
imm = buf;
}
else
if(memory.meta[target].branch)
{
std::sprintf(buf, "loc_%x", offset);
imm = buf;
}
break;
case insn__calli:
case insn__callfd:
case insn__callfi:
case insn__jmpfd:
case insn__jmpfi:
case insn__jmpi:
star = true;
done = true;
break;
default:
//pure = false;
#if 1
if(s.icode->lockrep == 1)
pure = false;
else
if(hll_style)
{
tcode_element &te = x86_tcode_table[encodings[s.encoding].insn];
if(te.index != 0)
{
//basic_block.back().is_asm = false if we recognize the instruction.
// write to basic_block.back(). if need many lines, add them--all with is_asm==false.
//basic_block.back().comment = "high level code";
//tnode tscratch[X86S_SEMANTICS_MAX_SIZE];
//U4 tscratch_size;
//tscratch_size = 0;
sp.accept(s.encoding, *s.icode, s.dsz);
// Go thru all asgn sp.tnodes and push the resulting string onto basic_block,
// additionally setting flags_in and flags_out.
// std::set<int>.
std::string output;
//std::cout << "(" << std::hex << te.index[2] << std::dec << ")" << std::endl;
for(U4 index = 0; te.index[2 + index] != 0xffffffff; ++index)
{
U4 *ptr = te.index + 2 + index;
if(index != 0)
{
basic_block.push_back(beta_line_t());
basic_block.back().is_asm = false;
basic_block.back().address = i;
basic_block.back().label = 0;
}
sp.get_text(te.index[2 + index] - te.index[1],
output,
&basic_block.back().flags_in,
&basic_block.back().flags_out,
*s.icode
);
basic_block.back().is_asm = false;
basic_block.back().line = output;
}
/*basic_block.back().is_asm = false;
basic_block.back().line = "// line 1";
basic_block.push_back(beta_line_t());
basic_block.back().is_asm = false;
basic_block.back().address = i;
basic_block.back().label = 0;
basic_block.back().line = "// line 2";*/
}
else
{
pure = false;
}
}
#endif
}
if(basic_block.back().is_asm)
{
if(star)
basic_block.back().comment = "(*) not followed";
nasmdis.disasm(line, &s, imm, disp, i + (U8)s.size + memory.image_base);
basic_block.back().line = line;
}
// Go to the next instruction.
i += memory.meta[i].size;
if(i == memory.image_size)
break; // was return -- bug fix
if(memory.meta[i].size == 0)
break;
//if(was_branch)
// break;
//---
overlaps = false;
for(U8 j = 1; j < memory.meta[i].size; ++j)
{
if(memory.meta[i + j].entry || memory.meta[i + j].branch)
{
overlaps = true;
break;
}
}
dump = overlaps;
if(!overlaps)
{
if(skip_decompiled && memory.meta[i].decompiled == 1)
{
//i += memory.meta[i].size;
break;
}
s.insn = memory.image + i;
s.end = s.insn + 15;
x = decode(s);
if(x != 0)
dump = true;
}
//---
if(memory.meta[i].entry == 1 || memory.meta[i].branch == 1)
done = true;
} while(!done && !dump);
if(pure)
{
// elliminate dead registers.
// e.g. if you have
// x86_of = _x86_sub_of(d[x86_bx + x86_si + 0x55aa], 0x1);
// x86_af = _x86_sub_af(trunc<byte>(d[x86_bx + x86_si + 0x55aa]), 0x1);
// x86_of = _x86_add_of(x86_bx, 0x1);
// x86_af = _x86_add_af(trunc<byte>(x86_bx), 0x1);
// then you can remove the first two of, af outputs because there are subsequent outputs without
// subsequent inputs.
for(std::list<beta_line_t>::iterator j = basic_block.begin(); j != basic_block.end();)
{
int outflag = -1;
if(j->flags_out.size() == 1)
outflag = *j->flags_out.begin();
std::list<beta_line_t>::iterator tmp = j;
++j;
if(outflag != -1)
{
for(std::list<beta_line_t>::iterator k = j; k != basic_block.end(); ++k)
{
if(k->flags_in.find(outflag) != k->flags_in.end())
break; // someone inputs it.
if(k->flags_out.find(outflag) != k->flags_out.end())
{
tmp->flags_out.clear();
tmp->flags_out.insert(-1);
}
}
}
}
}
// Print output here.
if(!basic_block.empty())
{
if(basic_block.begin()->label == 'c')
of << std::endl;
}
if(hll_style)
{
of << "{" << std::endl;
}
for(std::list<beta_line_t>::iterator j = basic_block.begin(); j != basic_block.end(); ++j)
{
bool nl = false;
if(j->label == 'c')
{
//of << std::endl;
// look up j->address and print list of callees here.
if(!parser.xgraph[j->address].calls.empty())
{
of << (hll_style ? "//" : ";");
of << " Calls:";
for(std::set<U8>::iterator jj = parser.xgraph[j->address].calls.begin(); jj != parser.xgraph[j->address].calls.end(); ++jj)
of << " " << std::hex << "fn_" << (U4)((*jj + memory.image_base) & memory.rip_mask) << std::dec;
of << std::endl;
}
if(memory.meta[j->address].branch == 1)
{
of << (hll_style ? "//" : ";");
of << " There are non-call branches here too." << std::endl;
}
of << "fn_";
nl = true;
}
else
if(j->label != 0)
{
of << "loc_";
nl = true;
}
if(j->label != 0)
{
of << std::hex << (U4)((j->address + memory.image_base) & memory.rip_mask) << std::dec;
of << ":" << std::endl;
nl = true;
}
if(j->is_asm)
{
if(hll_style)
of << " asm " << j->line << ";";
else
of << " " << j->line;
nl = true;
}
else
{
bool skip = false;
if(j->flags_out.size() == 1)
if(*j->flags_out.begin() == -1)
skip = true;
if(!skip)
{
of << " " << j->line;
nl = true;
}
}
if(!j->comment.empty())
{
nl = true;
if(hll_style)
of << " // " << j->comment;
else
of << " ; " << j->comment;
}
if(nl)
of << std::endl;
}
basic_block.clear();
if(hll_style)
of << "}" << std::endl;
}
}
};
} // namespace x86s
#endif // l_crudcom__beta_h__included
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -