📄 read_script.cpp

📁 当前支持 16-bit, 32-bit and 64-bit 的二进制文件
💻 CPP
字号:
// read_script.cpp
// Copyright (C) 2008 Willow Schlanger

#include "x86s_script.h"

#include "types.h"
#include "x86s_common.h"

#include <list>
#include <set>
#include <string>
#include <sstream>
#include <vector>
#include <iostream>
#include <cctype>
#include <cstdio>
#include <cstdlib>
#include <fstream>

namespace x86s
{

#if 0
// This computes ceil(log2(s)) and returns static_cast<UINT>(-1) on error.
UINT logsize(UINT s)
{
	if(s == 0)
		return 1;
	for(UINT x = 1, log = 1; x != 0; x += x, ++log)
	{
		if(x > s)
			return log - 1;
	}
	return static_cast<UINT>(-1);
}
#endif

struct tokenize
{
	std::vector<std::string> tokens;
	
	tokenize(std::string s, char delim)
	{
		std::string tmp;
		
		for(std::string::iterator i = s.begin(); i != s.end(); ++i)
		{
			if(*i == delim)
			{
				if(!tmp.empty())
					tokens.push_back(tmp);
				tmp.clear();
			}
			else
				tmp += *i;
		}
		
		if(!tmp.empty())
			tokens.push_back(tmp);
	}
};

std::string::iterator skip_space(std::string::iterator begin, std::string::iterator end)
{
	while(begin != end)
	{
		if(!std::isspace(*begin))
			break;
		++begin;
	}
	return begin;
}

std::string::iterator getword(std::string &out, std::string::iterator begin, std::string::iterator end, std::string delim="")
{
	out.clear();
	begin = skip_space(begin, end);
	std::string::iterator realbegin = begin;
	while(begin != end)
	{
		bool found = false;
		for(std::string::iterator i = delim.begin(); i != delim.end(); ++i)
		{
			if(*i == *begin)
			{
				found = true;
				break;
			}
		}
		if(found || std::isspace(*begin))
		{
			out = std::string(realbegin, begin);
			return begin;
		}
		++begin;
	}
	out = std::string(realbegin, begin);	// it's one big word!
	return begin;
}

void next_insn(globals_t &g)
{
	if(!g.insn_name.empty())
	{
		if(g.encodings.empty())
		{
			std::cout << "Error: " << g.filename << " line " << g.linenum << ": no encodings for instruction \'" << g.insn_name << "\'" << std::endl;
			throw __LINE__;
		}
		g.data.push_back(insn_t());
		g.data.back().insn_name = g.insn_name;
		g.data.back().encodings.clear();
		g.data.back().disasm_code.clear();
		g.data.back().semantics.clear();
		g.data.back().insn_suffix.clear();
		for(std::vector<tmp_encoding_t>::iterator i = g.encodings.begin(); i != g.encodings.end(); ++i)
			g.data.back().encodings.push_back(*i);
		for(std::list<std::string>::iterator i = g.disasm_code.begin(); i != g.disasm_code.end(); ++i)
			g.data.back().disasm_code.push_back(*i);
		for(std::list<std::string>::iterator i = g.semantics.begin(); i != g.semantics.end(); ++i)
			g.data.back().semantics.push_back(*i);
		for(std::set<std::string>::iterator i = g.insn_suffix.begin(); i != g.insn_suffix.end(); ++i)
			g.data.back().insn_suffix.insert(*i);

#if 0
		// ---
		std::cout << "// $ " << g.insn_name;
		if(!g.insn_suffix.empty())
		{
			std::cout << " ! ";
			for(std::set<std::string>::iterator i = g.insn_suffix.begin(); i != g.insn_suffix.end(); ++i)
			{
				if(i != g.insn_suffix.begin())
					std::cout << ",";
				std::cout << *i;
			}
		}
		std::cout << std::endl;
		std::cout << "#define insn_" << g.insn_name << " 0" << std::endl;
		std::string s;
		for(std::vector<tmp_encoding_t>::iterator i = g.encodings.begin(); i != g.encodings.end(); ++i)
		{
			encoding_to_string(s, *i, g);
			std::cout << std::endl;
			std::cout << s << "," << std::endl;
		}
		std::cout << std::endl;
		// ---
#endif

		g.insn_name.clear();
	}
}

void parse_insn(globals_t &g)
{
	++g.begin;		// skip '$'
	g.begin = getword(g.insn_name, g.begin, g.line.end(), "!#");
	
	if(g.insn_name.empty() || g.insn_name == "_")
	{
		std::cout << "Error: " << g.filename << " line " << g.linenum << ": valid instruction name expected" << std::endl;
		throw __LINE__;
	}
	
	if(g.insns.find(g.insn_name) != g.insns.end())
	{
		std::cout << "Error: " << g.filename << " line " << g.linenum << ": duplicate instruction name \'" << g.insn_name << "\'" << std::endl;
		throw __LINE__;
	}
	g.insns.insert(g.insn_name);
	
	g.begin = skip_space(g.begin, g.line.end());
	
	g.insn_suffix.clear();
	g.encodings.clear();
	g.disasm_code.clear();
	g.semantics.clear();
	
	if(g.begin != g.line.end())
	{
		if(*g.begin == '!')
		{
			++g.begin;			// skip '!'
			std::string tmp;
			g.begin = skip_space(g.begin, g.line.end());
			g.begin = getword(tmp, g.begin,	 g.line.end(), "#");
			
			tokenize t(tmp, ',');
			if(!t.tokens.empty())
			{
				for(std::vector<std::string>::iterator i = t.tokens.begin(); i != t.tokens.end(); ++i)
				{
					SINT j = g.ilist.do_find(*i);
					if(j == -1)
					{
						std::cout << "Error: " << g.filename << " line " << g.linenum << ": unrecognized instruction suffix: " << *i << std::endl;
						throw __LINE__;
					}
					g.insn_suffix.insert(*i);
				}
			}
		}
	}
	
	// now skip spaces and make sure there's nothing left, or possibly '#'.
	g.begin = skip_space(g.begin, g.line.end());
	if(g.begin == g.line.end())
		return;
	if(*g.begin == '#')
		return;
	
	std::cout << "Error: " << g.filename << " line " << g.linenum << ": expected end of line" << std::endl;
	throw __LINE__;
}

long gethexvalue(const char *s)
{
	long x;
	char *endptr;
	x = std::strtol(s, &endptr, 16);
	if(*endptr != '\0')
		return -1;
	return x;
}

void parse_encoding(globals_t &g)
{
	if(g.insn_name.empty())
	{
		std::cout << "Error: " << g.filename << " line " << g.linenum << ": instruction required before encodings" << std::endl;
		throw __LINE__;
	}

	tmp_encoding_t tmp;
	tmp.suffix.clear();
	//tmp.suffix.insert("fx_none_lockable");
	tmp.insn = g.insn_name;

	tmp.basecode = 0;
	tmp.nextbyte = 511;
	for(int i = 0; i < MAX_ARGS; ++i)
	{
		tmp.argtype[i] = "void";
		tmp.argtype_hi[i] = "0";
		tmp.argsize[i] = "16";
		tmp.argsize_hi[i] = "none";
		tmp.argvalue[i] = 0xff;
	}
	
	// now parse the line.
	//std::string::iterator getword(std::string &out, std::string::iterator begin, std::string::iterator end, std::string delim="")
	std::string s;

	++g.begin;				// skip ':'

	g.begin = getword(s, g.begin, g.line.end());
	
	bool op66 = false;
	int fx = 0;		// 0=none, 2=f2, 3=f3
	
	while(s == "66" || s == "f2" || s == "f3")
	{
		if(s == "66")
		{
			op66 = true;
			g.begin = getword(s, g.begin, g.line.end());
		}
		else
		if(s == "f2")
		{
			if(fx == 3)
			{
				std::cout << "Error: " << g.filename << " line " << g.linenum << ": f2 and f3 can not both be used!" << std::endl;
				throw __LINE__;
			}
			fx = 2;
			g.begin = getword(s, g.begin, g.line.end());
		}
		if(s == "f3")
		{
			if(fx == 2)
			{
				std::cout << "Error: " << g.filename << " line " << g.linenum << ": f2 and f3 can not both be used!" << std::endl;
				throw __LINE__;
			}
			fx = 3;
			g.begin = getword(s, g.begin, g.line.end());
		}
	}
	
	if(s == "0f")
	{
		tmp.basecode += 0x100;
		g.begin = getword(s, g.begin, g.line.end());
	}

	if(s == "0f")
	{
		tmp.basecode += 0x100;
		g.begin = getword(s, g.begin, g.line.end());
	}

	long x = gethexvalue(s.c_str());
	if(x < 0 || x > 0xff)
	{
		std::cout << "Error: " << g.filename << " line " << g.linenum << ": bad opcode byte \'" << s << "\'" << std::endl;
		throw __LINE__;
	}
	tmp.basecode += (U4)x;

	g.begin = getword(s, g.begin, g.line.end());
	
	bool has_colon = false;
	for(std::string::iterator i = s.begin(); i != s.end(); ++i)
	{
		if(*i == ':')
		{
			has_colon = true;
			break;
		}
	}
	
	if(!s.empty())
	{
		if(!has_colon && s[0] != '/')
		{
			x = gethexvalue(s.c_str());
			if(x >= 0 && x < 0x100)
			{
				tmp.nextbyte = (U1)x;
			
				g.begin = getword(s, g.begin, g.line.end());
			}
		}

		if(s[0] == '/')
		{
			if(s == "/0")
				tmp.suffix.insert("ro_0");
			else
			if(s == "/1")
				tmp.suffix.insert("ro_1");
			else
			if(s == "/2")
				tmp.suffix.insert("ro_2");
			else
			if(s == "/3")
				tmp.suffix.insert("ro_3");
			else
			if(s == "/4")
				tmp.suffix.insert("ro_4");
			else
			if(s == "/5")
				tmp.suffix.insert("ro_5");
			else
			if(s == "/6")
				tmp.suffix.insert("ro_6");
			else
			if(s == "/7")
				tmp.suffix.insert("ro_7");
			else
			if(s == "/r")
				tmp.suffix.insert("ro_r");
			else
			if(s == "/z")
				tmp.suffix.insert("ro_z");
			else
			{
				std::cout << "Error: " << g.filename << " line " << g.linenum << ": unrecognized reg/op specifier \'" << s << "\'" << std::endl;
				throw __LINE__;
			}
			g.begin = getword(s, g.begin, g.line.end());
		}
	}
	
	// get arguments.
	if(s.empty())
	{
		std::cout << "Error: " << g.filename << " line " << g.linenum << ": argument list expected - use void for no arguments" << std::endl;
		throw __LINE__;
	}
	
	if(s != "void")
	{
		tokenize t(s, ',');
		int argnum = 0;
		for(std::vector<std::string>::iterator i = t.tokens.begin(); i != t.tokens.end(); ++i, ++argnum)
		{
			if(argnum >= MAX_ARGS)
			{
				std::cout << "Error: " << g.filename << " line " << g.linenum << ": too many arguments (max " << MAX_ARGS << ")" << std::endl;
				throw __LINE__;
			}
			tokenize tsv(*i, ':');
			if(tsv.tokens.size() < 2 || tsv.tokens.size() > 3)
			{
				std::cout << "Error: " << g.filename << " line " << g.linenum << ": bad argument \'" << *i << "\'" << std::endl;
				throw __LINE__;
			}

			tmp.argtype[argnum] = get_prefix(tsv.tokens[0]);
			tmp.argtype_hi[argnum] = tsv.tokens[0];

			if(tsv.tokens.size() == 3)
			{
				long x = gethexvalue(tsv.tokens[2].c_str());
				if(x == -1)
				{
					std::cout << "Error: " << g.filename << " line " << g.linenum << ": bad argument \'" << *i << "\'" << std::endl;
					throw __LINE__;
				}
				tmp.argvalue[argnum] = (U2)x;
			}
			tokenize sz(tsv.tokens[1], '.');
			if(sz.tokens.empty() || sz.tokens.size() > 2)
			{
				std::cout << "Error: " << g.filename << " line " << g.linenum << ": bad argument \'" << *i << "\'" << std::endl;
				throw __LINE__;
			}
			if(sz.tokens[0] == "0")
				tmp.argsize[argnum] = "16";		// size not applicable -- use default
			else
				tmp.argsize[argnum] = sz.tokens[0];
			if(sz.tokens.size() == 2)
				tmp.argsize_hi[argnum] = sz.tokens[1];
		}
	}
	
	g.begin = getword(s, g.begin, g.line.end());
	if(!s.empty())
	{
		if(s[0] == '!')
		{
			g.begin = getword(s, g.begin, g.line.end());
			
			if(!s.empty())
			{
				if(s[0] == '#')
					s.clear();
			}
			
			tokenize su(s, ',');
			for(std::vector<std::string>::iterator i = su.tokens.begin(); i != su.tokens.end(); ++i)
			{
				// See if *i is a valid attribute.
				SINT j = g.elist.do_find(*i);
				if(j == -1)
				{
					std::cout << "Error: " << g.filename << " line " << g.linenum << ": bad encoding suffix \'" << *i << "\'" << std::endl;
					throw __LINE__;
				}
				// Add suffix.
				tmp.suffix.insert(*i);
			}
			
			g.begin = getword(s, g.begin, g.line.end());
		}
		if(!s.empty())
		{
			std::cout << "Error: " << g.filename << " line " << g.linenum << ": end-of-line expected" << std::endl;
			throw __LINE__;
		}
	}
	
	if(op66)
		tmp.suffix.insert("op66_op66");

	if(fx == 2)
		tmp.suffix.insert("fx_f2");
	else
	if(fx == 3)
		tmp.suffix.insert("fx_f3");
	
	// add encoding to vector.
	g.encodings.push_back(tmp);
}

// out: g.type =
//    '$' if it's an instruction line
//    ':' if it's an encoding line
//    '#' if the line is just a comment or is empty
//    '.' otherwise
void parse_line(globals_t &g)
{
	g.begin = g.line.begin();
	g.begin = skip_space(g.begin, g.line.end());
	g.type = '#';
	
	//std::cout << "[" << std::string(g.begin, g.line.end()) << "]" << std::endl;
	
	if(g.begin == g.line.end())
		return;

	g.type = *g.begin;

	if(g.type == '$')
	{
		next_insn(g);
		parse_insn(g);
	}
	else
	if(g.type == ':')
	{
		parse_encoding(g);
	}
	else
	if(g.type != '#')
		g.type = '.';
}

}	// namespace x86s

using namespace x86s;

// --- main program ---

int main()
{
	try
	{
		std::string infilename = "in_script.txt";
		globals_t g;
		g.filename = infilename;
		g.linenum = 0;
		
		std::ifstream inf(infilename.c_str());

		if(inf == NULL)
		{
			std::cout << infilename << ": unable to open file for reading" << std::endl;
			throw __LINE__;
		}
		
		while(getline(inf, g.line) != NULL)
		{
			++g.linenum;
			//std::cout << "<<" << g.line << ">>" << std::endl;
			parse_line(g);
			// fixme -- add support for g.type == '.' -- semantics and disassembly special case code.
			if(g.type == '.')
			{
				if(g.insn_name.empty())
				{
					std::cout << infilename << " line " << g.linenum << ": code has no associated instruction!" << std::endl;
					throw __LINE__;
				}
				if(g.encodings.empty())
					g.disasm_code.push_back(g.line);
				else
					g.semantics.push_back(g.line);
			}
		}
		next_insn(g);
		
		write_files(g);
	}
	catch(int line)
	{
		std::cout << "Error: line " << line << std::endl;
		return 1;
	}
	return 0;
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -