📄 c2asm.cpp

📁 将C语言转换成汇编语言
💻 CPP
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/*********************************************************************************************
*																							 *						
* Program Name:C2ASM,Ver 1.0,9th July,2003.													 *	
*																							 *
* Category:Cross Compiler(From "C" to "Assembly Language").									 *
*																							 *
* Input:c_code.txt(Place the "C" code u want to convert into "ASSEMBLY" Language)			 * 
*																							 *
* Output:asm_code.txt("ASSEMBLY" code for INPUT "C" program)								 *
*																							 *
* Requirements:Microsoft Windows(95/98/Me/2000/XP)/DOS.										 * 
*			 Microsoft Visual C++(Ver 6.0)/Microsoft Visual C++ .NET						 *
*			 Microsoft MASM(Ver 6.11)														 *	
*																							 *
*		NOTE:All Names Mentioned in "Requirements" Section are Registered trade marks of	 *
*			 Microsoft Corporation.															 *
*																							 *
* Programmed By:Muhammad Owais Khan Afridi.													 *
*			  BS(Computer Science),Department Of Computer Science,							 *
*			  Karachi University,Pakistan.													 *
*																							 *			
*			  http://www.csku.edu.pk														 * 	
*																							 *
* Notes for Readers:																		 *
*	1)Sure to read documentation supplied with this code.It'll clear many confusions.		 *
*	2)Reader Shld know programming in C,Assembly,Use of <Vectors> and <Stack> of STL(C++),	 *
*	  to understand this code.																 *
*	3)This program doesn't use any Object Code Optimization,Register Allocation Algorithms.  *
*	4)It converts STANDARD "C",See Sample Programs supplied to get some idea.				 *
*	5)It doesn't Convert all the "C" statements like Switch-Case.It uses a Sub-Part of "C"   *
*	  Language,See GRAMMAR,TOKEN SET of Language in Documentation Supplied to get idea abt it*
*																							 *	
* Disclaimer:																				 *
*			The Author of this PROGRAM shall not be liable in any event for incidental or    *
*			consequential damages in connection with,or arising out of,the furnishing,       *
*			performance,or use of this program.												 *
*********************************************************************************************/
#pragma warning (disable:4786)
#include<conio.h>
#include<string>
#include<iostream>
#include<fstream>
#include<vector>
#include<stack>
using namespace std;

#define LEXEME_SIZE 30			
#define FILE_READ "c_code.txt"	//This is the INPUT "C" file,which is to be converted into 
								//Assembly. 

#define FILE_WRITE "asm_code.txt"  //This is the OUTPUT "ASSEMBLY" file For the input "C" file

//Globals Used By Lexical Box,Syntax Box,Code Generator.
vector<long> number_long;	//This vector will hold Long Constants occuring in the Program
vector<long> number_int;	//This vector will hold Int Constants occuring in the Program

//////////////////////////////
//
//Globals Used By Lexical Box.
//
/////////////////////////////

//This Vector is used to hold IDENTIFIER(s) for Lexical Box ONLY!
vector<string> lex_identifier;	

#define TOTAL_KEYWORDS 10
string keywrd[TOTAL_KEYWORDS]={"void","main","int","long","if","else","for","while","do","return"};

//This Vector will Pick Keywords from the above Array. 
vector<string> keywords;

//Structure Of a Token
struct to{
	string clas;	//Stores Class part of Token
	int index;		//Stores Value part of Token
}tok;				//tok is used to PUSH values on "tokens" Vector.

//This Vector will hold ALL Tokens Generated by Lexical Box.
vector<to> tokens;

///////////////////////////////////
//
//Lexical Box Fucntion Declaration
//
///////////////////////////////////

//Lexemer will break input stream into LEXEMES using D.F.A approach as discussed in ULMAN's
//Compiler Construction Book
void lexemer(ifstream& input,int &line);

//If we follow a particular D.F.A to recognize input stream,In case that FAILS we need to check
//Some Other D.F.A,"lexemer_fail" will provide this fuctionality.
int lexemer_fail(ifstream& input,char& faulty_character,int& line,int& starting_state,streampos& lexeme_begining);

//If we're unable to identify a Lexeme as a particular TOKEN of Our Language,Then we need to 
//Show some error message."lexemer_error" will do this.
void lexemer_error(char& faulty_character,int &line);

//It makes Token for a LEXEME
void tokenizer(char* lexeme_buffer,int& starting_state);

void syntax_box();		//Syntax Box Declaration
void code_generator();	//Code Generator Declaration

int main()
{

	//Loading KEYWORDS LIST.It's used in Differentiating Kewords From Identifiers.
	for(unsigned int i=0;i<TOTAL_KEYWORDS;i++)
		keywords.push_back(keywrd[i]);		
		
	char file_read='\0';
	int linecount=1;
	ifstream input(FILE_READ);

//Start Reading INPUT File.
while(input.get(file_read))
{
	if(file_read=='\n') linecount++;
	if(file_read!='\n'&&file_read!='\t'&&file_read!=' ')
	{
		input.seekg(-1,ios::cur);
		lexemer(input,linecount);	//Start Making Lexemes
	}//end of if
	
}//end reading file

	input.close();

	//If u wanna See Which TOKENs are generated,Remove Comments From the Following Code,It'll 
	//Show u ALL TOKENs on CONSOLE.
	/*
    for(unsigned int i=0;i<tokens.size();i++)
	{
		cout<<"\n";
		cout<<tokens[i].clas;
		cout<<"\t"<<tokens[i].index;
	}
	*/
	
	//Pushing An Error Token At the END of TOKEN STREAM.
	//So that we've an "End Marker"
		tok.clas="error";
		tok.index=-10;
		tokens.push_back(tok);

		//Start Parsing,Type Checking,Intermediate Code Generation
		syntax_box();
		
	//If u wanna See What Intermediate Code is generated,Remove Comments From the Following 
	//Code,It'll Show u ALL ATOMS on CONSOLE.

/*		cout<<"\n\nFollowing Intermediate Code is Generated\n";

		cout<<"INDEX----DATATYPE----TABLE";
		for(i=0;i<atoms.size();i++)
			cout<<endl<<atoms[i].op<<" type="<<atoms[i].type
			    <<"  arg1= "<<atoms[i].arg1.index <<" "<< atoms[i].arg1.datatype <<" "<< atoms[i].arg1.whichtable
				<<"  arg2= "<<atoms[i].arg2.index <<" "<< atoms[i].arg2.datatype <<" "<< atoms[i].arg2.whichtable
				<<"  result= "<<atoms[i].result.index <<" "<< atoms[i].result.datatype <<" "<< atoms[i].result.whichtable;
*/				

	//Start CODE GENERATION.
		code_generator();

		cout<<"\n\n\"C2ASM\" has sucessfully converted \"C\" code(c_code.txt)"
			<<endl<<"to \"ASSEMBLY\" code(asm_code.txt)";
		
	cout<<"\n";
    getch();
	return(0);
}


///**********************///
///**********************///
///						 ///
/// LEXICAL BOX STARTED. ///
///						 ///
///**********************///
///**********************///

void lexemer(ifstream& input,int& line)
{
	char character_read='\0';
	char lexeme_buffer[LEXEME_SIZE];
	memset(lexeme_buffer,'\0',LEXEME_SIZE);
	int counter=0;
	int current_state=0,starting_state=0;
	bool read_flag=true;
	streampos lexeme_begining=input.tellg();
	while(read_flag){
			switch(current_state)
			{
			//D.F.A for Identifier And Keyword
			case 0:input.get(character_read);
				if(isalpha(character_read)||character_read=='_') {current_state=1;lexeme_buffer[counter]=character_read;counter++;starting_state=0;}
				else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);
				break;
			case 1:input.get(character_read);
				if(isalpha(character_read)||character_read=='_'||isdigit(character_read)) {current_state=1;lexeme_buffer[counter]=character_read;counter++;}
				else current_state=2;
				break;
			case 2:
				input.seekg(-1,ios::cur);
				read_flag=false;
				break;
			//D.F.A for Relational And Assignment Operators
			case 3:input.get(character_read);
				if(character_read=='=') {current_state=4;lexeme_buffer[counter]=character_read;counter++;starting_state=3;}
				else if(character_read=='<') {current_state=7;lexeme_buffer[counter]=character_read;counter++;starting_state=3;}
				else if(character_read=='>') {current_state=11;lexeme_buffer[counter]=character_read;counter++;starting_state=3;}
				else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);				
				break;
			case 4:input.get(character_read);
				if(character_read=='=') {current_state=5;lexeme_buffer[counter]=character_read;counter++;}
				else current_state=6;
				break;
			case 5:
				read_flag=false;
				break;
			case 6:
				input.seekg(-1,ios::cur);
				read_flag=false;
				break;
			case 7:input.get(character_read);
				if(character_read=='=') {current_state=8;lexeme_buffer[counter]=character_read;counter++;}
				else if(character_read=='>') {current_state=9;lexeme_buffer[counter]=character_read;counter++;}
				else current_state=10;
				break;
			case 8:
				read_flag=false;
				break;
			case 9:
				read_flag=false;
				break;
			case 10:
				input.seekg(-1,ios::cur);
				read_flag=false;
				break;
			case 11:input.get(character_read);
				if(character_read=='=') {current_state=12;lexeme_buffer[counter]=character_read;counter++;}
				else current_state=13;
				break;
			case 12:
				read_flag=false;
				break;
			case 13:
				input.seekg(-1,ios::cur);
				read_flag=false;
				break;
			//D.F.A for Arithmetic Operators
			case 14:
				//MAULA ALI!
				input.get(character_read);
				if(character_read=='+'|| character_read=='-'||character_read=='*'||character_read=='/'||character_read=='%') 
				{current_state=15;lexeme_buffer[counter]=character_read;counter++;starting_state=14;}
				else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);				
				break;
			case 15:
				read_flag=false;
				break;
			//D.F.A for Punctuations
			case 16:
				input.get(character_read);
				if(character_read=='('||character_read==')'||character_read=='{'||character_read=='}'||character_read==','||character_read==';'||character_read=='['||character_read==']')
				{current_state=17;lexeme_buffer[counter]=character_read;counter++;starting_state=16;}
				else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);
				break;
			case 17:
				read_flag=false;								
				break;
			//D.F.A for LONG Numbers
			case 18:
				input.get(character_read);
				if(isdigit(character_read)) {current_state=19;lexeme_buffer[counter]=character_read;counter++;starting_state=18;}
				else current_state=lexemer_fail(input,character_read,line,starting_state,lexeme_begining);
				break;
			case 19:
				input.get(character_read);
				if(isdigit(character_read)) {current_state=19;lexeme_buffer[counter]=character_read;counter++;}
				else if(character_read=='L'||character_read=='l') {current_state=20;lexeme_buffer[counter]=character_read;counter++;}
				else current_state=21;				
				break;
			case 20:
				read_flag=false;								
				break;
			case 21:
				input.seekg(-1,ios::cur);
				read_flag=false;								
				break;
			//Error TOKEN
			case 100:
				input.get(character_read);
				{current_state=100;lexeme_buffer[counter]=character_read;counter++;starting_state=100;}
				read_flag=false;	
				break;

			}//End of switch
				}//End Reading One LEXEME
		lexeme_buffer[counter]='\0';

		//A Lexeme Is Made Sucessfully,Call Tokenizer to Make It's Coresponding TOKEN
		tokenizer(lexeme_buffer,starting_state);
}

int lexemer_fail(ifstream& input,char& faulty_character,int& line,int& starting_state,streampos& lexeme_begining)
{
	switch(starting_state)
	{
	case 0:
	input.seekg(lexeme_begining);starting_state=3;break;
	case 3:
	input.seekg(lexeme_begining);starting_state=14;break;
	case 14:
	input.seekg(lexeme_begining);starting_state=16;break;
	case 16:
	input.seekg(lexeme_begining);starting_state=18;break;
	case 18:
	
	//We're Generating ERROR token for those things which are NOT part of OUR LANGUAGE.If U 
	//wanna stop this ERROR code generation,Remove Comments From the Following Line and DO 
	//COMMENT Line AFTER IT!.

	//input.seekg(lexeme_begining);lexemer_error(faulty_character,line);break;

	input.seekg(lexeme_begining);starting_state=100;break;
	//default:
	//	cout<<"\nNo More States";exit(-1);
	}
	return starting_state;
}

void lexemer_error(char& faulty_character,int &line)
{
	cout<<"\nSome Ir-Recoverable Error Occured in LEXICAL ANALYZER!---Possibly"
		<<"\nAn Invalid Lexeme Caused it,Which is Not Part Of Our Language"
		<<"The Starting SYMBOL Of Lexeme is "<<faulty_character<<" Line Number Is "<<line<<endl; 
	getch();
	exit(-1);
}

void tokenizer(char* lexeme_buffer,int& starting_state)
{
	tok.clas=" ";
	tok.index=-10;

	string lexeme;
	lexeme.assign(lexeme_buffer);

	unsigned int j=0,k=0;
	

	long num;

	switch(starting_state)
	{
	//Lexeme is a KEYWORD/IDENTIFIER
	case 0:
		//If LEXEME is a KEYWORD
		for(k=0;k<keywords.size();k++)
		{
			if(lexeme==keywords[k])
			{
				if(lexeme=="int"||lexeme=="long")
				{
					tok.clas="dt";
					tok.index=(lexeme=="int")?0:1;
				}
				else{
					tok.clas=lexeme;
					tok.index=-10;
				}
			tokens.push_back(tok);
			return;
			}
		}
		
		//If LEXEME is an IDENTIFIER
		for(j=0;j<lex_identifier.size();j++)
			//If identifier is already THERE!		
			if(lex_identifier[j]==lexeme)
			{
			tok.clas="id";
			tok.index=j;
			tokens.push_back(tok);
			return;
			}

			//If Identifier is NOT THERE!
			if(lex_identifier.size()==0||lex_identifier.size()==j)
			{
			//Enter into IDENTIFIER SYMBOL TABLE!
			lex_identifier.push_back(lexeme);
			//Enter into TOKEN's TABLE!
			tok.clas="id";
			tok.index=j;
			tokens.push_back(tok);
			return;
			}
		break;
	
		//Lexeme is a RELATIONAL/ASSIGNMENT OPERATOR
	case 3:
		if(lexeme=="=")	//An Assignment Operator
			tok.clas="assignop";
		else	//A Relational Operator
			tok.clas="relop";
	
		if(lexeme=="==")
			tok.index=8;
		else if(lexeme=="<>")	
			tok.index=9;
		else if(lexeme==">=")	
		tok.index=10;
		else if(lexeme=="<=")	
		tok.index=11;
		else if(lexeme==">")	
		tok.index=12;
		else if(lexeme=="<")	
		tok.index=13;
		else if(lexeme=="=")	
		tok.index=14;

		//Enter into TOKEN's TABLE!
			tokens.push_back(tok);
		break;
	
	//Lexeme is an ARITHMENTIC OPERATOR
	case 14:

		tok.clas=(lexeme=="+" ||lexeme=="-") ? "add_sub" : "mul_div_mod"; 
		
		if(lexeme=="+")
		tok.index=15;
		else if(lexeme=="-")	
		tok.index=16;
		else if(lexeme=="*")	
		tok.index=17;
		else if(lexeme=="/")	
		tok.index=18;
		else if(lexeme=="%")	
		tok.index=19;
		//Enter into TOKEN's TABLE!
			tokens.push_back(tok);
		break;

	//Lexeme is a PUNCTUATION CHARACTIER
	case 16:
		if(lexeme=="{")
		{
			//Enter into TOKEN's TABLE!
			tok.clas="braces_open";
			tok.index=2;
			tokens.push_back(tok);
		}
		
		else if(lexeme=="}")
		{
			//Enter into TOKEN's TABLE!
			tok.clas="braces_close";
			tok.index=3;
			tokens.push_back(tok);
		}
	
		else if(lexeme=="(")
		{
			//Enter into TOKEN's TABLE!
			tok.clas="parenthesis_open";
			tok.index=4;
			tokens.push_back(tok);
		}	

		else if(lexeme==")")
		{
			//Enter into TOKEN's TABLE!
			tok.clas="parenthesis_close";
			tok.index=5;
			tokens.push_back(tok);
		}	
	
		else if(lexeme==",")
		{
			//Enter into TOKEN's TABLE!
			tok.clas="comma";
			tok.index=6;
			tokens.push_back(tok);
		}	
			
		else if(lexeme==";")
		{
			//Enter into TOKEN's TABLE!
			tok.clas="semicolon";
			tok.index=7;
			tokens.push_back(tok);
		}

		else if(lexeme=="[")
		{
			//Enter into TOKEN's TABLE!
			tok.clas="square_open";
			tok.index=20;
			tokens.push_back(tok);
		}

		else if(lexeme=="]")
		{
			//Enter into TOKEN's TABLE!
			tok.clas="square_close";
			tok.index=21;
			tokens.push_back(tok);
		}
		break;

		//Lexeme is a NUMBER,It may be an INTEGER or a LONG 
	case 18:
		
		//Checking Whehter the GIVEN NUM is a LONG?
		num=lexeme.find('L');
		if (num==-1) num=lexeme.find('l');
		
		//IF GIVEN NUM is a LONG!
		if(num!=-1)
		{
			//Convert String To LONG INTEGER!...
			lexeme.erase(num,1);	//Remove 'L' or 'l' from the END
			num=0;
			const char *temp=lexeme.c_str();
			num=atol(temp);

		for(unsigned int m=0;m<number_long.size();m++)
			//Number is already there!
			if(number_long[m]==num)
			{
			tok.clas="long_const";
			tok.index=m;
			tokens.push_back(tok);
			return;
			}
			//Number is NOT there!
			if(number_long.size()==0||number_long.size()==m)
			{
			//Enter into NUMBER's SYMBOL TABLE!
			number_long.push_back(num);
			//Enter into TOKEN's TABLE!
			tok.clas="long_const";
			tok.index=m;
			tokens.push_back(tok);
			return;
			}
		}
		//IF GIVEN NUM is an INTEGER!
		else
		{
			int num=0;
			num=atoi(lexeme_buffer);

		for(unsigned int m=0;m<number_int.size();m++)
			//Number is already there!
			if(number_int[m]==num)
			{
			tok.clas="int_const";
			tok.index=m;
			tokens.push_back(tok);
			return;
			}
			//Number is NOT there!
			if(number_int.size()==0||number_int.size()==m)
			{
			//Enter into NUMBER's SYMBOL TABLE!
			number_int.push_back(num);
			//Enter into TOKEN's TABLE!
			tok.clas="int_const";
			tok.index=m;
			tokens.push_back(tok);
			return;
			}
		}
		break;
12 3 4 5 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -