⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fscan.cpp

📁 编译原理--词法扫描
💻 CPP
字号:
//#include <windows.h>
#include <iostream.h>
#include <stdio.h>
#include <string.h>

const namelen=40;
typedef struct symtab_node           //二叉树的结点结构
{
  struct symtab_node *left;          //指向左孩子
  struct symtab_node *right;         //指向右孩子
  char type[namelen+1];
  char name[namelen+1];              //指向标识符的名字串
  int lineno;               //line number
}SYMTAB_NODE,*SYMTAB_NODE_PTR;

SYMTAB_NODE_PTR symtab_root=NULL;          //初始化跟结点为空

//在由np指向的树中查找标识符*name
SYMTAB_NODE_PTR search_symtab(char *name,SYMTAB_NODE_PTR np)
{
  int cmp;
  while(np!=NULL)
  {
    cmp=strcmp(name,np->name);
    if(cmp==0) return(np);
    np=cmp<0 ? np->left : np->right;
  }
  return(NULL);
}

void visit_symtab(const SYMTAB_NODE_PTR np)
{
	if(np)
	{
		visit_symtab(np->left);
		cout<<np->name<<endl;
		visit_symtab(np->right);
	}
}

//在由npp指向的树中加入名为*name,值为value的标识符结点
SYMTAB_NODE_PTR enter_symtab(const char *name,const char *type , int line,SYMTAB_NODE_PTR &npp)
{
  int cmp;
  SYMTAB_NODE_PTR np1,np2;
  SYMTAB_NODE_PTR new_nodep;
  new_nodep=new symtab_node;
  strncpy(new_nodep->name,name,namelen);
  strncpy(new_nodep->type,type,namelen);
  new_nodep->lineno=line;
  new_nodep->left=new_nodep->right=NULL;
  np1=npp;
  while(np1 != NULL)
  {
    cmp=strcmp(name,np1->name);
    np2=np1;
	np1 =cmp <0 ? (np1->left) : (np1->right);
  }
  np1=new_nodep;
  if(npp==NULL)
	  npp=new_nodep;
  else cmp < 0 ? np2->left=np1 : np2->right=np1;
  return(new_nodep);
}

/*
void main2()
{
	//下面定义一个二叉树作为实例
 SYMTAB_NODE_PTR root1,root2,root3,root4;
  root1=new symtab_node;
  root2=new symtab_node;
  root3=new symtab_node;
  root4=new symtab_node;

  strncpy(root1->name,"y",namelen);
  root1->value=7;
  root1->left=root2;
  root1->right=root3;

  strncpy(root2->name,"x",namelen);
  root2->value=8;
  root2->left=NULL;
  root2->right=NULL;

  strncpy(root3->name,"z",namelen);
  root3->value=9;
  root3->left=root3->right=NULL;

  //作为实例的二叉树
 // char t[40+1];
 // int value;
  cout<<"请输入一个标识符:";
 // cin>>t;
  cout<<"它的值为:";
//  cin>>value;
//  cout<<search_symtab(t,root1)<<endl;
//  SYMTAB_NODE_PTR p=search_symtab(t,root1);
//  if(p!=NULL)
    cout<<enter_symtab("y",10,root1);
  }
*/

#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>

#ifndef FALSE
#define FALSE 0
#endif

#ifndef TRUE
#define TRUE 1
#endif

/* MAXRESERVED = the number of reserved words */
#define MAXRESERVED 8

typedef enum 
    /* book-keeping tokens */
   {ENDFILE,ERROR,
    /* reserved words */
    IF,THEN,ELSE,END,REPEAT,UNTIL,READ,WRITE,
    /* multicharacter tokens */
    ID,NUM,
    /* special symbols */
    ASSIGN,EQ,LT,PLUS,MINUS,TIMES,OVER,LPAREN,RPAREN,SEMI
   } TokenType;

FILE* source; /* source code text file */
FILE* listing; /* listing output text file */
FILE* code; /* code text file for TM simulator */

int lineno; /* source line number for listing */

/**************************************************/
/***********   Syntax tree for parsing ************/
/**************************************************/

typedef enum {StmtK,ExpK} NodeKind;
typedef enum {IfK,RepeatK,AssignK,ReadK,WriteK} StmtKind;
typedef enum {OpK,ConstK,IdK} ExpKind;

/* ExpType is used for type checking */
typedef enum {Void,Integer,Boolean} ExpType;

#define MAXCHILDREN 3

typedef struct treeNode
   { struct treeNode * child[MAXCHILDREN];
     struct treeNode * sibling;
     int lineno;
     NodeKind nodekind;
     union { StmtKind stmt; ExpKind exp;} kind;
     union { TokenType op;
             int val;
             char * name; } attr;
     ExpType type; /* for type checking of exps */
   } TreeNode;

/**************************************************/
/***********   Flags for tracing       ************/
/**************************************************/

/* EchoSource = TRUE causes the source program to
 * be echoed to the listing file with line numbers
 * during parsing
 */
int EchoSource;

/* TraceScan = TRUE causes token information to be
 * printed to the listing file as each token is
 * recognized by the scanner
 */
int TraceScan;

/* TraceParse = TRUE causes the syntax tree to be
 * printed to the listing file in linearized form
 * (using indents for children)
 */
int TraceParse;

/* TraceAnalyze = TRUE causes symbol table inserts
 * and lookups to be reported to the listing file
 */
int TraceAnalyze;

/* TraceCode = TRUE causes comments to be written
 * to the TM code file as code is generated
 */
int TraceCode;

/* Error = TRUE prevents further passes if an error occurs */
int Error; 
/* MAXTOKENLEN is the maximum size of a token */
#define MAXTOKENLEN 40

/* tokenString array stores the lexeme of each token */
char tokenString[MAXTOKENLEN+1];

/* function getToken returns the 
 * next token in source file
 */
TokenType getToken(void);


/* states in scanner DFA */
typedef enum
   { START,INASSIGN,INCOMMENT,INNUM,INID,DONE }
   StateType;

/* lexeme of identifier or reserved word */
//char tokenString[MAXTOKENLEN+1];

/* BUFLEN = length of the input buffer for
   source code lines */
#define BUFLEN 256

static char lineBuf[BUFLEN]; /* holds the current line */
static int linepos = 0; /* current position in LineBuf */
static int bufsize = 0; /* current size of buffer string */
static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF */

/* getNextChar fetches the next non-blank character
   from lineBuf, reading in a new line if lineBuf is
   exhausted */
static int getNextChar(void)
{ if (!(linepos < bufsize))
  { lineno++;
    if (fgets(lineBuf,BUFLEN-1,source))
    { 
//		if (EchoSource) fprintf(listing,"%4d: %s",lineno,lineBuf);
      bufsize = strlen(lineBuf);
      linepos = 0;
      return lineBuf[linepos++];
    }
    else
    { EOF_flag = TRUE;
      return EOF;
    }
  }
  else return lineBuf[linepos++];
}

/* ungetNextChar backtracks one character
   in lineBuf */
static void ungetNextChar(void)
{ if (!EOF_flag) linepos-- ;}

/* lookup table of reserved words */
static struct
    { char* str;
      TokenType tok;
    } reservedWords[MAXRESERVED]
   = {{"if",IF},{"then",THEN},{"else",ELSE},{"end",END},
      {"repeat",REPEAT},{"until",UNTIL},{"read",READ},
      {"write",WRITE}};

/* lookup an identifier to see if it is a reserved word */
/* uses linear search */
static TokenType reservedLookup (char * s)
{ int i;
  for (i=0;i<MAXRESERVED;i++)
    if (!strcmp(s,reservedWords[i].str))
      return reservedWords[i].tok;
  return ID;
}

/****************************************/
/* the primary function of the scanner  */
/****************************************/
/* function getToken returns the 
 * next token in source file
 */
TokenType getToken(void)
{  /* index for storing into tokenString */
   int tokenStringIndex = 0;
   /* holds current token to be returned */
   TokenType currentToken;
   /* current state - always begins at START */
   StateType state = START;
   /* flag to indicate save to tokenString */
   int save;
   while (state != DONE)
   { int c = getNextChar();
     save = TRUE;
     switch (state)
     { case START:
         if (isdigit(c))
           state = INNUM;
         else if (isalpha(c))
           state = INID;
         else if (c == ':')
           state = INASSIGN;
         else if ((c == ' ') || (c == '\t') || (c == '\n'))
           save = FALSE;
         else if (c == '{')
         { save = FALSE;
           state = INCOMMENT;
         }
         else
         { state = DONE;
           switch (c)
           { case EOF:
               save = FALSE;
               currentToken = ENDFILE;
               break;
             case '=':
               currentToken = EQ;
               break;
             case '<':
               currentToken = LT;
               break;
             case '+':
               currentToken = PLUS;
               break;
             case '-':
               currentToken = MINUS;
               break;
             case '*':
               currentToken = TIMES;
               break;
             case '/':
               currentToken = OVER;
               break;
             case '(':
               currentToken = LPAREN;
               break;
             case ')':
               currentToken = RPAREN;
               break;
             case ';':
               currentToken = SEMI;
               break;
             default:
               currentToken = ERROR;
               break;
           }
         }
         break;
       case INCOMMENT:
         save = FALSE;
         if (c == EOF)
         { state = DONE;
           currentToken = ENDFILE;
         }
         else if (c == '}') state = START;
         break;
       case INASSIGN:
         state = DONE;
         if (c == '=')
           currentToken = ASSIGN;
         else
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           currentToken = ERROR;
         }
         break;
       case INNUM:
         if (!isdigit(c))
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = NUM;
         }
         break;
       case INID:
         if (!isalpha(c))
         { /* backup in the input */
           ungetNextChar();
           save = FALSE;
           state = DONE;
           currentToken = ID;
         }
         break;
       case DONE:
       default: /* should never happen */
         fprintf(listing,"Scanner Bug: state= %d\n",state);
         state = DONE;
         currentToken = ERROR;
         break;
     }
     if ((save) && (tokenStringIndex <= MAXTOKENLEN))
       tokenString[tokenStringIndex++] = (char) c;
     if (state == DONE)
     { tokenString[tokenStringIndex] = '\0';
       if (currentToken == ID)
         currentToken = reservedLookup(tokenString);
     }
   }
/*   if (TraceScan) {
     fprintf(listing,"\t%d: ",lineno);
     printToken(currentToken,tokenString);
   }
*/   return currentToken;
} /* end getToken */


void main2()
{
	source=fopen("test.l","r");
	TokenType tem;
	symtab_node* root=NULL;

	while(ENDFILE != (tem=getToken()) )
	{
		switch(tem)
		{
		case  IF:
		case  THEN:
		case  ELSE:
		case  END:
		case  REPEAT:
		case  UNTIL:
		case  READ:
		case  WRITE:
		  cout<<lineno<<" "<<"reserved words:"<<tokenString<<endl;
		  enter_symtab(tokenString,"key words",lineno,root);
		  break;
		case  ID:
    	  cout<<lineno<<" "<<"identifier:"<<tokenString<<endl;
		  enter_symtab(tokenString,"identifier",lineno,root);
		  break;
		case  NUM:
    	  cout<<lineno<<" "<<"number:"<<tokenString<<endl;
		  enter_symtab(tokenString,"number",lineno,root);
		  break;
		case  ASSIGN:
		case  EQ:
		case  LT:
		case  PLUS:
		case  MINUS:
		case  TIMES:
		case  OVER:
		case  LPAREN:
		case  RPAREN:
		case  SEMI:
    	  cout<<lineno<<" "<<"operant:"<<tokenString<<endl;
		  enter_symtab(tokenString,"operant",lineno,root);
		  break;
		default:
			break;
		}

 	}
visit_symtab(root);

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -