📄 scan.c

📁 参照清华大学《编译原理》写的cmm语言语法分析程序
💻 C
字号:
/**  * @file scan.c * @brief  cmm语言的扫描器实现 * @author Shiquan Ye, yeshiquan@gmail.com * @date 2008-11-12 *//* * Copyright (C) 2008 - Shiquan Ye, yeshiquan@gmail.com * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA. */#include "globals.h"#include "util.h"#include "scan.h"/* DFA的状态 */typedef enum {START,INID,INEQ,INNEQ,INLEQ,INGEQ,ININTORREAL,INREAL,INSLASHORCOMMENT,INCOMMENT,DONE} StateType;/* 源代码中每行字符数的最大值 */#define BUFFLEN 256static char lineBuf[BUFFLEN];   /* 存放当前行 */static int linepos = 0;         /* 在lineBuf的当前位置 */static int bufsize = 0;         /* 当前行的长度 */static int EOF_flag = FALSE;    /* *//**  * @brief  获取当前行的下一个非空格字符，如果已到行尾， * 读取下一行 *  * @returns 字符所在的位置,如果到文件的末尾，返回-1  */static int getNextChar(void) {      if (!(linepos < bufsize)) { /* 已到行尾 */             if (fgets(lineBuf,BUFFLEN-1,source)) { /* 读取新的一行 */                  lineno++;                  bufsize = strlen(lineBuf);                  linepos = 0;                  return lineBuf[linepos++];            }            else{       /* 到达文件尾 */                  return EOF;            }      }      else return lineBuf[linepos++];}/**  * @brief  回到前一个字符 */static void ungetNextChar(void) {      if (!EOF_flag)  linepos--;}/* 用于查找保留字的表 */static struct {      char * str;      TokenType tok;} reservedWords[MAXRESERVED]= {      {"if",IF},{"else",ELSE},{"while",WHILE},{"write",WRITE},      {"read",READ},{"int",INT},{"real",REAL}};/**  * @brief  搜索一个标识符看它是否为保留字 *  * @param s 标识符 *  * @returns 如果是保留字，返回对应的保留子类型，否则返回ID */static TokenType reservedLookup (char * s) {      int i;      for (i = 0; i < MAXRESERVED; i++) {            if (!strcmp(s,reservedWords[i].str)) return reservedWords[i].tok;      }      return ID;}TokenType getToken(void) {      /* 多字符token存入tokenString时用到的索引 */      int tokenStringIndex = 0;      /* 保存当前token，最后返回 */      TokenType currentToken;      /* 当前DFA的状态，总是以START开头 */      StateType state = START;      /* 是否将字符存入tokenString */      int save;      while (state != DONE) {            char c = getNextChar();            save = TRUE;            switch (state) {                  case START:                        if (c >= '0' && c <= '9')                              state = ININTORREAL;                        else if (c >= 'a' && c <= 'z')                               state = INID;                        else if (c == ' ' || c == '\t' || c == '\n')                              save = FALSE;                        else if (c == '!')                              state = INNEQ;                        else if (c == '<')                              state = INLEQ;                        else if (c == '>')                              state = INGEQ;                        else if (c == '=')                              state = INEQ;                        else if (c == '/')                              state = INSLASHORCOMMENT;                        else {                              state = DONE;                              switch(c) {                                    case EOF:   /* 文件结束 */                                          save = FALSE;                                          currentToken = ENDFILE;                                          break;                                    case '+':                                          currentToken = PLUS;                                          break;                                    case '-':                                          currentToken = MINUS;                                          break;                                    case '*':                                          currentToken = TIMES;                                          break;                                    case '(':                                          currentToken = LPAREN;                                          break;                                    case ')':                                          currentToken = RPAREN;                                          break;                                    case '[':                                          currentToken = LBRACKET;                                          break;                                    case ']':                                          currentToken = RBRACKET;                                          break;                                    case ',':                                          currentToken = COMMA;                                          break;                                    case ';':                                          currentToken = SEMI;                                          break;                                    case '{':                                          currentToken = LBRACE;                                          break;                                    case '}':                                          currentToken = RBRACE;                                          break;                                    default:                                          currentToken = ERROR;                                          break;                              }                        }                        break;                  case INID:                        if ((c > 'z' || c < 'a') && c != '_') {                              ungetNextChar();                              save = FALSE;                              state = DONE;                             currentToken = ID;                        }                        break;                  case INNEQ:                        if (c != '=') {                              ungetNextChar();                              save = FALSE;                              state = DONE;                              currentToken = NEQ;                        }                        break;                  case INLEQ:                        if (c == '=') {                              state = DONE;                              currentToken = LEQ;                        }                        else {                              ungetNextChar();                              save = FALSE;                              state = DONE;                              currentToken = LSS;                        }                        break;                  case INGEQ:                        if (c == '=') {                              state = DONE;                              currentToken = GEQ;                        }                        else {                              ungetNextChar();                              save = FALSE;                              state = DONE;                              currentToken = GTR;                        }                        break;                  case INEQ:                        if (c == '=') {                              state = DONE;                              currentToken = EQL;                        }                        else {                              ungetNextChar();                              save = FALSE;                              state = DONE;                              currentToken = ASSIGN;                        }                        break;                  case ININTORREAL:                        if (c == '.')                              state = INREAL;                        else if (c != '.' && (c < '0' || c > '9')) {                              ungetNextChar();                              save = FALSE;                              state = DONE;                              currentToken = INTNUM;                        }                        break;                  case INSLASHORCOMMENT:                        save = FALSE;                        if (c != '*') {                              ungetNextChar();                              state = DONE;                              save = FALSE;                              currentToken = SLASH;                        }                        else {                              save = FALSE;                              tokenString[tokenStringIndex-1] = 0;                              state = INCOMMENT;                        }                        break;                  case INREAL:                        if (c > '9' || c < '0') {                              ungetNextChar();                              state = DONE;                              save = FALSE;                              currentToken = REALNUM;                        }                        break;                  case INCOMMENT:                        save = FALSE;                        if (c == '*') {                              c = getNextChar();                              if (c == '/') {                                    return getToken();                              }                        }                        if (c == -1) {                              state = DONE;                              currentToken = ERROR;                        }                                    }            if ( (save && tokenStringIndex <= MAXTOKENLEN))                  tokenString[tokenStringIndex++] = c;            if ( state == DONE) {                  tokenString[tokenStringIndex] = '\0';                  if (currentToken == ID) {                        currentToken = reservedLookup(tokenString);                  }            }      }      return currentToken;}
💿 文件大小 32 K
👤 上传用户 LIBIN200788
📂 所属分类编译器/解释器
🏷️ 相关标签

#cmm #清华大学 #分 #编译原理
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -