📄 lexer.c
字号:
/*
* Author: zhangdi
* Date: 2008-11-10
* Description: lexer, it can identify a simple C grammer
*/
#include "global.h"
#include "error.h"
#include "symbol.h"
#include "token.h"
char lexbuf[BSIZE];
int lineno = 1;
int tokenval = 0;
int lexsignal = -1;
int numflag = 0;
int lexan()
{
int ch, ch2;
int p, b, k;
int comment = 0;
int i = 0;
while (1)
{
ch = fgetc(fp);
if (ch == ' ' || ch == '\t')
; //wipe of blank
else if (ch == '\n')
lineno += 1;
else if (ch == '/' || ch == '*')
{
comment = 0;
ch2 = fgetc(fp);
if (ch == '/' && ch2 == '*')
{
ch = fgetc(fp);
while (comment != 2)
{
if (ch == '*')
{
comment++;
ch = fgetc(fp);
if (ch == '/')
comment++;
else
{
comment--;
ungetc(ch, fp);
}
}
else
ch = fgetc(fp);
}
}
else
{
ungetc(ch2, fp);
lexbuf[0] = ch;
lexbuf[1] = EOS;
p = lookup(lexbuf);
if (p == 0)
p = insert(lexbuf, MULOP);
addtoken("mulop", p);
lexsignal = 1;
return MULOP;
}
}
else if (isalpha(ch)) //distinguish a identifier
{
p = 0;
b = 0;
k = -1;
while (isalnum(ch)) {
lexbuf[b] = ch;
ch = fgetc(fp);
b += 1;
if (b >= BSIZE)
error("compiler error");
}
lexbuf[b] = EOS;
if (ch != EOF)
ungetc(ch, fp);
p = lookup(lexbuf);
// this identifier is not in the symtable
if (p == 0)
{
p = insert(lexbuf, ID);
addtoken("id", p);
}//judge the identifier is a key word
else
{
k = searchkeyword(lexbuf);
if (k == -1)
addtoken("id", p);
else
addtoken(lexbuf, p);
}
lexsignal = 1;
return symtable[p].token;
}
//distinguish a number
else if (isdigit(ch))
{
//tokenval = 0;
numflag = 0;
i = 0;
while (isdigit(ch))
{
lexbuf[i] = ch;
//tokenval = tokenval * 10 + ch - '0';
ch = fgetc(fp);
i++;
}
if (ch == '.')
{
// this number is float
numflag = 1;
lexbuf[i] = ch;
ch = fgetc(fp);
i++;
while (isdigit(ch))
{
lexbuf[i] = ch;
ch = fgetc(fp);
i++;
}
}
lexbuf[i] = EOS;
ungetc(ch, fp);
// exchange number to a char array
//itoa(tokenval, lexbuf, 10);
p = lookup(lexbuf);
if (p == 0)
if (numflag == 1)
p = insert(lexbuf, FNUM);
else
p = insert(lexbuf, NUM);
if (numflag == 1)
addtoken("fnum", p);
else
addtoken("num", p);
lexsignal = 1;
return NUM;
}
// distinguish <> <= < and >= > ==, they are all relop
else if(ch == '<' || ch == '>' || ch == '=')
{
lexbuf[0] = ch;
ch2 = fgetc(fp);
if (ch == '=' && ch2 != '=')
{
lexbuf[1] = EOS;
ungetc(ch2, fp);
p = lookup(lexbuf);
if (p == 0)
p = insert(lexbuf, ch);
addtoken(lexbuf, p);
lexsignal = 1;
return symtable[p].token;
}
else if ((ch == '<'&&ch2 == '>') || ch2 == '=' )
{
lexbuf[1] = ch2;
lexbuf[2] = EOS;
}
else
{
ungetc(ch2, fp);
lexbuf[1] = EOS;
}
p = lookup(lexbuf);
if (p == 0)
p = insert(lexbuf, RELOP);
addtoken("relop", p);
lexsignal = 1;
return RELOP;
}
//distinguish addop
else if (ch == '+' || ch == '-')
{
lexbuf[0] = ch;
lexbuf[1] = EOS;
p = lookup(lexbuf);
if (p == 0)
p = insert(lexbuf, ADDOP);
addtoken("addop", p);
lexsignal = 1;
return ADDOP;
}
//distinguish some key words
else if (ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == '!' || ch == ';')
{
lexbuf[0] = ch;
lexbuf[1] = EOS;
p = lookup(lexbuf);
if (p == 0)
p = insert(lexbuf, ch);
addtoken(lexbuf, p);
lexsignal = 1;
return symtable[p].token;
}
// end of source file
else if (ch == EOF)
{
lexsignal = 1;
return DONE;
}
//error, source file has character this lexer can not distinguish
else {
lexsignal = 0;
fprintf(stderr, "ERROR: line>> %d--Can't distinghuish character: %c\n", lineno, ch);
return ch;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -