📄 lex.c

📁 源码漏洞检查
💻 C
📖 第 1 页 / 共 3 页
字号:
12 3 下一页
/* *  A lexical analyzer for the C language. *  John Viega * *  Jul 28, 1999 * *  Lexical Analyzer based on the reference manual in K+R, 2nd Edition. *  Should handle all ANSI C just fine.  I think this also happens to  *  scan all valid ANSI C++. * *  This program is a big state machine.  Often we will jump from *  state to state with a goto.  IMHO, the gotos aren't a big deal. *  However, this file remains pretty unreadable for other reasons. *  I'm basically implementing a fully compliant ANSI C(++) lexer  *  (minus some non-important stuff) in a single pass.  Usually it's *  done in multiple passes, probably for clarity's sake... *  I was going for minimizing time of implementation, myself. *  Multiple passes would have taken me a lot longer to get right wrt *  line numbers in some situations. *  As a result, there are some gross things such as checking for \  *  followed by a newline EVERYWHERE. *   *  What isn't done: *    <><><> Necessary for "correct" functionality for this app <><><> * *  - Trigraphs need to be handled. *      ??= # *      ??/ \ *      ??' ^ *      ??( [ *      ??) ] *      ??! | *      ??< { *      ??> } *      ??- ~ *      ??? supposedly goes to ? *  - trigraphs are supposed to work in a string, bigraphs no. *  - I sometimes make an assumption that EOF is going to come after a \n. * *    <><><> Desirable, but not necessary <><><> *  - Wide strings are processed, but treated like regular strings. *  - Keywords are treated as identifiers.  That's OK for my purposes, tho. *  - See also "TODO:" items in place.   * * *  Jan 25, 2000: *  Apparently, gcc allows $ in identifiers.  Might as well recognize it. * *  Feb 1,  2000: *  Go ahead and treat \r as whitespace in the same context as \t, \n, etc. */#include "lex.H"#include "fatal.H"#include "config.H"#include <ctype.h>  // For isalanum(x)#include <stdio.h>#define STD_TOKENS     0#define PREPROC_TOKENS 1Lex::~Lex(){  if(free_input)    delete[] input;  if(str)    delete[] str;  if(token_box)    delete token_box;  if(comment_box)    delete comment_box;}Lex::Lex(FILE *f, char *srcid, int cpp){  return_on_error = 0;  free_input = 1;  const size_t file_incr = 1024*10; // alloc 10K at a time.  char *buf = new char[file_incr];  if(!buf)    OutOfMemory();  char *buf_pos = buf;  long size = 0;  while(1)    {      size_t s = fread(buf_pos, sizeof(char), file_incr, f);       size += s;      if(s != file_incr)	{	  if(feof(f) > 0)	    {	      Init(buf, size, srcid, 1, cpp);	      return;	    }	  else	    {	      fprintf(stderr, "%s" NEWLINE, strerror(ferror(f)));	      fprintf(stderr, "End of file not reached.  Progressing anyway."		      NEWLINE);	      Init(buf, size, srcid, 1, cpp);	      return;	    }	}      char *tmp = new char[file_incr + size];      if(!tmp)	OutOfMemory();      buf_pos = &tmp[size];      memcpy(tmp, buf, size);      delete[] buf;      buf = tmp;    }}Lex::Lex(char* s, long len, char *srcid, unsigned int l, int err, int cpp){  free_input = 0;  return_on_error = err;  Init(s, len, srcid, l, cpp);}void Lex::Init(char *s, long len, char *srcid, int l, int cpp){  cpp_mode = cpp;  input = s;  input_size = len;  source_id = srcid;  token_box = new TokenContainer();  if(!token_box)    OutOfMemory();  comment_box = new TokenContainer();  if(!comment_box)    OutOfMemory();  str_pos = 0;  str_len = 0;  pos = 0;  lineno_offset = 0;  comment_lineno_offset = 0;  str = NULL;  lineno = l;  Scan();}int Lex::LexCComment(){  int t;  StartCComment();  while (1)    {       t = GetChar();       switch(t)	{	case EOF:	  return 0;	case '\n':	  AddCharToComment(t);	  lineno++;	  comment_lineno_offset++;	  lineno_offset++;	  continue;	case '*':	  if ((t = GetChar()) == '/') 	    {	      EndComment();	      return 1;	    }	  else	    {	      AddCharToComment('*');	      UngetChar(t);	      continue;	    }	default:	  AddCharToComment(t);	  continue;	}    }}void Lex::LexCPPComment(){  int t;  StartCPPComment();  while(1)    {      switch (t=GetChar())	{	case EOF:	  return;	case '\\':	  switch(t=GetChar())	    {	    case '\n':	      lineno++;	      comment_lineno_offset++;	      lineno_offset++;	      continue;	    default:	      UngetChar(t);	      AddCharToComment('\\');	      continue;	    }	case '\n':	  EndComment();	  lineno++;          /* Do this for the following:           * x = x + // foo           * #if 1           * 2;           * #endif	   */	  return;	default:	  AddCharToComment(t);	  continue;	}    }}void Lex::StartHexChr(char c){  if(isdigit(c))    {      chr_val = c - '0';    }  else if(islower(c))    {      chr_val = c - 'a' + 10;    }  else    {      chr_val = c - 'A' + 10;    }}void Lex::AddHexChr(char c){  if(isdigit(c))    {      chr_val = (chr_val << 4) + (c - '0');    }  else if(islower(c))    {      chr_val = (chr_val << 4) + (c - 'a' + 10);    }  else    {      chr_val = (chr_val << 4) + (c - 'A' + 10);    }}void Lex::EndHexChr(){  GenChr(chr_val);}void Lex::StartOctChr(char c){  chr_val = c - '0';}void Lex::AddOctChr(char c){  chr_val = (chr_val << 3) + (c - '0');}void Lex::EndOctChr(){  GenChr(chr_val);}void Lex::StartIdentifier(char ch){  AddCharToStr(ch);}void Lex::ContinueIdentifier(char ch){  AddCharToStr(ch);}void Lex::EndIdentifier(){  str[str_pos++] = '\0';  char *tmp = new char[str_pos];  if(!tmp)    OutOfMemory();  strncpy(tmp, str, str_pos);  IdTok *tok = new IdTok(tmp, str_pos, lineno-lineno_offset, lineno);  if(!tok)    OutOfMemory();  token_box->Add(tok);  lineno_offset = 0;  str_pos = 0;}void Lex::StartHexNum(){  real = 0;  num_val = 0;  looks_octal = 0;  unsigned_flag = 0;  long_flag = 0;}void Lex::AddHexDigit(char c){  if(isdigit(c))    {      num_val = (num_val << 4) + (c - '0');    }  else if(islower(c))    {      num_val = (num_val << 4) + (c - 'a' + 10);    }  else    {      num_val = (num_val << 4) + (c - 'A' + 10);    }}void Lex::EndNum(){  if(!real)    {      IntegerTok *tok = new IntegerTok( looks_octal ? oct_val : num_val, 				     unsigned_flag, long_flag, 				     lineno-lineno_offset);      if(!tok)	OutOfMemory();      token_box->Add(tok);      lineno_offset = 0;    }  else    {      RealTok *tok = new RealTok(num_val, mant_val, 			       exp_neg_flag ? -exp : exp, 			       float_flag ? FLOAT : (long_flag ? LONG_DOUBLE						     : DOUBLE),			       lineno - lineno_offset);      if(!tok)	OutOfMemory();      token_box->Add(tok);      lineno_offset = 0;    }}void Lex::StartBase10OrLowerNum(char c){  real = 0;  mant_val = 0;  exp = 0;  unsigned_flag = 0;  long_flag = 0;  float_flag = 0;  exp_neg_flag = 0;  num_val = 0;  switch(c)    {    case '.':      real = 1;      return;    case '0':      looks_octal = 1;      oct_val = 0;      return;    default:      looks_octal = 0;      num_val = c - '0';      return;    }}void Lex::BeginExponent(char c){  switch(c)    {    case '+':      return;    case '-':      exp_neg_flag = 1;      return;    default:      exp = c - '0';    }}void Lex::AddExponent(char c){  exp = exp * 10 + (c - '0');}void Lex::AddOctDigit(char c)      {  // In case it turns out to be a float.  num_val = (num_val*10) + (c - '0');  oct_val = (oct_val << 3) + (c - '0');  }void Lex::AddDecDigit(char c){  long int *which_val;  if(c == '.')    {      real = 1;      return;    }  if(real == 0)    {      which_val = &num_val;    }  else    {      which_val = &mant_val;    }  *which_val = *which_val * 10 + (c - '0');}void Lex::MakeLong(){  long_flag = 1;}void Lex::MakeUnsigned(){  unsigned_flag = 1;}void Lex::MakeFloat()    {  float_flag = 1;}void Lex::GenChr(long c){  CharTok *tok = new CharTok(c, lineno-lineno_offset);  if(!tok)    OutOfMemory();  token_box->Add(tok);  lineno_offset = 0;}void Lex::AddCharToComment(char c){  AddCharToStr(c);}void Lex::LexPreprocNumber(char c){  AddCharToStr(c);  while(1)    {      c = GetChar();      if(!isalnum(c) && (c != '.') && (c != '_') && (c != '\\'))	{	  UngetChar(c);	finish_ppnum:	  if(!str_pos && !str)	    {	      str = new char[1];	      if(!str) OutOfMemory();	      str[0] = 0;	    }	  str[str_pos++] = 0;	  char *tmp = new char[str_pos];	  if(!tmp)OutOfMemory();	  strncpy(tmp, str, str_pos);	  PreprocNumTok *tok = new PreprocNumTok(tmp, str_pos-1,						 lineno-lineno_offset);	  if(!tok) OutOfMemory();	  token_box->Add(tok);	  lineno_offset = str_pos = 0;	  return;	}      switch(c)	{	  case '\\':	    switch((c = GetChar()))	      {	      case '\n':		lineno++;		lineno_offset++;		continue;	      default:		UngetChar(c);		UngetChar('\\');		goto finish_ppnum;	      }	case 'e':	case 'E':	  AddCharToStr(c);	  switch((c = GetChar()))	    {	    case '+':	    case '-':	      AddCharToStr(c);	      continue;	    default:	      UngetChar(c);	      continue;	    }	default:	  AddCharToStr(c);	  continue;	}    }}void Lex::AddCharToStr(char c){  if(str_pos == str_len)     {      char *tmp = new char[str_len + BUFFER_SIZE];      if(!tmp)	OutOfMemory();      memcpy(tmp, str, str_pos);      str_len += BUFFER_SIZE;      if(str) delete[] str;      str = tmp;    }  str[str_pos++] = c;}void Lex::EndStr(){  if(!str_pos && !str)  {    str = new char[1];    if(!str)      OutOfMemory();    str[0] = 0;  }  str[str_pos++] = 0;  char *tmp = new char[str_pos];  if(!tmp)    OutOfMemory();  strncpy(tmp, str, str_pos);  StringTok *tok = new StringTok(tmp, str_pos-1, lineno - lineno_offset);  if(!tok)    OutOfMemory();  token_box->Add(tok);  lineno_offset = 0;  str_pos = 0;}void Lex::EndComment(){  if(!str_pos && !str)    {      str = new char[1];      if(!str)	OutOfMemory();      str[0] = 0;    }  str[str_pos++] = 0;  char *tmp = new char[str_pos];  if(!tmp)    OutOfMemory();  strncpy(tmp, str, str_pos);  CommentTok *tok = new CommentTok(tmp, str_pos, lineno-lineno_offset,				   cpp_comment, token_box->GetCurrentSize(), 				   lineno, 0);  if(!tok)    OutOfMemory();  token_box->Add(tok);  // In the comment box, we don't free the string, so we pass the 1 param  // to say this.  tok = new CommentTok(tmp, str_pos, lineno-lineno_offset, cpp_comment,		       token_box->GetCurrentSize(), lineno, 1);  comment_box->Add(tok);  comment_lineno_offset = 0;  str_pos = 0;}void Lex::GenOp(char *s){  OperatorTok *tok = new OperatorTok(s, lineno - lineno_offset);  if(!tok)    OutOfMemory();  token_box->Add(tok);  lineno_offset = 0;}int Lex::GetChar(){  if(input_size <= pos)     {      return EOF;    }  return input[pos++];}void Lex::UngetChar(int c){  if(c == EOF)    return;  if(pos <= 0)    {      return;    }  input[--pos] = c;}// Return 1 if we found junk, 0 otherwise.int Lex::LexPreprocessorStuff() {  int t;  while(1)    {      switch(t = GetChar())	{	case '/':	try_again:	  switch(t = GetChar())	    {	    case '\\':	      switch(t = GetChar())		{		case '\n':		  lineno++;		  // Don't lineno_offset++; not in a token yet.		  goto try_again;		default:		  UngetChar(t);		  UngetChar('\\');		  UngetChar('/');		  return 0;		}	    case '/':	      LexCPPComment();	      continue;	    case '*':	      if(!LexCComment())		{		  // TODO: Make sure this is ok behavior.		  if(return_on_error)		    return 0;		  fprintf(stderr, "%s: Error: Unterminated comment." NEWLINE, 			  source_id); 		  exit(0); 		}	      else		{		  continue;		}	    default:	      UngetChar(t);	      UngetChar('/');	      return 0;	    }	case '\\':	  switch(t = GetChar())	    {	    case '\n':	      lineno++;	      // Don't lineno_offset++; not in a token yet.	      continue;	    default:	      UngetChar(t);	      UngetChar('\\');	      return 0;	    }	case '#':	  goto remove_directive;	case ' ':	case '\t':	case '\v':	case '\r':	case '\f':	  continue;	default:	  UngetChar(t);	  return 0;	}    } remove_directive:  Token *tok = new PreprocStartToken(lineno-lineno_offset);  if(!tok) OutOfMemory();  token_box->Add(tok);  lineno_offset = 0;  int old_return_on_error = return_on_error;  return_on_error = 1;  ScanLine(PREPROC_TOKENS);  return_on_error = old_return_on_error;  // Preprocessor guy ended on the previous line, since ScanLine  // bumped it up by 1.  tok = new PreprocEndToken(lineno-lineno_offset-1);  if(!tok) OutOfMemory();  token_box->Add(tok);  lineno_offset = 0;  return 1;}void Lex::Scan() {  do    {      while(LexPreprocessorStuff());    }while(ScanLine(STD_TOKENS));}int Lex::ScanLine(int preproc){  int t, t2, saved_char; next:  t = GetChar();  switch(t){  case EOF:    return 0;  case '\n':    lineno++;    return 1;  case '\t':  case '\v':  case '\r':  case '\f':  case ' ':    goto next;  case ';':    GenOp(";");    goto next;  case '/':  slash_start:    switch(t = GetChar())      {      case '\\':	switch(t = GetChar())	  {	  case '\n':	    lineno++;	    lineno_offset++;	    goto slash_start;	  default:	    UngetChar(t);	    UngetChar('\\');	    GenOp("/");	    goto next;	  }      case '*':	if(!LexCComment())	  goto unterminatedCommentError;	else	  {	    lineno_offset = 0;	    goto next;	  }      case '/':	LexCPPComment();	lineno_offset = 0;	return 1;      case '=':	GenOp("/=");	goto next;      default:	UngetChar(t);	GenOp("/");	goto next;      }  case '-':  minus_start:    switch(t = GetChar())      {      case '\\':	switch(t = GetChar())	  {	  case '\n':	    lineno++;	    lineno_offset++;	    goto minus_start;	  default:	    UngetChar(t);	    UngetChar('\\');	    GenOp("-");	    goto next;	  }      case '-':	GenOp("--");	goto next;      case '>':	GenOp("->");	goto next;      case '=':	GenOp("-=");	goto next;      default:	UngetChar(t);	GenOp("-");	goto next;      }  case '+':  plus_start:    switch(t = GetChar())      {      case '\\':	switch(t = GetChar())	  {	  case '\n':	    lineno++;	    lineno_offset++;	    goto plus_start;
12 3 下一页
💿 文件大小 59 K
👤 上传用户 gdmichael
📂 所属分类编译器/解释器
🏷️ 相关标签

#源码 #漏洞
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -