⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regexpcompiler.cpp

📁 用于词法分析的词法分析器
💻 CPP
字号:
/*  $Id: RegexpCompiler.cpp,v 1.4 1997/02/02 02:01:08 matt Exp $  Regular expression compiler class.    (c) Apr 95 Matt Phillips.  */#include <ctype.h>#include <util/DiscreteSet.h>#include "RegexpCompiler.h"#define ArraySize(a) (sizeof (a) / sizeof (*(a)))typedef Range<int> SymRange;typedef DiscreteSet<uchar, 0, 255> UCharSet;UCharRange RegexpCompiler::DotRange (0, 255);RegexpCompiler::RegexpCompiler (BufferedInputStream &input,				int prod, Symbols &syms)  : symbols (syms), scanner (input, errorList){  // start the ball rolling  scanner.getNext ();  FsmState &end = getRegexp (fsm.addState ());  end.setProd (prod);  // check for end  mustbe (RegexpScanner::SymEOF);}FsmState &RegexpCompiler::getRegexp (FsmState &start){  FsmState &theEnd = fsm.addState ();  do  {    FsmState &end = getOrTerm (start);    end.addEdge (theEnd);  } while (have (RegexpScanner::SymBar));  return theEnd;}FsmState &RegexpCompiler::getOrTerm (FsmState &start){  static const SymRange termFirst (RegexpScanner::SymChar, RegexpScanner::SymTilde);  FsmState *head = &start;  do  {    head = &getTerm (*head);  } while (termFirst.isIn (scanner.getSymbol ()));  return *head;}FsmState &RegexpCompiler::getTerm (FsmState &start){  FsmState &end = getFactor (start);  if (have (RegexpScanner::SymStar))  {    start.addEdge (end);	// skip regexp edge    end.addEdge (start);	// loop edge  } else if (have (RegexpScanner::SymPlus))  {    end.addEdge (start);	// loop edge  } else if (have (RegexpScanner::SymQuestion))  {    start.addEdge (end);	// skip edge  }  return end;}FsmState &RegexpCompiler::getFactor (FsmState &start){  switch (scanner.getSymbol ())  {  case RegexpScanner::SymDot:    scanner.getNext ();    return createRange (start, DotRange);  case RegexpScanner::SymChar:    return createRange (start, readRange ());  case RegexpScanner::SymTilde:    return getCIStr (start);  case RegexpScanner::SymVariable:    return getVariable (start);  case RegexpScanner::SymLSBracket:    return getSet (start);  case RegexpScanner::SymLBracket:    {      scanner.getNext ();      FsmState &end = getRegexp (start);      mustbe (RegexpScanner::SymRBracket);      return end;    }  default:    error ("factor expected");  }     return start;}FsmState &RegexpCompiler::getCIStr (FsmState &start){  scanner.getNext ();		// chuck ~  FsmState *s = &start;     for (;;)			// break in loop  {    if (scanner.getSymbol () == RegexpScanner::SymChar)    {      FsmState &end = fsm.addState ();      s->addEdge (UCharRange (tolower (scanner.getText (0))), end);      s->addEdge (UCharRange (toupper (scanner.getText (0))), end);      s = &end;      scanner.getNext ();    } else if (have (RegexpScanner::SymDot))      s = &createRange (*s, DotRange);    else      break;			// abort loop on invalid token also  }     mustbe (RegexpScanner::SymTilde);  return *s;}FsmState &RegexpCompiler::getSet (FsmState &start){  int invert = 0;		// invert set flag  UCharSet set;		// the set  FsmState &end = fsm.addState ();  UCharRange range;  scanner.getNext ();		// dump [     if (have (RegexpScanner::SymCaret))  {    set.addAll ();		// include all items    invert = 1;  }     for (;;)			// read set contents  {    if (scanner.getSymbol () == RegexpScanner::SymChar)      set.set (readRange (), !invert);    else      break;  }  mustbe (RegexpScanner::SymRSBracket);  // generate edges  uchar c;  int lastIn;  for (c = 0, lastIn = 0; c < set.getUpper (); lastIn = set.isIn (c), c++)  {    if (lastIn)    {      if (!set.isIn (c))      {	range.upper = c - 1;	range.normalize ();	start.addEdge (range, end);      }    } else    {      if (set.isIn (c))	range.lower = c;    }  }     // finish off  if (lastIn)  {    range.upper = c;    range.normalize ();    start.addEdge (range, end);  }  return end;}FsmState &RegexpCompiler::getVariable (FsmState &start){  const string varName (scanner.getText ());  const Fsm *varFsm = symbols.get (varName);  if (varFsm)  {    Fsm fsmCopy (*varFsm);	// copy fsm    FsmState &tail = fsmCopy.tail ();    start.addEdge (fsmCopy.head ()); // connect with epsilon edge    fsmCopy.mergeWith (fsm);	// merge copy with main    scanner.getNext ();    return tail;  } else  {    error ("undefined variable");    scanner.getNext ();    return start;		// null op on error  }}UCharRange RegexpCompiler::readRange (){  UCharRange range (scanner.getText (0));  scanner.getNext ();  if (have (RegexpScanner::SymMinus))  {    if (scanner.getSymbol () == RegexpScanner::SymChar)    {      range.upper = scanner.getText (0);      range.normalize ();      scanner.getNext ();    } else      mustbeErr (RegexpScanner::SymChar);  }     return range;}void RegexpCompiler::mustbe (int sym){  if (scanner.getSymbol () != sym)    mustbeErr (sym);  else    scanner.getNext ();}int RegexpCompiler::have (int sym){  if (scanner.getSymbol () != sym)    return 0;  else  {    scanner.getNext ();    return 1;  }}void RegexpCompiler::mustbeErr (int sym){  static char *notUsed = "**should not occur**";  char *errMsg [] =  {    "character expected"/*char*/, notUsed/*dot*/,    notUsed/*variable*/,  notUsed/*lbracket*/, notUsed/*lsbracket*/,    "closing '~' expected"/*tilde*/, notUsed/*question*/, notUsed/*caret*/,    notUsed/*star*/, notUsed/*plus*/, notUsed/*minus*/,    "closing ')' expected"/*rbracket*/,    "closing ']' expected"/*rsbracket*/, notUsed/*bar*/  };  if (sym == RegexpScanner::SymEOF)    error ("end of expression expected");  else  {    CHECK (sym >= 0 && sym < ArraySize (errMsg),	   "mustbeError symbol out of range");       error (errMsg [sym]);  }}void RegexpCompiler::error (const char *msg){  errorList.error (string (msg), scanner.getFilename (),		   scanner.getSymbolLine (), scanner.getSymbolColumn ());}void RegexpCompiler::warning (const char *msg){  errorList.warning (string (msg), scanner.getFilename (),		     scanner.getSymbolLine (), scanner.getSymbolColumn ());}FsmState &RegexpCompiler::createRange (FsmState &start, UCharRange r){  FsmState &end = fsm.addState ();  start.addEdge (r, end);     return end;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -