📄 regexpcompiler.cpp
字号:
/* $Id: RegexpCompiler.cpp,v 1.4 1997/02/02 02:01:08 matt Exp $ Regular expression compiler class. (c) Apr 95 Matt Phillips. */#include <ctype.h>#include <util/DiscreteSet.h>#include "RegexpCompiler.h"#define ArraySize(a) (sizeof (a) / sizeof (*(a)))typedef Range<int> SymRange;typedef DiscreteSet<uchar, 0, 255> UCharSet;UCharRange RegexpCompiler::DotRange (0, 255);RegexpCompiler::RegexpCompiler (BufferedInputStream &input, int prod, Symbols &syms) : symbols (syms), scanner (input, errorList){ // start the ball rolling scanner.getNext (); FsmState &end = getRegexp (fsm.addState ()); end.setProd (prod); // check for end mustbe (RegexpScanner::SymEOF);}FsmState &RegexpCompiler::getRegexp (FsmState &start){ FsmState &theEnd = fsm.addState (); do { FsmState &end = getOrTerm (start); end.addEdge (theEnd); } while (have (RegexpScanner::SymBar)); return theEnd;}FsmState &RegexpCompiler::getOrTerm (FsmState &start){ static const SymRange termFirst (RegexpScanner::SymChar, RegexpScanner::SymTilde); FsmState *head = &start; do { head = &getTerm (*head); } while (termFirst.isIn (scanner.getSymbol ())); return *head;}FsmState &RegexpCompiler::getTerm (FsmState &start){ FsmState &end = getFactor (start); if (have (RegexpScanner::SymStar)) { start.addEdge (end); // skip regexp edge end.addEdge (start); // loop edge } else if (have (RegexpScanner::SymPlus)) { end.addEdge (start); // loop edge } else if (have (RegexpScanner::SymQuestion)) { start.addEdge (end); // skip edge } return end;}FsmState &RegexpCompiler::getFactor (FsmState &start){ switch (scanner.getSymbol ()) { case RegexpScanner::SymDot: scanner.getNext (); return createRange (start, DotRange); case RegexpScanner::SymChar: return createRange (start, readRange ()); case RegexpScanner::SymTilde: return getCIStr (start); case RegexpScanner::SymVariable: return getVariable (start); case RegexpScanner::SymLSBracket: return getSet (start); case RegexpScanner::SymLBracket: { scanner.getNext (); FsmState &end = getRegexp (start); mustbe (RegexpScanner::SymRBracket); return end; } default: error ("factor expected"); } return start;}FsmState &RegexpCompiler::getCIStr (FsmState &start){ scanner.getNext (); // chuck ~ FsmState *s = &start; for (;;) // break in loop { if (scanner.getSymbol () == RegexpScanner::SymChar) { FsmState &end = fsm.addState (); s->addEdge (UCharRange (tolower (scanner.getText (0))), end); s->addEdge (UCharRange (toupper (scanner.getText (0))), end); s = &end; scanner.getNext (); } else if (have (RegexpScanner::SymDot)) s = &createRange (*s, DotRange); else break; // abort loop on invalid token also } mustbe (RegexpScanner::SymTilde); return *s;}FsmState &RegexpCompiler::getSet (FsmState &start){ int invert = 0; // invert set flag UCharSet set; // the set FsmState &end = fsm.addState (); UCharRange range; scanner.getNext (); // dump [ if (have (RegexpScanner::SymCaret)) { set.addAll (); // include all items invert = 1; } for (;;) // read set contents { if (scanner.getSymbol () == RegexpScanner::SymChar) set.set (readRange (), !invert); else break; } mustbe (RegexpScanner::SymRSBracket); // generate edges uchar c; int lastIn; for (c = 0, lastIn = 0; c < set.getUpper (); lastIn = set.isIn (c), c++) { if (lastIn) { if (!set.isIn (c)) { range.upper = c - 1; range.normalize (); start.addEdge (range, end); } } else { if (set.isIn (c)) range.lower = c; } } // finish off if (lastIn) { range.upper = c; range.normalize (); start.addEdge (range, end); } return end;}FsmState &RegexpCompiler::getVariable (FsmState &start){ const string varName (scanner.getText ()); const Fsm *varFsm = symbols.get (varName); if (varFsm) { Fsm fsmCopy (*varFsm); // copy fsm FsmState &tail = fsmCopy.tail (); start.addEdge (fsmCopy.head ()); // connect with epsilon edge fsmCopy.mergeWith (fsm); // merge copy with main scanner.getNext (); return tail; } else { error ("undefined variable"); scanner.getNext (); return start; // null op on error }}UCharRange RegexpCompiler::readRange (){ UCharRange range (scanner.getText (0)); scanner.getNext (); if (have (RegexpScanner::SymMinus)) { if (scanner.getSymbol () == RegexpScanner::SymChar) { range.upper = scanner.getText (0); range.normalize (); scanner.getNext (); } else mustbeErr (RegexpScanner::SymChar); } return range;}void RegexpCompiler::mustbe (int sym){ if (scanner.getSymbol () != sym) mustbeErr (sym); else scanner.getNext ();}int RegexpCompiler::have (int sym){ if (scanner.getSymbol () != sym) return 0; else { scanner.getNext (); return 1; }}void RegexpCompiler::mustbeErr (int sym){ static char *notUsed = "**should not occur**"; char *errMsg [] = { "character expected"/*char*/, notUsed/*dot*/, notUsed/*variable*/, notUsed/*lbracket*/, notUsed/*lsbracket*/, "closing '~' expected"/*tilde*/, notUsed/*question*/, notUsed/*caret*/, notUsed/*star*/, notUsed/*plus*/, notUsed/*minus*/, "closing ')' expected"/*rbracket*/, "closing ']' expected"/*rsbracket*/, notUsed/*bar*/ }; if (sym == RegexpScanner::SymEOF) error ("end of expression expected"); else { CHECK (sym >= 0 && sym < ArraySize (errMsg), "mustbeError symbol out of range"); error (errMsg [sym]); }}void RegexpCompiler::error (const char *msg){ errorList.error (string (msg), scanner.getFilename (), scanner.getSymbolLine (), scanner.getSymbolColumn ());}void RegexpCompiler::warning (const char *msg){ errorList.warning (string (msg), scanner.getFilename (), scanner.getSymbolLine (), scanner.getSymbolColumn ());}FsmState &RegexpCompiler::createRange (FsmState &start, UCharRange r){ FsmState &end = fsm.addState (); start.addEdge (r, end); return end;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -