📄 nfa.java
字号:
* Constructs an NFA accepting the complement of the language * of a given NFA. * * Converts the NFA into a DFA, then negates that DFA. * Exponential state blowup possible and common. * * @param the NFA to construct the complement for. * * @return a pair of integers denoting the index of start * and end state of the complement NFA. */ private IntPair complement(IntPair nfa) { if (Options.DEBUG) { Out.debug("complement for "+nfa); Out.debug("NFA is :"+Out.NL+this); } int dfaStart = nfa.end+1; // FIXME: only need epsilon closure of states reachable from nfa.start epsilonFill(); Hashtable dfaStates = new Hashtable(numStates); Vector dfaVector = new Vector(numStates); int numDFAStates = 0; int currentDFAState = 0; StateSet currentState, newState; newState = epsilon[nfa.start]; dfaStates.put(newState, new Integer(numDFAStates)); dfaVector.addElement(newState); if (Options.DEBUG) Out.debug("pos DFA start state is :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector); currentDFAState = 0; while ( currentDFAState <= numDFAStates ) { currentState = (StateSet) dfaVector.elementAt(currentDFAState); for (char input = 0; input < numInput; input++) { newState = DFAEdge(currentState, input); if ( newState.containsElements() ) { // Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState); // Out.debug("Looking for state set "+newState); Integer nextDFAState = (Integer) dfaStates.get(newState); if ( nextDFAState != null ) { // Out.debug("FOUND!"); addTransition(dfaStart+currentDFAState, input, dfaStart+nextDFAState.intValue()); } else { if (Options.dump) Out.print("+"); // Out.debug("NOT FOUND!"); // Out.debug("Table was "+dfaStates); numDFAStates++; dfaStates.put(newState, new Integer(numDFAStates)); dfaVector.addElement(newState); addTransition(dfaStart+currentDFAState, input, dfaStart+numDFAStates); } } } currentDFAState++; } // We have a dfa accepting the positive regexp. // Now the complement: if (Options.DEBUG) Out.debug("dfa finished, nfa is now :"+Out.NL+this); int start = dfaStart+numDFAStates+1; int error = dfaStart+numDFAStates+2; int end = dfaStart+numDFAStates+3; addEpsilonTransition(start, dfaStart); for (int i = 0; i < numInput; i++) addTransition(error, i, error); addEpsilonTransition(error, end); for (int s = 0; s <= numDFAStates; s++) { currentState = (StateSet) dfaVector.elementAt(s); currentDFAState = dfaStart+s; // if it was not a final state, it is now in the complement if (!currentState.isElement(nfa.end)) addEpsilonTransition(currentDFAState, end); // all inputs not present (formerly leading to an implicit error) // now lead to an explicit (final) state accepting everything. for (int i = 0; i < numInput; i++) if (table[currentDFAState][i] == null) addTransition(currentDFAState, i, error); } // eliminate transitions leading to dead states if (live == null || live.length < numStates) { live = new boolean [2*numStates]; visited = new boolean [2*numStates]; } removeDead(dfaStart, end); if (Options.DEBUG) Out.debug("complement finished, nfa ("+start+","+end+") is now :"+this); return new IntPair(start, end); } // "global" data for use in method removeDead only: // live[s] == false <=> no final state can be reached from s private boolean [] live; // = new boolean [estSize]; private boolean [] visited; // = new boolean [estSize]; private void removeDead(int start, int end) { // Out.debug("removeDead ("+start+")"); if ( visited[start] || live[start] ) return; visited[start] = true; // Out.debug("not yet visited"); if (closure(start).isElement(end)) live[start] = true; // Out.debug("is final :"+live[start]); for (int i = 0; i < numInput; i++) { StateSet nextState = closure(table[start][i]); StateSetEnumerator states = nextState.states(); while (states.hasMoreElements()) { int next = states.nextElement(); if (next != start) { removeDead(next,end); if (live[next]) live[start] = true; else table[start][i] = null; } } } StateSet nextState = closure(epsilon[start]); StateSetEnumerator states = nextState.states(); while (states.hasMoreElements()) { int next = states.nextElement(); if (next != start) { removeDead(next,end); if (live[next]) live[start] = true; } } // Out.debug("state "+start+" is live :"+live[start]); } /** * Constructs a two state NFA for char class regexps, * such that the NFA has * * exactly one start state, * exactly one end state, * no transitions leading out of the end state * no transitions leading into the start state * * Assumes that regExp.isCharClass(macros) == true * * @param regExp the regular expression to construct the * NFA for * * @return a pair of integers denoting the index of start * and end state of the NFA. */ private void insertCCLNFA(RegExp regExp, int start, int end) { switch (regExp.type) { case sym.BAR: RegExp2 r = (RegExp2) regExp; insertCCLNFA(r.r1, start, end); insertCCLNFA(r.r2, start, end); return; case sym.CCLASS: insertClassNFA( (Vector) ((RegExp1) regExp).content, start, end); return; case sym.CCLASSNOT: insertNotClassNFA( (Vector) ((RegExp1) regExp).content, start, end); return; case sym.CHAR: insertLetterNFA( false, ((Character) ((RegExp1) regExp).content).charValue(), start, end); return; case sym.CHAR_I: insertLetterNFA( true, ((Character) ((RegExp1) regExp).content).charValue(), start, end); return; case sym.MACROUSE: insertCCLNFA(macros.getDefinition((String) ((RegExp1) regExp).content), start, end); return; } throw new Error("Unknown expression type "+regExp.type+" in NFA construction"); } /** * Constructs an NFA for regExp such that the NFA has * * exactly one start state, * exactly one end state, * no transitions leading out of the end state * no transitions leading into the start state * * @param regExp the regular expression to construct the * NFA for * * @return a pair of integers denoting the index of start * and end state of the NFA. */ public IntPair insertNFA(RegExp regExp) { IntPair nfa1, nfa2; int start, end; RegExp2 r; if (Options.DEBUG) Out.debug("Inserting RegExp : "+regExp); if (regExp.isCharClass(macros)) { start = numStates; end = numStates+1; ensureCapacity(end+1); if (end+1 > numStates) numStates = end+1; insertCCLNFA(regExp, start, end); return new IntPair(start, end); } switch (regExp.type) { case sym.BAR: r = (RegExp2) regExp; nfa1 = insertNFA(r.r1); nfa2 = insertNFA(r.r2); start = nfa2.end+1; end = nfa2.end+2; addEpsilonTransition(start, nfa1.start); addEpsilonTransition(start, nfa2.start); addEpsilonTransition(nfa1.end, end); addEpsilonTransition(nfa2.end, end); return new IntPair(start, end); case sym.CONCAT: r = (RegExp2) regExp; nfa1 = insertNFA(r.r1); nfa2 = insertNFA(r.r2); addEpsilonTransition(nfa1.end, nfa2.start); return new IntPair(nfa1.start, nfa2.end); case sym.STAR: nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content ); start = nfa1.end+1; end = nfa1.end+2; addEpsilonTransition(nfa1.end, end); addEpsilonTransition(start, nfa1.start); addEpsilonTransition(start, end); addEpsilonTransition(nfa1.end, nfa1.start); return new IntPair(start, end); case sym.PLUS: nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content ); start = nfa1.end+1; end = nfa1.end+2; addEpsilonTransition(nfa1.end, end); addEpsilonTransition(start, nfa1.start); addEpsilonTransition(nfa1.end, nfa1.start); return new IntPair(start, end); case sym.QUESTION: nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content ); addEpsilonTransition(nfa1.start, nfa1.end); return new IntPair(nfa1.start, nfa1.end); case sym.BANG: return complement(insertNFA((RegExp) ((RegExp1) regExp).content)); case sym.TILDE: return insertNFA(regExp.resolveTilde(macros)); case sym.STRING: return insertStringNFA(false, (String) ((RegExp1) regExp).content ); case sym.STRING_I: return insertStringNFA(true, (String) ((RegExp1) regExp).content ); case sym.MACROUSE: return insertNFA(macros.getDefinition((String) ((RegExp1) regExp).content)); } throw new Error("Unknown expression type "+regExp.type+" in NFA construction"); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -