📄 dfa.cs
字号:
//---------- String handling
static char Hex2Char(string s) {
int val = 0;
for (int i = 0; i < s.Length; i++) {
char ch = s[i];
if ('0' <= ch && ch <= '9') val = 16 * val + (ch - '0');
else if ('a' <= ch && ch <= 'f') val = 16 * val + (10 + ch - 'a');
else if ('A' <= ch && ch <= 'F') val = 16 * val + (10 + ch - 'A');
else Parser.SemErr("bad escape sequence in string or character");
}
if (val > CharClass.charSetSize) /* pdt */
Parser.SemErr("bad escape sequence in string or character");
return (char) (val % CharClass.charSetSize);
}
static string Char2Hex(char ch) {
StringWriter w = new StringWriter();
w.Write("\\u{0:x4}", (int)ch);
return w.ToString();
}
public static string Unescape (string s) {
/* replaces escape sequences in s by their Unicode values. */
StringBuilder buf = new StringBuilder();
int i = 0;
while (i < s.Length) {
if (s[i] == '\\') {
switch (s[i+1]) {
case '\\': buf.Append('\\'); i += 2; break;
case '\'': buf.Append('\''); i += 2; break;
case '\"': buf.Append('\"'); i += 2; break;
case 'r': buf.Append('\r'); i += 2; break;
case 'n': buf.Append('\n'); i += 2; break;
case 't': buf.Append('\t'); i += 2; break;
case '0': buf.Append('\0'); i += 2; break;
case 'a': buf.Append('\a'); i += 2; break;
case 'b': buf.Append('\b'); i += 2; break;
case 'f': buf.Append('\f'); i += 2; break;
case 'v': buf.Append('\v'); i += 2; break;
case 'u': case 'x':
if (i + 6 <= s.Length) {
buf.Append(Hex2Char(s.Substring(i+2, 4))); i += 6; break;
} else {
Parser.SemErr("bad escape sequence in string or character"); i = s.Length; break;
}
default: Parser.SemErr("bad escape sequence in string or character"); i += 2; break;
}
} else {
buf.Append(s[i]);
i++;
}
}
return buf.ToString();
}
public static string Escape (string s) {
StringBuilder buf = new StringBuilder();
foreach (char ch in s) {
switch(ch) {
case '\\': buf.Append("\\\\"); break;
case '\'': buf.Append("\\'"); break;
case '\"': buf.Append("\\\""); break;
case '\t': buf.Append("\\t"); break;
case '\r': buf.Append("\\r"); break;
case '\n': buf.Append("\\n"); break;
default:
if (ch < ' ' || ch > '\u007f') buf.Append(Char2Hex(ch));
else buf.Append(ch);
break;
}
}
return buf.ToString();
}
//---------- State handling
static State NewState() {
State s = new State();
if (firstState == null) firstState = s; else lastState.next = s;
lastState = s;
return s;
}
static void NewTransition(State from, State to, int typ, int sym, int tc) {
if (to == firstState) Parser.SemErr("token must not start with an iteration");
Target t = new Target(to);
Action a = new Action(typ, sym, tc); a.target = t;
from.AddAction(a);
if (typ == Node.clas) curSy.tokenKind = Symbol.classToken;
}
static void CombineShifts() {
State state;
Action a, b, c;
BitArray seta, setb;
for (state = firstState; state != null; state = state.next) {
for (a = state.firstAction; a != null; a = a.next) {
b = a.next;
while (b != null)
if (a.target.state == b.target.state && a.tc == b.tc) {
seta = a.Symbols(); setb = b.Symbols();
seta.Or(setb);
a.ShiftWith(seta);
c = b; b = b.next; state.DetachAction(c);
} else b = b.next;
}
}
}
static void FindUsedStates(State state, BitArray used) {
if (used[state.nr]) return;
used[state.nr] = true;
for (Action a = state.firstAction; a != null; a = a.next)
FindUsedStates(a.target.state, used);
}
static void DeleteRedundantStates() {
State[] newState = new State[State.lastNr + 1];
BitArray used = new BitArray(State.lastNr + 1);
FindUsedStates(firstState, used);
// combine equal final states
for (State s1 = firstState.next; s1 != null; s1 = s1.next) // firstState cannot be final
if (used[s1.nr] && s1.endOf != null && s1.firstAction == null && !s1.ctx)
for (State s2 = s1.next; s2 != null; s2 = s2.next)
if (used[s2.nr] && s1.endOf == s2.endOf && s2.firstAction == null & !s2.ctx) {
used[s2.nr] = false; newState[s2.nr] = s1;
}
for (State state = firstState; state != null; state = state.next)
if (used[state.nr])
for (Action a = state.firstAction; a != null; a = a.next)
if (!used[a.target.state.nr])
a.target.state = newState[a.target.state.nr];
// delete unused states
lastState = firstState; State.lastNr = 0; // firstState has number 0
for (State state = firstState.next; state != null; state = state.next)
if (used[state.nr]) {state.nr = ++State.lastNr; lastState = state;}
else lastState.next = state.next;
}
static State TheState(Node p) {
State state;
if (p == null) {state = NewState(); state.endOf = curSy; return state;}
else return p.state;
}
static void Step(State from, Node p, BitArray stepped) {
if (p == null) return;
stepped[p.n] = true;
switch (p.typ) {
case Node.clas: case Node.chr: {
NewTransition(from, TheState(p.next), p.typ, p.val, p.code);
break;
}
case Node.alt: {
Step(from, p.sub, stepped); Step(from, p.down, stepped);
break;
}
case Node.iter: case Node.opt: {
if (p.next != null && !stepped[p.next.n]) Step(from, p.next, stepped);
Step(from, p.sub, stepped);
break;
}
}
}
static void NumberNodes(Node p, State state) {
/* Assigns a state n.state to every node n. There will be a transition from
n.state to n.next.state triggered by n.val. All nodes in an alternative
chain are represented by the same state.
*/
if (p == null) return;
if (p.state != null) return; // already visited;
if (state == null) state = NewState();
p.state = state;
if (Node.DelGraph(p)) state.endOf = curSy;
switch (p.typ) {
case Node.clas: case Node.chr: {
NumberNodes(p.next, null);
break;
}
case Node.opt: {
NumberNodes(p.next, null); NumberNodes(p.sub, state);
break;
}
case Node.iter: {
NumberNodes(p.next, state); NumberNodes(p.sub, state);
break;
}
case Node.alt: {
NumberNodes(p.sub, state); NumberNodes(p.down, state);
break;
}
}
}
static void FindTrans (Node p, bool start, BitArray marked) {
if (p == null || marked[p.n]) return;
marked[p.n] = true;
if (start) Step(p.state, p, new BitArray(Node.nodes.Count)); // start of group of equally numbered nodes
switch (p.typ) {
case Node.clas: case Node.chr: {
FindTrans(p.next, true, marked);
break;
}
case Node.opt: {
FindTrans(p.next, true, marked); FindTrans(p.sub, false, marked);
break;
}
case Node.iter: {
FindTrans(p.next, false, marked); FindTrans(p.sub, false, marked);
break;
}
case Node.alt: {
FindTrans(p.sub, false, marked); FindTrans(p.down, false, marked);
break;
}
}
}
public static void ConvertToStates(Node p, Symbol sym) {
curGraph = p; curSy = sym;
if (Node.DelGraph(curGraph)) Parser.SemErr("token might be empty");
NumberNodes(curGraph, firstState);
FindTrans(curGraph, true, new BitArray(Node.nodes.Count));
}
// match string against current automaton; store it either as a fixedToken or as a litToken
public static void MatchLiteral(string s, Symbol sym) {
s = Unescape(s.Substring(1, s.Length-2));
int i, len = s.Length;
State state = firstState;
Action a = null;
for (i = 0; i < len; i++) { // try to match s against existing DFA
a = state.TheAction(s[i]);
if (a == null) break;
state = a.target.state;
}
// if s was not totally consumed or leads to a non-final state => make new DFA from it
if (i != len || state.endOf == null) {
state = firstState; i = 0; a = null;
dirtyDFA = true;
}
for (; i < len; i++) { // make new DFA for s[i..len-1]
State to = NewState();
NewTransition(state, to, Node.chr, s[i], Node.normalTrans);
state = to;
}
Symbol matchedSym = state.endOf;
if (state.endOf == null) {
state.endOf = sym;
} else if (matchedSym.tokenKind == Symbol.fixedToken || (a != null && a.tc == Node.contextTrans)) {
// s matched a token with a fixed definition or a token with an appendix that will be cut off
Parser.SemErr("tokens " + sym.name + " and " + matchedSym.name + " cannot be distinguished");
} else { // matchedSym == classToken || classLitToken
matchedSym.tokenKind = Symbol.classLitToken;
sym.tokenKind = Symbol.litToken;
}
}
static void SplitActions(State state, Action a, Action b) {
Action c; BitArray seta, setb, setc;
seta = a.Symbols(); setb = b.Symbols();
if (Sets.Equals(seta, setb)) {
a.AddTargets(b);
state.DetachAction(b);
} else if (Sets.Includes(seta, setb)) {
setc = (BitArray)seta.Clone(); Sets.Subtract(setc, setb);
b.AddTargets(a);
a.ShiftWith(setc);
} else if (Sets.Includes(setb, seta)) {
setc = (BitArray)setb.Clone(); Sets.Subtract(setc, seta);
a.AddTargets(b);
b.ShiftWith(setc);
} else {
setc = (BitArray)seta.Clone(); setc.And(setb);
Sets.Subtract(seta, setc);
Sets.Subtract(setb, setc);
a.ShiftWith(seta);
b.ShiftWith(setb);
c = new Action(0, 0, Node.normalTrans); // typ and sym are set in ShiftWith
c.AddTargets(a);
c.AddTargets(b);
c.ShiftWith(setc);
state.AddAction(c);
}
}
private static bool Overlap(Action a, Action b) {
BitArray seta, setb;
if (a.typ == Node.chr)
if (b.typ == Node.chr) return a.sym == b.sym;
else {setb = CharClass.Set(b.sym); return setb[a.sym];}
else {
seta = CharClass.Set(a.sym);
if (b.typ ==Node.chr) return seta[b.sym];
else {setb = CharClass.Set(b.sym); return Sets.Intersect(seta, setb);}
}
}
static bool MakeUnique(State state) { // return true if actions were split
bool changed = false;
for (Action a = state.firstAction; a != null; a = a.next)
for (Action b = a.next; b != null; b = b.next)
if (Overlap(a, b)) {SplitActions(state, a, b); changed = true;}
return changed;
}
static void MeltStates(State state) {
bool changed, ctx;
BitArray targets;
Symbol endOf;
for (Action action = state.firstAction; action != null; action = action.next) {
if (action.target.next != null) {
action.GetTargetStates(out targets, out endOf, out ctx);
Melted melt = Melted.StateWithSet(targets);
if (melt == null) {
State s = NewState(); s.endOf = endOf; s.ctx = ctx;
for (Target targ = action.target; targ != null; targ = targ.next)
s.MeltWith(targ.state);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -