public FA merge(FA fa) { foreach (KeyValuePair <int, List <Tuple <Trans, int> > > p in fa.tg) { tg.Add(p.Key, p.Value); } return(this); }
private int onlyAcceptingState(FA a) { if (a.acceptingStates.Count != 1) { throw new ArgumentException("NFAs should be prepared with exactly one accepting state"); } return(a.acceptingStates.First); }
public static String ToDot(FA fa) { Set <int> allStates = Utils.make_set(Utils.transitiveClosure(fa.startState, (s) => Utils.make_set(Utils.GetOrDefault(fa.tg, s, new List <Tuple <Trans, int> >()).Select(p => p.Item2)))); StringBuilder sb = new StringBuilder(); p(sb, "digraph NFA {"); p(sb, "rankdir=LR;"); foreach (int s in allStates) { if (fa.acceptingStates.Contains(s)) { p(sb, string.Format("s{0}[fontsize=11, label=\"{1}\", shape=doublecircle, fixedsize=true, width=.6];", s, s)); } else { p(sb, String.Format( "s{0}[fontsize=11, label=\"{1}\", shape=circle, fixedsize=true, width=.55, peripheries=1];", s, s)); } } foreach (KeyValuePair <int, List <Tuple <Trans, int> > > kv in fa.tg) { int from = kv.Key; foreach (Tuple <Trans, int> pair in kv.Value) { Trans t = pair.Item1; int to = pair.Item2; string label = null; if (t is Epsilon) { label = "ε"; } else if (t is CharRange) { CharRange r = (CharRange)t; label = string.Format("{{{0}..{1}}}", r.From, r.To); } else { throw new System.ArgumentException(t.ToString()); } p(sb, String.Format( "s{0} -> s{1} [fontsize=11, fontname=\"Courier\", arrowsize=.7, label = \"{2}\", arrowhead = normal];", from, to, label)); } } p(sb, "}"); return(sb.ToString()); }
private Set <int> epsilonClosure(Set <int> state, FA fa) { Set <int> ret = new Set <int>(); foreach (int s in state) { ret.AddRange(epsilonClosure(s, fa)); } return(ret); }
private Set <int> epsilons(int state, FA fa) { Set <int> eps = new Set <int>(); foreach (Tuple <Trans, int> t in fa.outTrans(state)) { if (t.Item1 is Epsilon) { eps.Add(t.Item2); } } return(eps); }
public FA nfaWithNoCommonTransitions(FA fa) { int ss = fa.startState; Set <int> _as = fa.acceptingStates; Dictionary <int, List <Tuple <Trans, int> > > newTg = new Dictionary <int, List <Tuple <Trans, int> > >(); foreach (KeyValuePair <int, List <Tuple <Trans, int> > > kv in fa.tg) { int fromState = kv.Key; List <Tuple <Trans, int> > transitions = kv.Value; List <Tuple <Trans, int> > newTransitions = removeCommonTransitions(transitions); newTg[fromState] = newTransitions; } return(new FA(ss, _as, newTg)); }
public FA nfaFromRegEx(RExpr _e) { if (_e is ByName) { //ByName e = (ByName) _e; throw new System.NotImplementedException("nfaFromRegEx/ByName not implemented"); } else if (_e is CharClass) { CharClass e = _e as CharClass; e = canonicalized(e); int start = newState(); int end = newState(); return(fa(start, end).trans(start, end, charRange(e))); } else if (_e is NotCharClass) { NotCharClass __e = _e as NotCharClass; CharClass e = canonicalized(__e); int start = newState(); int end = newState(); return(fa(start, end).trans(start, end, charRange(e))); } else if (_e is Oring) { Oring e = _e as Oring; List <FA> opts = Utils.list(e.Exprs.Select(a => { return(nfaFromRegEx(a)); })); List <int> startEpsilons = Utils.list(opts.Select(a => { return(a.startState); })); List <int> endEpsilons = Utils.list(opts.Select(a => { return(onlyAcceptingState(a)); })); int start = newState(); int end = newState(); FA _fa = fa(start, end).merge(opts); for (int i = 0; i < opts.Count; ++i) { _fa.trans(start, startEpsilons[i], epsilon()); _fa.trans(endEpsilons[i], end, epsilon()); } return(_fa); } else if (_e is Plus) { Plus e = _e as Plus; FA fa = nfaFromRegEx(e.Expr); fa.trans(onlyAcceptingState(fa), fa.startState, epsilon()); return(fa); } else if (_e is RXSeq) { RXSeq e = _e as RXSeq; if (e.Exprs.Count == 1) { return(nfaFromRegEx(e.Exprs[0])); } List <FA> opts = Utils.list(e.Exprs.Select(a => { return(nfaFromRegEx(a)); })); FA _fa = fa(opts[0].startState, onlyAcceptingState(Utils.last(opts))).merge(opts); for (int i = 0; i < opts.Count - 1; ++i) { int prev = onlyAcceptingState(opts [i]); int next = opts [i + 1].startState; _fa.trans(prev, next, epsilon()); } return(_fa); } else if (_e is Star) { Star e = _e as Star; FA fa = nfaFromRegEx(e.Expr); fa.trans(onlyAcceptingState(fa), fa.startState, epsilon()); fa.trans(fa.startState, onlyAcceptingState(fa), epsilon()); return(fa); } else if (_e is Question) { Question e = _e as Question; FA fa = nfaFromRegEx(e.Expr); fa.trans(fa.startState, onlyAcceptingState(fa), epsilon()); return(fa); } else if (_e is Str) { Str e = _e as Str; List <RExpr> seq = new List <RExpr>(); // todo: iterating over a string, this would // not work with surrogate pairs...etc for (int i = 0; i < e.Value.Length; ++i) { char c = e.Value[i]; List <CharClassPart> v = Utils.list1((CharClassPart) new CharPartRange(c, c)); CharClass cc = new CharClass(v); seq.Add(cc); } return(nfaFromRegEx(new RXSeq(seq))); } else { throw new Exception("OptionNotHandledException(_e)"); } }
private Set <int> epsilonClosure(int state, FA fa) { return(Utils.transitiveClosure(state, s => { return epsilons(s, fa); })); }
public FA dfaFromNfa(FA nfa) { Dictionary <Set <int>, Set <Tuple <Trans, Set <int> > > > newTg = new Dictionary <Set <int>, Set <Tuple <Trans, Set <int> > > >(); Set <int> startState = epsilonClosure(nfa.startState, nfa); Set <Set <int> > newAcceptingStates = new Set <Set <int> >(); Stack <Set <int> > workList = new Stack <Set <int> >(); Set <Set <int> > done = new Set <Set <int> >(); workList.Push(startState); while (workList.Count != 0) { Set <int> state = workList.Peek(); workList.Pop(); done.Add(state); List <Tuple <Trans, int> > allOutTrans = new List <Tuple <Trans, int> >(); if (nfa.acceptingStates.intersect(state).Count != 0) { newAcceptingStates.Add(state); } foreach (int s in state) { allOutTrans.AddRange(nfa.outTrans(s)); } Dictionary <Trans, List <Tuple <Trans, int> > > groups = new Dictionary <Trans, List <Tuple <Trans, int> > > (); var _groups = allOutTrans.GroupBy(p => { return(p.Item1); }); foreach (IGrouping <Trans, Tuple <Trans, int> > g in _groups) { groups.Add(g.Key, g.ToList()); } foreach (KeyValuePair <Trans, List <Tuple <Trans, int> > > kv in groups) { // kv -> Tuple<shared_ptr<Trans>, vector<Tuple<shared_ptr<Trans>, int>>> Trans cond = kv.Key; if (!(cond is Epsilon)) { List <int> _states = Utils.list(kv.Value.Select(a => a.Item2)); Set <int> states = Utils.make_set(_states); states = epsilonClosure(states, nfa); Utils.Add(newTg, state, new Tuple <Trans, Set <int> >(cond, states)); if (!done.Contains(states)) { //Utils.printf("found new state: %s", states); workList.Push(states); } } } } LabellerInt <Set <int> > c = new LabellerInt <Set <int> >(); FA dfa = new FA(c.labelFor(startState), -1); foreach (KeyValuePair <Set <int>, Set <Tuple <Trans, Set <int> > > > kv in newTg) { int s = c.labelFor(kv.Key); Set <Tuple <Trans, int> > _trans2 = Utils.make_set(kv.Value.Select(p => { return(new Tuple <Trans, int>(p.Item1, c.labelFor(p.Item2))); })); List <Tuple <Trans, int> > trans2 = Utils.list(_trans2); dfa.tg[s] = trans2; } Set <int> accepting = Utils.make_set(newAcceptingStates.Select(s => { return(c.labelFor(s)); })); dfa.acceptingStates = accepting; return(dfa); }
// Longest match wins // in case of tie, first-in-rule-listing wins public Token nextToken() // throws LexerError { string maxAcceptingRule = ""; int maxAcceptingPos = 0, maxAcLine = -1, maxAcCol = -1; string scope = ""; if (scopes.Count != 0) { scope = scopes.Peek(); } foreach (Tuple <string, FA> kv in dfasByScope[scope]) { string ruleName = kv.Item1; FA fa = kv.Item2; //log("Rule %s, pos=%s", ruleName, pos); int state = fa.startState; int p = pos; int __line = line; int __col = col; while (true) { if (p == inputText.Length) { if (fa.acceptingStates.Contains(state)) { return(accept(ruleName, p, __line, __col)); } else { string ex = "No recognizable token at end of file"; throw new Exception(ex); } } char c = inputText[p]; bool found = false; foreach (Tuple <Trans, int> t in fa.outTrans(state)) { //Utils.printf("....Match %s?", t.a); if (t.Item1.match(c)) { state = t.Item2; found = true; break; } //Utils.printf("....fails"); } if (found) { p++; __col++; if (c == '\n') { __line++; __col = 0; } } else { if (fa.acceptingStates.Contains(state)) { if (p > maxAcceptingPos) { maxAcceptingPos = p; maxAcceptingRule = ruleName; maxAcLine = __line; maxAcCol = __col; goto forRules; } else { goto forRules; } } else { goto forRules; } } } // while forRules :; } // for kv in rules if (maxAcceptingPos > pos) { return(accept(maxAcceptingRule, maxAcceptingPos, maxAcLine, maxAcCol)); } else { if (errorRuleName != "") { int advLine, advCol; advance(pos, line, col, out advLine, out advCol); Token t = accept(errorRuleName, pos + 1, advLine, advCol); return(t); } string ex = "Cannot process input near: " + formatLiteral(Utils.Mid(inputText, pos, 15)) + ""; throw new Exception(ex); } }
private Dictionary <string, FA> initAutomata(List <LexerRule> rules) { Dictionary <string, FA> ruleNfas = new Dictionary <string, FA>(); FAAlgo algo = new FAAlgo(); validate(rules); if (writeDotFiles) { Utils.EnsureDirExists("./dots"); } foreach (LexerRule r in rules) { ruleNfas[r.Name] = algo.nfaFromRegEx(r.Expr); } if (writeDotFiles) { foreach (KeyValuePair <string, FA> kv in ruleNfas) { string dot = TestLexer.ToDot(kv.Value); Utils.WriteToFile("./dots/" + kv.Key + ".dot", dot); } } //*/ Dictionary <string, FA> ruleDfas = new Dictionary <string, FA>(); foreach (KeyValuePair <string, FA> kv in ruleNfas) { string name = kv.Key; FA dfa = algo.dfaFromNfa(kv.Value); ruleDfas[name] = dfa; if (writeDotFiles) { string dot = TestLexer.ToDot(dfa); Utils.WriteToFile("./dots/" + name + "_dfa.dot", dot); } } Dictionary <string, FA> ruleNoCommon = new Dictionary <string, FA> (); foreach (KeyValuePair <string, FA> kv in ruleDfas) { FA noCommonTrans = algo.nfaWithNoCommonTransitions(kv.Value); ruleNoCommon[kv.Key] = noCommonTrans; if (writeDotFiles) { string dot = TestLexer.ToDot(noCommonTrans); Utils.WriteToFile("./dots/" + kv.Key + "_noCommon.dot", dot); } } foreach (KeyValuePair <string, FA> kv in ruleNoCommon) { string name = kv.Key; FA dfa = algo.dfaFromNfa(kv.Value); ruleDfas[name] = dfa; if (writeDotFiles) { string dot = TestLexer.ToDot(dfa); Utils.WriteToFile("./dots/" + name + "_dfa2.dot", dot); } } return(ruleDfas); }