private void DescribeTdfa(ITdfaData data) { using (IGraphView view = new GvGraphView("tdfa.gv")) { data.DescribeGraph(view); } }
internal static int IndexOfState(this ITdfaData @this, IntSet positionSet) { int count = @this.StateCount; for (int i = 0; i != count; ++i) { if (@this.GetState(i).Positions.Equals(positionSet)) { return(i); } } return(-1); }
private ClassSyntax BuildMethod_Scan1(ClassSyntax context) { logging.Write( new LogEntry { Severity = Severity.Verbose, Message = string.Format("Started compiling Scan1 modes for {0} language", languageName.LanguageName) }); foreach (var condition in data.Grammar.Conditions) { ITdfaData dfa = condition.Joint.The <ITdfaData>(); var dfaSerialization = new DfaSerialization(dfa); var generator = new ScannerGenerator(dfaSerialization); var methodName = ConditionMethods.GetMethodName(condition.Index); var args = context .Method() .Static .Returning(context.Types.Int32) .Named(methodName) .BeginArgs(); var emit = args .Argument( context.Types.Import(typeof(ScanCursor)), args.Args.Generate("cursor")) // input .EndArgs() .NoInlining .NoOptimization .BeginBody(); generator.Build(emit); context = emit.EndBody(); } logging.Write( new LogEntry { Severity = Severity.Verbose, Message = string.Format("Done compiling Scan1 modes for {0} language", languageName.LanguageName) }); return(context); }
private static bool CompileTdfa(ILogging logging, Condition condition, out ITdfaData outcome) { var descr = ScannerDescriptor.FromScanRules(condition.Matchers, logging); var literalToAction = new Dictionary <string, int>(); var ast = descr.MakeAst(literalToAction); if (ast == null) { outcome = null; return(false); } var regTree = new RegularTree(ast); outcome = new RegularToTdfaAlgorithm(regTree, literalToAction).Data; condition.Joint.Add(outcome); return(true); }
public DfaSimulation(ITdfaData data) { this.data = data; }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }
public DfaSerialization(ITdfaData data) { this.data = data; }
public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet) { this.alphabet = alphabet; data = new TdfaData(alphabet); data.AddState(new TdfaState(data) { Positions = regTree.FirstPos }); foreach (var st in data.EnumerateStates()) { int Sindex = st.Index; var S = st.Positions; if (S.Contains(regTree.EoiPosition)) { data.GetState(Sindex).IsAccepting = true; } var actionPosList = new SortedList <int, int>(); foreach (var position in S) { var action = regTree.GetPosAction(position); if (action.HasValue) { actionPosList[position] = action.Value; } } if (actionPosList.Count != 0) { st.Actions.AddRange(actionPosList.Values); } var transitionSymbols = alphabet.SymbolSetType.Union( st.Positions .Select(regTree.GetPosSymbols) .Select(alphabet.Encode)); foreach (var symbol in transitionSymbols) { if (symbol == alphabet.EoiSymbol) { continue; } var U = TdfaData.PositionSetType.Mutable(); foreach (var position in S) { var cset = alphabet.Encode(regTree.GetPosSymbols(position)); if (cset.Contains(symbol)) { U.AddAll(regTree.GetFollowPos(position)); } } if (!U.IsEmpty) { int Uindex = data.IndexOfState(U); if (Uindex < 0) { Uindex = data.AddState(new TdfaState(data) { Positions = U }); } data.AddTransition(from: Sindex, symbol: symbol, to: Uindex); } } } }
public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet) { this.alphabet = alphabet; data = new TdfaData(alphabet); data.AddState(new TdfaState(data) { Positions = regTree.FirstPos }); foreach (var st in data.EnumerateStates()) { int Sindex = st.Index; var S = st.Positions; if (S.Contains(regTree.EoiPosition)) { data.GetState(Sindex).IsAccepting = true; } var actionPosList = new SortedList<int, int>(); foreach (var position in S) { var action = regTree.GetPosAction(position); if (action.HasValue) { actionPosList[position] = action.Value; } } if (actionPosList.Count != 0) { st.Actions.AddRange(actionPosList.Values); } var transitionSymbols = alphabet.SymbolSetType.Union( st.Positions .Select(regTree.GetPosSymbols) .Select(alphabet.Encode)); foreach (var symbol in transitionSymbols) { if (symbol == alphabet.EoiSymbol) { continue; } var U = TdfaData.PositionSetType.Mutable(); foreach (var position in S) { var cset = alphabet.Encode(regTree.GetPosSymbols(position)); if (cset.Contains(symbol)) { U.AddAll(regTree.GetFollowPos(position)); } } if (!U.IsEmpty) { int Uindex = data.IndexOfState(U); if (Uindex < 0) { Uindex = data.AddState(new TdfaState(data) { Positions = U }); } data.AddTransition(from: Sindex, symbol: symbol, to: Uindex); } } } }
public TdfaState(ITdfaData container) { this.container = container; }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }