public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet) { this.alphabet = alphabet; data = new TdfaData(alphabet); data.AddState(new TdfaState(data) { Positions = regTree.FirstPos }); foreach (var st in data.EnumerateStates()) { int Sindex = st.Index; var S = st.Positions; if (S.Contains(regTree.EoiPosition)) { data.GetState(Sindex).IsAccepting = true; } var actionPosList = new SortedList <int, int>(); foreach (var position in S) { var action = regTree.GetPosAction(position); if (action.HasValue) { actionPosList[position] = action.Value; } } if (actionPosList.Count != 0) { st.Actions.AddRange(actionPosList.Values); } var transitionSymbols = alphabet.SymbolSetType.Union( st.Positions .Select(regTree.GetPosSymbols) .Select(alphabet.Encode)); foreach (var symbol in transitionSymbols) { if (symbol == alphabet.EoiSymbol) { continue; } var U = TdfaData.PositionSetType.Mutable(); foreach (var position in S) { var cset = alphabet.Encode(regTree.GetPosSymbols(position)); if (cset.Contains(symbol)) { U.AddAll(regTree.GetFollowPos(position)); } } if (!U.IsEmpty) { int Uindex = data.IndexOfState(U); if (Uindex < 0) { Uindex = data.AddState(new TdfaState(data) { Positions = U }); } data.AddTransition(from: Sindex, symbol: symbol, to: Uindex); } } } }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }
public RegularToDfaAlgorithm(RegularTree regTree) : this(regTree, new EquivalenceClassesAlphabet(regTree.GetEquivalenceCsets())) // this(regTree, new RegularAlphabet(regTree.Positions.Select(node => node.Characters))) { }
public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet) { this.alphabet = alphabet; data = new TdfaData(alphabet); data.AddState(new TdfaState(data) { Positions = regTree.FirstPos }); foreach (var st in data.EnumerateStates()) { int Sindex = st.Index; var S = st.Positions; if (S.Contains(regTree.EoiPosition)) { data.GetState(Sindex).IsAccepting = true; } var actionPosList = new SortedList<int, int>(); foreach (var position in S) { var action = regTree.GetPosAction(position); if (action.HasValue) { actionPosList[position] = action.Value; } } if (actionPosList.Count != 0) { st.Actions.AddRange(actionPosList.Values); } var transitionSymbols = alphabet.SymbolSetType.Union( st.Positions .Select(regTree.GetPosSymbols) .Select(alphabet.Encode)); foreach (var symbol in transitionSymbols) { if (symbol == alphabet.EoiSymbol) { continue; } var U = TdfaData.PositionSetType.Mutable(); foreach (var position in S) { var cset = alphabet.Encode(regTree.GetPosSymbols(position)); if (cset.Contains(symbol)) { U.AddAll(regTree.GetFollowPos(position)); } } if (!U.IsEmpty) { int Uindex = data.IndexOfState(U); if (Uindex < 0) { Uindex = data.AddState(new TdfaState(data) { Positions = U }); } data.AddTransition(from: Sindex, symbol: symbol, to: Uindex); } } } }
// this(regTree, new RegularAlphabet(regTree.Positions.Select(node => node.Characters))) public RegularToDfaAlgorithm(RegularTree regTree) : this(regTree, new EquivalenceClassesAlphabet(regTree.GetEquivalenceCsets())) { }
private void GivenRegularExpression(AstNode astNode) { this.regularTree = new RegularTree(astNode); }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }