private IntSet FindAmbiguousStates() { var predCount = new int[data.StateCount]; foreach (var S in data.EnumerateStates()) { foreach (var transition in S.Outgoing) { predCount[transition.To] += transition.Symbols.Count; } } var ambiguous = StateSetType.Range(0, data.StateCount - 1).EditCopy(); var unabiguous = StateSetType.Of(data.Start).EditCopy(); while (!unabiguous.IsEmpty) { var state = unabiguous.PopAny(); ambiguous.Remove(state); var S = data.GetState(state); foreach (var transition in S.Outgoing) { int successor = transition.To; if (predCount[successor] == 1 && !unabiguous.Contains(successor)) { unabiguous.Add(successor); } } } return(ambiguous.CompleteAndDestroy()); }
public override string ToString() { var output = new StringBuilder(); foreach (var state in data.EnumerateStates()) { foreach (var transition in state.Outgoing) { output .AppendFormat( "{0} --{1}--> {2}", data.GetState(transition.From).Positions, alphabet.Decode(transition.Symbols), data.GetState(transition.To).Positions); if (data.GetState(transition.To).IsAccepting) { output.Append(" [CAN ACCEPT] "); } output .AppendLine(); } } return(output.ToString()); }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }
public IEnumerable <TdfaState> EnumerateStates() { return(data.EnumerateStates()); }
public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet) { this.alphabet = alphabet; data = new TdfaData(alphabet); data.AddState(new TdfaState(data) { Positions = regTree.FirstPos }); foreach (var st in data.EnumerateStates()) { int Sindex = st.Index; var S = st.Positions; if (S.Contains(regTree.EoiPosition)) { data.GetState(Sindex).IsAccepting = true; } var actionPosList = new SortedList <int, int>(); foreach (var position in S) { var action = regTree.GetPosAction(position); if (action.HasValue) { actionPosList[position] = action.Value; } } if (actionPosList.Count != 0) { st.Actions.AddRange(actionPosList.Values); } var transitionSymbols = alphabet.SymbolSetType.Union( st.Positions .Select(regTree.GetPosSymbols) .Select(alphabet.Encode)); foreach (var symbol in transitionSymbols) { if (symbol == alphabet.EoiSymbol) { continue; } var U = TdfaData.PositionSetType.Mutable(); foreach (var position in S) { var cset = alphabet.Encode(regTree.GetPosSymbols(position)); if (cset.Contains(symbol)) { U.AddAll(regTree.GetFollowPos(position)); } } if (!U.IsEmpty) { int Uindex = data.IndexOfState(U); if (Uindex < 0) { Uindex = data.AddState(new TdfaState(data) { Positions = U }); } data.AddTransition(from: Sindex, symbol: symbol, to: Uindex); } } } }
public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet) { this.alphabet = alphabet; data = new TdfaData(alphabet); data.AddState(new TdfaState(data) { Positions = regTree.FirstPos }); foreach (var st in data.EnumerateStates()) { int Sindex = st.Index; var S = st.Positions; if (S.Contains(regTree.EoiPosition)) { data.GetState(Sindex).IsAccepting = true; } var actionPosList = new SortedList<int, int>(); foreach (var position in S) { var action = regTree.GetPosAction(position); if (action.HasValue) { actionPosList[position] = action.Value; } } if (actionPosList.Count != 0) { st.Actions.AddRange(actionPosList.Values); } var transitionSymbols = alphabet.SymbolSetType.Union( st.Positions .Select(regTree.GetPosSymbols) .Select(alphabet.Encode)); foreach (var symbol in transitionSymbols) { if (symbol == alphabet.EoiSymbol) { continue; } var U = TdfaData.PositionSetType.Mutable(); foreach (var position in S) { var cset = alphabet.Encode(regTree.GetPosSymbols(position)); if (cset.Contains(symbol)) { U.AddAll(regTree.GetFollowPos(position)); } } if (!U.IsEmpty) { int Uindex = data.IndexOfState(U); if (Uindex < 0) { Uindex = data.AddState(new TdfaState(data) { Positions = U }); } data.AddTransition(from: Sindex, symbol: symbol, to: Uindex); } } } }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }