public override string ToString() { var output = new StringBuilder(); foreach (var state in data.EnumerateStates()) { foreach (var transition in state.Outgoing) { output .AppendFormat( "{0} --{1}--> {2}", data.GetState(transition.From).Positions, alphabet.Decode(transition.Symbols), data.GetState(transition.To).Positions); if (data.GetState(transition.To).IsAccepting) { output.Append(" [CAN ACCEPT] "); } output .AppendLine(); } } return(output.ToString()); }
private IScannerTransition MakeScannerTransition(TdfaTransition transition) { var charSet = container.Alphabet.Decode(transition.Symbols); return(new ScannerTransition( charSet.EnumerateIntervals().Select(MakeCharRange), container.GetState(transition.To))); }
bool IDfaSimulation.TryNext(int state, int input, out int next) { int symbol = data.Alphabet.Encode(input); var stateInfo = data.GetState(state); foreach (var t in stateInfo.Outgoing) { if (t.Symbols.Contains(symbol)) { next = t.To; return(true); } } next = -1; return(false); }
internal static int IndexOfState(this ITdfaData @this, IntSet positionSet) { int count = @this.StateCount; for (int i = 0; i != count; ++i) { if (@this.GetState(i).Positions.Equals(positionSet)) { return(i); } } return(-1); }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }
private void ExtendAutomatonWithLiteral(string literal, int scanAction) { var state = data.Start; var symbols = EnumerateLiteralSymbols(data.Alphabet, literal).GetEnumerator(); bool hasSymbol; // trace and do nothing while ((hasSymbol = symbols.MoveNext())) { State next = Control(state, symbols.Current); if (next == NoState || ambiguous.Contains(next)) { break; } state = next; } int previous = state; // trace and duplicate the path while (hasSymbol) { State next = Control(state, symbols.Current); if (next != NoState) { state = next; var newStateInfo = new TdfaState(data); int newState = data.AddState(newStateInfo); data.DeleteTransition(from: previous, symbol: symbols.Current); data.AddTransition( from: previous, symbol: symbols.Current, to: newState ); var S = data.GetState(state); newStateInfo.IsAccepting = S.IsAccepting; newStateInfo.Actions.AddRange(S.Actions); newStateInfo.Tunnel = state; previous = newState; hasSymbol = symbols.MoveNext(); } else { var S = data.GetState(state); if (S.Tunnel == NoState) { break; } state = S.Tunnel; } } // extend the path for (; hasSymbol; hasSymbol = symbols.MoveNext()) { var newStateInfo = new TdfaState(data); int newState = data.AddState(newStateInfo); data.AddTransition( from: previous, symbol: symbols.Current, to: newState ); newStateInfo.Tunnel = NoState; previous = newState; } // process new final state var finalState = data.GetState(previous); finalState.IsAccepting = true; finalState.Actions.Insert(0, scanAction); }
public TdfaState GetState(int state) { return(data.GetState(state)); }
public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet) { this.alphabet = alphabet; data = new TdfaData(alphabet); data.AddState(new TdfaState(data) { Positions = regTree.FirstPos }); foreach (var st in data.EnumerateStates()) { int Sindex = st.Index; var S = st.Positions; if (S.Contains(regTree.EoiPosition)) { data.GetState(Sindex).IsAccepting = true; } var actionPosList = new SortedList <int, int>(); foreach (var position in S) { var action = regTree.GetPosAction(position); if (action.HasValue) { actionPosList[position] = action.Value; } } if (actionPosList.Count != 0) { st.Actions.AddRange(actionPosList.Values); } var transitionSymbols = alphabet.SymbolSetType.Union( st.Positions .Select(regTree.GetPosSymbols) .Select(alphabet.Encode)); foreach (var symbol in transitionSymbols) { if (symbol == alphabet.EoiSymbol) { continue; } var U = TdfaData.PositionSetType.Mutable(); foreach (var position in S) { var cset = alphabet.Encode(regTree.GetPosSymbols(position)); if (cset.Contains(symbol)) { U.AddAll(regTree.GetFollowPos(position)); } } if (!U.IsEmpty) { int Uindex = data.IndexOfState(U); if (Uindex < 0) { Uindex = data.AddState(new TdfaState(data) { Positions = U }); } data.AddTransition(from: Sindex, symbol: symbol, to: Uindex); } } } }
public RegularToDfaAlgorithm(RegularTree regTree, IRegularAlphabet alphabet) { this.alphabet = alphabet; data = new TdfaData(alphabet); data.AddState(new TdfaState(data) { Positions = regTree.FirstPos }); foreach (var st in data.EnumerateStates()) { int Sindex = st.Index; var S = st.Positions; if (S.Contains(regTree.EoiPosition)) { data.GetState(Sindex).IsAccepting = true; } var actionPosList = new SortedList<int, int>(); foreach (var position in S) { var action = regTree.GetPosAction(position); if (action.HasValue) { actionPosList[position] = action.Value; } } if (actionPosList.Count != 0) { st.Actions.AddRange(actionPosList.Values); } var transitionSymbols = alphabet.SymbolSetType.Union( st.Positions .Select(regTree.GetPosSymbols) .Select(alphabet.Encode)); foreach (var symbol in transitionSymbols) { if (symbol == alphabet.EoiSymbol) { continue; } var U = TdfaData.PositionSetType.Mutable(); foreach (var position in S) { var cset = alphabet.Encode(regTree.GetPosSymbols(position)); if (cset.Contains(symbol)) { U.AddAll(regTree.GetFollowPos(position)); } } if (!U.IsEmpty) { int Uindex = data.IndexOfState(U); if (Uindex < 0) { Uindex = data.AddState(new TdfaState(data) { Positions = U }); } data.AddTransition(from: Sindex, symbol: symbol, to: Uindex); } } } }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }