// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary <string, Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }
// 1. build full RegularTree (don't build first, following for positions) // 2. build regular alphabet (equivalense classes) // 3. build first, following for non-literal part of regular tree // 4. build non-literal TdfaData public RegularToTdfaAlgorithm(RegularTree noConstRegTree, Dictionary<string,Action> literalToAction) { var equivClasses = noConstRegTree .GetEquivalenceCsets() .Union(new [] { NewLines }); var alphabet = new EquivalenceClassesAlphabet(equivClasses); foreach (var literal in literalToAction.Keys) { foreach (char ch in literal) { alphabet.AddInputSet(SparseIntSetType.Instance.Of(ch)); } } // Step 1. Convert the NFA for non-constant REs to a DFA using the usual // algorithms for subset construction and state minimization [ASU86, WaG84]. var initialDfa = new RegularToDfaAlgorithm(noConstRegTree, alphabet); this.data = initialDfa.Data; #if false using (var view = new IronText.Diagnostics.GvGraphView(Guid.NewGuid() + ".gv")) { data.DescribeGraph(view); } #endif // Step 2. Extend the DFA to a tunnel automaton by setting Tunnel (s) to NoState // for every state s. int initialStateCount = data.StateCount; foreach (var S in data.EnumerateStates()) { S.Tunnel = NoState; } // Step 3: Compute the set of ambiguous states of the tunnel automaton. this.ambiguous = FindAmbiguousStates(); // Step 4: For every constant RE execute Step 5 which incrementally extends // the tunnel automaton. Continue with Step 6. foreach (var pair in literalToAction) { ExtendAutomatonWithLiteral(pair.Key, pair.Value); } var newlines = alphabet.Encode(NewLines); // Add new line handling foreach (var state in data.EnumerateStates()) { var i = state.Outgoing.FindIndex(t => t.HasAnySymbol(newlines)); if (i < 0) { continue; } var newlineTransition = state.Outgoing[i]; var to = data.GetState(newlineTransition.To); TdfaState newlineState; if (to.IsNewline) { continue; } else if (data.EnumerateIncoming(to.Index).All(t => t.HasSingleSymbolFrom(newlines))) { newlineState = to; } else { newlineState = new TdfaState(data); data.AddState(newlineState); newlineState.Tunnel = newlineTransition.To; newlineState.IsAccepting = to.IsAccepting; newlineState.Actions.AddRange(to.Actions); data.DeleteTransition(state.Index, newlines); data.AddTransition(state.Index, newlines, newlineState.Index); } newlineState.IsNewline = true; } }