/// <summary> /// Returns an automaton that accepts the union of the languages of the given /// automata. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Union(Automaton a1, Automaton a2) { if ((a1.IsSingleton && a2.IsSingleton && a1.singleton.Equals(a2.singleton, StringComparison.Ordinal)) || a1 == a2) { return(a1.CloneIfRequired()); } if (a1 == a2) { a1 = a1.CloneExpanded(); a2 = a2.CloneExpanded(); } else { a1 = a1.CloneExpandedIfRequired(); a2 = a2.CloneExpandedIfRequired(); } State s = new State(); s.AddEpsilon(a1.initial); s.AddEpsilon(a2.initial); a1.initial = s; a1.deterministic = false; //a1.clearHashCode(); a1.ClearNumberedStates(); a1.CheckMinimizeAlways(); return(a1); }
/// <summary> /// Returns a clone of this automaton. /// </summary> public virtual object Clone() { Automaton a = (Automaton)base.MemberwiseClone(); if (!IsSingleton) { Dictionary <State, State> m = new Dictionary <State, State>(); State[] states = GetNumberedStates(); foreach (State s in states) { m[s] = new State(); } foreach (State s in states) { State p = m[s]; p.accept = s.accept; if (s == initial) { a.initial = p; } foreach (Transition t in s.GetTransitions()) { p.AddTransition(new Transition(t.min, t.max, m[t.to])); } } } a.ClearNumberedStates(); return(a); }
/// <summary> /// Returns an automaton that accepts the union of the empty string and the /// language of the given automaton. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Optional(Automaton a) { a = a.CloneExpandedIfRequired(); State s = new State(); s.AddEpsilon(a.initial); s.accept = true; a.initial = s; a.deterministic = false; //a.clearHashCode(); a.ClearNumberedStates(); a.CheckMinimizeAlways(); return(a); }
/// <summary> /// Returns an automaton that accepts the Kleene star (zero or more /// concatenated repetitions) of the language of the given automaton. Never /// modifies the input automaton language. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Repeat(Automaton a) { a = a.CloneExpanded(); State s = new State(); s.accept = true; s.AddEpsilon(a.initial); foreach (State p in a.GetAcceptStates()) { p.AddEpsilon(s); } a.initial = s; a.deterministic = false; //a.clearHashCode(); a.ClearNumberedStates(); a.CheckMinimizeAlways(); return(a); }
/// <summary> /// Reverses the language of the given (non-singleton) automaton while returning /// the set of new initial states. /// </summary> public static ISet <State> Reverse(Automaton a) { a.ExpandSingleton(); // reverse all edges Dictionary <State, ISet <Transition> > m = new Dictionary <State, ISet <Transition> >(); State[] states = a.GetNumberedStates(); ISet <State> accept = new JCG.HashSet <State>(); foreach (State s in states) { if (s.Accept) { accept.Add(s); } } foreach (State r in states) { m[r] = new JCG.HashSet <Transition>(); r.accept = false; } foreach (State r in states) { foreach (Transition t in r.GetTransitions()) { m[t.to].Add(new Transition(t.min, t.max, r)); } } foreach (State r in states) { ISet <Transition> tr = m[r]; r.SetTransitions(tr.ToArray(/*new Transition[tr.Count]*/)); } // make new initial+final states a.initial.accept = true; a.initial = new State(); foreach (State r in accept) { a.initial.AddEpsilon(r); // ensures that all initial states are reachable } a.deterministic = false; a.ClearNumberedStates(); return(accept); }
/// <summary> /// Returns an automaton that accepts the union of the languages of the given /// automata. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Union(ICollection <Automaton> l) { JCG.HashSet <int> ids = new JCG.HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; State s = new State(); foreach (Automaton b in l) { if (BasicOperations.IsEmpty(b)) { continue; } Automaton bb = b; if (has_aliases) { bb = bb.CloneExpanded(); } else { bb = bb.CloneExpandedIfRequired(); } s.AddEpsilon(bb.initial); } Automaton a_ = new Automaton { initial = s, deterministic = false }; //a.clearHashCode(); a_.ClearNumberedStates(); a_.CheckMinimizeAlways(); return(a_); }
/// <summary> /// Returns an automaton that accepts the concatenation of the languages of the /// given automata. /// <para/> /// Complexity: linear in number of states. /// </summary> public static Automaton Concatenate(Automaton a1, Automaton a2) { if (a1.IsSingleton && a2.IsSingleton) { return(BasicAutomata.MakeString(a1.singleton + a2.singleton)); } if (IsEmpty(a1) || IsEmpty(a2)) { return(BasicAutomata.MakeEmpty()); } // adding epsilon transitions with the NFA concatenation algorithm // in this case always produces a resulting DFA, preventing expensive // redundant determinize() calls for this common case. bool deterministic = a1.IsSingleton && a2.IsDeterministic; if (a1 == a2) { a1 = a1.CloneExpanded(); a2 = a2.CloneExpanded(); } else { a1 = a1.CloneExpandedIfRequired(); a2 = a2.CloneExpandedIfRequired(); } foreach (State s in a1.GetAcceptStates()) { s.accept = false; s.AddEpsilon(a2.initial); } a1.deterministic = deterministic; //a1.clearHashCode(); a1.ClearNumberedStates(); a1.CheckMinimizeAlways(); return(a1); }
/// <summary> /// Minimizes the given automaton using Hopcroft's algorithm. /// </summary> public static void MinimizeHopcroft(Automaton a) { a.Determinize(); if (a.initial.numTransitions == 1) { Transition t = a.initial.TransitionsArray[0]; if (t.to == a.initial && t.min == Character.MinCodePoint && t.max == Character.MaxCodePoint) { return; } } a.Totalize(); // initialize data structures int[] sigma = a.GetStartPoints(); State[] states = a.GetNumberedStates(); int sigmaLen = sigma.Length, statesLen = states.Length; JCG.List <State>[,] reverse = new JCG.List <State> [statesLen, sigmaLen]; ISet <State>[] partition = new JCG.HashSet <State> [statesLen]; JCG.List <State>[] splitblock = new JCG.List <State> [statesLen]; int[] block = new int[statesLen]; StateList[,] active = new StateList[statesLen, sigmaLen]; StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen]; Queue <Int32Pair> pending = new Queue <Int32Pair>(); // LUCENENET specific - Queue is much more performant than LinkedList OpenBitSet pending2 = new OpenBitSet(sigmaLen * statesLen); OpenBitSet split = new OpenBitSet(statesLen), refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen); for (int q = 0; q < statesLen; q++) { splitblock[q] = new JCG.List <State>(); partition[q] = new JCG.HashSet <State>(); for (int x = 0; x < sigmaLen; x++) { active[q, x] = new StateList(); } } // find initial partition and reverse edges for (int q = 0; q < statesLen; q++) { State qq = states[q]; int j = qq.accept ? 0 : 1; partition[j].Add(qq); block[q] = j; for (int x = 0; x < sigmaLen; x++) { //JCG.List<State>[] r = reverse[qq.Step(sigma[x]).number]; var r = qq.Step(sigma[x]).number; if (reverse[r, x] == null) { reverse[r, x] = new JCG.List <State>(); } reverse[r, x].Add(qq); } } // initialize active sets for (int j = 0; j <= 1; j++) { for (int x = 0; x < sigmaLen; x++) { foreach (State qq in partition[j]) { if (reverse[qq.number, x] != null) { active2[qq.number, x] = active[j, x].Add(qq); } } } } // initialize pending for (int x = 0; x < sigmaLen; x++) { int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1; pending.Enqueue(new Int32Pair(j, x)); pending2.Set(x * statesLen + j); } // process pending until fixed point int k = 2; while (pending.Count > 0) { Int32Pair ip = pending.Dequeue(); int p = ip.n1; int x = ip.n2; pending2.Clear(x * statesLen + p); // find states that need to be split off their blocks for (StateListNode m = active[p, x].First; m != null; m = m.Next) { JCG.List <State> r = reverse[m.Q.number, x]; if (r != null) { foreach (State s in r) { int i = s.number; if (!split.Get(i)) { split.Set(i); int j = block[i]; splitblock[j].Add(s); if (!refine2.Get(j)) { refine2.Set(j); refine.Set(j); } } } } } // refine blocks for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1)) { JCG.List <State> sb = splitblock[j]; if (sb.Count < partition[j].Count) { ISet <State> b1 = partition[j]; ISet <State> b2 = partition[k]; foreach (State s in sb) { b1.Remove(s); b2.Add(s); block[s.number] = k; for (int c = 0; c < sigmaLen; c++) { StateListNode sn = active2[s.number, c]; if (sn != null && sn.Sl == active[j, c]) { sn.Remove(); active2[s.number, c] = active[k, c].Add(s); } } } // update pending for (int c = 0; c < sigmaLen; c++) { int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen; if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak) { pending2.Set(ofs + j); pending.Enqueue(new Int32Pair(j, c)); } else { pending2.Set(ofs + k); pending.Enqueue(new Int32Pair(k, c)); } } k++; } refine2.Clear(j); foreach (State s in sb) { split.Clear(s.number); } sb.Clear(); } refine.Clear(0, refine.Length); } // make a new state for each equivalence class, set initial state State[] newstates = new State[k]; for (int n = 0; n < newstates.Length; n++) { State s = new State(); newstates[n] = s; foreach (State q in partition[n]) { if (q == a.initial) { a.initial = s; } s.accept = q.accept; s.number = q.number; // select representative q.number = n; } } // build transitions and set acceptance for (int n = 0; n < newstates.Length; n++) { State s = newstates[n]; s.accept = states[s.number].accept; foreach (Transition t in states[s.number].GetTransitions()) { s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number])); } } a.ClearNumberedStates(); a.RemoveDeadTransitions(); }
/// <summary> /// Returns an automaton that accepts the concatenation of the languages of the /// given automata. /// <para/> /// Complexity: linear in total number of states. /// </summary> public static Automaton Concatenate(IList <Automaton> l) { if (l.Count == 0) { return(BasicAutomata.MakeEmptyString()); } bool all_singleton = true; foreach (Automaton a in l) { if (!a.IsSingleton) { all_singleton = false; break; } } if (all_singleton) { StringBuilder b = new StringBuilder(); foreach (Automaton a in l) { b.Append(a.singleton); } return(BasicAutomata.MakeString(b.ToString())); } else { foreach (Automaton a in l) { if (BasicOperations.IsEmpty(a)) { return(BasicAutomata.MakeEmpty()); } } JCG.HashSet <int> ids = new JCG.HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; Automaton b = l[0]; if (has_aliases) { b = b.CloneExpanded(); } else { b = b.CloneExpandedIfRequired(); } ISet <State> ac = b.GetAcceptStates(); bool first = true; foreach (Automaton a in l) { if (first) { first = false; } else { if (a.IsEmptyString) { continue; } Automaton aa = a; if (has_aliases) { aa = aa.CloneExpanded(); } else { aa = aa.CloneExpandedIfRequired(); } ISet <State> ns = aa.GetAcceptStates(); foreach (State s in ac) { s.accept = false; s.AddEpsilon(aa.initial); if (s.accept) { ns.Add(s); } } ac = ns; } } b.deterministic = false; //b.clearHashCode(); b.ClearNumberedStates(); b.CheckMinimizeAlways(); return(b); } }
/// <summary> /// Adds epsilon transitions to the given automaton. This method adds extra /// character interval transitions that are equivalent to the given set of /// epsilon transitions. /// </summary> /// <param name="a"> Automaton. </param> /// <param name="pairs"> Collection of <see cref="StatePair"/> objects representing pairs of /// source/destination states where epsilon transitions should be /// added. </param> public static void AddEpsilons(Automaton a, ICollection <StatePair> pairs) { a.ExpandSingleton(); Dictionary <State, JCG.HashSet <State> > forward = new Dictionary <State, JCG.HashSet <State> >(); Dictionary <State, JCG.HashSet <State> > back = new Dictionary <State, JCG.HashSet <State> >(); foreach (StatePair p in pairs) { if (!forward.TryGetValue(p.s1, out JCG.HashSet <State> to)) { to = new JCG.HashSet <State>(); forward[p.s1] = to; } to.Add(p.s2); if (!back.TryGetValue(p.s2, out JCG.HashSet <State> from)) { from = new JCG.HashSet <State>(); back[p.s2] = from; } from.Add(p.s1); } // calculate epsilon closure LinkedList <StatePair> worklist = new LinkedList <StatePair>(pairs); JCG.HashSet <StatePair> workset = new JCG.HashSet <StatePair>(pairs); while (worklist.Count > 0) { StatePair p = worklist.First.Value; worklist.Remove(p); workset.Remove(p); #pragma warning disable IDE0018 // Inline variable declaration JCG.HashSet <State> from; #pragma warning restore IDE0018 // Inline variable declaration if (forward.TryGetValue(p.s2, out JCG.HashSet <State> to)) { foreach (State s in to) { StatePair pp = new StatePair(p.s1, s); if (!pairs.Contains(pp)) { pairs.Add(pp); forward[p.s1].Add(s); back[s].Add(p.s1); worklist.AddLast(pp); workset.Add(pp); if (back.TryGetValue(p.s1, out from)) { foreach (State q in from) { StatePair qq = new StatePair(q, p.s1); if (!workset.Contains(qq)) { worklist.AddLast(qq); workset.Add(qq); } } } } } } } // add transitions foreach (StatePair p in pairs) { p.s1.AddEpsilon(p.s2); } a.deterministic = false; //a.clearHashCode(); a.ClearNumberedStates(); a.CheckMinimizeAlways(); }