/// <summary> /// See <see cref="BasicOperations.SubsetOf(Automaton, Automaton)"/>. /// </summary> public virtual bool SubsetOf(Automaton a) { return(BasicOperations.SubsetOf(this, a)); }
/// <summary> /// See <see cref="BasicOperations.Minus(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Minus(Automaton a) { return(BasicOperations.Minus(this, a)); }
/// <summary> /// See <see cref="BasicOperations.Intersection(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Intersection(Automaton a) { return(BasicOperations.Intersection(this, a)); }
/// <summary> /// Minimizes the given automaton using Hopcroft's algorithm. /// </summary> public static void MinimizeHopcroft(Automaton a) { a.Determinize(); if (a.initial.numTransitions == 1) { Transition t = a.initial.TransitionsArray[0]; if (t.to == a.initial && t.min == Character.MinCodePoint && t.max == Character.MaxCodePoint) { return; } } a.Totalize(); // initialize data structures int[] sigma = a.GetStartPoints(); State[] states = a.GetNumberedStates(); int sigmaLen = sigma.Length, statesLen = states.Length; List <State>[,] reverse = new List <State> [statesLen, sigmaLen]; ISet <State>[] partition = new JCG.HashSet <State> [statesLen]; List <State>[] splitblock = new List <State> [statesLen]; int[] block = new int[statesLen]; StateList[,] active = new StateList[statesLen, sigmaLen]; StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen]; LinkedList <Int32Pair> pending = new LinkedList <Int32Pair>(); OpenBitSet pending2 = new OpenBitSet(sigmaLen * statesLen); OpenBitSet split = new OpenBitSet(statesLen), refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen); for (int q = 0; q < statesLen; q++) { splitblock[q] = new List <State>(); partition[q] = new JCG.HashSet <State>(); for (int x = 0; x < sigmaLen; x++) { active[q, x] = new StateList(); } } // find initial partition and reverse edges for (int q = 0; q < statesLen; q++) { State qq = states[q]; int j = qq.accept ? 0 : 1; partition[j].Add(qq); block[q] = j; for (int x = 0; x < sigmaLen; x++) { //List<State>[] r = reverse[qq.Step(sigma[x]).number]; var r = qq.Step(sigma[x]).number; if (reverse[r, x] == null) { reverse[r, x] = new List <State>(); } reverse[r, x].Add(qq); } } // initialize active sets for (int j = 0; j <= 1; j++) { for (int x = 0; x < sigmaLen; x++) { foreach (State qq in partition[j]) { if (reverse[qq.number, x] != null) { active2[qq.number, x] = active[j, x].Add(qq); } } } } // initialize pending for (int x = 0; x < sigmaLen; x++) { int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1; pending.AddLast(new Int32Pair(j, x)); pending2.Set(x * statesLen + j); } // process pending until fixed point int k = 2; while (pending.Count > 0) { Int32Pair ip = pending.First.Value; pending.Remove(ip); int p = ip.N1; int x = ip.N2; pending2.Clear(x * statesLen + p); // find states that need to be split off their blocks for (StateListNode m = active[p, x].First; m != null; m = m.Next) { List <State> r = reverse[m.Q.number, x]; if (r != null) { foreach (State s in r) { int i = s.number; if (!split.Get(i)) { split.Set(i); int j = block[i]; splitblock[j].Add(s); if (!refine2.Get(j)) { refine2.Set(j); refine.Set(j); } } } } } // refine blocks for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1)) { List <State> sb = splitblock[j]; if (sb.Count < partition[j].Count) { ISet <State> b1 = partition[j]; ISet <State> b2 = partition[k]; foreach (State s in sb) { b1.Remove(s); b2.Add(s); block[s.number] = k; for (int c = 0; c < sigmaLen; c++) { StateListNode sn = active2[s.number, c]; if (sn != null && sn.Sl == active[j, c]) { sn.Remove(); active2[s.number, c] = active[k, c].Add(s); } } } // update pending for (int c = 0; c < sigmaLen; c++) { int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen; if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak) { pending2.Set(ofs + j); pending.AddLast(new Int32Pair(j, c)); } else { pending2.Set(ofs + k); pending.AddLast(new Int32Pair(k, c)); } } k++; } refine2.Clear(j); foreach (State s in sb) { split.Clear(s.number); } sb.Clear(); } refine.Clear(0, refine.Length - 1); } // make a new state for each equivalence class, set initial state State[] newstates = new State[k]; for (int n = 0; n < newstates.Length; n++) { State s = new State(); newstates[n] = s; foreach (State q in partition[n]) { if (q == a.initial) { a.initial = s; } s.accept = q.accept; s.number = q.number; // select representative q.number = n; } } // build transitions and set acceptance for (int n = 0; n < newstates.Length; n++) { State s = newstates[n]; s.accept = states[s.number].accept; foreach (Transition t in states[s.number].GetTransitions()) { s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number])); } } a.ClearNumberedStates(); a.RemoveDeadTransitions(); }
/// <summary> /// See <see cref="BasicOperations.Concatenate(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Concatenate(Automaton a) { return(BasicOperations.Concatenate(this, a)); }
public CompiledAutomaton(Automaton automaton) : this(automaton, null, true) { }
public CharacterRunAutomaton(Automaton a) : base(a, Character.MaxCodePoint, false) { }
/// <summary> /// Determinizes the given automaton. /// <para/> /// Worst case complexity: exponential in number of states. /// </summary> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } State[] allStates = a.GetNumberedStates(); // subset construction bool initAccept = a.initial.accept; int initNumber = a.initial.number; a.initial = new State(); SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial); Queue <SortedInt32Set.FrozenInt32Set> worklist = new Queue <SortedInt32Set.FrozenInt32Set>(); // LUCENENET specific - Queue is much more performant than LinkedList IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>(); worklist.Enqueue(initialset); a.initial.accept = initAccept; newstate[initialset] = a.initial; int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> SortedInt32Set statesSet = new SortedInt32Set(5); while (worklist.Count > 0) { SortedInt32Set.FrozenInt32Set s = worklist.Dequeue(); //worklist.Remove(s); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.Length; i++) { State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.Add(s0.TransitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.Sort(); int lastPoint = -1; int accCount = 0; State r = s.state; for (int i = 0; i < points.count; i++) { int point = points.points[i].point; if (statesSet.upto > 0) { if (Debugging.AssertsEnabled) { Debugging.Assert(lastPoint != -1); } statesSet.ComputeHash(); if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out State q) || q is null) { q = new State(); SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q); worklist.Enqueue(p); if (newStateUpto == newStatesArray.Length) { // LUCENENET: Resize rather than copy Array.Resize(ref newStatesArray, ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate[p] = q; } else { if (Debugging.AssertsEnabled) { Debugging.Assert((accCount > 0) == q.accept, "accCount={0} vs existing accept={1} states={2}", accCount, q.accept, statesSet); } } r.AddTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.Reset(); if (Debugging.AssertsEnabled) { Debugging.Assert(statesSet.upto == 0, "upto={0}", statesSet.upto); } } a.deterministic = true; a.SetNumberedStates(newStatesArray, newStateUpto); }
/// <summary> /// Adds epsilon transitions to the given automaton. This method adds extra /// character interval transitions that are equivalent to the given set of /// epsilon transitions. /// </summary> /// <param name="a"> Automaton. </param> /// <param name="pairs"> Collection of <see cref="StatePair"/> objects representing pairs of /// source/destination states where epsilon transitions should be /// added. </param> public static void AddEpsilons(Automaton a, ICollection <StatePair> pairs) { a.ExpandSingleton(); Dictionary <State, JCG.HashSet <State> > forward = new Dictionary <State, JCG.HashSet <State> >(); Dictionary <State, JCG.HashSet <State> > back = new Dictionary <State, JCG.HashSet <State> >(); foreach (StatePair p in pairs) { if (!forward.TryGetValue(p.s1, out JCG.HashSet <State> to)) { to = new JCG.HashSet <State>(); forward[p.s1] = to; } to.Add(p.s2); if (!back.TryGetValue(p.s2, out JCG.HashSet <State> from)) { from = new JCG.HashSet <State>(); back[p.s2] = from; } from.Add(p.s1); } // calculate epsilon closure LinkedList <StatePair> worklist = new LinkedList <StatePair>(pairs); JCG.HashSet <StatePair> workset = new JCG.HashSet <StatePair>(pairs); while (worklist.Count > 0) { StatePair p = worklist.First.Value; worklist.Remove(p); workset.Remove(p); #pragma warning disable IDE0018 // Inline variable declaration JCG.HashSet <State> from; #pragma warning restore IDE0018 // Inline variable declaration if (forward.TryGetValue(p.s2, out JCG.HashSet <State> to)) { foreach (State s in to) { StatePair pp = new StatePair(p.s1, s); if (!pairs.Contains(pp)) { pairs.Add(pp); forward[p.s1].Add(s); back[s].Add(p.s1); worklist.AddLast(pp); workset.Add(pp); if (back.TryGetValue(p.s1, out from)) { foreach (State q in from) { StatePair qq = new StatePair(q, p.s1); if (!workset.Contains(qq)) { worklist.AddLast(qq); workset.Add(qq); } } } } } } } // add transitions foreach (StatePair p in pairs) { p.s1.AddEpsilon(p.s2); } a.deterministic = false; //a.clearHashCode(); a.ClearNumberedStates(); a.CheckMinimizeAlways(); }
/// <summary> /// Returns an automaton that accepts the intersection of the languages of the /// given automata. Never modifies the input automata languages. /// <para/> /// Complexity: quadratic in number of states. /// </summary> public static Automaton Intersection(Automaton a1, Automaton a2) { if (a1.IsSingleton) { if (BasicOperations.Run(a2, a1.singleton)) { return(a1.CloneIfRequired()); } else { return(BasicAutomata.MakeEmpty()); } } if (a2.IsSingleton) { if (BasicOperations.Run(a1, a2.singleton)) { return(a2.CloneIfRequired()); } else { return(BasicAutomata.MakeEmpty()); } } if (a1 == a2) { return(a1.CloneIfRequired()); } Transition[][] transitions1 = a1.GetSortedTransitions(); Transition[][] transitions2 = a2.GetSortedTransitions(); Automaton c = new Automaton(); Queue <StatePair> worklist = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList Dictionary <StatePair, StatePair> newstates = new Dictionary <StatePair, StatePair>(); StatePair p = new StatePair(c.initial, a1.initial, a2.initial); worklist.Enqueue(p); newstates[p] = p; while (worklist.Count > 0) { p = worklist.Dequeue(); p.s.accept = p.s1.accept && p.s2.accept; Transition[] t1 = transitions1[p.s1.number]; Transition[] t2 = transitions2[p.s2.number]; for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++) { while (b2 < t2.Length && t2[b2].max < t1[n1].min) { b2++; } for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++) { if (t2[n2].max >= t1[n1].min) { StatePair q = new StatePair(t1[n1].to, t2[n2].to); if (!newstates.TryGetValue(q, out StatePair r) || r is null) { q.s = new State(); worklist.Enqueue(q); newstates[q] = q; r = q; } int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min; int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max; p.s.AddTransition(new Transition(min, max, r.s)); } } } } c.deterministic = a1.deterministic && a2.deterministic; c.RemoveDeadTransitions(); c.CheckMinimizeAlways(); return(c); }
/// <summary> /// Returns true if the language of <paramref name="a1"/> is a subset of the language /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if /// not already marked as deterministic. /// <para/> /// Complexity: quadratic in number of states. /// </summary> public static bool SubsetOf(Automaton a1, Automaton a2) { if (a1 == a2) { return(true); } if (a1.IsSingleton) { if (a2.IsSingleton) { return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal)); } return(BasicOperations.Run(a2, a1.singleton)); } a2.Determinize(); Transition[][] transitions1 = a1.GetSortedTransitions(); Transition[][] transitions2 = a2.GetSortedTransitions(); Queue <StatePair> worklist = new Queue <StatePair>(); // LUCENENET specific - Queue is much more performant than LinkedList JCG.HashSet <StatePair> visited = new JCG.HashSet <StatePair>(); StatePair p = new StatePair(a1.initial, a2.initial); worklist.Enqueue(p); visited.Add(p); while (worklist.Count > 0) { p = worklist.Dequeue(); if (p.s1.accept && !p.s2.accept) { return(false); } Transition[] t1 = transitions1[p.s1.number]; Transition[] t2 = transitions2[p.s2.number]; for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++) { while (b2 < t2.Length && t2[b2].max < t1[n1].min) { b2++; } int min1 = t1[n1].min, max1 = t1[n1].max; for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++) { if (t2[n2].min > min1) { return(false); } if (t2[n2].max < Character.MaxCodePoint) { min1 = t2[n2].max + 1; } else { min1 = Character.MaxCodePoint; max1 = Character.MinCodePoint; } StatePair q = new StatePair(t1[n1].to, t2[n2].to); if (!visited.Contains(q)) { worklist.Enqueue(q); visited.Add(q); } } if (min1 <= max1) { return(false); } } } return(true); }
/// <summary> /// Returns <c>true</c> if the given string is accepted by the automaton. /// <para/> /// Complexity: linear in the length of the string. /// <para/> /// <b>Note:</b> for full performance, use the <see cref="RunAutomaton"/> class. /// </summary> public static bool Run(Automaton a, string s) { if (a.IsSingleton) { return(s.Equals(a.singleton, StringComparison.Ordinal)); } if (a.deterministic) { State p = a.initial; int cp; // LUCENENET: Removed unnecessary assignment for (int i = 0; i < s.Length; i += Character.CharCount(cp)) { State q = p.Step(cp = Character.CodePointAt(s, i)); if (q is null) { return(false); } p = q; } return(p.accept); } else { State[] states = a.GetNumberedStates(); LinkedList <State> pp = new LinkedList <State>(); LinkedList <State> pp_other = new LinkedList <State>(); OpenBitSet bb = new OpenBitSet(states.Length); OpenBitSet bb_other = new OpenBitSet(states.Length); pp.AddLast(a.initial); JCG.List <State> dest = new JCG.List <State>(); bool accept = a.initial.accept; int c; // LUCENENET: Removed unnecessary assignment for (int i = 0; i < s.Length; i += Character.CharCount(c)) { c = Character.CodePointAt(s, i); accept = false; pp_other.Clear(); bb_other.Clear(0, bb_other.Length - 1); foreach (State p in pp) { dest.Clear(); p.Step(c, dest); foreach (State q in dest) { if (q.accept) { accept = true; } if (!bb_other.Get(q.number)) { bb_other.Set(q.number); pp_other.AddLast(q); } } } LinkedList <State> tp = pp; pp = pp_other; pp_other = tp; OpenBitSet tb = bb; bb = bb_other; bb_other = tb; } return(accept); } }
/// <summary> /// Compute a DFA that accepts all strings within an edit distance of <paramref name="n"/>. /// <para> /// All automata have the following properties: /// <list type="bullet"> /// <item><description>They are deterministic (DFA).</description></item> /// <item><description>There are no transitions to dead states.</description></item> /// <item><description>They are not minimal (some transitions could be combined).</description></item> /// </list> /// </para> /// </summary> public virtual Automaton ToAutomaton(int n) { if (n == 0) { return(BasicAutomata.MakeString(word, 0, word.Length)); } if (n >= descriptions.Length) { return(null); } int range = 2 * n + 1; ParametricDescription description = descriptions[n]; // the number of states is based on the length of the word and n State[] states = new State[description.Count]; // create all states, and mark as accept states if appropriate for (int i = 0; i < states.Length; i++) { states[i] = new State { number = i, accept = description.IsAccept(i) }; } // create transitions from state to state for (int k = 0; k < states.Length; k++) { int xpos = description.GetPosition(k); if (xpos < 0) { continue; } int end = xpos + Math.Min(word.Length - xpos, range); for (int x = 0; x < alphabet.Length; x++) { int ch = alphabet[x]; // get the characteristic vector at this position wrt ch int cvec = GetVector(ch, xpos, end); int dest = description.Transition(k, xpos, cvec); if (dest >= 0) { states[k].AddTransition(new Transition(ch, states[dest])); } } // add transitions for all other chars in unicode // by definition, their characteristic vectors are always 0, // because they do not exist in the input string. int dest_ = description.Transition(k, xpos, 0); // by definition if (dest_ >= 0) { for (int r = 0; r < numRanges; r++) { states[k].AddTransition(new Transition(rangeLower[r], rangeUpper[r], states[dest_])); } } } Automaton a = new Automaton(states[0]) { deterministic = true }; // we create some useless unconnected states, and its a net-win overall to remove these, // as well as to combine any adjacent transitions (it makes later algorithms more efficient). // so, while we could set our numberedStates here, its actually best not to, and instead to // force a traversal in reduce, pruning the unconnected states while we combine adjacent transitions. //a.setNumberedStates(states); a.Reduce(); // we need not trim transitions to dead states, as they are not created. //a.restoreInvariant(); return(a); }
/// <summary> /// Determinizes the given automaton. /// <para/> /// Worst case complexity: exponential in number of states. /// </summary> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } State[] allStates = a.GetNumberedStates(); // subset construction bool initAccept = a.initial.accept; int initNumber = a.initial.number; a.initial = new State(); SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial); LinkedList <SortedInt32Set.FrozenInt32Set> worklist = new LinkedList <SortedInt32Set.FrozenInt32Set>(); IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>(); worklist.AddLast(initialset); a.initial.accept = initAccept; newstate[initialset] = a.initial; int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> SortedInt32Set statesSet = new SortedInt32Set(5); // LUCENENET NOTE: The problem here is almost certainly // due to the conversion to FrozenIntSet along with its // differing equality checking. while (worklist.Count > 0) { SortedInt32Set.FrozenInt32Set s = worklist.First.Value; worklist.Remove(s); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.Length; i++) { State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.Add(s0.TransitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.Sort(); int lastPoint = -1; int accCount = 0; State r = s.state; for (int i = 0; i < points.count; i++) { int point = points.points[i].point; if (statesSet.upto > 0) { Debug.Assert(lastPoint != -1); statesSet.ComputeHash(); State q; if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out q) || q == null) { q = new State(); SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q); worklist.AddLast(p); if (newStateUpto == newStatesArray.Length) { State[] newArray = new State[ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; Array.Copy(newStatesArray, 0, newArray, 0, newStateUpto); newStatesArray = newArray; } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate[p] = q; } else { Debug.Assert((accCount > 0) == q.accept, "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet); } r.AddTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.Reset(); Debug.Assert(statesSet.upto == 0, "upto=" + statesSet.upto); } a.deterministic = true; a.SetNumberedStates(newStatesArray, newStateUpto); }
/// <summary> /// See <see cref="BasicOperations.Union(Automaton, Automaton)"/>. /// </summary> public virtual Automaton Union(Automaton a) { return(BasicOperations.Union(this, a)); }
/// <summary> /// Returns an automaton that accepts the concatenation of the languages of the /// given automata. /// <para/> /// Complexity: linear in total number of states. /// </summary> public static Automaton Concatenate(IList <Automaton> l) { if (l.Count == 0) { return(BasicAutomata.MakeEmptyString()); } bool all_singleton = true; foreach (Automaton a in l) { if (!a.IsSingleton) { all_singleton = false; break; } } if (all_singleton) { StringBuilder b = new StringBuilder(); foreach (Automaton a in l) { b.Append(a.singleton); } return(BasicAutomata.MakeString(b.ToString())); } else { foreach (Automaton a in l) { if (BasicOperations.IsEmpty(a)) { return(BasicAutomata.MakeEmpty()); } } JCG.HashSet <int> ids = new JCG.HashSet <int>(); foreach (Automaton a in l) { ids.Add(a.GetHashCode()); } bool has_aliases = ids.Count != l.Count; Automaton b = l[0]; if (has_aliases) { b = b.CloneExpanded(); } else { b = b.CloneExpandedIfRequired(); } ISet <State> ac = b.GetAcceptStates(); bool first = true; foreach (Automaton a in l) { if (first) { first = false; } else { if (a.IsEmptyString) { continue; } Automaton aa = a; if (has_aliases) { aa = aa.CloneExpanded(); } else { aa = aa.CloneExpandedIfRequired(); } ISet <State> ns = aa.GetAcceptStates(); foreach (State s in ac) { s.accept = false; s.AddEpsilon(aa.initial); if (s.accept) { ns.Add(s); } } ac = ns; } } b.deterministic = false; //b.clearHashCode(); b.ClearNumberedStates(); b.CheckMinimizeAlways(); return(b); } }
/// <summary> /// See <see cref="MinimizationOperations.Minimize(Automaton)"/>. Returns the /// automaton being given as argument. /// </summary> public static Automaton Minimize(Automaton a) { MinimizationOperations.Minimize(a); return(a); }
private Automaton ToAutomaton(IDictionary <string, Automaton> automata, IAutomatonProvider automaton_provider) { IList <Automaton> list; Automaton a = null; switch (kind) { case Kind.REGEXP_UNION: list = new List <Automaton>(); FindLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider); FindLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider); a = BasicOperations.Union(list); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_CONCATENATION: list = new List <Automaton>(); FindLeaves(exp1, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); FindLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider); a = BasicOperations.Concatenate(list); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_INTERSECTION: a = exp1.ToAutomaton(automata, automaton_provider).Intersection(exp2.ToAutomaton(automata, automaton_provider)); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_OPTIONAL: a = exp1.ToAutomaton(automata, automaton_provider).Optional(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT_MIN: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_REPEAT_MINMAX: a = exp1.ToAutomaton(automata, automaton_provider).Repeat(min, max); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_COMPLEMENT: a = exp1.ToAutomaton(automata, automaton_provider).Complement(); MinimizationOperations.Minimize(a); break; case Kind.REGEXP_CHAR: a = BasicAutomata.MakeChar(c); break; case Kind.REGEXP_CHAR_RANGE: a = BasicAutomata.MakeCharRange(from, to); break; case Kind.REGEXP_ANYCHAR: a = BasicAutomata.MakeAnyChar(); break; case Kind.REGEXP_EMPTY: a = BasicAutomata.MakeEmpty(); break; case Kind.REGEXP_STRING: a = BasicAutomata.MakeString(s); break; case Kind.REGEXP_ANYSTRING: a = BasicAutomata.MakeAnyString(); break; case Kind.REGEXP_AUTOMATON: Automaton aa = null; if (automata != null) { aa = automata[s]; } if (aa == null && automaton_provider != null) { try { aa = automaton_provider.GetAutomaton(s); } catch (IOException e) { throw new ArgumentException(e.ToString(), e); } } if (aa == null) { throw new ArgumentException("'" + s + "' not found"); } a = (Automaton)aa.Clone(); // always clone here (ignore allow_mutate) break; case Kind.REGEXP_INTERVAL: a = BasicAutomata.MakeInterval(min, max, digits); break; } return(a); }
public CompiledAutomaton(Automaton automaton, bool?finite, bool simplify) { if (simplify) { // Test whether the automaton is a "simple" form and // if so, don't create a runAutomaton. Note that on a // large automaton these tests could be costly: if (BasicOperations.IsEmpty(automaton)) { // matches nothing Type = AUTOMATON_TYPE.NONE; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.IsTotal(automaton)) { // matches all possible strings Type = AUTOMATON_TYPE.ALL; Term = null; CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else { string commonPrefix; string singleton; if (automaton.Singleton is null) { commonPrefix = SpecialOperations.GetCommonPrefix(automaton); if (commonPrefix.Length > 0 && BasicOperations.SameLanguage(automaton, BasicAutomata.MakeString(commonPrefix))) { singleton = commonPrefix; } else { singleton = null; } } else { commonPrefix = null; singleton = automaton.Singleton; } if (singleton != null) { // matches a fixed string in singleton or expanded // representation Type = AUTOMATON_TYPE.SINGLE; Term = new BytesRef(singleton); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } else if (BasicOperations.SameLanguage(automaton, BasicOperations.Concatenate(BasicAutomata.MakeString(commonPrefix), BasicAutomata.MakeAnyString()))) { // matches a constant prefix Type = AUTOMATON_TYPE.PREFIX; Term = new BytesRef(commonPrefix); CommonSuffixRef = null; RunAutomaton = null; sortedTransitions = null; this.Finite = null; return; } } } Type = AUTOMATON_TYPE.NORMAL; Term = null; if (finite is null) { this.Finite = SpecialOperations.IsFinite(automaton); } else { this.Finite = finite; } Automaton utf8 = (new UTF32ToUTF8()).Convert(automaton); if (this.Finite == true) { CommonSuffixRef = null; } else { CommonSuffixRef = SpecialOperations.GetCommonSuffixBytesRef(utf8); } RunAutomaton = new ByteRunAutomaton(utf8, true); sortedTransitions = utf8.GetSortedTransitions(); }
/// <summary> /// Returns true if the language of <paramref name="a1"/> is a subset of the language /// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if /// not already marked as deterministic. /// <para/> /// Complexity: quadratic in number of states. /// </summary> public static bool SubsetOf(Automaton a1, Automaton a2) { if (a1 == a2) { return(true); } if (a1.IsSingleton) { if (a2.IsSingleton) { return(a1.singleton.Equals(a2.singleton, StringComparison.Ordinal)); } return(BasicOperations.Run(a2, a1.singleton)); } a2.Determinize(); Transition[][] transitions1 = a1.GetSortedTransitions(); Transition[][] transitions2 = a2.GetSortedTransitions(); LinkedList <StatePair> worklist = new LinkedList <StatePair>(); HashSet <StatePair> visited = new HashSet <StatePair>(); StatePair p = new StatePair(a1.initial, a2.initial); worklist.AddLast(p); visited.Add(p); while (worklist.Count > 0) { p = worklist.First.Value; worklist.Remove(p); if (p.S1.accept && !p.S2.accept) { return(false); } Transition[] t1 = transitions1[p.S1.number]; Transition[] t2 = transitions2[p.S2.number]; for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++) { while (b2 < t2.Length && t2[b2].max < t1[n1].min) { b2++; } int min1 = t1[n1].min, max1 = t1[n1].max; for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++) { if (t2[n2].min > min1) { return(false); } if (t2[n2].max < Character.MAX_CODE_POINT) { min1 = t2[n2].max + 1; } else { min1 = Character.MAX_CODE_POINT; max1 = Character.MIN_CODE_POINT; } StatePair q = new StatePair(t1[n1].to, t2[n2].to); if (!visited.Contains(q)) { worklist.AddLast(q); visited.Add(q); } } if (min1 <= max1) { return(false); } } } return(true); }