public RandomAcceptedStrings(Automaton a) { this.a = a; if (a.IsSingleton) { leadsToAccept = null; return; } // must use IdentityHashmap because two Transitions w/ // different start nodes can be considered the same leadsToAccept = new JCG.Dictionary <Transition, bool?>(IdentityEqualityComparer <Transition> .Default); IDictionary <State, IList <ArrivingTransition> > allArriving = new Dictionary <State, IList <ArrivingTransition> >(); Queue <State> q = new Queue <State>(); ISet <State> seen = new JCG.HashSet <State>(); // reverse map the transitions, so we can quickly look // up all arriving transitions to a given state foreach (State s in a.GetNumberedStates()) { for (int i = 0; i < s.numTransitions; i++) { Transition t = s.TransitionsArray[i]; if (!allArriving.TryGetValue(t.to, out IList <ArrivingTransition> tl) || tl == null) { tl = new List <ArrivingTransition>(); allArriving[t.to] = tl; } tl.Add(new ArrivingTransition(s, t)); } if (s.Accept) { q.Enqueue(s); seen.Add(s); } } // Breadth-first search, from accept states, // backwards: while (q.Count > 0) { State s = q.Dequeue(); if (allArriving.TryGetValue(s, out IList <ArrivingTransition> arriving) && arriving != null) { foreach (ArrivingTransition at in arriving) { State from = at.from; if (!seen.Contains(from)) { q.Enqueue(from); seen.Add(from); leadsToAccept[at.t] = true; } } } } }
/// <summary> /// Converts an incoming utf32 <see cref="Automaton"/> to an equivalent /// utf8 one. The incoming automaton need not be /// deterministic. Note that the returned automaton will /// not in general be deterministic, so you must /// determinize it if that's needed. /// </summary> public Automaton Convert(Automaton utf32) { if (utf32.IsSingleton) { utf32 = utf32.CloneExpanded(); } State[] map = new State[utf32.GetNumberedStates().Length]; JCG.List <State> pending = new JCG.List <State>(); State utf32State = utf32.GetInitialState(); pending.Add(utf32State); Automaton utf8 = new Automaton(); utf8.IsDeterministic = false; State utf8State = utf8.GetInitialState(); utf8States = new State[5]; utf8StateCount = 0; utf8State.number = utf8StateCount; utf8States[utf8StateCount] = utf8State; utf8StateCount++; utf8State.Accept = utf32State.Accept; map[utf32State.number] = utf8State; while (pending.Count != 0) { utf32State = pending[pending.Count - 1]; pending.RemoveAt(pending.Count - 1); utf8State = map[utf32State.number]; for (int i = 0; i < utf32State.numTransitions; i++) { Transition t = utf32State.TransitionsArray[i]; State destUTF32 = t.to; State destUTF8 = map[destUTF32.number]; if (destUTF8 == null) { destUTF8 = NewUTF8State(); destUTF8.accept = destUTF32.accept; map[destUTF32.number] = destUTF8; pending.Add(destUTF32); } ConvertOneEdge(utf8State, destUTF8, t.min, t.max); } } utf8.SetNumberedStates(utf8States, utf8StateCount); return(utf8); }
/// <summary> /// Returns a (deterministic) automaton that accepts the complement of the /// language of the given automaton. /// <para/> /// Complexity: linear in number of states (if already deterministic). /// </summary> public static Automaton Complement(Automaton a) { a = a.CloneExpandedIfRequired(); a.Determinize(); a.Totalize(); foreach (State p in a.GetNumberedStates()) { p.accept = !p.accept; } a.RemoveDeadTransitions(); return(a); }
/// <summary> /// Constructs a new <code>RunAutomaton</code> from a deterministic /// <code>Automaton</code>. /// </summary> /// <param name="a"> an automaton </param> /// <param name="maxInterval"></param> /// <param name="tableize"></param> public RunAutomaton(Automaton a, int maxInterval, bool tableize) { this._maxInterval = maxInterval; a.Determinize(); _points = a.GetStartPoints(); State[] states = a.GetNumberedStates(); m_initial = a.initial.Number; _size = states.Length; m_accept = new bool[_size]; m_transitions = new int[_size * _points.Length]; for (int n = 0; n < _size * _points.Length; n++) { m_transitions[n] = -1; } foreach (State s in states) { int n = s.number; m_accept[n] = s.accept; for (int c = 0; c < _points.Length; c++) { State q = s.Step(_points[c]); if (q != null) { m_transitions[n * _points.Length + c] = q.number; } } } /* * Set alphabet table for optimal run performance. */ if (tableize) { _classmap = new int[maxInterval + 1]; int i = 0; for (int j = 0; j <= maxInterval; j++) { if (i + 1 < _points.Length && j == _points[i + 1]) { i++; } _classmap[j] = i; } } else { _classmap = null; } }
/// <summary> /// Reverses the language of the given (non-singleton) automaton while returning /// the set of new initial states. /// </summary> public static ISet <State> Reverse(Automaton a) { a.ExpandSingleton(); // reverse all edges Dictionary <State, ISet <Transition> > m = new Dictionary <State, ISet <Transition> >(); State[] states = a.GetNumberedStates(); ISet <State> accept = new JCG.HashSet <State>(); foreach (State s in states) { if (s.Accept) { accept.Add(s); } } foreach (State r in states) { m[r] = new JCG.HashSet <Transition>(); r.accept = false; } foreach (State r in states) { foreach (Transition t in r.GetTransitions()) { m[t.to].Add(new Transition(t.min, t.max, r)); } } foreach (State r in states) { ISet <Transition> tr = m[r]; r.SetTransitions(tr.ToArray(/*new Transition[tr.Count]*/)); } // make new initial+final states a.initial.accept = true; a.initial = new State(); foreach (State r in accept) { a.initial.AddEpsilon(r); // ensures that all initial states are reachable } a.deterministic = false; a.ClearNumberedStates(); return(accept); }
/// <summary> /// Minimizes the given automaton using Hopcroft's algorithm. /// </summary> public static void MinimizeHopcroft(Automaton a) { a.Determinize(); if (a.initial.numTransitions == 1) { Transition t = a.initial.TransitionsArray[0]; if (t.to == a.initial && t.min == Character.MIN_CODE_POINT && t.max == Character.MAX_CODE_POINT) { return; } } a.Totalize(); // initialize data structures int[] sigma = a.GetStartPoints(); State[] states = a.GetNumberedStates(); int sigmaLen = sigma.Length, statesLen = states.Length; List <State>[,] reverse = new List <State> [statesLen, sigmaLen]; ISet <State>[] partition = new EquatableSet <State> [statesLen]; List <State>[] splitblock = new List <State> [statesLen]; int[] block = new int[statesLen]; StateList[,] active = new StateList[statesLen, sigmaLen]; StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen]; LinkedList <Int32Pair> pending = new LinkedList <Int32Pair>(); OpenBitSet pending2 = new OpenBitSet(sigmaLen * statesLen); OpenBitSet split = new OpenBitSet(statesLen), refine = new OpenBitSet(statesLen), refine2 = new OpenBitSet(statesLen); for (int q = 0; q < statesLen; q++) { splitblock[q] = new List <State>(); partition[q] = new EquatableSet <State>(); for (int x = 0; x < sigmaLen; x++) { active[q, x] = new StateList(); } } // find initial partition and reverse edges for (int q = 0; q < statesLen; q++) { State qq = states[q]; int j = qq.accept ? 0 : 1; partition[j].Add(qq); block[q] = j; for (int x = 0; x < sigmaLen; x++) { //List<State>[] r = reverse[qq.Step(sigma[x]).number]; var r = qq.Step(sigma[x]).number; if (reverse[r, x] == null) { reverse[r, x] = new List <State>(); } reverse[r, x].Add(qq); } } // initialize active sets for (int j = 0; j <= 1; j++) { for (int x = 0; x < sigmaLen; x++) { foreach (State qq in partition[j]) { if (reverse[qq.number, x] != null) { active2[qq.number, x] = active[j, x].Add(qq); } } } } // initialize pending for (int x = 0; x < sigmaLen; x++) { int j = (active[0, x].Count <= active[1, x].Count) ? 0 : 1; pending.AddLast(new Int32Pair(j, x)); pending2.Set(x * statesLen + j); } // process pending until fixed point int k = 2; while (pending.Count > 0) { Int32Pair ip = pending.First.Value; pending.Remove(ip); int p = ip.N1; int x = ip.N2; pending2.Clear(x * statesLen + p); // find states that need to be split off their blocks for (StateListNode m = active[p, x].First; m != null; m = m.Next) { List <State> r = reverse[m.Q.number, x]; if (r != null) { foreach (State s in r) { int i = s.number; if (!split.Get(i)) { split.Set(i); int j = block[i]; splitblock[j].Add(s); if (!refine2.Get(j)) { refine2.Set(j); refine.Set(j); } } } } } // refine blocks for (int j = refine.NextSetBit(0); j >= 0; j = refine.NextSetBit(j + 1)) { List <State> sb = splitblock[j]; if (sb.Count < partition[j].Count) { ISet <State> b1 = partition[j]; ISet <State> b2 = partition[k]; foreach (State s in sb) { b1.Remove(s); b2.Add(s); block[s.number] = k; for (int c = 0; c < sigmaLen; c++) { StateListNode sn = active2[s.number, c]; if (sn != null && sn.Sl == active[j, c]) { sn.Remove(); active2[s.number, c] = active[k, c].Add(s); } } } // update pending for (int c = 0; c < sigmaLen; c++) { int aj = active[j, c].Count, ak = active[k, c].Count, ofs = c * statesLen; if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak) { pending2.Set(ofs + j); pending.AddLast(new Int32Pair(j, c)); } else { pending2.Set(ofs + k); pending.AddLast(new Int32Pair(k, c)); } } k++; } refine2.Clear(j); foreach (State s in sb) { split.Clear(s.number); } sb.Clear(); } refine.Clear(0, refine.Length - 1); } // make a new state for each equivalence class, set initial state State[] newstates = new State[k]; for (int n = 0; n < newstates.Length; n++) { State s = new State(); newstates[n] = s; foreach (State q in partition[n]) { if (q == a.initial) { a.initial = s; } s.accept = q.accept; s.number = q.number; // select representative q.number = n; } } // build transitions and set acceptance for (int n = 0; n < newstates.Length; n++) { State s = newstates[n]; s.accept = states[s.number].accept; foreach (Transition t in states[s.number].GetTransitions()) { s.AddTransition(new Transition(t.min, t.max, newstates[t.to.number])); } } a.ClearNumberedStates(); a.RemoveDeadTransitions(); }
/// <summary> /// Determinizes the given automaton. /// <para/> /// Worst case complexity: exponential in number of states. /// </summary> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } State[] allStates = a.GetNumberedStates(); // subset construction bool initAccept = a.initial.accept; int initNumber = a.initial.number; a.initial = new State(); SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial); LinkedList <SortedInt32Set.FrozenInt32Set> worklist = new LinkedList <SortedInt32Set.FrozenInt32Set>(); IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>(); worklist.AddLast(initialset); a.initial.accept = initAccept; newstate[initialset] = a.initial; int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> SortedInt32Set statesSet = new SortedInt32Set(5); // LUCENENET NOTE: The problem here is almost certainly // due to the conversion to FrozenIntSet along with its // differing equality checking. while (worklist.Count > 0) { SortedInt32Set.FrozenInt32Set s = worklist.First.Value; worklist.Remove(s); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.Length; i++) { State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.Add(s0.TransitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.Sort(); int lastPoint = -1; int accCount = 0; State r = s.state; for (int i = 0; i < points.count; i++) { int point = points.points[i].point; if (statesSet.upto > 0) { Debug.Assert(lastPoint != -1); statesSet.ComputeHash(); State q; if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out q) || q == null) { q = new State(); SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q); worklist.AddLast(p); if (newStateUpto == newStatesArray.Length) { State[] newArray = new State[ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; Array.Copy(newStatesArray, 0, newArray, 0, newStateUpto); newStatesArray = newArray; } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate[p] = q; } else { Debug.Assert((accCount > 0) == q.accept, "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet); } r.AddTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.Reset(); Debug.Assert(statesSet.upto == 0, "upto=" + statesSet.upto); } a.deterministic = true; a.SetNumberedStates(newStatesArray, newStateUpto); }
/// <summary> /// Returns <c>true</c> if the given string is accepted by the automaton. /// <para/> /// Complexity: linear in the length of the string. /// <para/> /// <b>Note:</b> for full performance, use the <see cref="RunAutomaton"/> class. /// </summary> public static bool Run(Automaton a, string s) { if (a.IsSingleton) { return(s.Equals(a.singleton, StringComparison.Ordinal)); } if (a.deterministic) { State p = a.initial; for (int i = 0, cp = 0; i < s.Length; i += Character.CharCount(cp)) { State q = p.Step(cp = Character.CodePointAt(s, i)); if (q == null) { return(false); } p = q; } return(p.accept); } else { State[] states = a.GetNumberedStates(); LinkedList <State> pp = new LinkedList <State>(); LinkedList <State> pp_other = new LinkedList <State>(); OpenBitSet bb = new OpenBitSet(states.Length); OpenBitSet bb_other = new OpenBitSet(states.Length); pp.AddLast(a.initial); List <State> dest = new List <State>(); bool accept = a.initial.accept; for (int i = 0, c = 0; i < s.Length; i += Character.CharCount(c)) { c = Character.CodePointAt(s, i); accept = false; pp_other.Clear(); bb_other.Clear(0, bb_other.Length - 1); foreach (State p in pp) { dest.Clear(); p.Step(c, dest); foreach (State q in dest) { if (q.accept) { accept = true; } if (!bb_other.Get(q.number)) { bb_other.Set(q.number); pp_other.AddLast(q); } } } LinkedList <State> tp = pp; pp = pp_other; pp_other = tp; OpenBitSet tb = bb; bb = bb_other; bb_other = tb; } return(accept); } }
/// <summary> /// Determinizes the given automaton. /// <para/> /// Worst case complexity: exponential in number of states. /// </summary> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } State[] allStates = a.GetNumberedStates(); // subset construction bool initAccept = a.initial.accept; int initNumber = a.initial.number; a.initial = new State(); SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial); Queue <SortedInt32Set.FrozenInt32Set> worklist = new Queue <SortedInt32Set.FrozenInt32Set>(); // LUCENENET specific - Queue is much more performant than LinkedList IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>(); worklist.Enqueue(initialset); a.initial.accept = initAccept; newstate[initialset] = a.initial; int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> SortedInt32Set statesSet = new SortedInt32Set(5); while (worklist.Count > 0) { SortedInt32Set.FrozenInt32Set s = worklist.Dequeue(); //worklist.Remove(s); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.Length; i++) { State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.Add(s0.TransitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.Sort(); int lastPoint = -1; int accCount = 0; State r = s.state; for (int i = 0; i < points.count; i++) { int point = points.points[i].point; if (statesSet.upto > 0) { if (Debugging.AssertsEnabled) { Debugging.Assert(lastPoint != -1); } statesSet.ComputeHash(); if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out State q) || q == null) { q = new State(); SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q); worklist.Enqueue(p); if (newStateUpto == newStatesArray.Length) { // LUCENENET: Resize rather than copy Array.Resize(ref newStatesArray, ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate[p] = q; } else { if (Debugging.AssertsEnabled) { Debugging.Assert((accCount > 0) == q.accept, "accCount={0} vs existing accept={1} states={2}", accCount, q.accept, statesSet); } } r.AddTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.Reset(); if (Debugging.AssertsEnabled) { Debugging.Assert(statesSet.upto == 0, "upto={0}", statesSet.upto); } } a.deterministic = true; a.SetNumberedStates(newStatesArray, newStateUpto); }