/// <summary> /// Minimizes the given automaton using Hopcroft's algorithm. /// </summary> public static void MinimizeHopcroft(Automaton a) { a.Determinize(); if (a.Initial.numTransitions == 1) { Transition t = a.Initial.TransitionsArray[0]; if (t.To == a.Initial && t.Min_Renamed == Character.MIN_CODE_POINT && t.Max_Renamed == Character.MAX_CODE_POINT) { return; } } a.Totalize(); // initialize data structures int[] sigma = a.StartPoints; State[] states = a.NumberedStates; int sigmaLen = sigma.Length, statesLen = states.Length; List <State>[,] reverse = new List <State> [statesLen, sigmaLen]; HashSet <State>[] partition = new HashSet <State> [statesLen]; List <State>[] splitblock = new List <State> [statesLen]; int[] block = new int[statesLen]; StateList[,] active = new StateList[statesLen, sigmaLen]; StateListNode[,] active2 = new StateListNode[statesLen, sigmaLen]; LinkedList <IntPair> pending = new LinkedList <IntPair>(); BitArray pending2 = new BitArray(sigmaLen * statesLen); BitArray split = new BitArray(statesLen), refine = new BitArray(statesLen), refine2 = new BitArray(statesLen); for (int q = 0; q < statesLen; q++) { splitblock[q] = new List <State>(); partition[q] = new HashSet <State>(); for (int x = 0; x < sigmaLen; x++) { active[q, x] = new StateList(); } } // find initial partition and reverse edges for (int q = 0; q < statesLen; q++) { State qq = states[q]; int j = qq.accept ? 0 : 1; partition[j].Add(qq); block[q] = j; for (int x = 0; x < sigmaLen; x++) { //List<State>[] r = reverse[qq.Step(sigma[x]).number]; var r = qq.Step(sigma[x]).number; if (reverse[r, x] == null) { reverse[r, x] = new List <State>(); } reverse[r, x].Add(qq); } } // initialize active sets for (int j = 0; j <= 1; j++) { for (int x = 0; x < sigmaLen; x++) { foreach (State qq in partition[j]) { if (reverse[qq.number, x] != null) { active2[qq.number, x] = active[j, x].Add(qq); } } } } // initialize pending for (int x = 0; x < sigmaLen; x++) { int j = (active[0, x].Size <= active[1, x].Size) ? 0 : 1; pending.AddLast(new IntPair(j, x)); pending2.Set(x * statesLen + j, true); } // process pending until fixed point int k = 2; while (pending.Count > 0) { IntPair ip = pending.First.Value; pending.RemoveFirst(); int p = ip.N1; int x = ip.N2; pending2.Set(x * statesLen + p, false); // find states that need to be split off their blocks for (StateListNode m = active[p, x].First; m != null; m = m.Next) { List <State> r = reverse[m.q.number, x]; if (r != null) { foreach (State s in r) { int i = s.number; if (!split.Get(i)) { split.Set(i, true); int j = block[i]; splitblock[j].Add(s); if (!refine2.Get(j)) { refine2.Set(j, true); refine.Set(j, true); } } } } } // refine blocks for (int j = Number.NextSetBit(refine, 0); j >= 0; j = Number.NextSetBit(refine, j + 1)) { List <State> sb = splitblock[j]; if (sb.Count < partition[j].Count) { HashSet <State> b1 = partition[j]; HashSet <State> b2 = partition[k]; foreach (State s in sb) { b1.Remove(s); b2.Add(s); block[s.number] = k; for (int c = 0; c < sigmaLen; c++) { StateListNode sn = active2[s.number, c]; if (sn != null && sn.Sl == active[j, c]) { sn.Remove(); active2[s.number, c] = active[k, c].Add(s); } } } // update pending for (int c = 0; c < sigmaLen; c++) { int aj = active[j, c].Size, ak = active[k, c].Size, ofs = c * statesLen; if (!pending2.Get(ofs + j) && 0 < aj && aj <= ak) { pending2.Set(ofs + j, true); pending.AddLast(new IntPair(j, c)); } else { pending2.Set(ofs + k, true); pending.AddLast(new IntPair(k, c)); } } k++; } refine2.Set(j, false); foreach (State s in sb) { split.Set(s.number, false); } sb.Clear(); } refine.SetAll(false); } // make a new state for each equivalence class, set initial state State[] newstates = new State[k]; for (int n = 0; n < newstates.Length; n++) { State s = new State(); newstates[n] = s; foreach (State q in partition[n]) { if (q == a.Initial) { a.Initial = s; } s.accept = q.accept; s.number = q.number; // select representative q.number = n; } } // build transitions and set acceptance for (int n = 0; n < newstates.Length; n++) { State s = newstates[n]; s.accept = states[s.number].accept; foreach (Transition t in states[s.number].Transitions) { s.AddTransition(new Transition(t.Min_Renamed, t.Max_Renamed, newstates[t.To.number])); } } a.ClearNumberedStates(); a.RemoveDeadTransitions(); }
public int[] GetRandomAcceptedString(Random r) { IList <int?> soFar = new List <int?>(); if (a.IsSingleton) { // accepts only one var s = a.Singleton; int charUpto = 0; while (charUpto < s.Length) { int cp = s.CodePointAt(charUpto); charUpto += Character.CharCount(cp); soFar.Add(cp); } } else { var s = a.initial; while (true) { if (s.accept) { if (s.numTransitions == 0) { // stop now break; } else { if (r.NextBoolean()) { break; } } } if (s.numTransitions == 0) { throw new Exception("this automaton has dead states"); } bool cheat = r.NextBoolean(); Transition t; if (cheat) { // pick a transition that we know is the fastest // path to an accept state IList <Transition> toAccept = new List <Transition>(); for (int i = 0; i < s.numTransitions; i++) { Transition t0 = s.TransitionsArray[i]; if (leadsToAccept.ContainsKey(t0)) { toAccept.Add(t0); } } if (toAccept.Count == 0) { // this is OK -- it means we jumped into a cycle t = s.TransitionsArray[r.Next(s.numTransitions)]; } else { t = toAccept[r.Next(toAccept.Count)]; } } else { t = s.TransitionsArray[r.Next(s.numTransitions)]; } soFar.Add(AutomatonTestUtil.GetRandomCodePoint(r, t)); s = t.to; } } return(ArrayUtil.ToInt32Array(soFar)); }
public ArrivingTransition(State from, Transition t) { this.from = from; this.t = t; }
/// <summary> /// Determinizes the given automaton. /// <para/> /// Worst case complexity: exponential in number of states. /// </summary> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } State[] allStates = a.GetNumberedStates(); // subset construction bool initAccept = a.initial.accept; int initNumber = a.initial.number; a.initial = new State(); SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial); LinkedList <SortedInt32Set.FrozenInt32Set> worklist = new LinkedList <SortedInt32Set.FrozenInt32Set>(); IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>(); worklist.AddLast(initialset); a.initial.accept = initAccept; newstate[initialset] = a.initial; int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> SortedInt32Set statesSet = new SortedInt32Set(5); // LUCENENET NOTE: The problem here is almost certainly // due to the conversion to FrozenIntSet along with its // differing equality checking. while (worklist.Count > 0) { SortedInt32Set.FrozenInt32Set s = worklist.First.Value; worklist.Remove(s); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.Length; i++) { State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.Add(s0.TransitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.Sort(); int lastPoint = -1; int accCount = 0; State r = s.state; for (int i = 0; i < points.count; i++) { int point = points.points[i].point; if (statesSet.upto > 0) { Debug.Assert(lastPoint != -1); statesSet.ComputeHash(); State q; if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out q) || q == null) { q = new State(); SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q); worklist.AddLast(p); if (newStateUpto == newStatesArray.Length) { State[] newArray = new State[ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; Array.Copy(newStatesArray, 0, newArray, 0, newStateUpto); newStatesArray = newArray; } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate[p] = q; } else { Debug.Assert((accCount > 0) == q.accept, "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet); } r.AddTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.Reset(); Debug.Assert(statesSet.upto == 0, "upto=" + statesSet.upto); } a.deterministic = true; a.SetNumberedStates(newStatesArray, newStateUpto); }
public void Add(Transition t) { Find(t.min).starts.Add(t); Find(1 + t.max).ends.Add(t); }
//private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG; private BytesRef AddTail(int state, BytesRef term, int idx, int leadLabel) { // Find biggest transition that's < label // TODO: use binary search here Transition maxTransition = null; foreach (Transition transition in sortedTransitions[state]) { if (transition.min < leadLabel) { maxTransition = transition; } } Debug.Assert(maxTransition != null); // Append floorLabel int floorLabel; if (maxTransition.max > leadLabel - 1) { floorLabel = leadLabel - 1; } else { floorLabel = maxTransition.max; } if (idx >= term.Bytes.Length) { term.Grow(1 + idx); } //if (DEBUG) System.out.println(" add floorLabel=" + (char) floorLabel + " idx=" + idx); term.Bytes[idx] = (byte)floorLabel; state = maxTransition.to.Number; idx++; // Push down to last accept state while (true) { Transition[] transitions = sortedTransitions[state]; if (transitions.Length == 0) { Debug.Assert(RunAutomaton.IsAccept(state)); term.Length = idx; //if (DEBUG) System.out.println(" return " + term.utf8ToString()); return(term); } else { // We are pushing "top" -- so get last label of // last transition: Debug.Assert(transitions.Length != 0); Transition lastTransition = transitions[transitions.Length - 1]; if (idx >= term.Bytes.Length) { term.Grow(1 + idx); } //if (DEBUG) System.out.println(" push maxLabel=" + (char) lastTransition.max + " idx=" + idx); term.Bytes[idx] = (byte)lastTransition.max; state = lastTransition.to.Number; idx++; } } }