/// <summary> /// Determinizes the given automaton. /// <para/> /// Worst case complexity: exponential in number of states. /// </summary> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } State[] allStates = a.GetNumberedStates(); // subset construction bool initAccept = a.initial.accept; int initNumber = a.initial.number; a.initial = new State(); SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial); LinkedList <SortedInt32Set.FrozenInt32Set> worklist = new LinkedList <SortedInt32Set.FrozenInt32Set>(); IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>(); worklist.AddLast(initialset); a.initial.accept = initAccept; newstate[initialset] = a.initial; int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> SortedInt32Set statesSet = new SortedInt32Set(5); // LUCENENET NOTE: The problem here is almost certainly // due to the conversion to FrozenIntSet along with its // differing equality checking. while (worklist.Count > 0) { SortedInt32Set.FrozenInt32Set s = worklist.First.Value; worklist.Remove(s); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.Length; i++) { State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.Add(s0.TransitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.Sort(); int lastPoint = -1; int accCount = 0; State r = s.state; for (int i = 0; i < points.count; i++) { int point = points.points[i].point; if (statesSet.upto > 0) { Debug.Assert(lastPoint != -1); statesSet.ComputeHash(); State q; if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out q) || q == null) { q = new State(); SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q); worklist.AddLast(p); if (newStateUpto == newStatesArray.Length) { State[] newArray = new State[ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; Array.Copy(newStatesArray, 0, newArray, 0, newStateUpto); newStatesArray = newArray; } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate[p] = q; } else { Debug.Assert((accCount > 0) == q.accept, "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet); } r.AddTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.Reset(); Debug.Assert(statesSet.upto == 0, "upto=" + statesSet.upto); } a.deterministic = true; a.SetNumberedStates(newStatesArray, newStateUpto); }
/// <summary> /// Determinizes the given automaton. /// <para/> /// Worst case complexity: exponential in number of states. /// </summary> public static void Determinize(Automaton a) { if (a.IsDeterministic || a.IsSingleton) { return; } State[] allStates = a.GetNumberedStates(); // subset construction bool initAccept = a.initial.accept; int initNumber = a.initial.number; a.initial = new State(); SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial); Queue <SortedInt32Set.FrozenInt32Set> worklist = new Queue <SortedInt32Set.FrozenInt32Set>(); // LUCENENET specific - Queue is much more performant than LinkedList IDictionary <SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary <SortedInt32Set.FrozenInt32Set, State>(); worklist.Enqueue(initialset); a.initial.accept = initAccept; newstate[initialset] = a.initial; int newStateUpto = 0; State[] newStatesArray = new State[5]; newStatesArray[newStateUpto] = a.initial; a.initial.number = newStateUpto; newStateUpto++; // like Set<Integer,PointTransitions> PointTransitionSet points = new PointTransitionSet(); // like SortedMap<Integer,Integer> SortedInt32Set statesSet = new SortedInt32Set(5); while (worklist.Count > 0) { SortedInt32Set.FrozenInt32Set s = worklist.Dequeue(); //worklist.Remove(s); // Collate all outgoing transitions by min/1+max: for (int i = 0; i < s.values.Length; i++) { State s0 = allStates[s.values[i]]; for (int j = 0; j < s0.numTransitions; j++) { points.Add(s0.TransitionsArray[j]); } } if (points.count == 0) { // No outgoing transitions -- skip it continue; } points.Sort(); int lastPoint = -1; int accCount = 0; State r = s.state; for (int i = 0; i < points.count; i++) { int point = points.points[i].point; if (statesSet.upto > 0) { if (Debugging.AssertsEnabled) { Debugging.Assert(lastPoint != -1); } statesSet.ComputeHash(); if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out State q) || q == null) { q = new State(); SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q); worklist.Enqueue(p); if (newStateUpto == newStatesArray.Length) { // LUCENENET: Resize rather than copy Array.Resize(ref newStatesArray, ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } newStatesArray[newStateUpto] = q; q.number = newStateUpto; newStateUpto++; q.accept = accCount > 0; newstate[p] = q; } else { if (Debugging.AssertsEnabled) { Debugging.Assert((accCount > 0) == q.accept, "accCount={0} vs existing accept={1} states={2}", accCount, q.accept, statesSet); } } r.AddTransition(new Transition(lastPoint, point - 1, q)); } // process transitions that end on this point // (closes an overlapping interval) Transition[] transitions = points.points[i].ends.transitions; int limit = points.points[i].ends.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Decr(num); accCount -= t.to.accept ? 1 : 0; } points.points[i].ends.count = 0; // process transitions that start on this point // (opens a new interval) transitions = points.points[i].starts.transitions; limit = points.points[i].starts.count; for (int j = 0; j < limit; j++) { Transition t = transitions[j]; int num = t.to.number; statesSet.Incr(num); accCount += t.to.accept ? 1 : 0; } lastPoint = point; points.points[i].starts.count = 0; } points.Reset(); if (Debugging.AssertsEnabled) { Debugging.Assert(statesSet.upto == 0, "upto={0}", statesSet.upto); } } a.deterministic = true; a.SetNumberedStates(newStatesArray, newStateUpto); }