/// <summary> /// Attempts to determinize the automaton, /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element, /// and there are no epsilon transitions. /// </summary> /// <param name="maxStatesBeforeStop"> /// The maximum number of states the resulting automaton can have. If the number of states exceeds the value /// of this parameter during determinization, the process is aborted. /// </param> /// <returns> /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic, /// <see langword="false"/> otherwise. /// </returns> /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks> public bool TryDeterminize(int maxStatesBeforeStop) { Argument.CheckIfInRange( maxStatesBeforeStop > 0 && maxStatesBeforeStop <= MaxStateCount, "maxStatesBeforeStop", "The maximum number of states must be positive and not greater than the maximum number of states allowed in an automaton."); this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions if (this.UsesGroups()) { // Determinization will result in lost of group information, which we cannot allow return(false); } // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.. // Such pairs correspond to states of the resulting automaton. var weightedStateSetQueue = new Queue <Determinization.WeightedStateSet>(); var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>(); var builder = new Builder(); var startWeightedStateSet = new Determinization.WeightedStateSet { { this.Start.Index, Weight.One } }; weightedStateSetQueue.Enqueue(startWeightedStateSet); weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex); builder.Start.SetEndWeight(this.Start.EndWeight); while (weightedStateSetQueue.Count > 0) { // Take one unprocessed state of the resulting automaton Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue(); var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet]; var currentState = builder[currentStateIndex]; // Find out what transitions we should add for this state var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet); // For each transition to add foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos) { TElementDistribution elementDistribution = outgoingTransitionInfo.Item1; Weight weight = outgoingTransitionInfo.Item2; Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3; int destinationStateIndex; if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex)) { if (builder.StatesCount == maxStatesBeforeStop) { // Too many states, determinization attempt failed return(false); } // Add new state to the result var destinationState = builder.AddState(); weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index); weightedStateSetQueue.Enqueue(destWeightedStateSet); // Compute its ending weight destinationState.SetEndWeight(Weight.Zero); foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet) { destinationState.SetEndWeight(Weight.Sum( destinationState.EndWeight, Weight.Product(stateIdWithWeight.Value, this.States[stateIdWithWeight.Key].EndWeight))); } destinationStateIndex = destinationState.Index; } // Add transition to the destination state currentState.AddTransition(elementDistribution, weight, destinationStateIndex); } } var simplification = new Simplification(builder, this.PruneTransitionsWithLogWeightLessThan); simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment var result = builder.GetAutomaton(); result.PruneTransitionsWithLogWeightLessThan = this.PruneTransitionsWithLogWeightLessThan; result.LogValueOverride = this.LogValueOverride; this.SwapWith(result); return(true); }
/// <summary> /// Attempts to determinize the automaton, /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element, /// and there are no epsilon transitions. /// </summary> /// <returns> /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic, /// <see langword="false"/> otherwise. /// </returns> /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks> public bool TryDeterminize() { if (this.Data.IsDeterminized != null) { return(this.Data.IsDeterminized == true); } int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount); this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions if (this.UsesGroups) { // Determinization will result in lost of group information, which we cannot allow this.Data = this.Data.With(isDeterminized: false); return(false); } var builder = new Builder(); builder.Start.SetEndWeight(this.Start.EndWeight); var weightedStateSetStack = new Stack <(bool enter, Determinization.WeightedStateSet set)>(); var enqueuedWeightedStateSetStack = new Stack <(bool enter, Determinization.WeightedStateSet set)>(); var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>(); // This hash set is used to track sets currently in path from root. If we've found a set of states // that we have already seen during current path from root, but weights are different, that means // we've found a non-converging loop - infinite number of weighed sets will be generated if // we continue traversal and determinization will fail. For performance reasons we want to fail // fast if such loop is found. var stateSetsInPath = new Dictionary <Determinization.WeightedStateSet, Determinization.WeightedStateSet>( Determinization.WeightedStateSetOnlyStateComparer.Instance); var startWeightedStateSet = new Determinization.WeightedStateSet(this.Start.Index); weightedStateSetStack.Push((true, startWeightedStateSet)); weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex); while (weightedStateSetStack.Count > 0) { // Take one unprocessed state of the resulting automaton var(enter, currentWeightedStateSet) = weightedStateSetStack.Pop(); if (enter) { if (currentWeightedStateSet.Count > 1) { // Only sets with more than 1 state can lead to infinite loops with different weights. // Because if there's only 1 state, than it's weight is always Weight.One. if (!stateSetsInPath.ContainsKey(currentWeightedStateSet)) { stateSetsInPath.Add(currentWeightedStateSet, currentWeightedStateSet); } weightedStateSetStack.Push((false, currentWeightedStateSet)); } if (!EnqueueOutgoingTransitions(currentWeightedStateSet)) { this.Data = this.Data.With(isDeterminized: false); return(false); } } else { stateSetsInPath.Remove(currentWeightedStateSet); } } var simplification = new Simplification(builder, this.PruneStatesWithLogEndWeightLessThan); simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment this.Data = builder.GetData().With(isDeterminized: true); this.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan; this.LogValueOverride = this.LogValueOverride; return(true); bool EnqueueOutgoingTransitions(Determinization.WeightedStateSet currentWeightedStateSet) { var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet]; var currentState = builder[currentStateIndex]; // Common special-case: definitely deterministic transitions from single state. // In this case no complicated determinization procedure is needed. if (currentWeightedStateSet.Count == 1 && AllDestinationsAreSame(currentWeightedStateSet[0].Index)) { Debug.Assert(currentWeightedStateSet[0].Weight == Weight.One); var sourceState = this.States[currentWeightedStateSet[0].Index]; foreach (var transition in sourceState.Transitions) { var destinationStates = new Determinization.WeightedStateSet(transition.DestinationStateIndex); var outgoingTransitionInfo = new Determinization.OutgoingTransition( transition.ElementDistribution.Value, transition.Weight, destinationStates); if (!TryAddTransition(enqueuedWeightedStateSetStack, outgoingTransitionInfo, currentState)) { return(false); } } } else { // Find out what transitions we should add for this state var outgoingTransitions = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet); foreach (var outgoingTransition in outgoingTransitions) { if (!TryAddTransition(enqueuedWeightedStateSetStack, outgoingTransition, currentState)) { return(false); } } } while (enqueuedWeightedStateSetStack.Count > 0) { weightedStateSetStack.Push(enqueuedWeightedStateSetStack.Pop()); } return(true); } // Checks that all transitions from state end up in the same destination. This is used // as a very fast "is deterministic" check, that doesn't care about distributions. // State can have deterministic transitions with different destinations. This case will be // handled by slow path. bool AllDestinationsAreSame(int stateIndex) { var transitions = this.States[stateIndex].Transitions; if (transitions.Count <= 1) { return(true); } var destination = transitions[0].DestinationStateIndex; for (var i = 1; i < transitions.Count; ++i) { if (transitions[i].DestinationStateIndex != destination) { return(false); } } return(true); } // Adds transition from currentState into state corresponding to weighted state set from // outgoingTransitionInfo. If that state does not exist yet it is created and is put into stack // for further processing. This function returns false if determinization has failed. // That can happen because of 2 ressons: // - Too many states were created and its not feasible to continue trying to determinize // automaton further // - An infinite loop with not converging weights was found. It leads to infinite number of states. // So determinization is aborted early. bool TryAddTransition( Stack <(bool enter, Determinization.WeightedStateSet set)> destinationStack, Determinization.OutgoingTransition transition, Builder.StateBuilder currentState) { var destinations = transition.Destinations; if (!weightedStateSetToNewState.TryGetValue(destinations, out var destinationStateIndex)) { if (builder.StatesCount == maxStatesBeforeStop) { // Too many states, determinization attempt failed return(false); } var visitedWeightedStateSet = default(Determinization.WeightedStateSet); var sameSetVisited = destinations.Count > 1 && stateSetsInPath.TryGetValue(destinations, out visitedWeightedStateSet); if (sameSetVisited && !destinations.Equals(visitedWeightedStateSet)) { // We arrived into the same state set as before, but with different weights. // This is an infinite non-converging loop. Determinization has failed return(false); } // Add new state to the result var destinationState = builder.AddState(); weightedStateSetToNewState.Add(destinations, destinationState.Index); destinationStack.Push((true, destinations)); if (destinations.Count > 1 && !sameSetVisited) { destinationStack.Push((false, destinations)); } // Compute its ending weight destinationState.SetEndWeight(Weight.Zero); for (var i = 0; i < destinations.Count; ++i) { var weightedState = destinations[i]; var addedWeight = weightedState.Weight * this.States[weightedState.Index].EndWeight; destinationState.SetEndWeight(destinationState.EndWeight + addedWeight); } destinationStateIndex = destinationState.Index; } // Add transition to the destination state currentState.AddTransition(transition.ElementDistribution, transition.Weight, destinationStateIndex); return(true); } }
/// <summary> /// Attempts to determinize the automaton, /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element, /// and there are no epsilon transitions. /// </summary> /// <returns> /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic, /// <see langword="false"/> otherwise. /// </returns> /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks> public bool TryDeterminize() { if (this.Data.DeterminizationState != DeterminizationState.Unknown) { return(this.Data.DeterminizationState == DeterminizationState.IsDeterminized); } int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount); this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions if (this.UsesGroups) { // Determinization will result in lost of group information, which we cannot allow this.Data = this.Data.WithDeterminizationState(DeterminizationState.IsNonDeterminizable); return(false); } // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.. // Such pairs correspond to states of the resulting automaton. var weightedStateSetQueue = new Queue <Determinization.WeightedStateSet>(); var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>(); var builder = new Builder(); var startWeightedStateSet = new Determinization.WeightedStateSet { { this.Start.Index, Weight.One } }; weightedStateSetQueue.Enqueue(startWeightedStateSet); weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex); builder.Start.SetEndWeight(this.Start.EndWeight); while (weightedStateSetQueue.Count > 0) { // Take one unprocessed state of the resulting automaton Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue(); var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet]; var currentState = builder[currentStateIndex]; // Find out what transitions we should add for this state var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet); // For each transition to add foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos) { TElementDistribution elementDistribution = outgoingTransitionInfo.Item1; Weight weight = outgoingTransitionInfo.Item2; Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3; int destinationStateIndex; if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex)) { if (builder.StatesCount == maxStatesBeforeStop) { // Too many states, determinization attempt failed return(false); } // Add new state to the result var destinationState = builder.AddState(); weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index); weightedStateSetQueue.Enqueue(destWeightedStateSet); // Compute its ending weight destinationState.SetEndWeight(Weight.Zero); foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet) { var addedWeight = stateIdWithWeight.Value * this.States[stateIdWithWeight.Key].EndWeight; destinationState.SetEndWeight(destinationState.EndWeight + addedWeight); } destinationStateIndex = destinationState.Index; } // Add transition to the destination state currentState.AddTransition(elementDistribution, weight, destinationStateIndex); } } var simplification = new Simplification(builder, this.PruneStatesWithLogEndWeightLessThan); simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment this.Data = builder.GetData().WithDeterminizationState(DeterminizationState.IsDeterminized); this.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan; this.LogValueOverride = this.LogValueOverride; return(true); }