Exemplo n.º 1
0
        /// <summary>
        /// Attempts to determinize the automaton,
        /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
        /// and there are no epsilon transitions.
        /// </summary>
        /// <param name="maxStatesBeforeStop">
        /// The maximum number of states the resulting automaton can have. If the number of states exceeds the value
        /// of this parameter during determinization, the process is aborted.
        /// </param>
        /// <returns>
        /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
        /// <see langword="false"/> otherwise.
        /// </returns>
        /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
        public bool TryDeterminize(int maxStatesBeforeStop)
        {
            Argument.CheckIfInRange(
                maxStatesBeforeStop > 0 && maxStatesBeforeStop <= MaxStateCount,
                "maxStatesBeforeStop",
                "The maximum number of states must be positive and not greater than the maximum number of states allowed in an automaton.");

            this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions

            if (this.UsesGroups())
            {
                // Determinization will result in lost of group information, which we cannot allow
                return(false);
            }

            // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton..
            // Such pairs correspond to states of the resulting automaton.
            var weightedStateSetQueue      = new Queue <Determinization.WeightedStateSet>();
            var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>();
            var builder = new Builder();

            var startWeightedStateSet = new Determinization.WeightedStateSet {
                { this.Start.Index, Weight.One }
            };

            weightedStateSetQueue.Enqueue(startWeightedStateSet);
            weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex);
            builder.Start.SetEndWeight(this.Start.EndWeight);

            while (weightedStateSetQueue.Count > 0)
            {
                // Take one unprocessed state of the resulting automaton
                Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue();
                var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet];
                var currentState      = builder[currentStateIndex];

                // Find out what transitions we should add for this state
                var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet);

                // For each transition to add
                foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos)
                {
                    TElementDistribution elementDistribution = outgoingTransitionInfo.Item1;
                    Weight weight = outgoingTransitionInfo.Item2;
                    Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3;

                    int destinationStateIndex;
                    if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex))
                    {
                        if (builder.StatesCount == maxStatesBeforeStop)
                        {
                            // Too many states, determinization attempt failed
                            return(false);
                        }

                        // Add new state to the result
                        var destinationState = builder.AddState();
                        weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index);
                        weightedStateSetQueue.Enqueue(destWeightedStateSet);

                        // Compute its ending weight
                        destinationState.SetEndWeight(Weight.Zero);
                        foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet)
                        {
                            destinationState.SetEndWeight(Weight.Sum(
                                                              destinationState.EndWeight,
                                                              Weight.Product(stateIdWithWeight.Value, this.States[stateIdWithWeight.Key].EndWeight)));
                        }

                        destinationStateIndex = destinationState.Index;
                    }

                    // Add transition to the destination state
                    currentState.AddTransition(elementDistribution, weight, destinationStateIndex);
                }
            }

            var simplification = new Simplification(builder, this.PruneTransitionsWithLogWeightLessThan);

            simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment

            var result = builder.GetAutomaton();

            result.PruneTransitionsWithLogWeightLessThan = this.PruneTransitionsWithLogWeightLessThan;
            result.LogValueOverride = this.LogValueOverride;
            this.SwapWith(result);

            return(true);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Attempts to determinize the automaton,
        /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
        /// and there are no epsilon transitions.
        /// </summary>
        /// <returns>
        /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
        /// <see langword="false"/> otherwise.
        /// </returns>
        /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
        public bool TryDeterminize()
        {
            if (this.Data.IsDeterminized != null)
            {
                return(this.Data.IsDeterminized == true);
            }

            int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount);

            this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions

            if (this.UsesGroups)
            {
                // Determinization will result in lost of group information, which we cannot allow
                this.Data = this.Data.With(isDeterminized: false);
                return(false);
            }

            var builder = new Builder();

            builder.Start.SetEndWeight(this.Start.EndWeight);

            var weightedStateSetStack         = new Stack <(bool enter, Determinization.WeightedStateSet set)>();
            var enqueuedWeightedStateSetStack = new Stack <(bool enter, Determinization.WeightedStateSet set)>();
            var weightedStateSetToNewState    = new Dictionary <Determinization.WeightedStateSet, int>();
            // This hash set is used to track sets currently in path from root. If we've found a set of states
            // that we have already seen during current path from root, but weights are different, that means
            // we've found a non-converging loop - infinite number of weighed sets will be generated if
            // we continue traversal and determinization will fail. For performance reasons we want to fail
            // fast if such loop is found.
            var stateSetsInPath = new Dictionary <Determinization.WeightedStateSet, Determinization.WeightedStateSet>(
                Determinization.WeightedStateSetOnlyStateComparer.Instance);

            var startWeightedStateSet = new Determinization.WeightedStateSet(this.Start.Index);

            weightedStateSetStack.Push((true, startWeightedStateSet));
            weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex);

            while (weightedStateSetStack.Count > 0)
            {
                // Take one unprocessed state of the resulting automaton
                var(enter, currentWeightedStateSet) = weightedStateSetStack.Pop();

                if (enter)
                {
                    if (currentWeightedStateSet.Count > 1)
                    {
                        // Only sets with more than 1 state can lead to infinite loops with different weights.
                        // Because if there's only 1 state, than it's weight is always Weight.One.
                        if (!stateSetsInPath.ContainsKey(currentWeightedStateSet))
                        {
                            stateSetsInPath.Add(currentWeightedStateSet, currentWeightedStateSet);
                        }

                        weightedStateSetStack.Push((false, currentWeightedStateSet));
                    }

                    if (!EnqueueOutgoingTransitions(currentWeightedStateSet))
                    {
                        this.Data = this.Data.With(isDeterminized: false);
                        return(false);
                    }
                }
                else
                {
                    stateSetsInPath.Remove(currentWeightedStateSet);
                }
            }

            var simplification = new Simplification(builder, this.PruneStatesWithLogEndWeightLessThan);

            simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment

            this.Data = builder.GetData().With(isDeterminized: true);
            this.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan;
            this.LogValueOverride = this.LogValueOverride;

            return(true);

            bool EnqueueOutgoingTransitions(Determinization.WeightedStateSet currentWeightedStateSet)
            {
                var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet];
                var currentState      = builder[currentStateIndex];

                // Common special-case: definitely deterministic transitions from single state.
                // In this case no complicated determinization procedure is needed.
                if (currentWeightedStateSet.Count == 1 &&
                    AllDestinationsAreSame(currentWeightedStateSet[0].Index))
                {
                    Debug.Assert(currentWeightedStateSet[0].Weight == Weight.One);

                    var sourceState = this.States[currentWeightedStateSet[0].Index];
                    foreach (var transition in sourceState.Transitions)
                    {
                        var destinationStates      = new Determinization.WeightedStateSet(transition.DestinationStateIndex);
                        var outgoingTransitionInfo = new Determinization.OutgoingTransition(
                            transition.ElementDistribution.Value, transition.Weight, destinationStates);
                        if (!TryAddTransition(enqueuedWeightedStateSetStack, outgoingTransitionInfo, currentState))
                        {
                            return(false);
                        }
                    }
                }
                else
                {
                    // Find out what transitions we should add for this state
                    var outgoingTransitions =
                        this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet);
                    foreach (var outgoingTransition in outgoingTransitions)
                    {
                        if (!TryAddTransition(enqueuedWeightedStateSetStack, outgoingTransition, currentState))
                        {
                            return(false);
                        }
                    }
                }

                while (enqueuedWeightedStateSetStack.Count > 0)
                {
                    weightedStateSetStack.Push(enqueuedWeightedStateSetStack.Pop());
                }

                return(true);
            }

            // Checks that all transitions from state end up in the same destination. This is used
            // as a very fast "is deterministic" check, that doesn't care about distributions.
            // State can have deterministic transitions with different destinations. This case will be
            // handled by slow path.
            bool AllDestinationsAreSame(int stateIndex)
            {
                var transitions = this.States[stateIndex].Transitions;

                if (transitions.Count <= 1)
                {
                    return(true);
                }

                var destination = transitions[0].DestinationStateIndex;

                for (var i = 1; i < transitions.Count; ++i)
                {
                    if (transitions[i].DestinationStateIndex != destination)
                    {
                        return(false);
                    }
                }

                return(true);
            }

            // Adds transition from currentState into state corresponding to weighted state set from
            // outgoingTransitionInfo. If that state does not exist yet it is created and is put into stack
            // for further processing. This function returns false if determinization has failed.
            // That can happen because of 2 ressons:
            // - Too many states were created and its not feasible to continue trying to determinize
            //   automaton further
            // - An infinite loop with not converging weights was found. It leads to infinite number of states.
            //   So determinization is aborted early.
            bool TryAddTransition(
                Stack <(bool enter, Determinization.WeightedStateSet set)> destinationStack,
                Determinization.OutgoingTransition transition,
                Builder.StateBuilder currentState)
            {
                var destinations = transition.Destinations;

                if (!weightedStateSetToNewState.TryGetValue(destinations, out var destinationStateIndex))
                {
                    if (builder.StatesCount == maxStatesBeforeStop)
                    {
                        // Too many states, determinization attempt failed
                        return(false);
                    }

                    var visitedWeightedStateSet = default(Determinization.WeightedStateSet);
                    var sameSetVisited          =
                        destinations.Count > 1 &&
                        stateSetsInPath.TryGetValue(destinations, out visitedWeightedStateSet);

                    if (sameSetVisited && !destinations.Equals(visitedWeightedStateSet))
                    {
                        // We arrived into the same state set as before, but with different weights.
                        // This is an infinite non-converging loop. Determinization has failed
                        return(false);
                    }

                    // Add new state to the result
                    var destinationState = builder.AddState();
                    weightedStateSetToNewState.Add(destinations, destinationState.Index);
                    destinationStack.Push((true, destinations));

                    if (destinations.Count > 1 && !sameSetVisited)
                    {
                        destinationStack.Push((false, destinations));
                    }

                    // Compute its ending weight
                    destinationState.SetEndWeight(Weight.Zero);
                    for (var i = 0; i < destinations.Count; ++i)
                    {
                        var weightedState = destinations[i];
                        var addedWeight   = weightedState.Weight * this.States[weightedState.Index].EndWeight;
                        destinationState.SetEndWeight(destinationState.EndWeight + addedWeight);
                    }

                    destinationStateIndex = destinationState.Index;
                }

                // Add transition to the destination state
                currentState.AddTransition(transition.ElementDistribution, transition.Weight, destinationStateIndex);
                return(true);
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Attempts to determinize the automaton,
        /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
        /// and there are no epsilon transitions.
        /// </summary>
        /// <returns>
        /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
        /// <see langword="false"/> otherwise.
        /// </returns>
        /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
        public bool TryDeterminize()
        {
            if (this.Data.DeterminizationState != DeterminizationState.Unknown)
            {
                return(this.Data.DeterminizationState == DeterminizationState.IsDeterminized);
            }

            int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount);

            this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions

            if (this.UsesGroups)
            {
                // Determinization will result in lost of group information, which we cannot allow
                this.Data = this.Data.WithDeterminizationState(DeterminizationState.IsNonDeterminizable);
                return(false);
            }

            // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton..
            // Such pairs correspond to states of the resulting automaton.
            var weightedStateSetQueue      = new Queue <Determinization.WeightedStateSet>();
            var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>();
            var builder = new Builder();

            var startWeightedStateSet = new Determinization.WeightedStateSet {
                { this.Start.Index, Weight.One }
            };

            weightedStateSetQueue.Enqueue(startWeightedStateSet);
            weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex);
            builder.Start.SetEndWeight(this.Start.EndWeight);

            while (weightedStateSetQueue.Count > 0)
            {
                // Take one unprocessed state of the resulting automaton
                Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue();
                var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet];
                var currentState      = builder[currentStateIndex];

                // Find out what transitions we should add for this state
                var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet);

                // For each transition to add
                foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos)
                {
                    TElementDistribution elementDistribution = outgoingTransitionInfo.Item1;
                    Weight weight = outgoingTransitionInfo.Item2;
                    Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3;

                    int destinationStateIndex;
                    if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex))
                    {
                        if (builder.StatesCount == maxStatesBeforeStop)
                        {
                            // Too many states, determinization attempt failed
                            return(false);
                        }

                        // Add new state to the result
                        var destinationState = builder.AddState();
                        weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index);
                        weightedStateSetQueue.Enqueue(destWeightedStateSet);

                        // Compute its ending weight
                        destinationState.SetEndWeight(Weight.Zero);
                        foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet)
                        {
                            var addedWeight = stateIdWithWeight.Value * this.States[stateIdWithWeight.Key].EndWeight;
                            destinationState.SetEndWeight(destinationState.EndWeight + addedWeight);
                        }

                        destinationStateIndex = destinationState.Index;
                    }

                    // Add transition to the destination state
                    currentState.AddTransition(elementDistribution, weight, destinationStateIndex);
                }
            }

            var simplification = new Simplification(builder, this.PruneStatesWithLogEndWeightLessThan);

            simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment

            this.Data = builder.GetData().WithDeterminizationState(DeterminizationState.IsDeterminized);
            this.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan;
            this.LogValueOverride = this.LogValueOverride;

            return(true);
        }