Ejemplo n.º 1
0
        /// <summary>
        /// Computes a set of outgoing transitions from a given state of the determinization result.
        /// </summary>
        /// <param name="sourceStateSet">The source state of the determinized automaton represented as
        /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
        /// <returns>
        /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing
        /// transitions from <paramref name="sourceStateSet"/>.
        /// The first two elements of a tuple define the element distribution and the weight of a transition.
        /// The third element defines the outgoing state.
        /// </returns>
        protected override IEnumerable <Determinization.OutgoingTransition> GetOutgoingTransitionsForDeterminization(
            Determinization.WeightedStateSet sourceStateSet)
        {
            var destinationStateSetBuilder = Determinization.WeightedStateSetBuilder.Create();

            var segmentBounds       = CollectCharSegmentBounds(sourceStateSet);
            var currentSegmentStart = 0;

            foreach (var segmentBound in segmentBounds)
            {
                if (currentSegmentStart != segmentBound.Position &&
                    destinationStateSetBuilder.StatesCount() > 0)
                {
                    // Flush previous segment
                    var(destinationStateSet, destinationStateSetWeight) = destinationStateSetBuilder.Get();
                    var segmentLength = segmentBound.Position - currentSegmentStart;
                    yield return(new Determinization.OutgoingTransition(
                                     ImmutableDiscreteChar.InRange((char)currentSegmentStart, (char)(segmentBound.Position - 1)),
                                     Weight.FromValue(segmentLength) * destinationStateSetWeight,
                                     destinationStateSet));
                }

                // Update current segment
                currentSegmentStart = segmentBound.Position;
                if (segmentBound.IsEnd)
                {
                    destinationStateSetBuilder.Remove(segmentBound.DestinationStateId, segmentBound.Weight);
                }
                else
                {
                    destinationStateSetBuilder.Add(segmentBound.DestinationStateId, segmentBound.Weight);
                }
            }
        }
Ejemplo n.º 2
0
        private List <TransitionCharSegmentBound> CollectCharSegmentBounds(
            Determinization.WeightedStateSet sourceStateSet)
        {
            var segmentBounds = new List <TransitionCharSegmentBound>();

            for (var i = 0; i < sourceStateSet.Count; ++i)
            {
                var sourceState = sourceStateSet[i];
                var state       = this.States[sourceState.Index];
                foreach (var transition in state.Transitions)
                {
                    AddTransitionCharSegmentBounds(transition, sourceState.Weight, segmentBounds);
                }
            }

            segmentBounds.Sort();
            return(segmentBounds);
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Computes a set of outgoing transitions from a given state of the determinization result.
 /// </summary>
 /// <param name="sourceState">The source state of the determinized automaton represented as
 /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
 /// <returns>
 /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>.
 /// The first two elements of a tuple define the element distribution and the weight of a transition.
 /// The third element defines the outgoing state.
 /// </returns>
 protected override IEnumerable <Determinization.OutgoingTransition> GetOutgoingTransitionsForDeterminization(
     Determinization.WeightedStateSet sourceState)
 {
     throw new NotImplementedException("Determinization is not yet supported for this type of automata.");
 }
Ejemplo n.º 4
0
        /// <summary>
        /// Attempts to determinize the automaton,
        /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
        /// and there are no epsilon transitions.
        /// </summary>
        /// <returns>
        /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
        /// <see langword="false"/> otherwise.
        /// </returns>
        /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
        public bool TryDeterminize()
        {
            if (this.Data.IsDeterminized != null)
            {
                return(this.Data.IsDeterminized == true);
            }

            int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount);

            this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions

            if (this.UsesGroups)
            {
                // Determinization will result in lost of group information, which we cannot allow
                this.Data = this.Data.With(isDeterminized: false);
                return(false);
            }

            var builder = new Builder();

            builder.Start.SetEndWeight(this.Start.EndWeight);

            var weightedStateSetStack         = new Stack <(bool enter, Determinization.WeightedStateSet set)>();
            var enqueuedWeightedStateSetStack = new Stack <(bool enter, Determinization.WeightedStateSet set)>();
            var weightedStateSetToNewState    = new Dictionary <Determinization.WeightedStateSet, int>();
            // This hash set is used to track sets currently in path from root. If we've found a set of states
            // that we have already seen during current path from root, but weights are different, that means
            // we've found a non-converging loop - infinite number of weighed sets will be generated if
            // we continue traversal and determinization will fail. For performance reasons we want to fail
            // fast if such loop is found.
            var stateSetsInPath = new Dictionary <Determinization.WeightedStateSet, Determinization.WeightedStateSet>(
                Determinization.WeightedStateSetOnlyStateComparer.Instance);

            var startWeightedStateSet = new Determinization.WeightedStateSet(this.Start.Index);

            weightedStateSetStack.Push((true, startWeightedStateSet));
            weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex);

            while (weightedStateSetStack.Count > 0)
            {
                // Take one unprocessed state of the resulting automaton
                var(enter, currentWeightedStateSet) = weightedStateSetStack.Pop();

                if (enter)
                {
                    if (currentWeightedStateSet.Count > 1)
                    {
                        // Only sets with more than 1 state can lead to infinite loops with different weights.
                        // Because if there's only 1 state, than it's weight is always Weight.One.
                        if (!stateSetsInPath.ContainsKey(currentWeightedStateSet))
                        {
                            stateSetsInPath.Add(currentWeightedStateSet, currentWeightedStateSet);
                        }

                        weightedStateSetStack.Push((false, currentWeightedStateSet));
                    }

                    if (!EnqueueOutgoingTransitions(currentWeightedStateSet))
                    {
                        this.Data = this.Data.With(isDeterminized: false);
                        return(false);
                    }
                }
                else
                {
                    stateSetsInPath.Remove(currentWeightedStateSet);
                }
            }

            var simplification = new Simplification(builder, this.PruneStatesWithLogEndWeightLessThan);

            simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment

            this.Data = builder.GetData().With(isDeterminized: true);
            this.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan;
            this.LogValueOverride = this.LogValueOverride;

            return(true);

            bool EnqueueOutgoingTransitions(Determinization.WeightedStateSet currentWeightedStateSet)
            {
                var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet];
                var currentState      = builder[currentStateIndex];

                // Common special-case: definitely deterministic transitions from single state.
                // In this case no complicated determinization procedure is needed.
                if (currentWeightedStateSet.Count == 1 &&
                    AllDestinationsAreSame(currentWeightedStateSet[0].Index))
                {
                    Debug.Assert(currentWeightedStateSet[0].Weight == Weight.One);

                    var sourceState = this.States[currentWeightedStateSet[0].Index];
                    foreach (var transition in sourceState.Transitions)
                    {
                        var destinationStates      = new Determinization.WeightedStateSet(transition.DestinationStateIndex);
                        var outgoingTransitionInfo = new Determinization.OutgoingTransition(
                            transition.ElementDistribution.Value, transition.Weight, destinationStates);
                        if (!TryAddTransition(enqueuedWeightedStateSetStack, outgoingTransitionInfo, currentState))
                        {
                            return(false);
                        }
                    }
                }
                else
                {
                    // Find out what transitions we should add for this state
                    var outgoingTransitions =
                        this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet);
                    foreach (var outgoingTransition in outgoingTransitions)
                    {
                        if (!TryAddTransition(enqueuedWeightedStateSetStack, outgoingTransition, currentState))
                        {
                            return(false);
                        }
                    }
                }

                while (enqueuedWeightedStateSetStack.Count > 0)
                {
                    weightedStateSetStack.Push(enqueuedWeightedStateSetStack.Pop());
                }

                return(true);
            }

            // Checks that all transitions from state end up in the same destination. This is used
            // as a very fast "is deterministic" check, that doesn't care about distributions.
            // State can have deterministic transitions with different destinations. This case will be
            // handled by slow path.
            bool AllDestinationsAreSame(int stateIndex)
            {
                var transitions = this.States[stateIndex].Transitions;

                if (transitions.Count <= 1)
                {
                    return(true);
                }

                var destination = transitions[0].DestinationStateIndex;

                for (var i = 1; i < transitions.Count; ++i)
                {
                    if (transitions[i].DestinationStateIndex != destination)
                    {
                        return(false);
                    }
                }

                return(true);
            }

            // Adds transition from currentState into state corresponding to weighted state set from
            // outgoingTransitionInfo. If that state does not exist yet it is created and is put into stack
            // for further processing. This function returns false if determinization has failed.
            // That can happen because of 2 ressons:
            // - Too many states were created and its not feasible to continue trying to determinize
            //   automaton further
            // - An infinite loop with not converging weights was found. It leads to infinite number of states.
            //   So determinization is aborted early.
            bool TryAddTransition(
                Stack <(bool enter, Determinization.WeightedStateSet set)> destinationStack,
                Determinization.OutgoingTransition transition,
                Builder.StateBuilder currentState)
            {
                var destinations = transition.Destinations;

                if (!weightedStateSetToNewState.TryGetValue(destinations, out var destinationStateIndex))
                {
                    if (builder.StatesCount == maxStatesBeforeStop)
                    {
                        // Too many states, determinization attempt failed
                        return(false);
                    }

                    var visitedWeightedStateSet = default(Determinization.WeightedStateSet);
                    var sameSetVisited          =
                        destinations.Count > 1 &&
                        stateSetsInPath.TryGetValue(destinations, out visitedWeightedStateSet);

                    if (sameSetVisited && !destinations.Equals(visitedWeightedStateSet))
                    {
                        // We arrived into the same state set as before, but with different weights.
                        // This is an infinite non-converging loop. Determinization has failed
                        return(false);
                    }

                    // Add new state to the result
                    var destinationState = builder.AddState();
                    weightedStateSetToNewState.Add(destinations, destinationState.Index);
                    destinationStack.Push((true, destinations));

                    if (destinations.Count > 1 && !sameSetVisited)
                    {
                        destinationStack.Push((false, destinations));
                    }

                    // Compute its ending weight
                    destinationState.SetEndWeight(Weight.Zero);
                    for (var i = 0; i < destinations.Count; ++i)
                    {
                        var weightedState = destinations[i];
                        var addedWeight   = weightedState.Weight * this.States[weightedState.Index].EndWeight;
                        destinationState.SetEndWeight(destinationState.EndWeight + addedWeight);
                    }

                    destinationStateIndex = destinationState.Index;
                }

                // Add transition to the destination state
                currentState.AddTransition(transition.ElementDistribution, transition.Weight, destinationStateIndex);
                return(true);
            }
        }
Ejemplo n.º 5
0
 /// <summary>
 /// Overridden in the derived classes to compute a set of outgoing transitions
 /// from a given state of the determinization result.
 /// </summary>
 /// <param name="sourceState">The source state of the determinized automaton represented as
 /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
 /// <returns>
 /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>.
 /// The first two elements of a tuple define the element distribution and the weight of a transition.
 /// The third element defines the outgoing state.
 /// </returns>
 protected abstract IEnumerable <Determinization.OutgoingTransition> GetOutgoingTransitionsForDeterminization(
     Determinization.WeightedStateSet sourceState);
Ejemplo n.º 6
0
        /// <summary>
        /// Computes a set of outgoing transitions from a given state of the determinization result.
        /// </summary>
        /// <param name="sourceState">The source state of the determinized automaton represented as
        /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
        /// <returns>
        /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>.
        /// The first two elements of a tuple define the element distribution and the weight of a transition.
        /// The third element defines the outgoing state.
        /// </returns>
        protected override List <(DiscreteChar, Weight, Determinization.WeightedStateSet)> GetOutgoingTransitionsForDeterminization(
            Determinization.WeightedStateSet sourceState)
        {
            const double LogEps = -35; // Don't add transitions with log-weight less than this as they have been produced by numerical inaccuracies

            // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons)
            var segmentBounds        = new List <ValueTuple <int, TransitionCharSegmentBound> >();
            int transitionsProcessed = 0;

            foreach (KeyValuePair <int, Weight> stateIdWeight in sourceState)
            {
                var state = this.States[stateIdWeight.Key];
                foreach (var transition in state.Transitions)
                {
                    AddTransitionCharSegmentBounds(transition, stateIdWeight.Value, segmentBounds);
                }

                transitionsProcessed += state.Transitions.Count;
            }

            // Sort segment bounds left-to-right, start-to-end
            var sortedIndexedSegmentBounds = segmentBounds.ToArray();

            if (transitionsProcessed > 1)
            {
                Array.Sort(sortedIndexedSegmentBounds, CompareSegmentBounds);

                int CompareSegmentBounds((int, TransitionCharSegmentBound) a, (int, TransitionCharSegmentBound) b) =>
                a.Item2.CompareTo(b.Item2);
            }

            // Produce an outgoing transition for each unique subset of overlapping segments
            var    result = new List <(DiscreteChar, Weight, Determinization.WeightedStateSet)>();
            Weight currentSegmentStateWeightSum = Weight.Zero;

            var currentSegmentStateWeights = new Dictionary <int, Weight>();

            foreach (var sb in segmentBounds)
            {
                currentSegmentStateWeights[sb.Item2.DestinationStateId] = Weight.Zero;
            }

            var activeSegments      = new HashSet <TransitionCharSegmentBound>();
            int currentSegmentStart = char.MinValue;

            foreach (var tup in sortedIndexedSegmentBounds)
            {
                TransitionCharSegmentBound segmentBound = tup.Item2;

                if (currentSegmentStateWeightSum.LogValue > LogEps && currentSegmentStart < segmentBound.Bound)
                {
                    // Flush previous segment
                    char         segmentEnd    = (char)(segmentBound.Bound - 1);
                    int          segmentLength = segmentEnd - currentSegmentStart + 1;
                    DiscreteChar elementDist   = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd);

                    var destinationState = new Determinization.WeightedStateSet();
                    foreach (KeyValuePair <int, Weight> stateIdWithWeight in currentSegmentStateWeights)
                    {
                        if (stateIdWithWeight.Value.LogValue > LogEps)
                        {
                            Weight stateWeight = Weight.Product(stateIdWithWeight.Value, Weight.Inverse(currentSegmentStateWeightSum));
                            destinationState.Add(stateIdWithWeight.Key, stateWeight);
                        }
                    }

                    Weight transitionWeight = Weight.Product(Weight.FromValue(segmentLength), currentSegmentStateWeightSum);
                    result.Add((elementDist, transitionWeight, destinationState));
                }

                // Update current segment
                currentSegmentStart = segmentBound.Bound;

                if (segmentBound.IsStart)
                {
                    activeSegments.Add(segmentBound);
                    currentSegmentStateWeightSum = Weight.Sum(currentSegmentStateWeightSum, segmentBound.Weight);
                    currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.Sum(currentSegmentStateWeights[segmentBound.DestinationStateId], segmentBound.Weight);
                }
                else
                {
                    Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter.");
                    activeSegments.Remove(segmentBounds[tup.Item1 - 1].Item2);  // End follows start in original.
                    if (double.IsInfinity(segmentBound.Weight.Value))
                    {
                        // Cannot subtract because of the infinities involved.
                        currentSegmentStateWeightSum = activeSegments.Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w));
                        currentSegmentStateWeights[segmentBound.DestinationStateId] =
                            activeSegments.Where(sb => sb.DestinationStateId == segmentBound.DestinationStateId).Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w));
                    }
                    else
                    {
                        currentSegmentStateWeightSum = activeSegments.Count == 0 ? Weight.Zero : Weight.AbsoluteDifference(currentSegmentStateWeightSum, segmentBound.Weight);

                        Weight prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId];
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.AbsoluteDifference(
                            prevStateWeight, segmentBound.Weight);
                    }
                }
            }

            return(result);
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Computes a set of outgoing transitions from a given state of the determinization result.
        /// </summary>
        /// <param name="sourceStateSet">The source state of the determinized automaton represented as
        /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
        /// <returns>
        /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing
        /// transitions from <paramref name="sourceStateSet"/>.
        /// The first two elements of a tuple define the element distribution and the weight of a transition.
        /// The third element defines the outgoing state.
        /// </returns>
        protected override IEnumerable <Determinization.OutgoingTransition> GetOutgoingTransitionsForDeterminization(
            Determinization.WeightedStateSet sourceStateSet)
        {
            // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons)
            var segmentBounds = new List <TransitionCharSegmentBound>();

            for (var i = 0; i < sourceStateSet.Count; ++i)
            {
                var sourceState = sourceStateSet[i];
                var state       = this.States[sourceState.Index];
                foreach (var transition in state.Transitions)
                {
                    AddTransitionCharSegmentBounds(transition, sourceState.Weight, segmentBounds);
                }
            }

            segmentBounds.Sort();

            // Produce an outgoing transition for each unique subset of overlapping segments
            var currentSegmentTotal = WeightSum.Zero();

            var currentSegmentStateWeights = new Dictionary <int, WeightSum>();
            var currentSegmentStart        = (int)char.MinValue;
            var destinationStateSetBuilder = Determinization.WeightedStateSetBuilder.Create();

            foreach (var segmentBound in segmentBounds)
            {
                if (currentSegmentTotal.Count != 0 && currentSegmentStart < segmentBound.Bound)
                {
                    // Flush previous segment
                    var segmentEnd     = (char)(segmentBound.Bound - 1);
                    var segmentLength  = segmentEnd - currentSegmentStart + 1;
                    var elementDist    = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd);
                    var invTotalWeight = Weight.Inverse(currentSegmentTotal.Sum);

                    destinationStateSetBuilder.Reset();
                    foreach (var stateIdWithWeight in currentSegmentStateWeights)
                    {
                        var stateWeight = stateIdWithWeight.Value.Sum * invTotalWeight;
                        destinationStateSetBuilder.Add(stateIdWithWeight.Key, stateWeight);
                    }

                    var(destinationStateSet, destinationStateSetWeight) = destinationStateSetBuilder.Get();

                    var transitionWeight = Weight.Product(
                        Weight.FromValue(segmentLength),
                        currentSegmentTotal.Sum,
                        destinationStateSetWeight);
                    yield return(new Determinization.OutgoingTransition(
                                     elementDist, transitionWeight, destinationStateSet));
                }

                // Update current segment
                currentSegmentStart = segmentBound.Bound;

                if (segmentBound.IsStart)
                {
                    currentSegmentTotal += segmentBound.Weight;
                    if (currentSegmentStateWeights.TryGetValue(segmentBound.DestinationStateId, out var stateWeight))
                    {
                        currentSegmentStateWeights[segmentBound.DestinationStateId] =
                            stateWeight + segmentBound.Weight;
                    }
                    else
                    {
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = new WeightSum(segmentBound.Weight);
                    }
                }
                else
                {
                    Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter.");
                    Debug.Assert(!segmentBound.Weight.IsInfinity);
                    currentSegmentTotal -= segmentBound.Weight;

                    var prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId];
                    var newStateWeight  = prevStateWeight - segmentBound.Weight;
                    if (newStateWeight.Count == 0)
                    {
                        currentSegmentStateWeights.Remove(segmentBound.DestinationStateId);
                    }
                    else
                    {
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = newStateWeight;
                    }
                }
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Attempts to determinize the automaton,
        /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
        /// and there are no epsilon transitions.
        /// </summary>
        /// <param name="maxStatesBeforeStop">
        /// The maximum number of states the resulting automaton can have. If the number of states exceeds the value
        /// of this parameter during determinization, the process is aborted.
        /// </param>
        /// <returns>
        /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
        /// <see langword="false"/> otherwise.
        /// </returns>
        /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
        public bool TryDeterminize(int maxStatesBeforeStop)
        {
            Argument.CheckIfInRange(
                maxStatesBeforeStop > 0 && maxStatesBeforeStop <= MaxStateCount,
                "maxStatesBeforeStop",
                "The maximum number of states must be positive and not greater than the maximum number of states allowed in an automaton.");

            this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions

            if (this.UsesGroups())
            {
                // Determinization will result in lost of group information, which we cannot allow
                return(false);
            }

            // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton..
            // Such pairs correspond to states of the resulting automaton.
            var weightedStateSetQueue      = new Queue <Determinization.WeightedStateSet>();
            var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>();
            var builder = new Builder();

            var startWeightedStateSet = new Determinization.WeightedStateSet {
                { this.Start.Index, Weight.One }
            };

            weightedStateSetQueue.Enqueue(startWeightedStateSet);
            weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex);
            builder.Start.SetEndWeight(this.Start.EndWeight);

            while (weightedStateSetQueue.Count > 0)
            {
                // Take one unprocessed state of the resulting automaton
                Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue();
                var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet];
                var currentState      = builder[currentStateIndex];

                // Find out what transitions we should add for this state
                var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet);

                // For each transition to add
                foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos)
                {
                    TElementDistribution elementDistribution = outgoingTransitionInfo.Item1;
                    Weight weight = outgoingTransitionInfo.Item2;
                    Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3;

                    int destinationStateIndex;
                    if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex))
                    {
                        if (builder.StatesCount == maxStatesBeforeStop)
                        {
                            // Too many states, determinization attempt failed
                            return(false);
                        }

                        // Add new state to the result
                        var destinationState = builder.AddState();
                        weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index);
                        weightedStateSetQueue.Enqueue(destWeightedStateSet);

                        // Compute its ending weight
                        destinationState.SetEndWeight(Weight.Zero);
                        foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet)
                        {
                            destinationState.SetEndWeight(Weight.Sum(
                                                              destinationState.EndWeight,
                                                              Weight.Product(stateIdWithWeight.Value, this.States[stateIdWithWeight.Key].EndWeight)));
                        }

                        destinationStateIndex = destinationState.Index;
                    }

                    // Add transition to the destination state
                    currentState.AddTransition(elementDistribution, weight, destinationStateIndex);
                }
            }

            var simplification = new Simplification(builder, this.PruneTransitionsWithLogWeightLessThan);

            simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment

            var result = builder.GetAutomaton();

            result.PruneTransitionsWithLogWeightLessThan = this.PruneTransitionsWithLogWeightLessThan;
            result.LogValueOverride = this.LogValueOverride;
            this.SwapWith(result);

            return(true);
        }
Ejemplo n.º 9
0
 /// <summary>
 /// Overridden in the derived classes to compute a set of outgoing transitions
 /// from a given state of the determinization result.
 /// </summary>
 /// <param name="sourceState">The source state of the determinized automaton represented as
 /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
 /// <returns>
 /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>.
 /// The first two elements of a tuple define the element distribution and the weight of a transition.
 /// The third element defines the outgoing state.
 /// </returns>
 protected abstract List <(TElementDistribution, Weight, Determinization.WeightedStateSet)> GetOutgoingTransitionsForDeterminization(
     Determinization.WeightedStateSet sourceState);
Ejemplo n.º 10
0
 /// <summary>
 /// Computes a set of outgoing transitions from a given state of the determinization result.
 /// </summary>
 /// <param name="sourceState">The source state of the determinized automaton represented as
 /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
 /// <returns>
 /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>.
 /// The first two elements of a tuple define the element distribution and the weight of a transition.
 /// The third element defines the outgoing state.
 /// </returns>
 protected override List <(TPairDistribution, Weight, Determinization.WeightedStateSet)> GetOutgoingTransitionsForDeterminization(
     Determinization.WeightedStateSet sourceState)
 {
     throw new NotImplementedException("Determinization is not yet supported for this type of automata.");
 }
Ejemplo n.º 11
0
        /// <summary>
        /// Attempts to determinize the automaton,
        /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
        /// and there are no epsilon transitions.
        /// </summary>
        /// <returns>
        /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
        /// <see langword="false"/> otherwise.
        /// </returns>
        /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
        public bool TryDeterminize()
        {
            if (this.Data.DeterminizationState != DeterminizationState.Unknown)
            {
                return(this.Data.DeterminizationState == DeterminizationState.IsDeterminized);
            }

            int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount);

            this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions

            if (this.UsesGroups)
            {
                // Determinization will result in lost of group information, which we cannot allow
                this.Data = this.Data.WithDeterminizationState(DeterminizationState.IsNonDeterminizable);
                return(false);
            }

            // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton..
            // Such pairs correspond to states of the resulting automaton.
            var weightedStateSetQueue      = new Queue <Determinization.WeightedStateSet>();
            var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>();
            var builder = new Builder();

            var startWeightedStateSet = new Determinization.WeightedStateSet {
                { this.Start.Index, Weight.One }
            };

            weightedStateSetQueue.Enqueue(startWeightedStateSet);
            weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex);
            builder.Start.SetEndWeight(this.Start.EndWeight);

            while (weightedStateSetQueue.Count > 0)
            {
                // Take one unprocessed state of the resulting automaton
                Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue();
                var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet];
                var currentState      = builder[currentStateIndex];

                // Find out what transitions we should add for this state
                var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet);

                // For each transition to add
                foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos)
                {
                    TElementDistribution elementDistribution = outgoingTransitionInfo.Item1;
                    Weight weight = outgoingTransitionInfo.Item2;
                    Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3;

                    int destinationStateIndex;
                    if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex))
                    {
                        if (builder.StatesCount == maxStatesBeforeStop)
                        {
                            // Too many states, determinization attempt failed
                            return(false);
                        }

                        // Add new state to the result
                        var destinationState = builder.AddState();
                        weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index);
                        weightedStateSetQueue.Enqueue(destWeightedStateSet);

                        // Compute its ending weight
                        destinationState.SetEndWeight(Weight.Zero);
                        foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet)
                        {
                            var addedWeight = stateIdWithWeight.Value * this.States[stateIdWithWeight.Key].EndWeight;
                            destinationState.SetEndWeight(destinationState.EndWeight + addedWeight);
                        }

                        destinationStateIndex = destinationState.Index;
                    }

                    // Add transition to the destination state
                    currentState.AddTransition(elementDistribution, weight, destinationStateIndex);
                }
            }

            var simplification = new Simplification(builder, this.PruneStatesWithLogEndWeightLessThan);

            simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment

            this.Data = builder.GetData().WithDeterminizationState(DeterminizationState.IsDeterminized);
            this.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan;
            this.LogValueOverride = this.LogValueOverride;

            return(true);
        }