Beispiel #1
            /// <summary>
            /// For each state of the component, computes the total weight of all paths starting at that state.
            /// Ending weights are taken into account.
            /// </summary>
            /// <remarks>The weights are computed using dynamic programming, going up from leafs to the root.</remarks>
            private static Weight[] ComputeWeightsToEnd(int nStates, IReadOnlyList <State> topologicalOrder, int group)
                var weights = CreateZeroWeights(nStates);

                // Iterate in the reverse topological order
                for (var stateIndex = topologicalOrder.Count - 1; stateIndex >= 0; stateIndex--)
                    var state = topologicalOrder[stateIndex];
                    // Aggregate weights of all the outgoing transitions from this state
                    var weightToAdd = state.EndWeight;
                    for (var transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex)
                        var transition = state.GetTransition(transitionIndex);

                        if (transition.Group == group)

                        weightToAdd = Weight.Sum(
                            Weight.Product(transition.Weight, weights[transition.DestinationStateIndex]));

                    weights[state.Index] = weightToAdd;

Beispiel #2
            /// <summary>
            /// For each state of the component, computes the total weight of all paths starting at the root
            /// and ending at that state. Ending weights are not taken into account.
            /// </summary>
            /// <remarks>The weights are computed using dynamic programming, going down from the root to leafs.</remarks>
            private static Weight[] ComputeWeightsFromRoot(int nStates, IReadOnlyList <State> topologicalOrder, int group)
                var weights = CreateZeroWeights(nStates);

                weights[topologicalOrder[0].Index] = Weight.One;

                // Iterate in the topological order
                for (var i = 0; i < topologicalOrder.Count; i++)
                    var srcState  = topologicalOrder[i];
                    var srcWeight = weights[srcState.Index];
                    if (srcWeight.IsZero)

                    // Aggregate weights of all the outgoing transitions from this state
                    for (var transitionIndex = 0; transitionIndex < srcState.TransitionCount; transitionIndex++)
                        var transition = srcState.GetTransition(transitionIndex);

                        if (transition.Group == group)

                        var destWeight = weights[transition.DestinationStateIndex];
                        var weight     = Weight.Sum(destWeight, Weight.Product(srcWeight, transition.Weight));

                        weights[transition.DestinationStateIndex] = weight;

Beispiel #3
            /// <summary>
            /// Recursively computes the value of the automaton on a given sequence.
            /// </summary>
            /// <param name="sequence">The sequence to compute the value on.</param>
            /// <param name="sequencePosition">The current position in the sequence.</param>
            /// <param name="valueCache">A lookup table for memoization.</param>
            /// <returns>The value computed from the current state.</returns>
            private Weight DoGetValue(
                TSequence sequence, int sequencePosition, Dictionary <IntPair, Weight> valueCache)
                var    stateIndexPair = new IntPair(this.Index, sequencePosition);
                Weight cachedValue;

                if (valueCache.TryGetValue(stateIndexPair, out cachedValue))

                EpsilonClosure closure = this.GetEpsilonClosure();

                Weight value = Weight.Zero;
                int    count = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetLength(sequence);

                bool isCurrent = sequencePosition < count;

                if (isCurrent)
                    TElement element = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetElement(sequence, sequencePosition);

                    for (int closureStateIndex = 0; closureStateIndex < closure.Size; ++closureStateIndex)
                        State  closureState       = closure.GetStateByIndex(closureStateIndex);
                        Weight closureStateWeight = closure.GetStateWeightByIndex(closureStateIndex);

                        for (int transitionIndex = 0; transitionIndex < closureState.transitionCount; transitionIndex++)
                            Transition transition = closureState.transitions[transitionIndex];
                            if (transition.IsEpsilon)
                                continue; // The destination is a part of the closure anyway

                            State  destState  = this.Owner.states[transition.DestinationStateIndex];
                            Weight distWeight = Weight.FromLogValue(transition.ElementDistribution.GetLogProb(element));
                            if (!distWeight.IsZero && !transition.Weight.IsZero)
                                Weight destValue = destState.DoGetValue(sequence, sequencePosition + 1, valueCache);
                                if (!destValue.IsZero)
                                    value = Weight.Sum(
                                        Weight.Product(closureStateWeight, transition.Weight, distWeight, destValue));
                    value = closure.EndWeight;

                valueCache.Add(stateIndexPair, value);
            /// <summary>
            /// Computes the total weights between each pair of states in the component
            /// using the <a href="">generalized Floyd's algorithm</a>.
            /// </summary>
            private void ComputePairwiseWeightsMatrix()
                this.pairwiseWeights = Util.ArrayInit(this.Size, this.Size, (i, j) => Weight.Zero);
                for (int srcStateIndexInComponent = 0; srcStateIndexInComponent < this.Size; ++srcStateIndexInComponent)
                    State state = this.statesInComponent[srcStateIndexInComponent];
                    for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex)
                        Transition transition = state.GetTransition(transitionIndex);
                        State      destState  = state.Owner.States[transition.DestinationStateIndex];
                        int        destStateIndexInComponent;
                        if (this.transitionFilter(transition) && (destStateIndexInComponent = this.GetIndexByState(destState)) != -1)
                            this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent] = Weight.Sum(
                                this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent], transition.Weight);

                for (int k = 0; k < this.Size; ++k)
                    Weight loopWeight =
                        this.useApproximateClosure ? Weight.ApproximateClosure(this.pairwiseWeights[k, k]) : Weight.Closure(this.pairwiseWeights[k, k]);

                    for (int i = 0; i < this.Size; ++i)
                        if (i == k || this.pairwiseWeights[i, k].IsZero)

                        for (int j = 0; j < this.Size; ++j)
                            if (j == k || this.pairwiseWeights[k, j].IsZero)

                            Weight additionalWeight = Weight.Product(
                                this.pairwiseWeights[i, k], loopWeight, this.pairwiseWeights[k, j]);
                            this.pairwiseWeights[i, j] = Weight.Sum(this.pairwiseWeights[i, j], additionalWeight);

                    for (int i = 0; i < this.Size; ++i)
                        this.pairwiseWeights[i, k] = Weight.Product(this.pairwiseWeights[i, k], loopWeight);
                        this.pairwiseWeights[k, i] = Weight.Product(this.pairwiseWeights[k, i], loopWeight);

                    this.pairwiseWeights[k, k] = loopWeight;
Beispiel #5
        /// <summary>
        /// Given a transition and the residual weight of its source state, adds weighted non-zero probability character segments
        /// associated with the transition to the list.
        /// </summary>
        /// <param name="transition">The transition.</param>
        /// <param name="sourceStateResidualWeight">The logarithm of the residual weight of the source state of the transition.</param>
        /// <param name="bounds">The list for storing numbered segment bounds.</param>
        private static void AddTransitionCharSegmentBounds(
            Transition transition, Weight sourceStateResidualWeight, List <ValueTuple <int, TransitionCharSegmentBound> > bounds)
            var    distribution     = transition.ElementDistribution.Value;
            var    ranges           = distribution.Ranges;
            int    commonValueStart = char.MinValue;
            Weight commonValue      = Weight.FromValue(distribution.ProbabilityOutsideRanges);
            Weight weightBase       = Weight.Product(transition.Weight, sourceStateResidualWeight);
            TransitionCharSegmentBound newSegmentBound;

            ////if (double.IsInfinity(weightBase.Value))
            ////    Console.WriteLine("Weight base infinity");

            foreach (var range in ranges)
                if (range.StartInclusive > commonValueStart && !commonValue.IsZero)
                    // Add endpoints for the common value
                    Weight segmentWeight = Weight.Product(commonValue, weightBase);
                    newSegmentBound = new TransitionCharSegmentBound(commonValueStart, transition.DestinationStateIndex, segmentWeight, true);
                    bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));
                    newSegmentBound = new TransitionCharSegmentBound(range.StartInclusive, transition.DestinationStateIndex, segmentWeight, false);
                    bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));

                // Add segment endpoints
                Weight pieceValue = Weight.FromValue(range.Probability);
                if (!pieceValue.IsZero)
                    Weight segmentWeight = Weight.Product(pieceValue, weightBase);
                    newSegmentBound = new TransitionCharSegmentBound(range.StartInclusive, transition.DestinationStateIndex, segmentWeight, true);
                    bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));
                    newSegmentBound = new TransitionCharSegmentBound(range.EndExclusive, transition.DestinationStateIndex, segmentWeight, false);
                    bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));

                commonValueStart = range.EndExclusive;

            if (!commonValue.IsZero && (ranges.Count == 0 || ranges[ranges.Count - 1].EndExclusive != DiscreteChar.CharRangeEndExclusive))
                // Add endpoints for the last common value segment
                Weight segmentWeight = Weight.Product(commonValue, weightBase);
                newSegmentBound = new TransitionCharSegmentBound(commonValueStart, transition.DestinationStateIndex, segmentWeight, true);
                bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));
                newSegmentBound = new TransitionCharSegmentBound(char.MaxValue + 1, transition.DestinationStateIndex, segmentWeight, false);
                bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));
Beispiel #6
        /// <summary>
        /// Given a transition and the residual weight of its source state, adds weighted non-zero probability character segments
        /// associated with the transition to the list.
        /// </summary>
        /// <param name="transition">The transition.</param>
        /// <param name="sourceStateResidualWeight">The logarithm of the residual weight of the source state of the transition.</param>
        /// <param name="bounds">The list for storing numbered segment bounds.</param>
        private static void AddTransitionCharSegmentBounds(
            Transition transition, Weight sourceStateResidualWeight, List <Tuple <int, TransitionCharSegmentBound> > bounds)
            var    probs            = (PiecewiseVector)transition.ElementDistribution.GetProbs();
            int    commonValueStart = char.MinValue;
            Weight commonValue      = Weight.FromValue(probs.CommonValue);
            Weight weightBase       = Weight.Product(transition.Weight, sourceStateResidualWeight);
            TransitionCharSegmentBound newSegmentBound;

            ////if (double.IsInfinity(weightBase.Value))
            ////    Console.WriteLine("Weight base infinity");

            for (int i = 0; i < probs.Pieces.Count; ++i)
                ConstantVector piece = probs.Pieces[i];
                if (piece.Start > commonValueStart && !commonValue.IsZero)
                    // Add endpoints for the common value
                    Weight segmentWeight = Weight.Product(commonValue, weightBase);
                    newSegmentBound = new TransitionCharSegmentBound(commonValueStart, transition.DestinationStateIndex, segmentWeight, true);
                    bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));
                    newSegmentBound = new TransitionCharSegmentBound(piece.Start, transition.DestinationStateIndex, segmentWeight, false);
                    bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));

                // Add segment endpoints
                Weight pieceValue = Weight.FromValue(piece.Value);
                if (!pieceValue.IsZero)
                    Weight segmentWeight = Weight.Product(pieceValue, weightBase);
                    newSegmentBound = new TransitionCharSegmentBound(piece.Start, transition.DestinationStateIndex, segmentWeight, true);
                    bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));
                    newSegmentBound = new TransitionCharSegmentBound(piece.End + 1, transition.DestinationStateIndex, segmentWeight, false);
                    bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));

                commonValueStart = piece.End + 1;

            if (!commonValue.IsZero && (probs.Pieces.Count == 0 || probs.Pieces[probs.Pieces.Count - 1].End != char.MaxValue))
                // Add endpoints for the last common value segment
                Weight segmentWeight = Weight.Product(commonValue, weightBase);
                newSegmentBound = new TransitionCharSegmentBound(commonValueStart, transition.DestinationStateIndex, segmentWeight, true);
                bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));
                newSegmentBound = new TransitionCharSegmentBound(char.MaxValue + 1, transition.DestinationStateIndex, segmentWeight, false);
                bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound));
Beispiel #7
            /// <summary>
            /// Computes the total weights between each pair of states in the component
            /// using the <a href="">generalized Floyd's algorithm</a>.
            /// </summary>
            private void ComputePairwiseWeightsMatrix()
                this.pairwiseWeights = Util.ArrayInit(this.Size, this.Size, (i, j) => Weight.Zero);
                for (int srcStateIndexInComponent = 0; srcStateIndexInComponent < this.Size; ++srcStateIndexInComponent)
                    State state = this.statesInComponent[srcStateIndexInComponent];
                    foreach (var transition in state.Transitions)
                        State destState = this.Automaton.States[transition.DestinationStateIndex];
                        int   destStateIndexInComponent;
                        if (this.transitionFilter(transition) && (destStateIndexInComponent = this.GetIndexByState(destState)) != -1)
                            this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent] += transition.Weight;

                for (int k = 0; k < this.Size; ++k)
                    Weight loopWeight =
                        this.useApproximateClosure ? Weight.ApproximateClosure(this.pairwiseWeights[k, k]) : Weight.Closure(this.pairwiseWeights[k, k]);

                    for (int i = 0; i < this.Size; ++i)
                        if (i == k || this.pairwiseWeights[i, k].IsZero)

                        for (int j = 0; j < this.Size; ++j)
                            if (j == k || this.pairwiseWeights[k, j].IsZero)

                            this.pairwiseWeights[i, j] += Weight.Product(
                                this.pairwiseWeights[i, k], loopWeight, this.pairwiseWeights[k, j]);

                    for (int i = 0; i < this.Size; ++i)
                        this.pairwiseWeights[i, k] *= loopWeight;
                        this.pairwiseWeights[k, i] *= loopWeight;

                    this.pairwiseWeights[k, k] = loopWeight;
Beispiel #8
            /// <summary>
            /// Initializes a new instance of the <see cref="EpsilonClosure"/> class.
            /// </summary>
            /// <param name="state">The state, which epsilon closure this instance will represent.</param>
            internal EpsilonClosure(State state)
                Argument.CheckIfValid(!state.IsNull, nameof(state));

                // Optimize for a very common case: a single-node closure
                bool   singleNodeClosure = true;
                Weight selfLoopWeight    = Weight.Zero;

                for (int i = 0; i < state.TransitionCount; ++i)
                    Transition transition = state.GetTransition(i);
                    if (transition.IsEpsilon)
                        if (transition.DestinationStateIndex != state.Index)
                            singleNodeClosure = false;

                        selfLoopWeight = Weight.Sum(selfLoopWeight, transition.Weight);

                if (singleNodeClosure)
                    Weight stateWeight = Weight.ApproximateClosure(selfLoopWeight);
                    this.weightedStates.Add(Pair.Create(state, stateWeight));
                    this.EndWeight = Weight.Product(stateWeight, state.EndWeight);
                    Condensation condensation = state.Owner.ComputeCondensation(state, tr => tr.IsEpsilon, true);
                    for (int i = 0; i < condensation.ComponentCount; ++i)
                        StronglyConnectedComponent component = condensation.GetComponent(i);
                        for (int j = 0; j < component.Size; ++j)
                            State componentState = component.GetStateByIndex(j);
                            this.weightedStates.Add(Pair.Create(componentState, condensation.GetWeightFromRoot(componentState)));

                    this.EndWeight = condensation.GetWeightToEnd(state);
            /// <summary>
            /// For each state of the component, computes the total weight of all paths starting at that state.
            /// Ending weights are taken into account.
            /// </summary>
            /// <remarks>The weights are computed using dynamic programming, going up from leafs to the root.</remarks>
            private void ComputeWeightsToEnd()
                // Iterate in the reverse topological order
                for (int currentComponentIndex = 0; currentComponentIndex < this.components.Count; ++currentComponentIndex)
                    StronglyConnectedComponent currentComponent = this.components[currentComponentIndex];

                    // Update end weights in this component based on outgoing transitions to downward components
                    for (int stateIndex = 0; stateIndex < currentComponent.Size; ++stateIndex)
                        State state = currentComponent.GetStateByIndex(stateIndex);

                        // Aggregate weights of all the outgoing transitions from this state
                        Weight weightToAdd = state.EndWeight;
                        for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex)
                            Transition transition = state.GetTransition(transitionIndex);
                            State      destState  = state.Owner.states[transition.DestinationStateIndex];
                            if (this.transitionFilter(transition) && !currentComponent.HasState(destState))
                                weightToAdd = Weight.Sum(
                                    Weight.Product(transition.Weight, this.stateIdToInfo[transition.DestinationStateIndex].WeightToEnd));

                        // We can go from any state of the component to the current state
                        if (!weightToAdd.IsZero)
                            for (int updatedStateIndex = 0; updatedStateIndex < currentComponent.Size; ++updatedStateIndex)
                                State updatedState = currentComponent.GetStateByIndex(updatedStateIndex);
                                CondensationStateInfo updatedStateInfo = this.stateIdToInfo[updatedState.Index];
                                updatedStateInfo.WeightToEnd = Weight.Sum(
                                    Weight.Product(currentComponent.GetWeight(updatedStateIndex, stateIndex), weightToAdd));
                                this.stateIdToInfo[updatedState.Index] = updatedStateInfo;

                this.weightsToEndComputed = true;
            /// <summary>
            /// Recursively increases the value of this automaton on <paramref name="sequence"/> by <paramref name="weight"/>.
            /// </summary>
            /// <param name="stateIndex">Index of currently traversed state.</param>
            /// <param name="isNewState">Indicates whether state <paramref name="stateIndex"/> was just created.</param>
            /// <param name="selfLoopAlreadyMatched">Indicates whether self-loop on state <paramref name="stateIndex"/> was just matched.</param>
            /// <param name="firstAllowedStateIndex">The minimum index of an existing state that can be used for the sequence.</param>
            /// <param name="currentSequencePos">The current position in the generalized sequence.</param>
            /// <param name="sequence">The generalized sequence.</param>
            /// <param name="weight">The weight of the sequence.</param>
            /// <returns>
            /// <see langword="true"/> if the subsequence starting at <paramref name="currentSequencePos"/> has been successfully merged in,
            /// <see langword="false"/> otherwise.
            /// </returns>
            /// <remarks>
            /// This function attempts to add as few new states and transitions as possible.
            /// Its implementation is conceptually similar to adding string to a trie.
            /// </remarks>
            private bool DoAddGeneralizedSequence(
                int stateIndex,
                bool isNewState,
                bool selfLoopAlreadyMatched,
                int firstAllowedStateIndex,
                int currentSequencePos,
                GeneralizedSequence sequence,
                Weight weight)
                bool success;
                var  builder = this.builder;
                var  state   = builder[stateIndex];

                if (currentSequencePos == sequence.Count)
                    if (!selfLoopAlreadyMatched)
                        // We can't finish in a state with a self-loop
                        for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                            if (iterator.Value.DestinationStateIndex == state.Index)

                    state.SetEndWeight(Weight.Sum(state.EndWeight, weight));

                var element = sequence[currentSequencePos];

                // Treat self-loops elements separately
                if (element.LoopWeight.HasValue)
                    if (selfLoopAlreadyMatched)
                        // Previous element was also a self-loop, we should try to find an espilon transition
                        for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                            var transition = iterator.Value;
                            if (transition.DestinationStateIndex != state.Index &&
                                transition.IsEpsilon &&
                                transition.DestinationStateIndex >= firstAllowedStateIndex)
                                if (this.DoAddGeneralizedSequence(
                                        Weight.Product(weight, Weight.Inverse(transition.Weight))))

                        // Epsilon transition not found, let's create a new one
                        var destination = state.AddEpsilonTransition(Weight.One);
                        success = this.DoAddGeneralizedSequence(
                            destination.Index, true, false, firstAllowedStateIndex, currentSequencePos, sequence, weight);
                        Debug.Assert(success, "This call must always succeed.");

                    // Find a matching self-loop
                    for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                        var transition = iterator.Value;

                        if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex)
                            // Try this epsilon transition
                            if (this.DoAddGeneralizedSequence(
                                    transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight))

                        // Is it a self-loop?
                        if (transition.DestinationStateIndex == state.Index)
                            // Do self-loops match?
                            if ((transition.Weight == element.LoopWeight.Value) &&
                                (element.Group == transition.Group) &&
                                ((transition.IsEpsilon && element.IsEpsilonSelfLoop) || (!transition.IsEpsilon && !element.IsEpsilonSelfLoop && transition.ElementDistribution.Equals(element.ElementDistribution))))
                                // Skip the element in the sequence, remain in the same state
                                success = this.DoAddGeneralizedSequence(
                                    stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight);
                                Debug.Assert(success, "This call must always succeed.");

                            // StateIndex also has a self-loop, but the two doesn't match

                    if (!isNewState)
                        // Can't add self-loop to an existing state, it will change the language accepted by the state

                    // Add a new self-loop
                    state.AddTransition(element.ElementDistribution, element.LoopWeight.Value, stateIndex, element.Group);
                    success = this.DoAddGeneralizedSequence(stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight);
                    Debug.Assert(success, "This call must always succeed.");

                // Try to find a transition for the element
                for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                    var transition = iterator.Value;

                    if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex)
                        // Try this epsilon transition
                        if (this.DoAddGeneralizedSequence(
                                transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight))

                    // Is it a self-loop?
                    if (transition.DestinationStateIndex == state.Index)
                        if (selfLoopAlreadyMatched)
                            // The self-loop was checked or added by the caller

                        // Can't go through an existing self-loop, it will allow undesired sequences to be accepted

                    if (transition.DestinationStateIndex < firstAllowedStateIndex ||
                        element.Group != transition.Group ||

                    // Skip the element in the sequence, move to the destination state
                    // Weight of the existing transition must be taken into account
                    // This case can fail if the next element is a self-loop and the destination state already has a different one
                    if (this.DoAddGeneralizedSequence(
                            currentSequencePos + 1,
                            Weight.Product(weight, Weight.Inverse(transition.Weight))))

                // Add a new transition
                var newChild = state.AddTransition(element.ElementDistribution, Weight.One, null, element.Group);

                success = this.DoAddGeneralizedSequence(
                    newChild.Index, true, false, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight);
                Debug.Assert(success, "This call must always succeed.");
Beispiel #11
        /// <summary>
        /// Computes a set of outgoing transitions from a given state of the determinization result.
        /// </summary>
        /// <param name="sourceState">The source state of the determinized automaton represented as
        /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
        /// <returns>
        /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>.
        /// The first two elements of a tuple define the element distribution and the weight of a transition.
        /// The third element defines the outgoing state.
        /// </returns>
        protected override List <(DiscreteChar, Weight, Determinization.WeightedStateSet)> GetOutgoingTransitionsForDeterminization(
            Determinization.WeightedStateSet sourceState)
            const double LogEps = -35; // Don't add transitions with log-weight less than this as they have been produced by numerical inaccuracies

            // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons)
            var segmentBounds        = new List <ValueTuple <int, TransitionCharSegmentBound> >();
            int transitionsProcessed = 0;

            foreach (KeyValuePair <int, Weight> stateIdWeight in sourceState)
                var state = this.States[stateIdWeight.Key];
                foreach (var transition in state.Transitions)
                    AddTransitionCharSegmentBounds(transition, stateIdWeight.Value, segmentBounds);

                transitionsProcessed += state.Transitions.Count;

            // Sort segment bounds left-to-right, start-to-end
            var sortedIndexedSegmentBounds = segmentBounds.ToArray();

            if (transitionsProcessed > 1)
                Array.Sort(sortedIndexedSegmentBounds, CompareSegmentBounds);

                int CompareSegmentBounds((int, TransitionCharSegmentBound) a, (int, TransitionCharSegmentBound) b) =>

            // Produce an outgoing transition for each unique subset of overlapping segments
            var    result = new List <(DiscreteChar, Weight, Determinization.WeightedStateSet)>();
            Weight currentSegmentStateWeightSum = Weight.Zero;

            var currentSegmentStateWeights = new Dictionary <int, Weight>();

            foreach (var sb in segmentBounds)
                currentSegmentStateWeights[sb.Item2.DestinationStateId] = Weight.Zero;

            var activeSegments      = new HashSet <TransitionCharSegmentBound>();
            int currentSegmentStart = char.MinValue;

            foreach (var tup in sortedIndexedSegmentBounds)
                TransitionCharSegmentBound segmentBound = tup.Item2;

                if (currentSegmentStateWeightSum.LogValue > LogEps && currentSegmentStart < segmentBound.Bound)
                    // Flush previous segment
                    char         segmentEnd    = (char)(segmentBound.Bound - 1);
                    int          segmentLength = segmentEnd - currentSegmentStart + 1;
                    DiscreteChar elementDist   = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd);

                    var destinationState = new Determinization.WeightedStateSet();
                    foreach (KeyValuePair <int, Weight> stateIdWithWeight in currentSegmentStateWeights)
                        if (stateIdWithWeight.Value.LogValue > LogEps)
                            Weight stateWeight = Weight.Product(stateIdWithWeight.Value, Weight.Inverse(currentSegmentStateWeightSum));
                            destinationState.Add(stateIdWithWeight.Key, stateWeight);

                    Weight transitionWeight = Weight.Product(Weight.FromValue(segmentLength), currentSegmentStateWeightSum);
                    result.Add((elementDist, transitionWeight, destinationState));

                // Update current segment
                currentSegmentStart = segmentBound.Bound;

                if (segmentBound.IsStart)
                    currentSegmentStateWeightSum = Weight.Sum(currentSegmentStateWeightSum, segmentBound.Weight);
                    currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.Sum(currentSegmentStateWeights[segmentBound.DestinationStateId], segmentBound.Weight);
                    Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter.");
                    activeSegments.Remove(segmentBounds[tup.Item1 - 1].Item2);  // End follows start in original.
                    if (double.IsInfinity(segmentBound.Weight.Value))
                        // Cannot subtract because of the infinities involved.
                        currentSegmentStateWeightSum = activeSegments.Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w));
                        currentSegmentStateWeights[segmentBound.DestinationStateId] =
                            activeSegments.Where(sb => sb.DestinationStateId == segmentBound.DestinationStateId).Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w));
                        currentSegmentStateWeightSum = activeSegments.Count == 0 ? Weight.Zero : Weight.AbsoluteDifference(currentSegmentStateWeightSum, segmentBound.Weight);

                        Weight prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId];
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.AbsoluteDifference(
                            prevStateWeight, segmentBound.Weight);

            /// <summary>
            /// For each state of the component, computes the total weight of all paths starting at the root
            /// and ending at that state. Ending weights are not taken into account.
            /// </summary>
            /// <remarks>The weights are computed using dynamic programming, going down from the root to leafs.</remarks>
            private void ComputeWeightsFromRoot()
                CondensationStateInfo rootInfo = this.stateIdToInfo[this.Root.Index];

                rootInfo.UpwardWeightFromRoot       = Weight.One;
                this.stateIdToInfo[this.Root.Index] = rootInfo;

                // Iterate in the topological order
                for (int currentComponentIndex = this.components.Count - 1; currentComponentIndex >= 0; --currentComponentIndex)
                    StronglyConnectedComponent currentComponent = this.components[currentComponentIndex];

                    // Propagate weights inside the component
                    for (int srcStateIndex = 0; srcStateIndex < currentComponent.Size; ++srcStateIndex)
                        State srcState = currentComponent.GetStateByIndex(srcStateIndex);
                        CondensationStateInfo srcStateInfo = this.stateIdToInfo[srcState.Index];
                        if (srcStateInfo.UpwardWeightFromRoot.IsZero)

                        for (int destStateIndex = 0; destStateIndex < currentComponent.Size; ++destStateIndex)
                            State destState = currentComponent.GetStateByIndex(destStateIndex);
                            CondensationStateInfo destStateInfo = this.stateIdToInfo[destState.Index];
                            destStateInfo.WeightFromRoot = Weight.Sum(
                                Weight.Product(srcStateInfo.UpwardWeightFromRoot, currentComponent.GetWeight(srcStateIndex, destStateIndex)));
                            this.stateIdToInfo[destState.Index] = destStateInfo;

                    // Compute weight contributions to downward components
                    for (int srcStateIndex = 0; srcStateIndex < currentComponent.Size; ++srcStateIndex)
                        State srcState = currentComponent.GetStateByIndex(srcStateIndex);
                        CondensationStateInfo srcStateInfo = this.stateIdToInfo[srcState.Index];
                        if (srcStateInfo.WeightFromRoot.IsZero)

                        // Aggregate weights of all the outgoing transitions from this state
                        for (int transitionIndex = 0; transitionIndex < srcState.TransitionCount; ++transitionIndex)
                            Transition transition = srcState.GetTransition(transitionIndex);
                            State      destState  = srcState.Owner.states[transition.DestinationStateIndex];
                            if (this.transitionFilter(transition) && !currentComponent.HasState(destState))
                                CondensationStateInfo destStateInfo = this.stateIdToInfo[destState.Index];
                                destStateInfo.UpwardWeightFromRoot = Weight.Sum(
                                    Weight.Product(srcStateInfo.WeightFromRoot, transition.Weight));
                                this.stateIdToInfo[transition.DestinationStateIndex] = destStateInfo;

                this.weightsFromRootComputed = true;
            /// <summary>
            /// Recursively builds a complete list of generalized sequences accepted by the simplifiable part of the automaton.
            /// </summary>
            /// <param name="stateIndex">The currently traversed state.</param>
            /// <param name="generalizedTreeNodes">The state labels obtained from <see cref="FindGeneralizedTrees"/>.</param>
            /// <param name="weightedSequences">The sequence list being built.</param>
            /// <param name="currentSequenceElements">The list of elements of the sequence currently being built.</param>
            /// <param name="currentWeight">The weight of the sequence currently being built.</param>
            private void DoBuildAcceptedSequenceList(
                int stateIndex,
                bool[] generalizedTreeNodes,
                List <WeightedSequence> weightedSequences,
                List <GeneralizedElement> currentSequenceElements,
                Weight currentWeight)
                var stack = new Stack <StackItem>();

                stack.Push(new StateWeight(stateIndex, currentWeight));

                while (stack.Count > 0)
                    var stackItem = stack.Pop();

                    if (stackItem is ElementItem elementItem)
                        if (elementItem.Element != null)
                            currentSequenceElements.RemoveAt(currentSequenceElements.Count - 1);

                    var stateAndWeight = stackItem as StateWeight;

                    stateIndex = stateAndWeight.StateIndex;
                    var state = this.builder[stateIndex];
                    currentWeight = stateAndWeight.Weight;

                    // Find a non-epsilon self-loop if there is one
                    Transition?selfLoop = null;
                    for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                        var transition = iterator.Value;
                        if (transition.DestinationStateIndex == stateIndex)
                                selfLoop == null,
                                "Multiple self-loops should have been merged by MergeParallelTransitions()");
                            selfLoop = transition;

                    // Push the found self-loop to the end of the current sequence
                    if (selfLoop != null)
                        currentSequenceElements.Add(new GeneralizedElement(
                                                        selfLoop.Value.ElementDistribution, selfLoop.Value.Group, selfLoop.Value.Weight));
                        stack.Push(new ElementItem(null));

                    // Can this state produce a sequence?
                    if (state.CanEnd && generalizedTreeNodes[stateIndex])
                        var sequence = new GeneralizedSequence(currentSequenceElements);
                        // TODO: use immutable data structure instead of copying sequences
                        weightedSequences.Add(new WeightedSequence(sequence, Weight.Product(currentWeight, state.EndWeight)));

                    // Traverse the outgoing transitions
                    for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                        var transition = iterator.Value;
                        // Skip self-loops & disallowed states
                        if (transition.DestinationStateIndex == stateIndex ||

                        if (!transition.IsEpsilon)
                            // Non-epsilon transitions contribute to the sequence
                            stack.Push(new ElementItem(null));

                            new StateWeight(
                                Weight.Product(currentWeight, transition.Weight)));

                        if (!transition.IsEpsilon)
                                new ElementItem(
                                    new GeneralizedElement(transition.ElementDistribution, transition.Group, null)));
Beispiel #14
            private static TThis BuildSubautomaton(IReadOnlyList <State> states, IReadOnlyList <State> topologicalOrder, int group, HashSet <int> subgraph)
                var weightsFromRoot          = ComputeWeightsFromRoot(states.Count, topologicalOrder, group);
                var weightsToEnd             = ComputeWeightsToEnd(states.Count, topologicalOrder, group);
                var subautomaton             = new TThis();
                var stateMapping             = subgraph.ToDictionary(x => x, _ => subautomaton.AddState());
                var hasNoIncomingTransitions = new HashSet <int>(subgraph);

                // copy the automaton and find states without incoming transitions.
                foreach (var stateIndex in subgraph)
                    var newSourceState = stateMapping[stateIndex];

                    for (int i = 0; i < states[stateIndex].TransitionCount; i++)
                        var transition = states[stateIndex].GetTransition(i);
                        if (transition.Group != group)

                var correctionFactor = Weight.Zero;

                // mark start and end states, modulo paths bypassing the automaton.
                foreach (var stateIndex in subgraph)
                    var newSourceState = stateMapping[stateIndex];

                    // consider start states
                    var weightFromRoot = newSourceState.TransitionCount > 0 ? weightsFromRoot[stateIndex] : Weight.Zero;
                    if (!weightFromRoot.IsZero)
                        subautomaton.Start.AddEpsilonTransition(weightFromRoot, newSourceState);

                    // consider end states
                    var weightToEnd = !hasNoIncomingTransitions.Contains(stateIndex) ? weightsToEnd[stateIndex] : Weight.Zero;
                    if (!weightToEnd.IsZero)

                    correctionFactor = Weight.Sum(correctionFactor, Weight.Product(weightFromRoot, weightToEnd));

                if (!correctionFactor.IsZero)
                    throw new Exception("Write a unit test for this case. Code should be fine.");
                var epsilonWeight = Weight.AbsoluteDifference(weightsToEnd[topologicalOrder[0].Index], correctionFactor);


Beispiel #15
        /// <summary>
        /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence.
        /// </summary>
        /// <param name="srcSequence">The sequence to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/>
        /// to the automaton representation of a projected sequence.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcSequence srcSequence)
            Argument.CheckIfNotNull(srcSequence, "srcSequence");

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            if (this.sequencePairToWeight.IsCanonicZero())

            var destStateCache = new Dictionary <(int, int), int>();

            result.StartStateIndex = BuildProjectionOfSequence(this.sequencePairToWeight.Start, 0);

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);



            // Recursively builds the projection of a given sequence onto this transducer.
            int BuildProjectionOfSequence(PairListAutomaton.State mappingState, int srcSequenceIndex)
                //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
                //// Unfortunately, it's not clear how to avoid the duplication in the current design.

                var sourceSequenceManipulator =
                    Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator;

                var statePair = (mappingState.Index, srcSequenceIndex);

                if (destStateCache.TryGetValue(statePair, out var destStateIndex))

                var destState = result.AddState();

                destStateCache.Add(statePair, destState.Index);

                var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence);

                // Enumerate transitions from the current mapping state
                foreach (var mappingTransition in mappingState.Transitions)
                    var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                        var destElementWeights =
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = BuildProjectionOfSequence(destMappingState, srcSequenceIndex);
                        destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);

                    // Normal transition case - Find epsilon-reachable states
                    if (srcSequenceIndex < srcSequenceLength)
                        var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex);

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcSequenceElement, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))

                        var weight         = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestState = BuildProjectionOfSequence(destMappingState, srcSequenceIndex + 1);
                        destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group);

                destState.SetEndWeight(srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero);
Beispiel #16
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            if (srcAutomaton.IsCanonicZero() || this.sequencePairToWeight.IsCanonicZero())

            // The projected automaton must be epsilon-free

            var destStateCache = new Dictionary <(int, int), int>();

            result.StartStateIndex = BuildProjectionOfAutomaton(this.sequencePairToWeight.Start, srcAutomaton.Start);

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);



            // Recursively builds the projection of a given automaton onto this transducer.
            // The projected automaton must be epsilon-free.
            int BuildProjectionOfAutomaton(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
                //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
                //// Unfortunately, it's not clear how to avoid the duplication in the current design.

                // State already exists, return its index
                var statePair = (mappingState.Index, srcState.Index);

                if (destStateCache.TryGetValue(statePair, out var destStateIndex))

                var destState = result.AddState();

                destStateCache.Add(statePair, destState.Index);

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                    var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                        var destElementDistribution =
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))

                        var destWeight          = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);

                destState.SetEndWeight(Weight.Product(mappingState.EndWeight, srcState.EndWeight));
        /// <summary>
        /// Recursively builds the projection of a given automaton onto this transducer.
        /// The projected automaton must be epsilon-free.
        /// </summary>
        /// <param name="destAutomaton">The projection being built.</param>
        /// <param name="mappingState">The currently traversed state of the transducer.</param>
        /// <param name="srcState">The currently traversed state of the automaton being projected.</param>
        /// <param name="destStateCache">The cache of the created projection states.</param>
        /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns>
        private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfAutomaton(
            TDestAutomaton destAutomaton,
            PairListAutomaton.State mappingState,
            Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState,
            Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache)
            Debug.Assert(mappingState != null && srcState != null, "Valid states must be provided.");
            Debug.Assert(!ReferenceEquals(srcState.Owner, destAutomaton), "Cannot build a projection in place.");

            //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
            //// Unfortunately, it's not clear how to avoid the duplication in the current design.

            // State already exists, return its index
            var statePair = new IntPair(mappingState.Index, srcState.Index);

            Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState;
            if (destStateCache.TryGetValue(statePair, out destState))

            destState = destAutomaton.AddState();
            destStateCache.Add(statePair, destState);

            // Iterate over transitions from mappingState
            for (int mappingTransitionIndex = 0; mappingTransitionIndex < mappingState.TransitionCount; mappingTransitionIndex++)
                var mappingTransition = mappingState.GetTransition(mappingTransitionIndex);
                var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                // Epsilon transition case
                if (IsSrcEpsilon(mappingTransition))
                    TDestElementDistribution destElementDistribution = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second;
                    var childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcState, destStateCache);
                    destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestState, mappingTransition.Group);

                // Iterate over states and transitions in the closure of srcState
                for (int srcTransitionIndex = 0; srcTransitionIndex < srcState.TransitionCount; srcTransitionIndex++)
                    var srcTransition = srcState.GetTransition(srcTransitionIndex);
                    Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                    var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex];

                    TDestElementDistribution destElementDistribution;
                    double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst(
                        srcTransition.ElementDistribution, out destElementDistribution);
                    if (double.IsNegativeInfinity(projectionLogScale))

                    Weight destWeight     = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale));
                    var    childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcChildState, destStateCache);
                    destState.AddTransition(destElementDistribution, destWeight, childDestState, mappingTransition.Group);

            destState.EndWeight = Weight.Product(mappingState.EndWeight, srcState.EndWeight);
        /// <summary>
        /// Recursively builds the projection of a given sequence onto this transducer.
        /// </summary>
        /// <param name="destAutomaton">The projection being built.</param>
        /// <param name="mappingState">The currently traversed state of the transducer.</param>
        /// <param name="srcSequence">The sequence being projected.</param>
        /// <param name="srcSequenceIndex">The current index in the sequence being projected.</param>
        /// <param name="destStateCache">The cache of the created projection states.</param>
        /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns>
        private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfSequence(
            TDestAutomaton destAutomaton,
            PairListAutomaton.State mappingState,
            TSrcSequence srcSequence,
            int srcSequenceIndex,
            Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache)
            //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
            //// Unfortunately, it's not clear how to avoid the duplication in the current design.

            var sourceSequenceManipulator =
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator;

            var statePair = new IntPair(mappingState.Index, srcSequenceIndex);

            Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState;
            if (destStateCache.TryGetValue(statePair, out destState))

            destState = destAutomaton.AddState();
            destStateCache.Add(statePair, destState);

            int srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence);

            // Enumerate transitions from the current mapping state
            for (int i = 0; i < mappingState.TransitionCount; i++)
                var mappingTransition = mappingState.GetTransition(i);
                var destMappingState  = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                // Epsilon transition case
                if (IsSrcEpsilon(mappingTransition))
                    TDestElementDistribution destElementWeights = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second;
                    var childDestState = this.BuildProjectionOfSequence(
                        destAutomaton, destMappingState, srcSequence, srcSequenceIndex, destStateCache);
                    destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestState, mappingTransition.Group);

                // Normal transition case - Find epsilon-reachable states
                if (srcSequenceIndex < srcSequenceLength)
                    var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex);

                    TDestElementDistribution destElementDistribution;
                    double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst(
                        srcSequenceElement, out destElementDistribution);
                    if (double.IsNegativeInfinity(projectionLogScale))

                    Weight weight         = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale));
                    var    childDestState = this.BuildProjectionOfSequence(
                        destAutomaton, destMappingState, srcSequence, srcSequenceIndex + 1, destStateCache);
                    destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group);

            destState.EndWeight = srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero;
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// The code of this method has a lot in common with the code of Automaton.SetToProduct.
        /// Unfortunately, it's not clear how to avoid the duplication in the current design.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var mappingAutomaton = this.sequencePairToWeight;

            if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero())
                return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero());

            // The projected automaton must be epsilon-free

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            var destStateCache = new Dictionary <(int, int), int>();
            var stack          = new Stack <(int state1, int state2, int destStateIndex)>();

            // Creates destination state and schedules projection computation for it.
            // If computation is already scheduled or done the state index is simply taken from cache
            int CreateDestState(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
                var destPair = (mappingState.Index, srcState.Index);

                if (!destStateCache.TryGetValue(destPair, out var destStateIndex))
                    var destState = result.AddState();
                    destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight);
                    stack.Push((mappingState.Index, srcState.Index, destState.Index));
                    destStateCache[destPair] = destState.Index;
                    destStateIndex           = destState.Index;


            // Populate the stack with start destination state
            result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start);
            var stringAutomaton = srcAutomaton as StringAutomaton;
            var sourceDistributionHasLogProbabilityOverrides = stringAutomaton?.HasElementLogValueOverrides ?? false;

            while (stack.Count > 0)
                var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop();

                var mappingState = mappingAutomaton.States[mappingStateIndex];
                var srcState     = srcAutomaton.States[srcStateIndex];
                var destState    = result[destStateIndex];

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                    var childMappingState = mappingAutomaton.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                        var destElementDistribution =
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = CreateDestState(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcAutomaton.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))

                        // In the special case of a log probability override in a DiscreteChar element distribution,
                        // we need to compensate for the fact that the distribution is not normalized.
                        if (destElementDistribution.HasValue && sourceDistributionHasLogProbabilityOverrides)
                            var discreteChar =
                                (DiscreteChar)(IDistribution <char>)srcTransition.ElementDistribution.Value;
                            if (discreteChar.HasLogProbabilityOverride)
                                var totalMass = discreteChar.Ranges.EnumerableSum(rng =>
                                                                                  rng.Probability.Value * (rng.EndExclusive - rng.StartInclusive));
                                projectionLogScale -= System.Math.Log(totalMass);

                        var destWeight =
                            sourceDistributionHasLogProbabilityOverrides && destElementDistribution.HasNoValue
                                ? Weight.One
                                : Weight.Product(mappingTransition.Weight, srcTransition.Weight,

                        // We don't want an unnormalizable distribution to become normalizable due to a rounding error.
                        if (Math.Abs(destWeight.LogValue) < 1e-12)
                            destWeight = Weight.One;

                        var childDestStateIndex = CreateDestState(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);


        /// <summary>
        /// Attempts to determinize the automaton,
        /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
        /// and there are no epsilon transitions.
        /// </summary>
        /// <param name="maxStatesBeforeStop">
        /// The maximum number of states the resulting automaton can have. If the number of states exceeds the value
        /// of this parameter during determinization, the process is aborted.
        /// </param>
        /// <returns>
        /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
        /// <see langword="false"/> otherwise.
        /// </returns>
        /// <remarks>See <a href=""/> for algorithm details.</remarks>
        public bool TryDeterminize(int maxStatesBeforeStop)
                maxStatesBeforeStop > 0 && maxStatesBeforeStop <= MaxStateCount,
                "The maximum number of states must be positive and not greater than the maximum number of states allowed in an automaton.");

            this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions

            if (this.UsesGroups())
                // Determinization will result in lost of group information, which we cannot allow

            // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton..
            // Such pairs correspond to states of the resulting automaton.
            var weightedStateSetQueue      = new Queue <Determinization.WeightedStateSet>();
            var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>();
            var builder = new Builder();

            var startWeightedStateSet = new Determinization.WeightedStateSet {
                { this.Start.Index, Weight.One }

            weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex);

            while (weightedStateSetQueue.Count > 0)
                // Take one unprocessed state of the resulting automaton
                Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue();
                var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet];
                var currentState      = builder[currentStateIndex];

                // Find out what transitions we should add for this state
                var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet);

                // For each transition to add
                foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos)
                    TElementDistribution elementDistribution = outgoingTransitionInfo.Item1;
                    Weight weight = outgoingTransitionInfo.Item2;
                    Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3;

                    int destinationStateIndex;
                    if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex))
                        if (builder.StatesCount == maxStatesBeforeStop)
                            // Too many states, determinization attempt failed

                        // Add new state to the result
                        var destinationState = builder.AddState();
                        weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index);

                        // Compute its ending weight
                        foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet)
                                                              Weight.Product(stateIdWithWeight.Value, this.States[stateIdWithWeight.Key].EndWeight)));

                        destinationStateIndex = destinationState.Index;

                    // Add transition to the destination state
                    currentState.AddTransition(elementDistribution, weight, destinationStateIndex);

            var simplification = new Simplification(builder, this.PruneTransitionsWithLogWeightLessThan);

            simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment

            var result = builder.GetAutomaton();

            result.PruneTransitionsWithLogWeightLessThan = this.PruneTransitionsWithLogWeightLessThan;
            result.LogValueOverride = this.LogValueOverride;

Beispiel #21
        /// <summary>
        /// Computes a set of outgoing transitions from a given state of the determinization result.
        /// </summary>
        /// <param name="sourceStateSet">The source state of the determinized automaton represented as
        /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
        /// <returns>
        /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing
        /// transitions from <paramref name="sourceStateSet"/>.
        /// The first two elements of a tuple define the element distribution and the weight of a transition.
        /// The third element defines the outgoing state.
        /// </returns>
        protected override IEnumerable <Determinization.OutgoingTransition> GetOutgoingTransitionsForDeterminization(
            Determinization.WeightedStateSet sourceStateSet)
            // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons)
            var segmentBounds = new List <TransitionCharSegmentBound>();

            for (var i = 0; i < sourceStateSet.Count; ++i)
                var sourceState = sourceStateSet[i];
                var state       = this.States[sourceState.Index];
                foreach (var transition in state.Transitions)
                    AddTransitionCharSegmentBounds(transition, sourceState.Weight, segmentBounds);


            // Produce an outgoing transition for each unique subset of overlapping segments
            var currentSegmentTotal = WeightSum.Zero();

            var currentSegmentStateWeights = new Dictionary <int, WeightSum>();
            var currentSegmentStart        = (int)char.MinValue;
            var destinationStateSetBuilder = Determinization.WeightedStateSetBuilder.Create();

            foreach (var segmentBound in segmentBounds)
                if (currentSegmentTotal.Count != 0 && currentSegmentStart < segmentBound.Bound)
                    // Flush previous segment
                    var segmentEnd     = (char)(segmentBound.Bound - 1);
                    var segmentLength  = segmentEnd - currentSegmentStart + 1;
                    var elementDist    = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd);
                    var invTotalWeight = Weight.Inverse(currentSegmentTotal.Sum);

                    foreach (var stateIdWithWeight in currentSegmentStateWeights)
                        var stateWeight = stateIdWithWeight.Value.Sum * invTotalWeight;
                        destinationStateSetBuilder.Add(stateIdWithWeight.Key, stateWeight);

                    var(destinationStateSet, destinationStateSetWeight) = destinationStateSetBuilder.Get();

                    var transitionWeight = Weight.Product(
                    yield return(new Determinization.OutgoingTransition(
                                     elementDist, transitionWeight, destinationStateSet));

                // Update current segment
                currentSegmentStart = segmentBound.Bound;

                if (segmentBound.IsStart)
                    currentSegmentTotal += segmentBound.Weight;
                    if (currentSegmentStateWeights.TryGetValue(segmentBound.DestinationStateId, out var stateWeight))
                        currentSegmentStateWeights[segmentBound.DestinationStateId] =
                            stateWeight + segmentBound.Weight;
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = new WeightSum(segmentBound.Weight);
                    Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter.");
                    currentSegmentTotal -= segmentBound.Weight;

                    var prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId];
                    var newStateWeight  = prevStateWeight - segmentBound.Weight;
                    if (newStateWeight.Count == 0)
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = newStateWeight;
Beispiel #22
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// The code of this method has a lot in common with the code of Automaton.SetToProduct.
        /// Unfortunately, it's not clear how to avoid the duplication in the current design.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var mappingAutomaton = this.sequencePairToWeight;

            if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero())
                return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero());

            // The projected automaton must be epsilon-free

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            var destStateCache = new Dictionary <(int, int), int>();
            var stack          = new Stack <(int state1, int state2, int destStateIndex)>();

            // Creates destination state and schedules projection computation for it.
            // If computation is already scheduled or done the state index is simply taken from cache
            int CreateDestState(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
                var destPair = (mappingState.Index, srcState.Index);

                if (!destStateCache.TryGetValue(destPair, out var destStateIndex))
                    var destState = result.AddState();
                    destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight);
                    stack.Push((mappingState.Index, srcState.Index, destState.Index));
                    destStateCache[destPair] = destState.Index;
                    destStateIndex           = destState.Index;


            // Populate the stack with start destination state
            result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start);

            while (stack.Count > 0)
                var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop();

                var mappingState = mappingAutomaton.States[mappingStateIndex];
                var srcState     = srcAutomaton.States[srcStateIndex];
                var destState    = result[destStateIndex];

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                    var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                        var destElementDistribution =
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = CreateDestState(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))

                        var destWeight          = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestStateIndex = CreateDestState(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);


Beispiel #23
            /// <summary>
            /// Creates an automaton <c>f'(s) = sum_{tu=s} f(t)g(u)</c>, where <c>f(t)</c> is the current
            /// automaton (in builder) and <c>g(u)</c> is the given automaton.
            /// The resulting automaton is also known as the Cauchy product of two automata.
            /// </summary>
            public void Append(
                Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> automaton,
                int group = 0,
                bool avoidEpsilonTransitions = true)
                var oldStateCount = this.states.Count;

                foreach (var state in automaton.States)
                    var stateBuilder = this.AddState();
                    foreach (var transition in state.Transitions)
                        var updatedTransition = transition;
                        updatedTransition.DestinationStateIndex += oldStateCount;
                        if (group != 0)
                            updatedTransition.Group = group;


                var secondStartState = this[oldStateCount + automaton.Start.Index];

                if (avoidEpsilonTransitions &&
                    (AllEndStatesHaveNoTransitions() || !automaton.Start.HasIncomingTransitions))
                    // Remove start state of appended automaton and copy all its transitions to previous end states
                    for (var i = 0; i < oldStateCount; ++i)
                        var endState = this[i];
                        if (!endState.CanEnd)

                        for (var iterator = secondStartState.TransitionIterator; iterator.Ok; iterator.Next())
                            var transition = iterator.Value;

                            if (group != 0)
                                transition.Group = group;

                            if (transition.DestinationStateIndex == secondStartState.Index)
                                transition.DestinationStateIndex = endState.Index;
                                transition.Weight = Weight.Product(transition.Weight, endState.EndWeight);


                        endState.SetEndWeight(Weight.Product(endState.EndWeight, secondStartState.EndWeight));

                    // Just connect all end states with start state of appended automaton
                    for (var i = 0; i < oldStateCount; i++)
                        var state = this[i];
                        if (state.CanEnd)
                            state.AddEpsilonTransition(state.EndWeight, secondStartState.Index, group);

                bool AllEndStatesHaveNoTransitions()
                    for (var i = 0; i < oldStateCount; ++i)
                        var state = this.states[i];
                        if (state.CanEnd && state.FirstTransition != -1)
