示例#1
0
            /// <summary>
            /// For each state of the component, computes the total weight of all paths starting at that state.
            /// Ending weights are taken into account.
            /// </summary>
            /// <remarks>The weights are computed using dynamic programming, going up from leafs to the root.</remarks>
            private static Weight[] ComputeWeightsToEnd(int nStates, IReadOnlyList <State> topologicalOrder, int group)
            {
                var weights = CreateZeroWeights(nStates);

                // Iterate in the reverse topological order
                for (var stateIndex = topologicalOrder.Count - 1; stateIndex >= 0; stateIndex--)
                {
                    var state = topologicalOrder[stateIndex];
                    // Aggregate weights of all the outgoing transitions from this state
                    var weightToAdd = state.EndWeight;
                    for (var transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex)
                    {
                        var transition = state.GetTransition(transitionIndex);

                        if (transition.Group == group)
                        {
                            continue;
                        }

                        weightToAdd = Weight.Sum(
                            weightToAdd,
                            Weight.Product(transition.Weight, weights[transition.DestinationStateIndex]));
                    }

                    weights[state.Index] = weightToAdd;
                }

                return(weights);
            }
示例#2
0
            /// <summary>
            /// For each state of the component, computes the total weight of all paths starting at the root
            /// and ending at that state. Ending weights are not taken into account.
            /// </summary>
            /// <remarks>The weights are computed using dynamic programming, going down from the root to leafs.</remarks>
            private static Weight[] ComputeWeightsFromRoot(int nStates, IReadOnlyList <State> topologicalOrder, int group)
            {
                var weights = CreateZeroWeights(nStates);

                weights[topologicalOrder[0].Index] = Weight.One;

                // Iterate in the topological order
                for (var i = 0; i < topologicalOrder.Count; i++)
                {
                    var srcState  = topologicalOrder[i];
                    var srcWeight = weights[srcState.Index];
                    if (srcWeight.IsZero)
                    {
                        continue;
                    }

                    // Aggregate weights of all the outgoing transitions from this state
                    for (var transitionIndex = 0; transitionIndex < srcState.TransitionCount; transitionIndex++)
                    {
                        var transition = srcState.GetTransition(transitionIndex);

                        if (transition.Group == group)
                        {
                            continue;
                        }

                        var destWeight = weights[transition.DestinationStateIndex];
                        var weight     = Weight.Sum(destWeight, Weight.Product(srcWeight, transition.Weight));

                        weights[transition.DestinationStateIndex] = weight;
                    }
                }

                return(weights);
            }
示例#3
0
            /// <summary>
            /// Recursively computes the value of the automaton on a given sequence.
            /// </summary>
            /// <param name="sequence">The sequence to compute the value on.</param>
            /// <param name="sequencePosition">The current position in the sequence.</param>
            /// <param name="valueCache">A lookup table for memoization.</param>
            /// <returns>The value computed from the current state.</returns>
            private Weight DoGetValue(
                TSequence sequence, int sequencePosition, Dictionary <IntPair, Weight> valueCache)
            {
                var    stateIndexPair = new IntPair(this.Index, sequencePosition);
                Weight cachedValue;

                if (valueCache.TryGetValue(stateIndexPair, out cachedValue))
                {
                    return(cachedValue);
                }

                EpsilonClosure closure = this.GetEpsilonClosure();

                Weight value = Weight.Zero;
                int    count = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetLength(sequence);

                bool isCurrent = sequencePosition < count;

                if (isCurrent)
                {
                    TElement element = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetElement(sequence, sequencePosition);

                    for (int closureStateIndex = 0; closureStateIndex < closure.Size; ++closureStateIndex)
                    {
                        State  closureState       = closure.GetStateByIndex(closureStateIndex);
                        Weight closureStateWeight = closure.GetStateWeightByIndex(closureStateIndex);

                        for (int transitionIndex = 0; transitionIndex < closureState.transitionCount; transitionIndex++)
                        {
                            Transition transition = closureState.transitions[transitionIndex];
                            if (transition.IsEpsilon)
                            {
                                continue; // The destination is a part of the closure anyway
                            }

                            State  destState  = this.Owner.states[transition.DestinationStateIndex];
                            Weight distWeight = Weight.FromLogValue(transition.ElementDistribution.GetLogProb(element));
                            if (!distWeight.IsZero && !transition.Weight.IsZero)
                            {
                                Weight destValue = destState.DoGetValue(sequence, sequencePosition + 1, valueCache);
                                if (!destValue.IsZero)
                                {
                                    value = Weight.Sum(
                                        value,
                                        Weight.Product(closureStateWeight, transition.Weight, distWeight, destValue));
                                }
                            }
                        }
                    }
                }
                else
                {
                    value = closure.EndWeight;
                }

                valueCache.Add(stateIndexPair, value);
                return(value);
            }
            /// <summary>
            /// Computes the total weights between each pair of states in the component
            /// using the <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf">generalized Floyd's algorithm</a>.
            /// </summary>
            private void ComputePairwiseWeightsMatrix()
            {
                this.pairwiseWeights = Util.ArrayInit(this.Size, this.Size, (i, j) => Weight.Zero);
                for (int srcStateIndexInComponent = 0; srcStateIndexInComponent < this.Size; ++srcStateIndexInComponent)
                {
                    State state = this.statesInComponent[srcStateIndexInComponent];
                    for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex)
                    {
                        Transition transition = state.GetTransition(transitionIndex);
                        State      destState  = state.Owner.States[transition.DestinationStateIndex];
                        int        destStateIndexInComponent;
                        if (this.transitionFilter(transition) && (destStateIndexInComponent = this.GetIndexByState(destState)) != -1)
                        {
                            this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent] = Weight.Sum(
                                this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent], transition.Weight);
                        }
                    }
                }

                for (int k = 0; k < this.Size; ++k)
                {
                    Weight loopWeight =
                        this.useApproximateClosure ? Weight.ApproximateClosure(this.pairwiseWeights[k, k]) : Weight.Closure(this.pairwiseWeights[k, k]);

                    for (int i = 0; i < this.Size; ++i)
                    {
                        if (i == k || this.pairwiseWeights[i, k].IsZero)
                        {
                            continue;
                        }

                        for (int j = 0; j < this.Size; ++j)
                        {
                            if (j == k || this.pairwiseWeights[k, j].IsZero)
                            {
                                continue;
                            }

                            Weight additionalWeight = Weight.Product(
                                this.pairwiseWeights[i, k], loopWeight, this.pairwiseWeights[k, j]);
                            this.pairwiseWeights[i, j] = Weight.Sum(this.pairwiseWeights[i, j], additionalWeight);
                        }
                    }

                    for (int i = 0; i < this.Size; ++i)
                    {
                        this.pairwiseWeights[i, k] = Weight.Product(this.pairwiseWeights[i, k], loopWeight);
                        this.pairwiseWeights[k, i] = Weight.Product(this.pairwiseWeights[k, i], loopWeight);
                    }

                    this.pairwiseWeights[k, k] = loopWeight;
                }
            }
示例#5
0
            /// <summary>
            /// Merges outgoing transitions with the same destination state.
            /// </summary>
            public void MergeParallelTransitions()
            {
                for (var stateIndex = 0; stateIndex < this.builder.StatesCount; ++stateIndex)
                {
                    var state = this.builder[stateIndex];
                    for (var iterator1 = state.TransitionIterator; iterator1.Ok; iterator1.Next())
                    {
                        var transition1 = iterator1.Value;
                        var iterator2   = iterator1;
                        iterator2.Next();
                        for (; iterator2.Ok; iterator2.Next())
                        {
                            var transition2 = iterator2.Value;
                            if (transition1.DestinationStateIndex == transition2.DestinationStateIndex && transition1.Group == transition2.Group)
                            {
                                var removeTransition2 = false;
                                if (transition1.IsEpsilon && transition2.IsEpsilon)
                                {
                                    transition1.Weight = Weight.Sum(transition1.Weight, transition2.Weight);
                                    iterator1.Value    = transition1;
                                    removeTransition2  = true;
                                }
                                else if (!transition1.IsEpsilon && !transition2.IsEpsilon)
                                {
                                    var newElementDistribution = new TElementDistribution();
                                    if (double.IsInfinity(transition1.Weight.Value) && double.IsInfinity(transition2.Weight.Value))
                                    {
                                        newElementDistribution.SetToSum(
                                            1.0, transition1.ElementDistribution.Value, 1.0, transition2.ElementDistribution.Value);
                                    }
                                    else
                                    {
                                        newElementDistribution.SetToSum(
                                            transition1.Weight.Value, transition1.ElementDistribution.Value, transition2.Weight.Value, transition2.ElementDistribution.Value);
                                    }

                                    transition1.ElementDistribution = newElementDistribution;
                                    transition1.Weight = Weight.Sum(transition1.Weight, transition2.Weight);
                                    iterator1.Value    = transition1;
                                    removeTransition2  = true;
                                }

                                if (removeTransition2)
                                {
                                    iterator2.Remove();
                                }
                            }
                        }
                    }
                }
            }
示例#6
0
            /// <summary>
            /// Initializes a new instance of the <see cref="EpsilonClosure"/> class.
            /// </summary>
            /// <param name="state">The state, which epsilon closure this instance will represent.</param>
            internal EpsilonClosure(State state)
            {
                Argument.CheckIfValid(!state.IsNull, nameof(state));

                // Optimize for a very common case: a single-node closure
                bool   singleNodeClosure = true;
                Weight selfLoopWeight    = Weight.Zero;

                for (int i = 0; i < state.TransitionCount; ++i)
                {
                    Transition transition = state.GetTransition(i);
                    if (transition.IsEpsilon)
                    {
                        if (transition.DestinationStateIndex != state.Index)
                        {
                            singleNodeClosure = false;
                            break;
                        }

                        selfLoopWeight = Weight.Sum(selfLoopWeight, transition.Weight);
                    }
                }

                if (singleNodeClosure)
                {
                    Weight stateWeight = Weight.ApproximateClosure(selfLoopWeight);
                    this.weightedStates.Add(Pair.Create(state, stateWeight));
                    this.EndWeight = Weight.Product(stateWeight, state.EndWeight);
                }
                else
                {
                    Condensation condensation = state.Owner.ComputeCondensation(state, tr => tr.IsEpsilon, true);
                    for (int i = 0; i < condensation.ComponentCount; ++i)
                    {
                        StronglyConnectedComponent component = condensation.GetComponent(i);
                        for (int j = 0; j < component.Size; ++j)
                        {
                            State componentState = component.GetStateByIndex(j);
                            this.weightedStates.Add(Pair.Create(componentState, condensation.GetWeightFromRoot(componentState)));
                        }
                    }

                    this.EndWeight = condensation.GetWeightToEnd(state);
                }
            }
            /// <summary>
            /// Gets the total weight between two given states in the component.
            /// </summary>
            /// <param name="srcStateIndexInComponent">The index of the source state in the component.</param>
            /// <param name="destStateIndexInComponent">The index of the destination state in the component.</param>
            /// <returns>The total weight between the given states in the component.</returns>
            public Weight GetWeight(int srcStateIndexInComponent, int destStateIndexInComponent)
            {
                Argument.CheckIfInRange(
                    srcStateIndexInComponent >= 0 && srcStateIndexInComponent < this.Size,
                    "srcStateIndexInComponent",
                    "The given index is out of range.");
                Argument.CheckIfInRange(
                    destStateIndexInComponent >= 0 && destStateIndexInComponent < this.Size,
                    "destStateIndexInComponent",
                    "The given index is out of range.");

                if (this.Size == 1)
                {
                    if (!this.singleStatePairwiseWeight.HasValue)
                    {
                        // Optimize for a common case
                        State state = this.statesInComponent[0];
                        this.singleStatePairwiseWeight = Weight.Zero;
                        for (int i = 0; i < state.TransitionCount; ++i)
                        {
                            Transition transition = state.GetTransition(i);
                            if (this.transitionFilter(transition) && transition.DestinationStateIndex == state.Index)
                            {
                                this.singleStatePairwiseWeight = Weight.Sum(
                                    this.singleStatePairwiseWeight.Value, transition.Weight);
                            }
                        }

                        this.singleStatePairwiseWeight =
                            this.useApproximateClosure
                                ? Weight.ApproximateClosure(this.singleStatePairwiseWeight.Value)
                                : Weight.Closure(this.singleStatePairwiseWeight.Value);
                    }

                    return(this.singleStatePairwiseWeight.Value);
                }

                if (this.pairwiseWeights == null)
                {
                    this.ComputePairwiseWeightsMatrix();
                }

                return(this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent]);
            }
            /// <summary>
            /// For each state of the component, computes the total weight of all paths starting at that state.
            /// Ending weights are taken into account.
            /// </summary>
            /// <remarks>The weights are computed using dynamic programming, going up from leafs to the root.</remarks>
            private void ComputeWeightsToEnd()
            {
                // Iterate in the reverse topological order
                for (int currentComponentIndex = 0; currentComponentIndex < this.components.Count; ++currentComponentIndex)
                {
                    StronglyConnectedComponent currentComponent = this.components[currentComponentIndex];

                    // Update end weights in this component based on outgoing transitions to downward components
                    for (int stateIndex = 0; stateIndex < currentComponent.Size; ++stateIndex)
                    {
                        State state = currentComponent.GetStateByIndex(stateIndex);

                        // Aggregate weights of all the outgoing transitions from this state
                        Weight weightToAdd = state.EndWeight;
                        for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex)
                        {
                            Transition transition = state.GetTransition(transitionIndex);
                            State      destState  = state.Owner.states[transition.DestinationStateIndex];
                            if (this.transitionFilter(transition) && !currentComponent.HasState(destState))
                            {
                                weightToAdd = Weight.Sum(
                                    weightToAdd,
                                    Weight.Product(transition.Weight, this.stateIdToInfo[transition.DestinationStateIndex].WeightToEnd));
                            }
                        }

                        // We can go from any state of the component to the current state
                        if (!weightToAdd.IsZero)
                        {
                            for (int updatedStateIndex = 0; updatedStateIndex < currentComponent.Size; ++updatedStateIndex)
                            {
                                State updatedState = currentComponent.GetStateByIndex(updatedStateIndex);
                                CondensationStateInfo updatedStateInfo = this.stateIdToInfo[updatedState.Index];
                                updatedStateInfo.WeightToEnd = Weight.Sum(
                                    updatedStateInfo.WeightToEnd,
                                    Weight.Product(currentComponent.GetWeight(updatedStateIndex, stateIndex), weightToAdd));
                                this.stateIdToInfo[updatedState.Index] = updatedStateInfo;
                            }
                        }
                    }
                }

                this.weightsToEndComputed = true;
            }
示例#9
0
            private static TThis BuildSubautomaton(IReadOnlyList <State> states, IReadOnlyList <State> topologicalOrder, int group, HashSet <int> subgraph)
            {
                var weightsFromRoot          = ComputeWeightsFromRoot(states.Count, topologicalOrder, group);
                var weightsToEnd             = ComputeWeightsToEnd(states.Count, topologicalOrder, group);
                var subautomaton             = new TThis();
                var stateMapping             = subgraph.ToDictionary(x => x, _ => subautomaton.AddState());
                var hasNoIncomingTransitions = new HashSet <int>(subgraph);

                // copy the automaton and find states without incoming transitions.
                foreach (var stateIndex in subgraph)
                {
                    var newSourceState = stateMapping[stateIndex];

                    for (int i = 0; i < states[stateIndex].TransitionCount; i++)
                    {
                        var transition = states[stateIndex].GetTransition(i);
                        if (transition.Group != group)
                        {
                            continue;
                        }
                        hasNoIncomingTransitions.Remove(transition.DestinationStateIndex);
                        newSourceState.AddTransition(
                            transition.ElementDistribution,
                            transition.Weight,
                            stateMapping[transition.DestinationStateIndex]);
                    }
                }

                var correctionFactor = Weight.Zero;

                // mark start and end states, modulo paths bypassing the automaton.
                foreach (var stateIndex in subgraph)
                {
                    var newSourceState = stateMapping[stateIndex];

                    // consider start states
                    var weightFromRoot = newSourceState.TransitionCount > 0 ? weightsFromRoot[stateIndex] : Weight.Zero;
                    if (!weightFromRoot.IsZero)
                    {
                        subautomaton.Start.AddEpsilonTransition(weightFromRoot, newSourceState);
                    }

                    // consider end states
                    var weightToEnd = !hasNoIncomingTransitions.Contains(stateIndex) ? weightsToEnd[stateIndex] : Weight.Zero;
                    if (!weightToEnd.IsZero)
                    {
                        newSourceState.SetEndWeight(weightToEnd);
                    }

                    correctionFactor = Weight.Sum(correctionFactor, Weight.Product(weightFromRoot, weightToEnd));
                }

                if (!correctionFactor.IsZero)
                {
                    throw new Exception("Write a unit test for this case. Code should be fine.");
                }
                var epsilonWeight = Weight.AbsoluteDifference(weightsToEnd[topologicalOrder[0].Index], correctionFactor);

                subautomaton.Start.SetEndWeight(epsilonWeight);

                return(subautomaton);
            }
示例#10
0
        /// <summary>
        /// Computes a set of outgoing transitions from a given state of the determinization result.
        /// </summary>
        /// <param name="sourceState">The source state of the determinized automaton represented as
        /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
        /// <returns>
        /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>.
        /// The first two elements of a tuple define the element distribution and the weight of a transition.
        /// The third element defines the outgoing state.
        /// </returns>
        protected override List <(DiscreteChar, Weight, Determinization.WeightedStateSet)> GetOutgoingTransitionsForDeterminization(
            Determinization.WeightedStateSet sourceState)
        {
            const double LogEps = -35; // Don't add transitions with log-weight less than this as they have been produced by numerical inaccuracies

            // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons)
            var segmentBounds        = new List <ValueTuple <int, TransitionCharSegmentBound> >();
            int transitionsProcessed = 0;

            foreach (KeyValuePair <int, Weight> stateIdWeight in sourceState)
            {
                var state = this.States[stateIdWeight.Key];
                foreach (var transition in state.Transitions)
                {
                    AddTransitionCharSegmentBounds(transition, stateIdWeight.Value, segmentBounds);
                }

                transitionsProcessed += state.Transitions.Count;
            }

            // Sort segment bounds left-to-right, start-to-end
            var sortedIndexedSegmentBounds = segmentBounds.ToArray();

            if (transitionsProcessed > 1)
            {
                Array.Sort(sortedIndexedSegmentBounds, CompareSegmentBounds);

                int CompareSegmentBounds((int, TransitionCharSegmentBound) a, (int, TransitionCharSegmentBound) b) =>
                a.Item2.CompareTo(b.Item2);
            }

            // Produce an outgoing transition for each unique subset of overlapping segments
            var    result = new List <(DiscreteChar, Weight, Determinization.WeightedStateSet)>();
            Weight currentSegmentStateWeightSum = Weight.Zero;

            var currentSegmentStateWeights = new Dictionary <int, Weight>();

            foreach (var sb in segmentBounds)
            {
                currentSegmentStateWeights[sb.Item2.DestinationStateId] = Weight.Zero;
            }

            var activeSegments      = new HashSet <TransitionCharSegmentBound>();
            int currentSegmentStart = char.MinValue;

            foreach (var tup in sortedIndexedSegmentBounds)
            {
                TransitionCharSegmentBound segmentBound = tup.Item2;

                if (currentSegmentStateWeightSum.LogValue > LogEps && currentSegmentStart < segmentBound.Bound)
                {
                    // Flush previous segment
                    char         segmentEnd    = (char)(segmentBound.Bound - 1);
                    int          segmentLength = segmentEnd - currentSegmentStart + 1;
                    DiscreteChar elementDist   = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd);

                    var destinationState = new Determinization.WeightedStateSet();
                    foreach (KeyValuePair <int, Weight> stateIdWithWeight in currentSegmentStateWeights)
                    {
                        if (stateIdWithWeight.Value.LogValue > LogEps)
                        {
                            Weight stateWeight = Weight.Product(stateIdWithWeight.Value, Weight.Inverse(currentSegmentStateWeightSum));
                            destinationState.Add(stateIdWithWeight.Key, stateWeight);
                        }
                    }

                    Weight transitionWeight = Weight.Product(Weight.FromValue(segmentLength), currentSegmentStateWeightSum);
                    result.Add((elementDist, transitionWeight, destinationState));
                }

                // Update current segment
                currentSegmentStart = segmentBound.Bound;

                if (segmentBound.IsStart)
                {
                    activeSegments.Add(segmentBound);
                    currentSegmentStateWeightSum = Weight.Sum(currentSegmentStateWeightSum, segmentBound.Weight);
                    currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.Sum(currentSegmentStateWeights[segmentBound.DestinationStateId], segmentBound.Weight);
                }
                else
                {
                    Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter.");
                    activeSegments.Remove(segmentBounds[tup.Item1 - 1].Item2);  // End follows start in original.
                    if (double.IsInfinity(segmentBound.Weight.Value))
                    {
                        // Cannot subtract because of the infinities involved.
                        currentSegmentStateWeightSum = activeSegments.Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w));
                        currentSegmentStateWeights[segmentBound.DestinationStateId] =
                            activeSegments.Where(sb => sb.DestinationStateId == segmentBound.DestinationStateId).Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w));
                    }
                    else
                    {
                        currentSegmentStateWeightSum = activeSegments.Count == 0 ? Weight.Zero : Weight.AbsoluteDifference(currentSegmentStateWeightSum, segmentBound.Weight);

                        Weight prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId];
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.AbsoluteDifference(
                            prevStateWeight, segmentBound.Weight);
                    }
                }
            }

            return(result);
        }
            /// <summary>
            /// For each state of the component, computes the total weight of all paths starting at the root
            /// and ending at that state. Ending weights are not taken into account.
            /// </summary>
            /// <remarks>The weights are computed using dynamic programming, going down from the root to leafs.</remarks>
            private void ComputeWeightsFromRoot()
            {
                CondensationStateInfo rootInfo = this.stateIdToInfo[this.Root.Index];

                rootInfo.UpwardWeightFromRoot       = Weight.One;
                this.stateIdToInfo[this.Root.Index] = rootInfo;

                // Iterate in the topological order
                for (int currentComponentIndex = this.components.Count - 1; currentComponentIndex >= 0; --currentComponentIndex)
                {
                    StronglyConnectedComponent currentComponent = this.components[currentComponentIndex];

                    // Propagate weights inside the component
                    for (int srcStateIndex = 0; srcStateIndex < currentComponent.Size; ++srcStateIndex)
                    {
                        State srcState = currentComponent.GetStateByIndex(srcStateIndex);
                        CondensationStateInfo srcStateInfo = this.stateIdToInfo[srcState.Index];
                        if (srcStateInfo.UpwardWeightFromRoot.IsZero)
                        {
                            continue;
                        }

                        for (int destStateIndex = 0; destStateIndex < currentComponent.Size; ++destStateIndex)
                        {
                            State destState = currentComponent.GetStateByIndex(destStateIndex);
                            CondensationStateInfo destStateInfo = this.stateIdToInfo[destState.Index];
                            destStateInfo.WeightFromRoot = Weight.Sum(
                                destStateInfo.WeightFromRoot,
                                Weight.Product(srcStateInfo.UpwardWeightFromRoot, currentComponent.GetWeight(srcStateIndex, destStateIndex)));
                            this.stateIdToInfo[destState.Index] = destStateInfo;
                        }
                    }

                    // Compute weight contributions to downward components
                    for (int srcStateIndex = 0; srcStateIndex < currentComponent.Size; ++srcStateIndex)
                    {
                        State srcState = currentComponent.GetStateByIndex(srcStateIndex);
                        CondensationStateInfo srcStateInfo = this.stateIdToInfo[srcState.Index];
                        if (srcStateInfo.WeightFromRoot.IsZero)
                        {
                            continue;
                        }

                        // Aggregate weights of all the outgoing transitions from this state
                        for (int transitionIndex = 0; transitionIndex < srcState.TransitionCount; ++transitionIndex)
                        {
                            Transition transition = srcState.GetTransition(transitionIndex);
                            State      destState  = srcState.Owner.states[transition.DestinationStateIndex];
                            if (this.transitionFilter(transition) && !currentComponent.HasState(destState))
                            {
                                CondensationStateInfo destStateInfo = this.stateIdToInfo[destState.Index];
                                destStateInfo.UpwardWeightFromRoot = Weight.Sum(
                                    destStateInfo.UpwardWeightFromRoot,
                                    Weight.Product(srcStateInfo.WeightFromRoot, transition.Weight));
                                this.stateIdToInfo[transition.DestinationStateIndex] = destStateInfo;
                            }
                        }
                    }
                }

                this.weightsFromRootComputed = true;
            }
示例#12
0
        /// <summary>
        /// Attempts to determinize the automaton,
        /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element,
        /// and there are no epsilon transitions.
        /// </summary>
        /// <param name="maxStatesBeforeStop">
        /// The maximum number of states the resulting automaton can have. If the number of states exceeds the value
        /// of this parameter during determinization, the process is aborted.
        /// </param>
        /// <returns>
        /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic,
        /// <see langword="false"/> otherwise.
        /// </returns>
        /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks>
        public bool TryDeterminize(int maxStatesBeforeStop)
        {
            Argument.CheckIfInRange(
                maxStatesBeforeStop > 0 && maxStatesBeforeStop <= MaxStateCount,
                "maxStatesBeforeStop",
                "The maximum number of states must be positive and not greater than the maximum number of states allowed in an automaton.");

            this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions

            if (this.UsesGroups())
            {
                // Determinization will result in lost of group information, which we cannot allow
                return(false);
            }

            // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton..
            // Such pairs correspond to states of the resulting automaton.
            var weightedStateSetQueue      = new Queue <Determinization.WeightedStateSet>();
            var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>();
            var builder = new Builder();

            var startWeightedStateSet = new Determinization.WeightedStateSet {
                { this.Start.Index, Weight.One }
            };

            weightedStateSetQueue.Enqueue(startWeightedStateSet);
            weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex);
            builder.Start.SetEndWeight(this.Start.EndWeight);

            while (weightedStateSetQueue.Count > 0)
            {
                // Take one unprocessed state of the resulting automaton
                Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue();
                var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet];
                var currentState      = builder[currentStateIndex];

                // Find out what transitions we should add for this state
                var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet);

                // For each transition to add
                foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos)
                {
                    TElementDistribution elementDistribution = outgoingTransitionInfo.Item1;
                    Weight weight = outgoingTransitionInfo.Item2;
                    Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3;

                    int destinationStateIndex;
                    if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex))
                    {
                        if (builder.StatesCount == maxStatesBeforeStop)
                        {
                            // Too many states, determinization attempt failed
                            return(false);
                        }

                        // Add new state to the result
                        var destinationState = builder.AddState();
                        weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index);
                        weightedStateSetQueue.Enqueue(destWeightedStateSet);

                        // Compute its ending weight
                        destinationState.SetEndWeight(Weight.Zero);
                        foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet)
                        {
                            destinationState.SetEndWeight(Weight.Sum(
                                                              destinationState.EndWeight,
                                                              Weight.Product(stateIdWithWeight.Value, this.States[stateIdWithWeight.Key].EndWeight)));
                        }

                        destinationStateIndex = destinationState.Index;
                    }

                    // Add transition to the destination state
                    currentState.AddTransition(elementDistribution, weight, destinationStateIndex);
                }
            }

            var simplification = new Simplification(builder, this.PruneTransitionsWithLogWeightLessThan);

            simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment

            var result = builder.GetAutomaton();

            result.PruneTransitionsWithLogWeightLessThan = this.PruneTransitionsWithLogWeightLessThan;
            result.LogValueOverride = this.LogValueOverride;
            this.SwapWith(result);

            return(true);
        }
示例#13
0
            /// <summary>
            /// Recursively increases the value of this automaton on <paramref name="sequence"/> by <paramref name="weight"/>.
            /// </summary>
            /// <param name="stateIndex">Index of currently traversed state.</param>
            /// <param name="isNewState">Indicates whether state <paramref name="stateIndex"/> was just created.</param>
            /// <param name="selfLoopAlreadyMatched">Indicates whether self-loop on state <paramref name="stateIndex"/> was just matched.</param>
            /// <param name="firstAllowedStateIndex">The minimum index of an existing state that can be used for the sequence.</param>
            /// <param name="currentSequencePos">The current position in the generalized sequence.</param>
            /// <param name="sequence">The generalized sequence.</param>
            /// <param name="weight">The weight of the sequence.</param>
            /// <returns>
            /// <see langword="true"/> if the subsequence starting at <paramref name="currentSequencePos"/> has been successfully merged in,
            /// <see langword="false"/> otherwise.
            /// </returns>
            /// <remarks>
            /// This function attempts to add as few new states and transitions as possible.
            /// Its implementation is conceptually similar to adding string to a trie.
            /// </remarks>
            private bool DoAddGeneralizedSequence(
                int stateIndex,
                bool isNewState,
                bool selfLoopAlreadyMatched,
                int firstAllowedStateIndex,
                int currentSequencePos,
                GeneralizedSequence sequence,
                Weight weight)
            {
                bool success;
                var  builder = this.builder;
                var  state   = builder[stateIndex];

                if (currentSequencePos == sequence.Count)
                {
                    if (!selfLoopAlreadyMatched)
                    {
                        // We can't finish in a state with a self-loop
                        for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                        {
                            if (iterator.Value.DestinationStateIndex == state.Index)
                            {
                                return(false);
                            }
                        }
                    }

                    state.SetEndWeight(Weight.Sum(state.EndWeight, weight));
                    return(true);
                }

                var element = sequence[currentSequencePos];

                // Treat self-loops elements separately
                if (element.LoopWeight.HasValue)
                {
                    if (selfLoopAlreadyMatched)
                    {
                        // Previous element was also a self-loop, we should try to find an espilon transition
                        for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                        {
                            var transition = iterator.Value;
                            if (transition.DestinationStateIndex != state.Index &&
                                transition.IsEpsilon &&
                                transition.DestinationStateIndex >= firstAllowedStateIndex)
                            {
                                if (this.DoAddGeneralizedSequence(
                                        transition.DestinationStateIndex,
                                        false,
                                        false,
                                        firstAllowedStateIndex,
                                        currentSequencePos,
                                        sequence,
                                        Weight.Product(weight, Weight.Inverse(transition.Weight))))
                                {
                                    return(true);
                                }
                            }
                        }

                        // Epsilon transition not found, let's create a new one
                        var destination = state.AddEpsilonTransition(Weight.One);
                        success = this.DoAddGeneralizedSequence(
                            destination.Index, true, false, firstAllowedStateIndex, currentSequencePos, sequence, weight);
                        Debug.Assert(success, "This call must always succeed.");
                        return(true);
                    }

                    // Find a matching self-loop
                    for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                    {
                        var transition = iterator.Value;

                        if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex)
                        {
                            // Try this epsilon transition
                            if (this.DoAddGeneralizedSequence(
                                    transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight))
                            {
                                return(true);
                            }
                        }

                        // Is it a self-loop?
                        if (transition.DestinationStateIndex == state.Index)
                        {
                            // Do self-loops match?
                            if ((transition.Weight == element.LoopWeight.Value) &&
                                (element.Group == transition.Group) &&
                                ((transition.IsEpsilon && element.IsEpsilonSelfLoop) || (!transition.IsEpsilon && !element.IsEpsilonSelfLoop && transition.ElementDistribution.Equals(element.ElementDistribution))))
                            {
                                // Skip the element in the sequence, remain in the same state
                                success = this.DoAddGeneralizedSequence(
                                    stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight);
                                Debug.Assert(success, "This call must always succeed.");
                                return(true);
                            }

                            // StateIndex also has a self-loop, but the two doesn't match
                            return(false);
                        }
                    }

                    if (!isNewState)
                    {
                        // Can't add self-loop to an existing state, it will change the language accepted by the state
                        return(false);
                    }

                    // Add a new self-loop
                    state.AddTransition(element.ElementDistribution, element.LoopWeight.Value, stateIndex, element.Group);
                    success = this.DoAddGeneralizedSequence(stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight);
                    Debug.Assert(success, "This call must always succeed.");
                    return(true);
                }

                // Try to find a transition for the element
                for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next())
                {
                    var transition = iterator.Value;

                    if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex)
                    {
                        // Try this epsilon transition
                        if (this.DoAddGeneralizedSequence(
                                transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight))
                        {
                            return(true);
                        }
                    }

                    // Is it a self-loop?
                    if (transition.DestinationStateIndex == state.Index)
                    {
                        if (selfLoopAlreadyMatched)
                        {
                            // The self-loop was checked or added by the caller
                            continue;
                        }

                        // Can't go through an existing self-loop, it will allow undesired sequences to be accepted
                        return(false);
                    }

                    if (transition.DestinationStateIndex < firstAllowedStateIndex ||
                        element.Group != transition.Group ||
                        !element.ElementDistribution.Equals(transition.ElementDistribution))
                    {
                        continue;
                    }

                    // Skip the element in the sequence, move to the destination state
                    // Weight of the existing transition must be taken into account
                    // This case can fail if the next element is a self-loop and the destination state already has a different one
                    if (this.DoAddGeneralizedSequence(
                            transition.DestinationStateIndex,
                            false,
                            false,
                            firstAllowedStateIndex,
                            currentSequencePos + 1,
                            sequence,
                            Weight.Product(weight, Weight.Inverse(transition.Weight))))
                    {
                        return(true);
                    }
                }

                // Add a new transition
                var newChild = state.AddTransition(element.ElementDistribution, Weight.One, null, element.Group);

                success = this.DoAddGeneralizedSequence(
                    newChild.Index, true, false, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight);
                Debug.Assert(success, "This call must always succeed.");
                return(true);
            }