/// <summary> /// For each state of the component, computes the total weight of all paths starting at that state. /// Ending weights are taken into account. /// </summary> /// <remarks>The weights are computed using dynamic programming, going up from leafs to the root.</remarks> private static Weight[] ComputeWeightsToEnd(int nStates, IReadOnlyList <State> topologicalOrder, int group) { var weights = CreateZeroWeights(nStates); // Iterate in the reverse topological order for (var stateIndex = topologicalOrder.Count - 1; stateIndex >= 0; stateIndex--) { var state = topologicalOrder[stateIndex]; // Aggregate weights of all the outgoing transitions from this state var weightToAdd = state.EndWeight; for (var transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex) { var transition = state.GetTransition(transitionIndex); if (transition.Group == group) { continue; } weightToAdd = Weight.Sum( weightToAdd, Weight.Product(transition.Weight, weights[transition.DestinationStateIndex])); } weights[state.Index] = weightToAdd; } return(weights); }
/// <summary> /// For each state of the component, computes the total weight of all paths starting at the root /// and ending at that state. Ending weights are not taken into account. /// </summary> /// <remarks>The weights are computed using dynamic programming, going down from the root to leafs.</remarks> private static Weight[] ComputeWeightsFromRoot(int nStates, IReadOnlyList <State> topologicalOrder, int group) { var weights = CreateZeroWeights(nStates); weights[topologicalOrder[0].Index] = Weight.One; // Iterate in the topological order for (var i = 0; i < topologicalOrder.Count; i++) { var srcState = topologicalOrder[i]; var srcWeight = weights[srcState.Index]; if (srcWeight.IsZero) { continue; } // Aggregate weights of all the outgoing transitions from this state for (var transitionIndex = 0; transitionIndex < srcState.TransitionCount; transitionIndex++) { var transition = srcState.GetTransition(transitionIndex); if (transition.Group == group) { continue; } var destWeight = weights[transition.DestinationStateIndex]; var weight = Weight.Sum(destWeight, Weight.Product(srcWeight, transition.Weight)); weights[transition.DestinationStateIndex] = weight; } } return(weights); }
/// <summary> /// Recursively computes the value of the automaton on a given sequence. /// </summary> /// <param name="sequence">The sequence to compute the value on.</param> /// <param name="sequencePosition">The current position in the sequence.</param> /// <param name="valueCache">A lookup table for memoization.</param> /// <returns>The value computed from the current state.</returns> private Weight DoGetValue( TSequence sequence, int sequencePosition, Dictionary <IntPair, Weight> valueCache) { var stateIndexPair = new IntPair(this.Index, sequencePosition); Weight cachedValue; if (valueCache.TryGetValue(stateIndexPair, out cachedValue)) { return(cachedValue); } EpsilonClosure closure = this.GetEpsilonClosure(); Weight value = Weight.Zero; int count = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetLength(sequence); bool isCurrent = sequencePosition < count; if (isCurrent) { TElement element = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetElement(sequence, sequencePosition); for (int closureStateIndex = 0; closureStateIndex < closure.Size; ++closureStateIndex) { State closureState = closure.GetStateByIndex(closureStateIndex); Weight closureStateWeight = closure.GetStateWeightByIndex(closureStateIndex); for (int transitionIndex = 0; transitionIndex < closureState.transitionCount; transitionIndex++) { Transition transition = closureState.transitions[transitionIndex]; if (transition.IsEpsilon) { continue; // The destination is a part of the closure anyway } State destState = this.Owner.states[transition.DestinationStateIndex]; Weight distWeight = Weight.FromLogValue(transition.ElementDistribution.GetLogProb(element)); if (!distWeight.IsZero && !transition.Weight.IsZero) { Weight destValue = destState.DoGetValue(sequence, sequencePosition + 1, valueCache); if (!destValue.IsZero) { value = Weight.Sum( value, Weight.Product(closureStateWeight, transition.Weight, distWeight, destValue)); } } } } } else { value = closure.EndWeight; } valueCache.Add(stateIndexPair, value); return(value); }
/// <summary> /// Computes the total weights between each pair of states in the component /// using the <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf">generalized Floyd's algorithm</a>. /// </summary> private void ComputePairwiseWeightsMatrix() { this.pairwiseWeights = Util.ArrayInit(this.Size, this.Size, (i, j) => Weight.Zero); for (int srcStateIndexInComponent = 0; srcStateIndexInComponent < this.Size; ++srcStateIndexInComponent) { State state = this.statesInComponent[srcStateIndexInComponent]; for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex) { Transition transition = state.GetTransition(transitionIndex); State destState = state.Owner.States[transition.DestinationStateIndex]; int destStateIndexInComponent; if (this.transitionFilter(transition) && (destStateIndexInComponent = this.GetIndexByState(destState)) != -1) { this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent] = Weight.Sum( this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent], transition.Weight); } } } for (int k = 0; k < this.Size; ++k) { Weight loopWeight = this.useApproximateClosure ? Weight.ApproximateClosure(this.pairwiseWeights[k, k]) : Weight.Closure(this.pairwiseWeights[k, k]); for (int i = 0; i < this.Size; ++i) { if (i == k || this.pairwiseWeights[i, k].IsZero) { continue; } for (int j = 0; j < this.Size; ++j) { if (j == k || this.pairwiseWeights[k, j].IsZero) { continue; } Weight additionalWeight = Weight.Product( this.pairwiseWeights[i, k], loopWeight, this.pairwiseWeights[k, j]); this.pairwiseWeights[i, j] = Weight.Sum(this.pairwiseWeights[i, j], additionalWeight); } } for (int i = 0; i < this.Size; ++i) { this.pairwiseWeights[i, k] = Weight.Product(this.pairwiseWeights[i, k], loopWeight); this.pairwiseWeights[k, i] = Weight.Product(this.pairwiseWeights[k, i], loopWeight); } this.pairwiseWeights[k, k] = loopWeight; } }
/// <summary> /// Given a transition and the residual weight of its source state, adds weighted non-zero probability character segments /// associated with the transition to the list. /// </summary> /// <param name="transition">The transition.</param> /// <param name="sourceStateResidualWeight">The logarithm of the residual weight of the source state of the transition.</param> /// <param name="bounds">The list for storing numbered segment bounds.</param> private static void AddTransitionCharSegmentBounds( Transition transition, Weight sourceStateResidualWeight, List <ValueTuple <int, TransitionCharSegmentBound> > bounds) { var distribution = transition.ElementDistribution.Value; var ranges = distribution.Ranges; int commonValueStart = char.MinValue; Weight commonValue = Weight.FromValue(distribution.ProbabilityOutsideRanges); Weight weightBase = Weight.Product(transition.Weight, sourceStateResidualWeight); TransitionCharSegmentBound newSegmentBound; ////if (double.IsInfinity(weightBase.Value)) ////{ //// Console.WriteLine("Weight base infinity"); ////} foreach (var range in ranges) { if (range.StartInclusive > commonValueStart && !commonValue.IsZero) { // Add endpoints for the common value Weight segmentWeight = Weight.Product(commonValue, weightBase); newSegmentBound = new TransitionCharSegmentBound(commonValueStart, transition.DestinationStateIndex, segmentWeight, true); bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); newSegmentBound = new TransitionCharSegmentBound(range.StartInclusive, transition.DestinationStateIndex, segmentWeight, false); bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); } // Add segment endpoints Weight pieceValue = Weight.FromValue(range.Probability); if (!pieceValue.IsZero) { Weight segmentWeight = Weight.Product(pieceValue, weightBase); newSegmentBound = new TransitionCharSegmentBound(range.StartInclusive, transition.DestinationStateIndex, segmentWeight, true); bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); newSegmentBound = new TransitionCharSegmentBound(range.EndExclusive, transition.DestinationStateIndex, segmentWeight, false); bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); } commonValueStart = range.EndExclusive; } if (!commonValue.IsZero && (ranges.Count == 0 || ranges[ranges.Count - 1].EndExclusive != DiscreteChar.CharRangeEndExclusive)) { // Add endpoints for the last common value segment Weight segmentWeight = Weight.Product(commonValue, weightBase); newSegmentBound = new TransitionCharSegmentBound(commonValueStart, transition.DestinationStateIndex, segmentWeight, true); bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); newSegmentBound = new TransitionCharSegmentBound(char.MaxValue + 1, transition.DestinationStateIndex, segmentWeight, false); bounds.Add(new ValueTuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); } }
/// <summary> /// Given a transition and the residual weight of its source state, adds weighted non-zero probability character segments /// associated with the transition to the list. /// </summary> /// <param name="transition">The transition.</param> /// <param name="sourceStateResidualWeight">The logarithm of the residual weight of the source state of the transition.</param> /// <param name="bounds">The list for storing numbered segment bounds.</param> private static void AddTransitionCharSegmentBounds( Transition transition, Weight sourceStateResidualWeight, List <Tuple <int, TransitionCharSegmentBound> > bounds) { var probs = (PiecewiseVector)transition.ElementDistribution.GetProbs(); int commonValueStart = char.MinValue; Weight commonValue = Weight.FromValue(probs.CommonValue); Weight weightBase = Weight.Product(transition.Weight, sourceStateResidualWeight); TransitionCharSegmentBound newSegmentBound; ////if (double.IsInfinity(weightBase.Value)) ////{ //// Console.WriteLine("Weight base infinity"); ////} for (int i = 0; i < probs.Pieces.Count; ++i) { ConstantVector piece = probs.Pieces[i]; if (piece.Start > commonValueStart && !commonValue.IsZero) { // Add endpoints for the common value Weight segmentWeight = Weight.Product(commonValue, weightBase); newSegmentBound = new TransitionCharSegmentBound(commonValueStart, transition.DestinationStateIndex, segmentWeight, true); bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); newSegmentBound = new TransitionCharSegmentBound(piece.Start, transition.DestinationStateIndex, segmentWeight, false); bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); } // Add segment endpoints Weight pieceValue = Weight.FromValue(piece.Value); if (!pieceValue.IsZero) { Weight segmentWeight = Weight.Product(pieceValue, weightBase); newSegmentBound = new TransitionCharSegmentBound(piece.Start, transition.DestinationStateIndex, segmentWeight, true); bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); newSegmentBound = new TransitionCharSegmentBound(piece.End + 1, transition.DestinationStateIndex, segmentWeight, false); bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); } commonValueStart = piece.End + 1; } if (!commonValue.IsZero && (probs.Pieces.Count == 0 || probs.Pieces[probs.Pieces.Count - 1].End != char.MaxValue)) { // Add endpoints for the last common value segment Weight segmentWeight = Weight.Product(commonValue, weightBase); newSegmentBound = new TransitionCharSegmentBound(commonValueStart, transition.DestinationStateIndex, segmentWeight, true); bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); newSegmentBound = new TransitionCharSegmentBound(char.MaxValue + 1, transition.DestinationStateIndex, segmentWeight, false); bounds.Add(new Tuple <int, TransitionCharSegmentBound>(bounds.Count, newSegmentBound)); } }
/// <summary> /// Computes the total weights between each pair of states in the component /// using the <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf">generalized Floyd's algorithm</a>. /// </summary> private void ComputePairwiseWeightsMatrix() { this.pairwiseWeights = Util.ArrayInit(this.Size, this.Size, (i, j) => Weight.Zero); for (int srcStateIndexInComponent = 0; srcStateIndexInComponent < this.Size; ++srcStateIndexInComponent) { State state = this.statesInComponent[srcStateIndexInComponent]; foreach (var transition in state.Transitions) { State destState = this.Automaton.States[transition.DestinationStateIndex]; int destStateIndexInComponent; if (this.transitionFilter(transition) && (destStateIndexInComponent = this.GetIndexByState(destState)) != -1) { this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent] += transition.Weight; } } } for (int k = 0; k < this.Size; ++k) { Weight loopWeight = this.useApproximateClosure ? Weight.ApproximateClosure(this.pairwiseWeights[k, k]) : Weight.Closure(this.pairwiseWeights[k, k]); for (int i = 0; i < this.Size; ++i) { if (i == k || this.pairwiseWeights[i, k].IsZero) { continue; } for (int j = 0; j < this.Size; ++j) { if (j == k || this.pairwiseWeights[k, j].IsZero) { continue; } this.pairwiseWeights[i, j] += Weight.Product( this.pairwiseWeights[i, k], loopWeight, this.pairwiseWeights[k, j]); } } for (int i = 0; i < this.Size; ++i) { this.pairwiseWeights[i, k] *= loopWeight; this.pairwiseWeights[k, i] *= loopWeight; } this.pairwiseWeights[k, k] = loopWeight; } }
/// <summary> /// Initializes a new instance of the <see cref="EpsilonClosure"/> class. /// </summary> /// <param name="state">The state, which epsilon closure this instance will represent.</param> internal EpsilonClosure(State state) { Argument.CheckIfValid(!state.IsNull, nameof(state)); // Optimize for a very common case: a single-node closure bool singleNodeClosure = true; Weight selfLoopWeight = Weight.Zero; for (int i = 0; i < state.TransitionCount; ++i) { Transition transition = state.GetTransition(i); if (transition.IsEpsilon) { if (transition.DestinationStateIndex != state.Index) { singleNodeClosure = false; break; } selfLoopWeight = Weight.Sum(selfLoopWeight, transition.Weight); } } if (singleNodeClosure) { Weight stateWeight = Weight.ApproximateClosure(selfLoopWeight); this.weightedStates.Add(Pair.Create(state, stateWeight)); this.EndWeight = Weight.Product(stateWeight, state.EndWeight); } else { Condensation condensation = state.Owner.ComputeCondensation(state, tr => tr.IsEpsilon, true); for (int i = 0; i < condensation.ComponentCount; ++i) { StronglyConnectedComponent component = condensation.GetComponent(i); for (int j = 0; j < component.Size; ++j) { State componentState = component.GetStateByIndex(j); this.weightedStates.Add(Pair.Create(componentState, condensation.GetWeightFromRoot(componentState))); } } this.EndWeight = condensation.GetWeightToEnd(state); } }
/// <summary> /// For each state of the component, computes the total weight of all paths starting at that state. /// Ending weights are taken into account. /// </summary> /// <remarks>The weights are computed using dynamic programming, going up from leafs to the root.</remarks> private void ComputeWeightsToEnd() { // Iterate in the reverse topological order for (int currentComponentIndex = 0; currentComponentIndex < this.components.Count; ++currentComponentIndex) { StronglyConnectedComponent currentComponent = this.components[currentComponentIndex]; // Update end weights in this component based on outgoing transitions to downward components for (int stateIndex = 0; stateIndex < currentComponent.Size; ++stateIndex) { State state = currentComponent.GetStateByIndex(stateIndex); // Aggregate weights of all the outgoing transitions from this state Weight weightToAdd = state.EndWeight; for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex) { Transition transition = state.GetTransition(transitionIndex); State destState = state.Owner.states[transition.DestinationStateIndex]; if (this.transitionFilter(transition) && !currentComponent.HasState(destState)) { weightToAdd = Weight.Sum( weightToAdd, Weight.Product(transition.Weight, this.stateIdToInfo[transition.DestinationStateIndex].WeightToEnd)); } } // We can go from any state of the component to the current state if (!weightToAdd.IsZero) { for (int updatedStateIndex = 0; updatedStateIndex < currentComponent.Size; ++updatedStateIndex) { State updatedState = currentComponent.GetStateByIndex(updatedStateIndex); CondensationStateInfo updatedStateInfo = this.stateIdToInfo[updatedState.Index]; updatedStateInfo.WeightToEnd = Weight.Sum( updatedStateInfo.WeightToEnd, Weight.Product(currentComponent.GetWeight(updatedStateIndex, stateIndex), weightToAdd)); this.stateIdToInfo[updatedState.Index] = updatedStateInfo; } } } } this.weightsToEndComputed = true; }
/// <summary> /// Recursively increases the value of this automaton on <paramref name="sequence"/> by <paramref name="weight"/>. /// </summary> /// <param name="stateIndex">Index of currently traversed state.</param> /// <param name="isNewState">Indicates whether state <paramref name="stateIndex"/> was just created.</param> /// <param name="selfLoopAlreadyMatched">Indicates whether self-loop on state <paramref name="stateIndex"/> was just matched.</param> /// <param name="firstAllowedStateIndex">The minimum index of an existing state that can be used for the sequence.</param> /// <param name="currentSequencePos">The current position in the generalized sequence.</param> /// <param name="sequence">The generalized sequence.</param> /// <param name="weight">The weight of the sequence.</param> /// <returns> /// <see langword="true"/> if the subsequence starting at <paramref name="currentSequencePos"/> has been successfully merged in, /// <see langword="false"/> otherwise. /// </returns> /// <remarks> /// This function attempts to add as few new states and transitions as possible. /// Its implementation is conceptually similar to adding string to a trie. /// </remarks> private bool DoAddGeneralizedSequence( int stateIndex, bool isNewState, bool selfLoopAlreadyMatched, int firstAllowedStateIndex, int currentSequencePos, GeneralizedSequence sequence, Weight weight) { bool success; var builder = this.builder; var state = builder[stateIndex]; if (currentSequencePos == sequence.Count) { if (!selfLoopAlreadyMatched) { // We can't finish in a state with a self-loop for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { if (iterator.Value.DestinationStateIndex == state.Index) { return(false); } } } state.SetEndWeight(Weight.Sum(state.EndWeight, weight)); return(true); } var element = sequence[currentSequencePos]; // Treat self-loops elements separately if (element.LoopWeight.HasValue) { if (selfLoopAlreadyMatched) { // Previous element was also a self-loop, we should try to find an espilon transition for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.DestinationStateIndex != state.Index && transition.IsEpsilon && transition.DestinationStateIndex >= firstAllowedStateIndex) { if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, Weight.Product(weight, Weight.Inverse(transition.Weight)))) { return(true); } } } // Epsilon transition not found, let's create a new one var destination = state.AddEpsilonTransition(Weight.One); success = this.DoAddGeneralizedSequence( destination.Index, true, false, firstAllowedStateIndex, currentSequencePos, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // Find a matching self-loop for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex) { // Try this epsilon transition if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight)) { return(true); } } // Is it a self-loop? if (transition.DestinationStateIndex == state.Index) { // Do self-loops match? if ((transition.Weight == element.LoopWeight.Value) && (element.Group == transition.Group) && ((transition.IsEpsilon && element.IsEpsilonSelfLoop) || (!transition.IsEpsilon && !element.IsEpsilonSelfLoop && transition.ElementDistribution.Equals(element.ElementDistribution)))) { // Skip the element in the sequence, remain in the same state success = this.DoAddGeneralizedSequence( stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // StateIndex also has a self-loop, but the two doesn't match return(false); } } if (!isNewState) { // Can't add self-loop to an existing state, it will change the language accepted by the state return(false); } // Add a new self-loop state.AddTransition(element.ElementDistribution, element.LoopWeight.Value, stateIndex, element.Group); success = this.DoAddGeneralizedSequence(stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // Try to find a transition for the element for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex) { // Try this epsilon transition if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight)) { return(true); } } // Is it a self-loop? if (transition.DestinationStateIndex == state.Index) { if (selfLoopAlreadyMatched) { // The self-loop was checked or added by the caller continue; } // Can't go through an existing self-loop, it will allow undesired sequences to be accepted return(false); } if (transition.DestinationStateIndex < firstAllowedStateIndex || element.Group != transition.Group || !element.ElementDistribution.Equals(transition.ElementDistribution)) { continue; } // Skip the element in the sequence, move to the destination state // Weight of the existing transition must be taken into account // This case can fail if the next element is a self-loop and the destination state already has a different one if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos + 1, sequence, Weight.Product(weight, Weight.Inverse(transition.Weight)))) { return(true); } } // Add a new transition var newChild = state.AddTransition(element.ElementDistribution, Weight.One, null, element.Group); success = this.DoAddGeneralizedSequence( newChild.Index, true, false, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); }
/// <summary> /// Computes a set of outgoing transitions from a given state of the determinization result. /// </summary> /// <param name="sourceState">The source state of the determinized automaton represented as /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param> /// <returns> /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>. /// The first two elements of a tuple define the element distribution and the weight of a transition. /// The third element defines the outgoing state. /// </returns> protected override List <(DiscreteChar, Weight, Determinization.WeightedStateSet)> GetOutgoingTransitionsForDeterminization( Determinization.WeightedStateSet sourceState) { const double LogEps = -35; // Don't add transitions with log-weight less than this as they have been produced by numerical inaccuracies // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons) var segmentBounds = new List <ValueTuple <int, TransitionCharSegmentBound> >(); int transitionsProcessed = 0; foreach (KeyValuePair <int, Weight> stateIdWeight in sourceState) { var state = this.States[stateIdWeight.Key]; foreach (var transition in state.Transitions) { AddTransitionCharSegmentBounds(transition, stateIdWeight.Value, segmentBounds); } transitionsProcessed += state.Transitions.Count; } // Sort segment bounds left-to-right, start-to-end var sortedIndexedSegmentBounds = segmentBounds.ToArray(); if (transitionsProcessed > 1) { Array.Sort(sortedIndexedSegmentBounds, CompareSegmentBounds); int CompareSegmentBounds((int, TransitionCharSegmentBound) a, (int, TransitionCharSegmentBound) b) => a.Item2.CompareTo(b.Item2); } // Produce an outgoing transition for each unique subset of overlapping segments var result = new List <(DiscreteChar, Weight, Determinization.WeightedStateSet)>(); Weight currentSegmentStateWeightSum = Weight.Zero; var currentSegmentStateWeights = new Dictionary <int, Weight>(); foreach (var sb in segmentBounds) { currentSegmentStateWeights[sb.Item2.DestinationStateId] = Weight.Zero; } var activeSegments = new HashSet <TransitionCharSegmentBound>(); int currentSegmentStart = char.MinValue; foreach (var tup in sortedIndexedSegmentBounds) { TransitionCharSegmentBound segmentBound = tup.Item2; if (currentSegmentStateWeightSum.LogValue > LogEps && currentSegmentStart < segmentBound.Bound) { // Flush previous segment char segmentEnd = (char)(segmentBound.Bound - 1); int segmentLength = segmentEnd - currentSegmentStart + 1; DiscreteChar elementDist = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd); var destinationState = new Determinization.WeightedStateSet(); foreach (KeyValuePair <int, Weight> stateIdWithWeight in currentSegmentStateWeights) { if (stateIdWithWeight.Value.LogValue > LogEps) { Weight stateWeight = Weight.Product(stateIdWithWeight.Value, Weight.Inverse(currentSegmentStateWeightSum)); destinationState.Add(stateIdWithWeight.Key, stateWeight); } } Weight transitionWeight = Weight.Product(Weight.FromValue(segmentLength), currentSegmentStateWeightSum); result.Add((elementDist, transitionWeight, destinationState)); } // Update current segment currentSegmentStart = segmentBound.Bound; if (segmentBound.IsStart) { activeSegments.Add(segmentBound); currentSegmentStateWeightSum = Weight.Sum(currentSegmentStateWeightSum, segmentBound.Weight); currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.Sum(currentSegmentStateWeights[segmentBound.DestinationStateId], segmentBound.Weight); } else { Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter."); activeSegments.Remove(segmentBounds[tup.Item1 - 1].Item2); // End follows start in original. if (double.IsInfinity(segmentBound.Weight.Value)) { // Cannot subtract because of the infinities involved. currentSegmentStateWeightSum = activeSegments.Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w)); currentSegmentStateWeights[segmentBound.DestinationStateId] = activeSegments.Where(sb => sb.DestinationStateId == segmentBound.DestinationStateId).Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w)); } else { currentSegmentStateWeightSum = activeSegments.Count == 0 ? Weight.Zero : Weight.AbsoluteDifference(currentSegmentStateWeightSum, segmentBound.Weight); Weight prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId]; currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.AbsoluteDifference( prevStateWeight, segmentBound.Weight); } } } return(result); }
/// <summary> /// For each state of the component, computes the total weight of all paths starting at the root /// and ending at that state. Ending weights are not taken into account. /// </summary> /// <remarks>The weights are computed using dynamic programming, going down from the root to leafs.</remarks> private void ComputeWeightsFromRoot() { CondensationStateInfo rootInfo = this.stateIdToInfo[this.Root.Index]; rootInfo.UpwardWeightFromRoot = Weight.One; this.stateIdToInfo[this.Root.Index] = rootInfo; // Iterate in the topological order for (int currentComponentIndex = this.components.Count - 1; currentComponentIndex >= 0; --currentComponentIndex) { StronglyConnectedComponent currentComponent = this.components[currentComponentIndex]; // Propagate weights inside the component for (int srcStateIndex = 0; srcStateIndex < currentComponent.Size; ++srcStateIndex) { State srcState = currentComponent.GetStateByIndex(srcStateIndex); CondensationStateInfo srcStateInfo = this.stateIdToInfo[srcState.Index]; if (srcStateInfo.UpwardWeightFromRoot.IsZero) { continue; } for (int destStateIndex = 0; destStateIndex < currentComponent.Size; ++destStateIndex) { State destState = currentComponent.GetStateByIndex(destStateIndex); CondensationStateInfo destStateInfo = this.stateIdToInfo[destState.Index]; destStateInfo.WeightFromRoot = Weight.Sum( destStateInfo.WeightFromRoot, Weight.Product(srcStateInfo.UpwardWeightFromRoot, currentComponent.GetWeight(srcStateIndex, destStateIndex))); this.stateIdToInfo[destState.Index] = destStateInfo; } } // Compute weight contributions to downward components for (int srcStateIndex = 0; srcStateIndex < currentComponent.Size; ++srcStateIndex) { State srcState = currentComponent.GetStateByIndex(srcStateIndex); CondensationStateInfo srcStateInfo = this.stateIdToInfo[srcState.Index]; if (srcStateInfo.WeightFromRoot.IsZero) { continue; } // Aggregate weights of all the outgoing transitions from this state for (int transitionIndex = 0; transitionIndex < srcState.TransitionCount; ++transitionIndex) { Transition transition = srcState.GetTransition(transitionIndex); State destState = srcState.Owner.states[transition.DestinationStateIndex]; if (this.transitionFilter(transition) && !currentComponent.HasState(destState)) { CondensationStateInfo destStateInfo = this.stateIdToInfo[destState.Index]; destStateInfo.UpwardWeightFromRoot = Weight.Sum( destStateInfo.UpwardWeightFromRoot, Weight.Product(srcStateInfo.WeightFromRoot, transition.Weight)); this.stateIdToInfo[transition.DestinationStateIndex] = destStateInfo; } } } } this.weightsFromRootComputed = true; }
/// <summary> /// Recursively builds a complete list of generalized sequences accepted by the simplifiable part of the automaton. /// </summary> /// <param name="stateIndex">The currently traversed state.</param> /// <param name="generalizedTreeNodes">The state labels obtained from <see cref="FindGeneralizedTrees"/>.</param> /// <param name="weightedSequences">The sequence list being built.</param> /// <param name="currentSequenceElements">The list of elements of the sequence currently being built.</param> /// <param name="currentWeight">The weight of the sequence currently being built.</param> private void DoBuildAcceptedSequenceList( int stateIndex, bool[] generalizedTreeNodes, List <WeightedSequence> weightedSequences, List <GeneralizedElement> currentSequenceElements, Weight currentWeight) { var stack = new Stack <StackItem>(); stack.Push(new StateWeight(stateIndex, currentWeight)); while (stack.Count > 0) { var stackItem = stack.Pop(); if (stackItem is ElementItem elementItem) { if (elementItem.Element != null) { currentSequenceElements.Add(elementItem.Element.Value); } else { currentSequenceElements.RemoveAt(currentSequenceElements.Count - 1); } continue; } var stateAndWeight = stackItem as StateWeight; stateIndex = stateAndWeight.StateIndex; var state = this.builder[stateIndex]; currentWeight = stateAndWeight.Weight; // Find a non-epsilon self-loop if there is one Transition?selfLoop = null; for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.DestinationStateIndex == stateIndex) { Debug.Assert( selfLoop == null, "Multiple self-loops should have been merged by MergeParallelTransitions()"); selfLoop = transition; } } // Push the found self-loop to the end of the current sequence if (selfLoop != null) { currentSequenceElements.Add(new GeneralizedElement( selfLoop.Value.ElementDistribution, selfLoop.Value.Group, selfLoop.Value.Weight)); stack.Push(new ElementItem(null)); } // Can this state produce a sequence? if (state.CanEnd && generalizedTreeNodes[stateIndex]) { var sequence = new GeneralizedSequence(currentSequenceElements); // TODO: use immutable data structure instead of copying sequences weightedSequences.Add(new WeightedSequence(sequence, Weight.Product(currentWeight, state.EndWeight))); } // Traverse the outgoing transitions for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; // Skip self-loops & disallowed states if (transition.DestinationStateIndex == stateIndex || !generalizedTreeNodes[transition.DestinationStateIndex]) { continue; } if (!transition.IsEpsilon) { // Non-epsilon transitions contribute to the sequence stack.Push(new ElementItem(null)); } stack.Push( new StateWeight( transition.DestinationStateIndex, Weight.Product(currentWeight, transition.Weight))); if (!transition.IsEpsilon) { stack.Push( new ElementItem( new GeneralizedElement(transition.ElementDistribution, transition.Group, null))); } } } }
private static TThis BuildSubautomaton(IReadOnlyList <State> states, IReadOnlyList <State> topologicalOrder, int group, HashSet <int> subgraph) { var weightsFromRoot = ComputeWeightsFromRoot(states.Count, topologicalOrder, group); var weightsToEnd = ComputeWeightsToEnd(states.Count, topologicalOrder, group); var subautomaton = new TThis(); var stateMapping = subgraph.ToDictionary(x => x, _ => subautomaton.AddState()); var hasNoIncomingTransitions = new HashSet <int>(subgraph); // copy the automaton and find states without incoming transitions. foreach (var stateIndex in subgraph) { var newSourceState = stateMapping[stateIndex]; for (int i = 0; i < states[stateIndex].TransitionCount; i++) { var transition = states[stateIndex].GetTransition(i); if (transition.Group != group) { continue; } hasNoIncomingTransitions.Remove(transition.DestinationStateIndex); newSourceState.AddTransition( transition.ElementDistribution, transition.Weight, stateMapping[transition.DestinationStateIndex]); } } var correctionFactor = Weight.Zero; // mark start and end states, modulo paths bypassing the automaton. foreach (var stateIndex in subgraph) { var newSourceState = stateMapping[stateIndex]; // consider start states var weightFromRoot = newSourceState.TransitionCount > 0 ? weightsFromRoot[stateIndex] : Weight.Zero; if (!weightFromRoot.IsZero) { subautomaton.Start.AddEpsilonTransition(weightFromRoot, newSourceState); } // consider end states var weightToEnd = !hasNoIncomingTransitions.Contains(stateIndex) ? weightsToEnd[stateIndex] : Weight.Zero; if (!weightToEnd.IsZero) { newSourceState.SetEndWeight(weightToEnd); } correctionFactor = Weight.Sum(correctionFactor, Weight.Product(weightFromRoot, weightToEnd)); } if (!correctionFactor.IsZero) { throw new Exception("Write a unit test for this case. Code should be fine."); } var epsilonWeight = Weight.AbsoluteDifference(weightsToEnd[topologicalOrder[0].Index], correctionFactor); subautomaton.Start.SetEndWeight(epsilonWeight); return(subautomaton); }
/// <summary> /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence. /// </summary> /// <param name="srcSequence">The sequence to project.</param> /// <returns>The projection.</returns> /// <remarks> /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/> /// to the automaton representation of a projected sequence. /// </remarks> public TDestAutomaton ProjectSource(TSrcSequence srcSequence) { Argument.CheckIfNotNull(srcSequence, "srcSequence"); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); if (this.sequencePairToWeight.IsCanonicZero()) { return(result.GetAutomaton()); } var destStateCache = new Dictionary <(int, int), int>(); result.StartStateIndex = BuildProjectionOfSequence(this.sequencePairToWeight.Start, 0); var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); // Recursively builds the projection of a given sequence onto this transducer. int BuildProjectionOfSequence(PairListAutomaton.State mappingState, int srcSequenceIndex) { //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var statePair = (mappingState.Index, srcSequenceIndex); if (destStateCache.TryGetValue(statePair, out var destStateIndex)) { return(destStateIndex); } var destState = result.AddState(); destStateCache.Add(statePair, destState.Index); var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); // Enumerate transitions from the current mapping state foreach (var mappingTransition in mappingState.Transitions) { var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementWeights = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = BuildProjectionOfSequence(destMappingState, srcSequenceIndex); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcSequenceElement, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var weight = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestState = BuildProjectionOfSequence(destMappingState, srcSequenceIndex + 1); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } destState.SetEndWeight(srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero); return(destState.Index); } }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); if (srcAutomaton.IsCanonicZero() || this.sequencePairToWeight.IsCanonicZero()) { return(result.GetAutomaton()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var destStateCache = new Dictionary <(int, int), int>(); result.StartStateIndex = BuildProjectionOfAutomaton(this.sequencePairToWeight.Start, srcAutomaton.Start); var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); // Recursively builds the projection of a given automaton onto this transducer. // The projected automaton must be epsilon-free. int BuildProjectionOfAutomaton( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. // State already exists, return its index var statePair = (mappingState.Index, srcState.Index); if (destStateCache.TryGetValue(statePair, out var destStateIndex)) { return(destStateIndex); } var destState = result.AddState(); destStateCache.Add(statePair, destState.Index); // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } destState.SetEndWeight(Weight.Product(mappingState.EndWeight, srcState.EndWeight)); return(destState.Index); } }
/// <summary> /// Recursively builds the projection of a given automaton onto this transducer. /// The projected automaton must be epsilon-free. /// </summary> /// <param name="destAutomaton">The projection being built.</param> /// <param name="mappingState">The currently traversed state of the transducer.</param> /// <param name="srcState">The currently traversed state of the automaton being projected.</param> /// <param name="destStateCache">The cache of the created projection states.</param> /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns> private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfAutomaton( TDestAutomaton destAutomaton, PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState, Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache) { Debug.Assert(mappingState != null && srcState != null, "Valid states must be provided."); Debug.Assert(!ReferenceEquals(srcState.Owner, destAutomaton), "Cannot build a projection in place."); //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. // State already exists, return its index var statePair = new IntPair(mappingState.Index, srcState.Index); Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState; if (destStateCache.TryGetValue(statePair, out destState)) { return(destState); } destState = destAutomaton.AddState(); destStateCache.Add(statePair, destState); // Iterate over transitions from mappingState for (int mappingTransitionIndex = 0; mappingTransitionIndex < mappingState.TransitionCount; mappingTransitionIndex++) { var mappingTransition = mappingState.GetTransition(mappingTransitionIndex); var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { TDestElementDistribution destElementDistribution = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second; var childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcState, destStateCache); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestState, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState for (int srcTransitionIndex = 0; srcTransitionIndex < srcState.TransitionCount; srcTransitionIndex++) { var srcTransition = srcState.GetTransition(srcTransitionIndex); Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; TDestElementDistribution destElementDistribution; double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst( srcTransition.ElementDistribution, out destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } Weight destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcChildState, destStateCache); destState.AddTransition(destElementDistribution, destWeight, childDestState, mappingTransition.Group); } } destState.EndWeight = Weight.Product(mappingState.EndWeight, srcState.EndWeight); return(destState); }
/// <summary> /// Recursively builds the projection of a given sequence onto this transducer. /// </summary> /// <param name="destAutomaton">The projection being built.</param> /// <param name="mappingState">The currently traversed state of the transducer.</param> /// <param name="srcSequence">The sequence being projected.</param> /// <param name="srcSequenceIndex">The current index in the sequence being projected.</param> /// <param name="destStateCache">The cache of the created projection states.</param> /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns> private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfSequence( TDestAutomaton destAutomaton, PairListAutomaton.State mappingState, TSrcSequence srcSequence, int srcSequenceIndex, Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache) { //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var statePair = new IntPair(mappingState.Index, srcSequenceIndex); Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState; if (destStateCache.TryGetValue(statePair, out destState)) { return(destState); } destState = destAutomaton.AddState(); destStateCache.Add(statePair, destState); int srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); // Enumerate transitions from the current mapping state for (int i = 0; i < mappingState.TransitionCount; i++) { var mappingTransition = mappingState.GetTransition(i); var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { TDestElementDistribution destElementWeights = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second; var childDestState = this.BuildProjectionOfSequence( destAutomaton, destMappingState, srcSequence, srcSequenceIndex, destStateCache); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestState, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); TDestElementDistribution destElementDistribution; double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst( srcSequenceElement, out destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } Weight weight = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestState = this.BuildProjectionOfSequence( destAutomaton, destMappingState, srcSequence, srcSequenceIndex + 1, destStateCache); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } destState.EndWeight = srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero; return(destState); }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); var stringAutomaton = srcAutomaton as StringAutomaton; var sourceDistributionHasLogProbabilityOverrides = stringAutomaton?.HasElementLogValueOverrides ?? false; while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingAutomaton.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcAutomaton.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } // In the special case of a log probability override in a DiscreteChar element distribution, // we need to compensate for the fact that the distribution is not normalized. if (destElementDistribution.HasValue && sourceDistributionHasLogProbabilityOverrides) { var discreteChar = (DiscreteChar)(IDistribution <char>)srcTransition.ElementDistribution.Value; if (discreteChar.HasLogProbabilityOverride) { var totalMass = discreteChar.Ranges.EnumerableSum(rng => rng.Probability.Value * (rng.EndExclusive - rng.StartInclusive)); projectionLogScale -= System.Math.Log(totalMass); } } var destWeight = sourceDistributionHasLogProbabilityOverrides && destElementDistribution.HasNoValue ? Weight.One : Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); // We don't want an unnormalizable distribution to become normalizable due to a rounding error. if (Math.Abs(destWeight.LogValue) < 1e-12) { destWeight = Weight.One; } var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Attempts to determinize the automaton, /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element, /// and there are no epsilon transitions. /// </summary> /// <param name="maxStatesBeforeStop"> /// The maximum number of states the resulting automaton can have. If the number of states exceeds the value /// of this parameter during determinization, the process is aborted. /// </param> /// <returns> /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic, /// <see langword="false"/> otherwise. /// </returns> /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks> public bool TryDeterminize(int maxStatesBeforeStop) { Argument.CheckIfInRange( maxStatesBeforeStop > 0 && maxStatesBeforeStop <= MaxStateCount, "maxStatesBeforeStop", "The maximum number of states must be positive and not greater than the maximum number of states allowed in an automaton."); this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions if (this.UsesGroups()) { // Determinization will result in lost of group information, which we cannot allow return(false); } // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.. // Such pairs correspond to states of the resulting automaton. var weightedStateSetQueue = new Queue <Determinization.WeightedStateSet>(); var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>(); var builder = new Builder(); var startWeightedStateSet = new Determinization.WeightedStateSet { { this.Start.Index, Weight.One } }; weightedStateSetQueue.Enqueue(startWeightedStateSet); weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex); builder.Start.SetEndWeight(this.Start.EndWeight); while (weightedStateSetQueue.Count > 0) { // Take one unprocessed state of the resulting automaton Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue(); var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet]; var currentState = builder[currentStateIndex]; // Find out what transitions we should add for this state var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet); // For each transition to add foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos) { TElementDistribution elementDistribution = outgoingTransitionInfo.Item1; Weight weight = outgoingTransitionInfo.Item2; Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3; int destinationStateIndex; if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex)) { if (builder.StatesCount == maxStatesBeforeStop) { // Too many states, determinization attempt failed return(false); } // Add new state to the result var destinationState = builder.AddState(); weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index); weightedStateSetQueue.Enqueue(destWeightedStateSet); // Compute its ending weight destinationState.SetEndWeight(Weight.Zero); foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet) { destinationState.SetEndWeight(Weight.Sum( destinationState.EndWeight, Weight.Product(stateIdWithWeight.Value, this.States[stateIdWithWeight.Key].EndWeight))); } destinationStateIndex = destinationState.Index; } // Add transition to the destination state currentState.AddTransition(elementDistribution, weight, destinationStateIndex); } } var simplification = new Simplification(builder, this.PruneTransitionsWithLogWeightLessThan); simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment var result = builder.GetAutomaton(); result.PruneTransitionsWithLogWeightLessThan = this.PruneTransitionsWithLogWeightLessThan; result.LogValueOverride = this.LogValueOverride; this.SwapWith(result); return(true); }
/// <summary> /// Computes a set of outgoing transitions from a given state of the determinization result. /// </summary> /// <param name="sourceStateSet">The source state of the determinized automaton represented as /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param> /// <returns> /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing /// transitions from <paramref name="sourceStateSet"/>. /// The first two elements of a tuple define the element distribution and the weight of a transition. /// The third element defines the outgoing state. /// </returns> protected override IEnumerable <Determinization.OutgoingTransition> GetOutgoingTransitionsForDeterminization( Determinization.WeightedStateSet sourceStateSet) { // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons) var segmentBounds = new List <TransitionCharSegmentBound>(); for (var i = 0; i < sourceStateSet.Count; ++i) { var sourceState = sourceStateSet[i]; var state = this.States[sourceState.Index]; foreach (var transition in state.Transitions) { AddTransitionCharSegmentBounds(transition, sourceState.Weight, segmentBounds); } } segmentBounds.Sort(); // Produce an outgoing transition for each unique subset of overlapping segments var currentSegmentTotal = WeightSum.Zero(); var currentSegmentStateWeights = new Dictionary <int, WeightSum>(); var currentSegmentStart = (int)char.MinValue; var destinationStateSetBuilder = Determinization.WeightedStateSetBuilder.Create(); foreach (var segmentBound in segmentBounds) { if (currentSegmentTotal.Count != 0 && currentSegmentStart < segmentBound.Bound) { // Flush previous segment var segmentEnd = (char)(segmentBound.Bound - 1); var segmentLength = segmentEnd - currentSegmentStart + 1; var elementDist = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd); var invTotalWeight = Weight.Inverse(currentSegmentTotal.Sum); destinationStateSetBuilder.Reset(); foreach (var stateIdWithWeight in currentSegmentStateWeights) { var stateWeight = stateIdWithWeight.Value.Sum * invTotalWeight; destinationStateSetBuilder.Add(stateIdWithWeight.Key, stateWeight); } var(destinationStateSet, destinationStateSetWeight) = destinationStateSetBuilder.Get(); var transitionWeight = Weight.Product( Weight.FromValue(segmentLength), currentSegmentTotal.Sum, destinationStateSetWeight); yield return(new Determinization.OutgoingTransition( elementDist, transitionWeight, destinationStateSet)); } // Update current segment currentSegmentStart = segmentBound.Bound; if (segmentBound.IsStart) { currentSegmentTotal += segmentBound.Weight; if (currentSegmentStateWeights.TryGetValue(segmentBound.DestinationStateId, out var stateWeight)) { currentSegmentStateWeights[segmentBound.DestinationStateId] = stateWeight + segmentBound.Weight; } else { currentSegmentStateWeights[segmentBound.DestinationStateId] = new WeightSum(segmentBound.Weight); } } else { Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter."); Debug.Assert(!segmentBound.Weight.IsInfinity); currentSegmentTotal -= segmentBound.Weight; var prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId]; var newStateWeight = prevStateWeight - segmentBound.Weight; if (newStateWeight.Count == 0) { currentSegmentStateWeights.Remove(segmentBound.DestinationStateId); } else { currentSegmentStateWeights[segmentBound.DestinationStateId] = newStateWeight; } } } }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Creates an automaton <c>f'(s) = sum_{tu=s} f(t)g(u)</c>, where <c>f(t)</c> is the current /// automaton (in builder) and <c>g(u)</c> is the given automaton. /// The resulting automaton is also known as the Cauchy product of two automata. /// </summary> public void Append( Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> automaton, int group = 0, bool avoidEpsilonTransitions = true) { var oldStateCount = this.states.Count; foreach (var state in automaton.States) { var stateBuilder = this.AddState(); stateBuilder.SetEndWeight(state.EndWeight); foreach (var transition in state.Transitions) { var updatedTransition = transition; updatedTransition.DestinationStateIndex += oldStateCount; if (group != 0) { updatedTransition.Group = group; } stateBuilder.AddTransition(updatedTransition); } } var secondStartState = this[oldStateCount + automaton.Start.Index]; if (avoidEpsilonTransitions && (AllEndStatesHaveNoTransitions() || !automaton.Start.HasIncomingTransitions)) { // Remove start state of appended automaton and copy all its transitions to previous end states for (var i = 0; i < oldStateCount; ++i) { var endState = this[i]; if (!endState.CanEnd) { continue; } for (var iterator = secondStartState.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (group != 0) { transition.Group = group; } if (transition.DestinationStateIndex == secondStartState.Index) { transition.DestinationStateIndex = endState.Index; } else { transition.Weight = Weight.Product(transition.Weight, endState.EndWeight); } endState.AddTransition(transition); } endState.SetEndWeight(Weight.Product(endState.EndWeight, secondStartState.EndWeight)); } this.RemoveState(secondStartState.Index); } else { // Just connect all end states with start state of appended automaton for (var i = 0; i < oldStateCount; i++) { var state = this[i]; if (state.CanEnd) { state.AddEpsilonTransition(state.EndWeight, secondStartState.Index, group); state.SetEndWeight(Weight.Zero); } } } bool AllEndStatesHaveNoTransitions() { for (var i = 0; i < oldStateCount; ++i) { var state = this.states[i]; if (state.CanEnd && state.FirstTransition != -1) { return(false); } } return(true); } }