/// <summary> /// For each state of the component, computes the total weight of all paths starting at that state. /// Ending weights are taken into account. /// </summary> /// <remarks>The weights are computed using dynamic programming, going up from leafs to the root.</remarks> private static Weight[] ComputeWeightsToEnd(int nStates, IReadOnlyList <State> topologicalOrder, int group) { var weights = CreateZeroWeights(nStates); // Iterate in the reverse topological order for (var stateIndex = topologicalOrder.Count - 1; stateIndex >= 0; stateIndex--) { var state = topologicalOrder[stateIndex]; // Aggregate weights of all the outgoing transitions from this state var weightToAdd = state.EndWeight; for (var transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex) { var transition = state.GetTransition(transitionIndex); if (transition.Group == group) { continue; } weightToAdd = Weight.Sum( weightToAdd, Weight.Product(transition.Weight, weights[transition.DestinationStateIndex])); } weights[state.Index] = weightToAdd; } return(weights); }
/// <summary> /// For each state of the component, computes the total weight of all paths starting at the root /// and ending at that state. Ending weights are not taken into account. /// </summary> /// <remarks>The weights are computed using dynamic programming, going down from the root to leafs.</remarks> private static Weight[] ComputeWeightsFromRoot(int nStates, IReadOnlyList <State> topologicalOrder, int group) { var weights = CreateZeroWeights(nStates); weights[topologicalOrder[0].Index] = Weight.One; // Iterate in the topological order for (var i = 0; i < topologicalOrder.Count; i++) { var srcState = topologicalOrder[i]; var srcWeight = weights[srcState.Index]; if (srcWeight.IsZero) { continue; } // Aggregate weights of all the outgoing transitions from this state for (var transitionIndex = 0; transitionIndex < srcState.TransitionCount; transitionIndex++) { var transition = srcState.GetTransition(transitionIndex); if (transition.Group == group) { continue; } var destWeight = weights[transition.DestinationStateIndex]; var weight = Weight.Sum(destWeight, Weight.Product(srcWeight, transition.Weight)); weights[transition.DestinationStateIndex] = weight; } } return(weights); }
/// <summary> /// Recursively computes the value of the automaton on a given sequence. /// </summary> /// <param name="sequence">The sequence to compute the value on.</param> /// <param name="sequencePosition">The current position in the sequence.</param> /// <param name="valueCache">A lookup table for memoization.</param> /// <returns>The value computed from the current state.</returns> private Weight DoGetValue( TSequence sequence, int sequencePosition, Dictionary <IntPair, Weight> valueCache) { var stateIndexPair = new IntPair(this.Index, sequencePosition); Weight cachedValue; if (valueCache.TryGetValue(stateIndexPair, out cachedValue)) { return(cachedValue); } EpsilonClosure closure = this.GetEpsilonClosure(); Weight value = Weight.Zero; int count = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetLength(sequence); bool isCurrent = sequencePosition < count; if (isCurrent) { TElement element = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetElement(sequence, sequencePosition); for (int closureStateIndex = 0; closureStateIndex < closure.Size; ++closureStateIndex) { State closureState = closure.GetStateByIndex(closureStateIndex); Weight closureStateWeight = closure.GetStateWeightByIndex(closureStateIndex); for (int transitionIndex = 0; transitionIndex < closureState.transitionCount; transitionIndex++) { Transition transition = closureState.transitions[transitionIndex]; if (transition.IsEpsilon) { continue; // The destination is a part of the closure anyway } State destState = this.Owner.states[transition.DestinationStateIndex]; Weight distWeight = Weight.FromLogValue(transition.ElementDistribution.GetLogProb(element)); if (!distWeight.IsZero && !transition.Weight.IsZero) { Weight destValue = destState.DoGetValue(sequence, sequencePosition + 1, valueCache); if (!destValue.IsZero) { value = Weight.Sum( value, Weight.Product(closureStateWeight, transition.Weight, distWeight, destValue)); } } } } } else { value = closure.EndWeight; } valueCache.Add(stateIndexPair, value); return(value); }
/// <summary> /// Computes the total weights between each pair of states in the component /// using the <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf">generalized Floyd's algorithm</a>. /// </summary> private void ComputePairwiseWeightsMatrix() { this.pairwiseWeights = Util.ArrayInit(this.Size, this.Size, (i, j) => Weight.Zero); for (int srcStateIndexInComponent = 0; srcStateIndexInComponent < this.Size; ++srcStateIndexInComponent) { State state = this.statesInComponent[srcStateIndexInComponent]; for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex) { Transition transition = state.GetTransition(transitionIndex); State destState = state.Owner.States[transition.DestinationStateIndex]; int destStateIndexInComponent; if (this.transitionFilter(transition) && (destStateIndexInComponent = this.GetIndexByState(destState)) != -1) { this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent] = Weight.Sum( this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent], transition.Weight); } } } for (int k = 0; k < this.Size; ++k) { Weight loopWeight = this.useApproximateClosure ? Weight.ApproximateClosure(this.pairwiseWeights[k, k]) : Weight.Closure(this.pairwiseWeights[k, k]); for (int i = 0; i < this.Size; ++i) { if (i == k || this.pairwiseWeights[i, k].IsZero) { continue; } for (int j = 0; j < this.Size; ++j) { if (j == k || this.pairwiseWeights[k, j].IsZero) { continue; } Weight additionalWeight = Weight.Product( this.pairwiseWeights[i, k], loopWeight, this.pairwiseWeights[k, j]); this.pairwiseWeights[i, j] = Weight.Sum(this.pairwiseWeights[i, j], additionalWeight); } } for (int i = 0; i < this.Size; ++i) { this.pairwiseWeights[i, k] = Weight.Product(this.pairwiseWeights[i, k], loopWeight); this.pairwiseWeights[k, i] = Weight.Product(this.pairwiseWeights[k, i], loopWeight); } this.pairwiseWeights[k, k] = loopWeight; } }
/// <summary> /// Merges outgoing transitions with the same destination state. /// </summary> public void MergeParallelTransitions() { for (var stateIndex = 0; stateIndex < this.builder.StatesCount; ++stateIndex) { var state = this.builder[stateIndex]; for (var iterator1 = state.TransitionIterator; iterator1.Ok; iterator1.Next()) { var transition1 = iterator1.Value; var iterator2 = iterator1; iterator2.Next(); for (; iterator2.Ok; iterator2.Next()) { var transition2 = iterator2.Value; if (transition1.DestinationStateIndex == transition2.DestinationStateIndex && transition1.Group == transition2.Group) { var removeTransition2 = false; if (transition1.IsEpsilon && transition2.IsEpsilon) { transition1.Weight = Weight.Sum(transition1.Weight, transition2.Weight); iterator1.Value = transition1; removeTransition2 = true; } else if (!transition1.IsEpsilon && !transition2.IsEpsilon) { var newElementDistribution = new TElementDistribution(); if (double.IsInfinity(transition1.Weight.Value) && double.IsInfinity(transition2.Weight.Value)) { newElementDistribution.SetToSum( 1.0, transition1.ElementDistribution.Value, 1.0, transition2.ElementDistribution.Value); } else { newElementDistribution.SetToSum( transition1.Weight.Value, transition1.ElementDistribution.Value, transition2.Weight.Value, transition2.ElementDistribution.Value); } transition1.ElementDistribution = newElementDistribution; transition1.Weight = Weight.Sum(transition1.Weight, transition2.Weight); iterator1.Value = transition1; removeTransition2 = true; } if (removeTransition2) { iterator2.Remove(); } } } } } }
/// <summary> /// Initializes a new instance of the <see cref="EpsilonClosure"/> class. /// </summary> /// <param name="state">The state, which epsilon closure this instance will represent.</param> internal EpsilonClosure(State state) { Argument.CheckIfValid(!state.IsNull, nameof(state)); // Optimize for a very common case: a single-node closure bool singleNodeClosure = true; Weight selfLoopWeight = Weight.Zero; for (int i = 0; i < state.TransitionCount; ++i) { Transition transition = state.GetTransition(i); if (transition.IsEpsilon) { if (transition.DestinationStateIndex != state.Index) { singleNodeClosure = false; break; } selfLoopWeight = Weight.Sum(selfLoopWeight, transition.Weight); } } if (singleNodeClosure) { Weight stateWeight = Weight.ApproximateClosure(selfLoopWeight); this.weightedStates.Add(Pair.Create(state, stateWeight)); this.EndWeight = Weight.Product(stateWeight, state.EndWeight); } else { Condensation condensation = state.Owner.ComputeCondensation(state, tr => tr.IsEpsilon, true); for (int i = 0; i < condensation.ComponentCount; ++i) { StronglyConnectedComponent component = condensation.GetComponent(i); for (int j = 0; j < component.Size; ++j) { State componentState = component.GetStateByIndex(j); this.weightedStates.Add(Pair.Create(componentState, condensation.GetWeightFromRoot(componentState))); } } this.EndWeight = condensation.GetWeightToEnd(state); } }
/// <summary> /// Gets the total weight between two given states in the component. /// </summary> /// <param name="srcStateIndexInComponent">The index of the source state in the component.</param> /// <param name="destStateIndexInComponent">The index of the destination state in the component.</param> /// <returns>The total weight between the given states in the component.</returns> public Weight GetWeight(int srcStateIndexInComponent, int destStateIndexInComponent) { Argument.CheckIfInRange( srcStateIndexInComponent >= 0 && srcStateIndexInComponent < this.Size, "srcStateIndexInComponent", "The given index is out of range."); Argument.CheckIfInRange( destStateIndexInComponent >= 0 && destStateIndexInComponent < this.Size, "destStateIndexInComponent", "The given index is out of range."); if (this.Size == 1) { if (!this.singleStatePairwiseWeight.HasValue) { // Optimize for a common case State state = this.statesInComponent[0]; this.singleStatePairwiseWeight = Weight.Zero; for (int i = 0; i < state.TransitionCount; ++i) { Transition transition = state.GetTransition(i); if (this.transitionFilter(transition) && transition.DestinationStateIndex == state.Index) { this.singleStatePairwiseWeight = Weight.Sum( this.singleStatePairwiseWeight.Value, transition.Weight); } } this.singleStatePairwiseWeight = this.useApproximateClosure ? Weight.ApproximateClosure(this.singleStatePairwiseWeight.Value) : Weight.Closure(this.singleStatePairwiseWeight.Value); } return(this.singleStatePairwiseWeight.Value); } if (this.pairwiseWeights == null) { this.ComputePairwiseWeightsMatrix(); } return(this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent]); }
/// <summary> /// For each state of the component, computes the total weight of all paths starting at that state. /// Ending weights are taken into account. /// </summary> /// <remarks>The weights are computed using dynamic programming, going up from leafs to the root.</remarks> private void ComputeWeightsToEnd() { // Iterate in the reverse topological order for (int currentComponentIndex = 0; currentComponentIndex < this.components.Count; ++currentComponentIndex) { StronglyConnectedComponent currentComponent = this.components[currentComponentIndex]; // Update end weights in this component based on outgoing transitions to downward components for (int stateIndex = 0; stateIndex < currentComponent.Size; ++stateIndex) { State state = currentComponent.GetStateByIndex(stateIndex); // Aggregate weights of all the outgoing transitions from this state Weight weightToAdd = state.EndWeight; for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex) { Transition transition = state.GetTransition(transitionIndex); State destState = state.Owner.states[transition.DestinationStateIndex]; if (this.transitionFilter(transition) && !currentComponent.HasState(destState)) { weightToAdd = Weight.Sum( weightToAdd, Weight.Product(transition.Weight, this.stateIdToInfo[transition.DestinationStateIndex].WeightToEnd)); } } // We can go from any state of the component to the current state if (!weightToAdd.IsZero) { for (int updatedStateIndex = 0; updatedStateIndex < currentComponent.Size; ++updatedStateIndex) { State updatedState = currentComponent.GetStateByIndex(updatedStateIndex); CondensationStateInfo updatedStateInfo = this.stateIdToInfo[updatedState.Index]; updatedStateInfo.WeightToEnd = Weight.Sum( updatedStateInfo.WeightToEnd, Weight.Product(currentComponent.GetWeight(updatedStateIndex, stateIndex), weightToAdd)); this.stateIdToInfo[updatedState.Index] = updatedStateInfo; } } } } this.weightsToEndComputed = true; }
private static TThis BuildSubautomaton(IReadOnlyList <State> states, IReadOnlyList <State> topologicalOrder, int group, HashSet <int> subgraph) { var weightsFromRoot = ComputeWeightsFromRoot(states.Count, topologicalOrder, group); var weightsToEnd = ComputeWeightsToEnd(states.Count, topologicalOrder, group); var subautomaton = new TThis(); var stateMapping = subgraph.ToDictionary(x => x, _ => subautomaton.AddState()); var hasNoIncomingTransitions = new HashSet <int>(subgraph); // copy the automaton and find states without incoming transitions. foreach (var stateIndex in subgraph) { var newSourceState = stateMapping[stateIndex]; for (int i = 0; i < states[stateIndex].TransitionCount; i++) { var transition = states[stateIndex].GetTransition(i); if (transition.Group != group) { continue; } hasNoIncomingTransitions.Remove(transition.DestinationStateIndex); newSourceState.AddTransition( transition.ElementDistribution, transition.Weight, stateMapping[transition.DestinationStateIndex]); } } var correctionFactor = Weight.Zero; // mark start and end states, modulo paths bypassing the automaton. foreach (var stateIndex in subgraph) { var newSourceState = stateMapping[stateIndex]; // consider start states var weightFromRoot = newSourceState.TransitionCount > 0 ? weightsFromRoot[stateIndex] : Weight.Zero; if (!weightFromRoot.IsZero) { subautomaton.Start.AddEpsilonTransition(weightFromRoot, newSourceState); } // consider end states var weightToEnd = !hasNoIncomingTransitions.Contains(stateIndex) ? weightsToEnd[stateIndex] : Weight.Zero; if (!weightToEnd.IsZero) { newSourceState.SetEndWeight(weightToEnd); } correctionFactor = Weight.Sum(correctionFactor, Weight.Product(weightFromRoot, weightToEnd)); } if (!correctionFactor.IsZero) { throw new Exception("Write a unit test for this case. Code should be fine."); } var epsilonWeight = Weight.AbsoluteDifference(weightsToEnd[topologicalOrder[0].Index], correctionFactor); subautomaton.Start.SetEndWeight(epsilonWeight); return(subautomaton); }
/// <summary> /// Computes a set of outgoing transitions from a given state of the determinization result. /// </summary> /// <param name="sourceState">The source state of the determinized automaton represented as /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param> /// <returns> /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>. /// The first two elements of a tuple define the element distribution and the weight of a transition. /// The third element defines the outgoing state. /// </returns> protected override List <(DiscreteChar, Weight, Determinization.WeightedStateSet)> GetOutgoingTransitionsForDeterminization( Determinization.WeightedStateSet sourceState) { const double LogEps = -35; // Don't add transitions with log-weight less than this as they have been produced by numerical inaccuracies // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons) var segmentBounds = new List <ValueTuple <int, TransitionCharSegmentBound> >(); int transitionsProcessed = 0; foreach (KeyValuePair <int, Weight> stateIdWeight in sourceState) { var state = this.States[stateIdWeight.Key]; foreach (var transition in state.Transitions) { AddTransitionCharSegmentBounds(transition, stateIdWeight.Value, segmentBounds); } transitionsProcessed += state.Transitions.Count; } // Sort segment bounds left-to-right, start-to-end var sortedIndexedSegmentBounds = segmentBounds.ToArray(); if (transitionsProcessed > 1) { Array.Sort(sortedIndexedSegmentBounds, CompareSegmentBounds); int CompareSegmentBounds((int, TransitionCharSegmentBound) a, (int, TransitionCharSegmentBound) b) => a.Item2.CompareTo(b.Item2); } // Produce an outgoing transition for each unique subset of overlapping segments var result = new List <(DiscreteChar, Weight, Determinization.WeightedStateSet)>(); Weight currentSegmentStateWeightSum = Weight.Zero; var currentSegmentStateWeights = new Dictionary <int, Weight>(); foreach (var sb in segmentBounds) { currentSegmentStateWeights[sb.Item2.DestinationStateId] = Weight.Zero; } var activeSegments = new HashSet <TransitionCharSegmentBound>(); int currentSegmentStart = char.MinValue; foreach (var tup in sortedIndexedSegmentBounds) { TransitionCharSegmentBound segmentBound = tup.Item2; if (currentSegmentStateWeightSum.LogValue > LogEps && currentSegmentStart < segmentBound.Bound) { // Flush previous segment char segmentEnd = (char)(segmentBound.Bound - 1); int segmentLength = segmentEnd - currentSegmentStart + 1; DiscreteChar elementDist = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd); var destinationState = new Determinization.WeightedStateSet(); foreach (KeyValuePair <int, Weight> stateIdWithWeight in currentSegmentStateWeights) { if (stateIdWithWeight.Value.LogValue > LogEps) { Weight stateWeight = Weight.Product(stateIdWithWeight.Value, Weight.Inverse(currentSegmentStateWeightSum)); destinationState.Add(stateIdWithWeight.Key, stateWeight); } } Weight transitionWeight = Weight.Product(Weight.FromValue(segmentLength), currentSegmentStateWeightSum); result.Add((elementDist, transitionWeight, destinationState)); } // Update current segment currentSegmentStart = segmentBound.Bound; if (segmentBound.IsStart) { activeSegments.Add(segmentBound); currentSegmentStateWeightSum = Weight.Sum(currentSegmentStateWeightSum, segmentBound.Weight); currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.Sum(currentSegmentStateWeights[segmentBound.DestinationStateId], segmentBound.Weight); } else { Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter."); activeSegments.Remove(segmentBounds[tup.Item1 - 1].Item2); // End follows start in original. if (double.IsInfinity(segmentBound.Weight.Value)) { // Cannot subtract because of the infinities involved. currentSegmentStateWeightSum = activeSegments.Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w)); currentSegmentStateWeights[segmentBound.DestinationStateId] = activeSegments.Where(sb => sb.DestinationStateId == segmentBound.DestinationStateId).Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w)); } else { currentSegmentStateWeightSum = activeSegments.Count == 0 ? Weight.Zero : Weight.AbsoluteDifference(currentSegmentStateWeightSum, segmentBound.Weight); Weight prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId]; currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.AbsoluteDifference( prevStateWeight, segmentBound.Weight); } } } return(result); }
/// <summary> /// For each state of the component, computes the total weight of all paths starting at the root /// and ending at that state. Ending weights are not taken into account. /// </summary> /// <remarks>The weights are computed using dynamic programming, going down from the root to leafs.</remarks> private void ComputeWeightsFromRoot() { CondensationStateInfo rootInfo = this.stateIdToInfo[this.Root.Index]; rootInfo.UpwardWeightFromRoot = Weight.One; this.stateIdToInfo[this.Root.Index] = rootInfo; // Iterate in the topological order for (int currentComponentIndex = this.components.Count - 1; currentComponentIndex >= 0; --currentComponentIndex) { StronglyConnectedComponent currentComponent = this.components[currentComponentIndex]; // Propagate weights inside the component for (int srcStateIndex = 0; srcStateIndex < currentComponent.Size; ++srcStateIndex) { State srcState = currentComponent.GetStateByIndex(srcStateIndex); CondensationStateInfo srcStateInfo = this.stateIdToInfo[srcState.Index]; if (srcStateInfo.UpwardWeightFromRoot.IsZero) { continue; } for (int destStateIndex = 0; destStateIndex < currentComponent.Size; ++destStateIndex) { State destState = currentComponent.GetStateByIndex(destStateIndex); CondensationStateInfo destStateInfo = this.stateIdToInfo[destState.Index]; destStateInfo.WeightFromRoot = Weight.Sum( destStateInfo.WeightFromRoot, Weight.Product(srcStateInfo.UpwardWeightFromRoot, currentComponent.GetWeight(srcStateIndex, destStateIndex))); this.stateIdToInfo[destState.Index] = destStateInfo; } } // Compute weight contributions to downward components for (int srcStateIndex = 0; srcStateIndex < currentComponent.Size; ++srcStateIndex) { State srcState = currentComponent.GetStateByIndex(srcStateIndex); CondensationStateInfo srcStateInfo = this.stateIdToInfo[srcState.Index]; if (srcStateInfo.WeightFromRoot.IsZero) { continue; } // Aggregate weights of all the outgoing transitions from this state for (int transitionIndex = 0; transitionIndex < srcState.TransitionCount; ++transitionIndex) { Transition transition = srcState.GetTransition(transitionIndex); State destState = srcState.Owner.states[transition.DestinationStateIndex]; if (this.transitionFilter(transition) && !currentComponent.HasState(destState)) { CondensationStateInfo destStateInfo = this.stateIdToInfo[destState.Index]; destStateInfo.UpwardWeightFromRoot = Weight.Sum( destStateInfo.UpwardWeightFromRoot, Weight.Product(srcStateInfo.WeightFromRoot, transition.Weight)); this.stateIdToInfo[transition.DestinationStateIndex] = destStateInfo; } } } } this.weightsFromRootComputed = true; }
/// <summary> /// Attempts to determinize the automaton, /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element, /// and there are no epsilon transitions. /// </summary> /// <param name="maxStatesBeforeStop"> /// The maximum number of states the resulting automaton can have. If the number of states exceeds the value /// of this parameter during determinization, the process is aborted. /// </param> /// <returns> /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic, /// <see langword="false"/> otherwise. /// </returns> /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks> public bool TryDeterminize(int maxStatesBeforeStop) { Argument.CheckIfInRange( maxStatesBeforeStop > 0 && maxStatesBeforeStop <= MaxStateCount, "maxStatesBeforeStop", "The maximum number of states must be positive and not greater than the maximum number of states allowed in an automaton."); this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions if (this.UsesGroups()) { // Determinization will result in lost of group information, which we cannot allow return(false); } // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.. // Such pairs correspond to states of the resulting automaton. var weightedStateSetQueue = new Queue <Determinization.WeightedStateSet>(); var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>(); var builder = new Builder(); var startWeightedStateSet = new Determinization.WeightedStateSet { { this.Start.Index, Weight.One } }; weightedStateSetQueue.Enqueue(startWeightedStateSet); weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex); builder.Start.SetEndWeight(this.Start.EndWeight); while (weightedStateSetQueue.Count > 0) { // Take one unprocessed state of the resulting automaton Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue(); var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet]; var currentState = builder[currentStateIndex]; // Find out what transitions we should add for this state var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet); // For each transition to add foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos) { TElementDistribution elementDistribution = outgoingTransitionInfo.Item1; Weight weight = outgoingTransitionInfo.Item2; Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3; int destinationStateIndex; if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex)) { if (builder.StatesCount == maxStatesBeforeStop) { // Too many states, determinization attempt failed return(false); } // Add new state to the result var destinationState = builder.AddState(); weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index); weightedStateSetQueue.Enqueue(destWeightedStateSet); // Compute its ending weight destinationState.SetEndWeight(Weight.Zero); foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet) { destinationState.SetEndWeight(Weight.Sum( destinationState.EndWeight, Weight.Product(stateIdWithWeight.Value, this.States[stateIdWithWeight.Key].EndWeight))); } destinationStateIndex = destinationState.Index; } // Add transition to the destination state currentState.AddTransition(elementDistribution, weight, destinationStateIndex); } } var simplification = new Simplification(builder, this.PruneTransitionsWithLogWeightLessThan); simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment var result = builder.GetAutomaton(); result.PruneTransitionsWithLogWeightLessThan = this.PruneTransitionsWithLogWeightLessThan; result.LogValueOverride = this.LogValueOverride; this.SwapWith(result); return(true); }
/// <summary> /// Recursively increases the value of this automaton on <paramref name="sequence"/> by <paramref name="weight"/>. /// </summary> /// <param name="stateIndex">Index of currently traversed state.</param> /// <param name="isNewState">Indicates whether state <paramref name="stateIndex"/> was just created.</param> /// <param name="selfLoopAlreadyMatched">Indicates whether self-loop on state <paramref name="stateIndex"/> was just matched.</param> /// <param name="firstAllowedStateIndex">The minimum index of an existing state that can be used for the sequence.</param> /// <param name="currentSequencePos">The current position in the generalized sequence.</param> /// <param name="sequence">The generalized sequence.</param> /// <param name="weight">The weight of the sequence.</param> /// <returns> /// <see langword="true"/> if the subsequence starting at <paramref name="currentSequencePos"/> has been successfully merged in, /// <see langword="false"/> otherwise. /// </returns> /// <remarks> /// This function attempts to add as few new states and transitions as possible. /// Its implementation is conceptually similar to adding string to a trie. /// </remarks> private bool DoAddGeneralizedSequence( int stateIndex, bool isNewState, bool selfLoopAlreadyMatched, int firstAllowedStateIndex, int currentSequencePos, GeneralizedSequence sequence, Weight weight) { bool success; var builder = this.builder; var state = builder[stateIndex]; if (currentSequencePos == sequence.Count) { if (!selfLoopAlreadyMatched) { // We can't finish in a state with a self-loop for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { if (iterator.Value.DestinationStateIndex == state.Index) { return(false); } } } state.SetEndWeight(Weight.Sum(state.EndWeight, weight)); return(true); } var element = sequence[currentSequencePos]; // Treat self-loops elements separately if (element.LoopWeight.HasValue) { if (selfLoopAlreadyMatched) { // Previous element was also a self-loop, we should try to find an espilon transition for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.DestinationStateIndex != state.Index && transition.IsEpsilon && transition.DestinationStateIndex >= firstAllowedStateIndex) { if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, Weight.Product(weight, Weight.Inverse(transition.Weight)))) { return(true); } } } // Epsilon transition not found, let's create a new one var destination = state.AddEpsilonTransition(Weight.One); success = this.DoAddGeneralizedSequence( destination.Index, true, false, firstAllowedStateIndex, currentSequencePos, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // Find a matching self-loop for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex) { // Try this epsilon transition if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight)) { return(true); } } // Is it a self-loop? if (transition.DestinationStateIndex == state.Index) { // Do self-loops match? if ((transition.Weight == element.LoopWeight.Value) && (element.Group == transition.Group) && ((transition.IsEpsilon && element.IsEpsilonSelfLoop) || (!transition.IsEpsilon && !element.IsEpsilonSelfLoop && transition.ElementDistribution.Equals(element.ElementDistribution)))) { // Skip the element in the sequence, remain in the same state success = this.DoAddGeneralizedSequence( stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // StateIndex also has a self-loop, but the two doesn't match return(false); } } if (!isNewState) { // Can't add self-loop to an existing state, it will change the language accepted by the state return(false); } // Add a new self-loop state.AddTransition(element.ElementDistribution, element.LoopWeight.Value, stateIndex, element.Group); success = this.DoAddGeneralizedSequence(stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // Try to find a transition for the element for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex) { // Try this epsilon transition if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight)) { return(true); } } // Is it a self-loop? if (transition.DestinationStateIndex == state.Index) { if (selfLoopAlreadyMatched) { // The self-loop was checked or added by the caller continue; } // Can't go through an existing self-loop, it will allow undesired sequences to be accepted return(false); } if (transition.DestinationStateIndex < firstAllowedStateIndex || element.Group != transition.Group || !element.ElementDistribution.Equals(transition.ElementDistribution)) { continue; } // Skip the element in the sequence, move to the destination state // Weight of the existing transition must be taken into account // This case can fail if the next element is a self-loop and the destination state already has a different one if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos + 1, sequence, Weight.Product(weight, Weight.Inverse(transition.Weight)))) { return(true); } } // Add a new transition var newChild = state.AddTransition(element.ElementDistribution, Weight.One, null, element.Group); success = this.DoAddGeneralizedSequence( newChild.Index, true, false, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); }