private static Weight NormalizeWeights(List <WeightedState> weightedStates) { var maxWeight = weightedStates[0].Weight; for (var i = 1; i < weightedStates.Count; ++i) { if (weightedStates[i].Weight > maxWeight) { maxWeight = weightedStates[i].Weight; } } var normalizer = Weight.Inverse(maxWeight); for (var i = 0; i < weightedStates.Count; ++i) { var state = weightedStates[i]; weightedStates[i] = new WeightedState(state.Index, state.Weight * normalizer); } return(maxWeight); }
public (WeightedStateSet, Weight) Get() { Debug.Assert(this.weightedStates.Count > 0); var sortedStates = this.weightedStates.ToArray(); if (sortedStates.Length == 1) { var state = sortedStates[0]; sortedStates[0] = new WeightedState(state.Index, Weight.One); return(new WeightedStateSet(sortedStates), state.Weight); } else { Array.Sort(sortedStates); var maxWeight = sortedStates[0].Weight; for (var i = 1; i < sortedStates.Length; ++i) { if (sortedStates[i].Weight > maxWeight) { maxWeight = sortedStates[i].Weight; } } var normalizer = Weight.Inverse(maxWeight); for (var i = 0; i < sortedStates.Length; ++i) { var state = sortedStates[i]; sortedStates[i] = new WeightedState(state.Index, state.Weight * normalizer); } return(new WeightedStateSet(sortedStates), maxWeight); } }
public void MergeTrees() { var builder = this.builder; var isRemovedNode = new bool[builder.StatesCount]; var isTreeNode = FindTreeNodes(); var stack = new Stack <int>(); stack.Push(builder.StartStateIndex); while (stack.Count > 0) { var stateIndex = stack.Pop(); var state = builder[stateIndex]; // Transitions to non-tree nodes and self-loops should be ignored bool IsMergeableTransition(Transition t) => isTreeNode[t.DestinationStateIndex] && t.DestinationStateIndex != stateIndex; for (var iterator1 = state.TransitionIterator; iterator1.Ok; iterator1.Next()) { var transition1 = iterator1.Value; // ignore non-tree nodes and self-loops if (!IsMergeableTransition(transition1)) { continue; } // If it is an epsilon transition then try to merge with current state first // Note: group doesn't matter for epsilon transitions (in generalized trees) if (transition1.IsEpsilon && CanMergeStates(stateIndex, transition1.DestinationStateIndex)) { // All transitions from transition1.DestinationStateIndex will be inserted // into current state. And will be iterated by iterator1 without special treatment. MergeStates(stateIndex, transition1.DestinationStateIndex, transition1.Weight); isRemovedNode[transition1.DestinationStateIndex] = true; iterator1.Remove(); continue; } // Try to find transitions with which this one can be merged var iterator2 = iterator1; iterator2.Next(); for (; iterator2.Ok; iterator2.Next()) { var transition2 = iterator2.Value; Debug.Assert( transition1.DestinationStateIndex != transition2.DestinationStateIndex, "Parallel transitions must be merged earlier by MergeParallelTransitions()"); // ignore non-tree nodes and self-loops if (IsMergeableTransition(transition2) && CanMergeDestinations(transition1, transition2)) { MergeStates( transition1.DestinationStateIndex, transition2.DestinationStateIndex, transition2.Weight * Weight.Inverse(transition1.Weight)); isRemovedNode[transition2.DestinationStateIndex] = true; iterator2.Remove(); } } stack.Push(transition1.DestinationStateIndex); } } builder.RemoveStates(isRemovedNode, true); return; // Returns a boolean array in which for each automaton state a "isTree" flag is stored. // State is considered to be tree node if its in degree = 1 and it's parent is also a tree node. bool[] FindTreeNodes() { var inDegree = new int[builder.StatesCount]; for (var i = 0; i < builder.StatesCount; ++i) { for (var iterator = builder[i].TransitionIterator; iterator.Ok; iterator.Next()) { var destinationIndex = iterator.Value.DestinationStateIndex; // Ignore self-loops if (destinationIndex != i) { ++inDegree[destinationIndex]; } } } var result = new bool[builder.StatesCount]; var treeSearchStack = new Stack <int>(); treeSearchStack.Push(builder.StartStateIndex); while (treeSearchStack.Count > 0) { var stateIndex = treeSearchStack.Pop(); result[stateIndex] = true; for (var iterator = builder[stateIndex].TransitionIterator; iterator.Ok; iterator.Next()) { var destinationIndex = iterator.Value.DestinationStateIndex; if (destinationIndex != stateIndex && inDegree[destinationIndex] == 1) { treeSearchStack.Push(destinationIndex); } } } return(result); } bool CanMergeStates(int stateIndex1, int stateIndex2) { var selfLoop1 = TryFindSelfLoop(stateIndex1); var selfLoop2 = TryFindSelfLoop(stateIndex2); // Can merge only if both destination states don't have self-loops // or these loops are exactly the same. return ((!selfLoop1.HasValue && !selfLoop2.HasValue) || (selfLoop1.HasValue && selfLoop2.HasValue && selfLoop1.Value.Group == selfLoop2.Value.Group && selfLoop1.Value.Weight == selfLoop2.Value.Weight && EqualDistributions(selfLoop1.Value.ElementDistribution, selfLoop2.Value.ElementDistribution))); } bool CanMergeDestinations(Transition transition1, Transition transition2) { // Check that group and element distribution match if (transition1.Group != transition2.Group || !EqualDistributions(transition1.ElementDistribution, transition2.ElementDistribution)) { return(false); } return(CanMergeStates(transition1.DestinationStateIndex, transition2.DestinationStateIndex)); } // Compares element distributions in transition. Epsilon transitions are considered equal. bool EqualDistributions(Option <TElementDistribution> dist1, Option <TElementDistribution> dist2) => dist1.HasValue == dist2.HasValue && (!dist1.HasValue || dist1.Value.Equals(dist2.Value)); // Finds transition which points to state itself // It is assumed that there's only one such transition Transition?TryFindSelfLoop(int stateIndex) { for (var iterator = builder[stateIndex].TransitionIterator; iterator.Ok; iterator.Next()) { if (iterator.Value.DestinationStateIndex == stateIndex) { return(iterator.Value); } } return(null); } // Adds EndWeight and all transitions from state2 into state1. // All state2 weights are multiplied by state2WeightMultiplier void MergeStates(int state1Index, int state2Index, Weight state2WeightMultiplier) { var state1 = builder[state1Index]; var state2 = builder[state2Index]; // sum end weights if (!state2.EndWeight.IsZero) { var state2EndWeight = state2WeightMultiplier * state2.EndWeight; state1.SetEndWeight(state1.EndWeight + state2EndWeight); } // Copy all transitions for (var iterator = state2.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.DestinationStateIndex != state2Index) { // Self-loop is not copied: it is already present in state1 and is absolutely // compatible: it has the same distribution and weight transition = transition.With(weight: transition.Weight * state2WeightMultiplier); state1.AddTransition(transition); } } } }
/// <summary> /// Computes a set of outgoing transitions from a given state of the determinization result. /// </summary> /// <param name="sourceState">The source state of the determinized automaton represented as /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param> /// <returns> /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing transitions from <paramref name="sourceState"/>. /// The first two elements of a tuple define the element distribution and the weight of a transition. /// The third element defines the outgoing state. /// </returns> protected override List <(DiscreteChar, Weight, Determinization.WeightedStateSet)> GetOutgoingTransitionsForDeterminization( Determinization.WeightedStateSet sourceState) { const double LogEps = -35; // Don't add transitions with log-weight less than this as they have been produced by numerical inaccuracies // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons) var segmentBounds = new List <ValueTuple <int, TransitionCharSegmentBound> >(); int transitionsProcessed = 0; foreach (KeyValuePair <int, Weight> stateIdWeight in sourceState) { var state = this.States[stateIdWeight.Key]; foreach (var transition in state.Transitions) { AddTransitionCharSegmentBounds(transition, stateIdWeight.Value, segmentBounds); } transitionsProcessed += state.Transitions.Count; } // Sort segment bounds left-to-right, start-to-end var sortedIndexedSegmentBounds = segmentBounds.ToArray(); if (transitionsProcessed > 1) { Array.Sort(sortedIndexedSegmentBounds, CompareSegmentBounds); int CompareSegmentBounds((int, TransitionCharSegmentBound) a, (int, TransitionCharSegmentBound) b) => a.Item2.CompareTo(b.Item2); } // Produce an outgoing transition for each unique subset of overlapping segments var result = new List <(DiscreteChar, Weight, Determinization.WeightedStateSet)>(); Weight currentSegmentStateWeightSum = Weight.Zero; var currentSegmentStateWeights = new Dictionary <int, Weight>(); foreach (var sb in segmentBounds) { currentSegmentStateWeights[sb.Item2.DestinationStateId] = Weight.Zero; } var activeSegments = new HashSet <TransitionCharSegmentBound>(); int currentSegmentStart = char.MinValue; foreach (var tup in sortedIndexedSegmentBounds) { TransitionCharSegmentBound segmentBound = tup.Item2; if (currentSegmentStateWeightSum.LogValue > LogEps && currentSegmentStart < segmentBound.Bound) { // Flush previous segment char segmentEnd = (char)(segmentBound.Bound - 1); int segmentLength = segmentEnd - currentSegmentStart + 1; DiscreteChar elementDist = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd); var destinationState = new Determinization.WeightedStateSet(); foreach (KeyValuePair <int, Weight> stateIdWithWeight in currentSegmentStateWeights) { if (stateIdWithWeight.Value.LogValue > LogEps) { Weight stateWeight = Weight.Product(stateIdWithWeight.Value, Weight.Inverse(currentSegmentStateWeightSum)); destinationState.Add(stateIdWithWeight.Key, stateWeight); } } Weight transitionWeight = Weight.Product(Weight.FromValue(segmentLength), currentSegmentStateWeightSum); result.Add((elementDist, transitionWeight, destinationState)); } // Update current segment currentSegmentStart = segmentBound.Bound; if (segmentBound.IsStart) { activeSegments.Add(segmentBound); currentSegmentStateWeightSum = Weight.Sum(currentSegmentStateWeightSum, segmentBound.Weight); currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.Sum(currentSegmentStateWeights[segmentBound.DestinationStateId], segmentBound.Weight); } else { Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter."); activeSegments.Remove(segmentBounds[tup.Item1 - 1].Item2); // End follows start in original. if (double.IsInfinity(segmentBound.Weight.Value)) { // Cannot subtract because of the infinities involved. currentSegmentStateWeightSum = activeSegments.Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w)); currentSegmentStateWeights[segmentBound.DestinationStateId] = activeSegments.Where(sb => sb.DestinationStateId == segmentBound.DestinationStateId).Select(sb => sb.Weight).Aggregate(Weight.Zero, (acc, w) => Weight.Sum(acc, w)); } else { currentSegmentStateWeightSum = activeSegments.Count == 0 ? Weight.Zero : Weight.AbsoluteDifference(currentSegmentStateWeightSum, segmentBound.Weight); Weight prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId]; currentSegmentStateWeights[segmentBound.DestinationStateId] = Weight.AbsoluteDifference( prevStateWeight, segmentBound.Weight); } } } return(result); }
/// <summary> /// Computes a set of outgoing transitions from a given state of the determinization result. /// </summary> /// <param name="sourceStateSet">The source state of the determinized automaton represented as /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param> /// <returns> /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing /// transitions from <paramref name="sourceStateSet"/>. /// The first two elements of a tuple define the element distribution and the weight of a transition. /// The third element defines the outgoing state. /// </returns> protected override IEnumerable <Determinization.OutgoingTransition> GetOutgoingTransitionsForDeterminization( Determinization.WeightedStateSet sourceStateSet) { // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons) var segmentBounds = new List <TransitionCharSegmentBound>(); for (var i = 0; i < sourceStateSet.Count; ++i) { var sourceState = sourceStateSet[i]; var state = this.States[sourceState.Index]; foreach (var transition in state.Transitions) { AddTransitionCharSegmentBounds(transition, sourceState.Weight, segmentBounds); } } segmentBounds.Sort(); // Produce an outgoing transition for each unique subset of overlapping segments var currentSegmentTotal = WeightSum.Zero(); var currentSegmentStateWeights = new Dictionary <int, WeightSum>(); var currentSegmentStart = (int)char.MinValue; var destinationStateSetBuilder = Determinization.WeightedStateSetBuilder.Create(); foreach (var segmentBound in segmentBounds) { if (currentSegmentTotal.Count != 0 && currentSegmentStart < segmentBound.Bound) { // Flush previous segment var segmentEnd = (char)(segmentBound.Bound - 1); var segmentLength = segmentEnd - currentSegmentStart + 1; var elementDist = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd); var invTotalWeight = Weight.Inverse(currentSegmentTotal.Sum); destinationStateSetBuilder.Reset(); foreach (var stateIdWithWeight in currentSegmentStateWeights) { var stateWeight = stateIdWithWeight.Value.Sum * invTotalWeight; destinationStateSetBuilder.Add(stateIdWithWeight.Key, stateWeight); } var(destinationStateSet, destinationStateSetWeight) = destinationStateSetBuilder.Get(); var transitionWeight = Weight.Product( Weight.FromValue(segmentLength), currentSegmentTotal.Sum, destinationStateSetWeight); yield return(new Determinization.OutgoingTransition( elementDist, transitionWeight, destinationStateSet)); } // Update current segment currentSegmentStart = segmentBound.Bound; if (segmentBound.IsStart) { currentSegmentTotal += segmentBound.Weight; if (currentSegmentStateWeights.TryGetValue(segmentBound.DestinationStateId, out var stateWeight)) { currentSegmentStateWeights[segmentBound.DestinationStateId] = stateWeight + segmentBound.Weight; } else { currentSegmentStateWeights[segmentBound.DestinationStateId] = new WeightSum(segmentBound.Weight); } } else { Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter."); Debug.Assert(!segmentBound.Weight.IsInfinity); currentSegmentTotal -= segmentBound.Weight; var prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId]; var newStateWeight = prevStateWeight - segmentBound.Weight; if (newStateWeight.Count == 0) { currentSegmentStateWeights.Remove(segmentBound.DestinationStateId); } else { currentSegmentStateWeights[segmentBound.DestinationStateId] = newStateWeight; } } } }
/// <summary> /// Recursively increases the value of this automaton on <paramref name="sequence"/> by <paramref name="weight"/>. /// </summary> /// <param name="stateIndex">Index of currently traversed state.</param> /// <param name="isNewState">Indicates whether state <paramref name="stateIndex"/> was just created.</param> /// <param name="selfLoopAlreadyMatched">Indicates whether self-loop on state <paramref name="stateIndex"/> was just matched.</param> /// <param name="firstAllowedStateIndex">The minimum index of an existing state that can be used for the sequence.</param> /// <param name="currentSequencePos">The current position in the generalized sequence.</param> /// <param name="sequence">The generalized sequence.</param> /// <param name="weight">The weight of the sequence.</param> /// <returns> /// <see langword="true"/> if the subsequence starting at <paramref name="currentSequencePos"/> has been successfully merged in, /// <see langword="false"/> otherwise. /// </returns> /// <remarks> /// This function attempts to add as few new states and transitions as possible. /// Its implementation is conceptually similar to adding string to a trie. /// </remarks> private bool DoAddGeneralizedSequence( int stateIndex, bool isNewState, bool selfLoopAlreadyMatched, int firstAllowedStateIndex, int currentSequencePos, GeneralizedSequence sequence, Weight weight) { bool success; var builder = this.builder; var state = builder[stateIndex]; if (currentSequencePos == sequence.Count) { if (!selfLoopAlreadyMatched) { // We can't finish in a state with a self-loop for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { if (iterator.Value.DestinationStateIndex == state.Index) { return(false); } } } state.SetEndWeight(Weight.Sum(state.EndWeight, weight)); return(true); } var element = sequence[currentSequencePos]; // Treat self-loops elements separately if (element.LoopWeight.HasValue) { if (selfLoopAlreadyMatched) { // Previous element was also a self-loop, we should try to find an espilon transition for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.DestinationStateIndex != state.Index && transition.IsEpsilon && transition.DestinationStateIndex >= firstAllowedStateIndex) { if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, Weight.Product(weight, Weight.Inverse(transition.Weight)))) { return(true); } } } // Epsilon transition not found, let's create a new one var destination = state.AddEpsilonTransition(Weight.One); success = this.DoAddGeneralizedSequence( destination.Index, true, false, firstAllowedStateIndex, currentSequencePos, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // Find a matching self-loop for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex) { // Try this epsilon transition if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight)) { return(true); } } // Is it a self-loop? if (transition.DestinationStateIndex == state.Index) { // Do self-loops match? if ((transition.Weight == element.LoopWeight.Value) && (element.Group == transition.Group) && ((transition.IsEpsilon && element.IsEpsilonSelfLoop) || (!transition.IsEpsilon && !element.IsEpsilonSelfLoop && transition.ElementDistribution.Equals(element.ElementDistribution)))) { // Skip the element in the sequence, remain in the same state success = this.DoAddGeneralizedSequence( stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // StateIndex also has a self-loop, but the two doesn't match return(false); } } if (!isNewState) { // Can't add self-loop to an existing state, it will change the language accepted by the state return(false); } // Add a new self-loop state.AddTransition(element.ElementDistribution, element.LoopWeight.Value, stateIndex, element.Group); success = this.DoAddGeneralizedSequence(stateIndex, false, true, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); } // Try to find a transition for the element for (var iterator = state.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (transition.IsEpsilon && transition.DestinationStateIndex != state.Index && transition.DestinationStateIndex >= firstAllowedStateIndex) { // Try this epsilon transition if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos, sequence, weight)) { return(true); } } // Is it a self-loop? if (transition.DestinationStateIndex == state.Index) { if (selfLoopAlreadyMatched) { // The self-loop was checked or added by the caller continue; } // Can't go through an existing self-loop, it will allow undesired sequences to be accepted return(false); } if (transition.DestinationStateIndex < firstAllowedStateIndex || element.Group != transition.Group || !element.ElementDistribution.Equals(transition.ElementDistribution)) { continue; } // Skip the element in the sequence, move to the destination state // Weight of the existing transition must be taken into account // This case can fail if the next element is a self-loop and the destination state already has a different one if (this.DoAddGeneralizedSequence( transition.DestinationStateIndex, false, false, firstAllowedStateIndex, currentSequencePos + 1, sequence, Weight.Product(weight, Weight.Inverse(transition.Weight)))) { return(true); } } // Add a new transition var newChild = state.AddTransition(element.ElementDistribution, Weight.One, null, element.Group); success = this.DoAddGeneralizedSequence( newChild.Index, true, false, firstAllowedStateIndex, currentSequencePos + 1, sequence, weight); Debug.Assert(success, "This call must always succeed."); return(true); }