public (WeightedStateSet, Weight) Get()
                {
                    Debug.Assert(this.weightedStates.Count > 0);

                    var sortedStates = this.weightedStates.ToArray();

                    if (sortedStates.Length == 1)
                    {
                        var state = sortedStates[0];
                        sortedStates[0] = new WeightedState(state.Index, Weight.One);
                        return(new WeightedStateSet(sortedStates), state.Weight);
                    }
                    else
                    {
                        Array.Sort(sortedStates);

                        var maxWeight = sortedStates[0].Weight;
                        for (var i = 1; i < sortedStates.Length; ++i)
                        {
                            if (sortedStates[i].Weight > maxWeight)
                            {
                                maxWeight = sortedStates[i].Weight;
                            }
                        }

                        var normalizer = Weight.Inverse(maxWeight);

                        for (var i = 0; i < sortedStates.Length; ++i)
                        {
                            var state = sortedStates[i];
                            sortedStates[i] = new WeightedState(state.Index, state.Weight * normalizer);
                        }

                        return(new WeightedStateSet(sortedStates), maxWeight);
                    }
                }
Beispiel #2
0
            public void MergeTrees()
            {
                var builder       = this.builder;
                var isRemovedNode = new bool[builder.StatesCount];
                var isTreeNode    = FindTreeNodes();

                var stack = new Stack <int>();

                stack.Push(builder.StartStateIndex);

                while (stack.Count > 0)
                {
                    var stateIndex = stack.Pop();
                    var state      = builder[stateIndex];

                    // Transitions to non-tree nodes and self-loops should be ignored
                    bool IsMergeableTransition(Transition t) =>
                    isTreeNode[t.DestinationStateIndex] && t.DestinationStateIndex != stateIndex;

                    for (var iterator1 = state.TransitionIterator; iterator1.Ok; iterator1.Next())
                    {
                        var transition1 = iterator1.Value;

                        // ignore non-tree nodes and self-loops
                        if (!IsMergeableTransition(transition1))
                        {
                            continue;
                        }

                        // If it is an epsilon transition then try to merge with current state first
                        // Note: group doesn't matter for epsilon transitions (in generalized trees)
                        if (transition1.IsEpsilon &&
                            CanMergeStates(stateIndex, transition1.DestinationStateIndex))
                        {
                            // All transitions from transition1.DestinationStateIndex will be inserted
                            // into current state. And will be iterated by iterator1 without special treatment.
                            MergeStates(stateIndex, transition1.DestinationStateIndex, transition1.Weight);
                            isRemovedNode[transition1.DestinationStateIndex] = true;
                            iterator1.Remove();
                            continue;
                        }

                        // Try to find transitions with which this one can be merged
                        var iterator2 = iterator1;
                        iterator2.Next();
                        for (; iterator2.Ok; iterator2.Next())
                        {
                            var transition2 = iterator2.Value;

                            Debug.Assert(
                                transition1.DestinationStateIndex != transition2.DestinationStateIndex,
                                "Parallel transitions must be merged earlier by MergeParallelTransitions()");

                            // ignore non-tree nodes and self-loops
                            if (IsMergeableTransition(transition2) &&
                                CanMergeDestinations(transition1, transition2))
                            {
                                MergeStates(
                                    transition1.DestinationStateIndex,
                                    transition2.DestinationStateIndex,
                                    transition2.Weight * Weight.Inverse(transition1.Weight));
                                isRemovedNode[transition2.DestinationStateIndex] = true;
                                iterator2.Remove();
                            }
                        }

                        stack.Push(transition1.DestinationStateIndex);
                    }
                }

                builder.RemoveStates(isRemovedNode, true);
                return;

                // Returns a boolean array in which for each automaton state a "isTree" flag is stored.
                // State is considered to be tree node if its in degree = 1 and it's parent is also a tree node.
                bool[] FindTreeNodes()
                {
                    var inDegree = new int[builder.StatesCount];

                    for (var i = 0; i < builder.StatesCount; ++i)
                    {
                        for (var iterator = builder[i].TransitionIterator; iterator.Ok; iterator.Next())
                        {
                            var destinationIndex = iterator.Value.DestinationStateIndex;
                            // Ignore self-loops
                            if (destinationIndex != i)
                            {
                                ++inDegree[destinationIndex];
                            }
                        }
                    }

                    var result = new bool[builder.StatesCount];

                    var treeSearchStack = new Stack <int>();

                    treeSearchStack.Push(builder.StartStateIndex);
                    while (treeSearchStack.Count > 0)
                    {
                        var stateIndex = treeSearchStack.Pop();
                        result[stateIndex] = true;
                        for (var iterator = builder[stateIndex].TransitionIterator; iterator.Ok; iterator.Next())
                        {
                            var destinationIndex = iterator.Value.DestinationStateIndex;
                            if (destinationIndex != stateIndex && inDegree[destinationIndex] == 1)
                            {
                                treeSearchStack.Push(destinationIndex);
                            }
                        }
                    }

                    return(result);
                }

                bool CanMergeStates(int stateIndex1, int stateIndex2)
                {
                    var selfLoop1 = TryFindSelfLoop(stateIndex1);
                    var selfLoop2 = TryFindSelfLoop(stateIndex2);

                    // Can merge only if both destination states don't have self-loops
                    // or these loops are exactly the same.
                    return
                        ((!selfLoop1.HasValue && !selfLoop2.HasValue) ||
                         (selfLoop1.HasValue &&
                          selfLoop2.HasValue &&
                          selfLoop1.Value.Group == selfLoop2.Value.Group &&
                          selfLoop1.Value.Weight == selfLoop2.Value.Weight &&
                          EqualDistributions(selfLoop1.Value.ElementDistribution, selfLoop2.Value.ElementDistribution)));
                }

                bool CanMergeDestinations(Transition transition1, Transition transition2)
                {
                    // Check that group and element distribution match
                    if (transition1.Group != transition2.Group ||
                        !EqualDistributions(transition1.ElementDistribution, transition2.ElementDistribution))
                    {
                        return(false);
                    }

                    return(CanMergeStates(transition1.DestinationStateIndex, transition2.DestinationStateIndex));
                }

                // Compares element distributions in transition. Epsilon transitions are considered equal.
                bool EqualDistributions(Option <TElementDistribution> dist1, Option <TElementDistribution> dist2) =>
                dist1.HasValue == dist2.HasValue &&
                (!dist1.HasValue || dist1.Value.Equals(dist2.Value));

                // Finds transition which points to state itself
                // It is assumed that there's only one such transition
                Transition?TryFindSelfLoop(int stateIndex)
                {
                    for (var iterator = builder[stateIndex].TransitionIterator; iterator.Ok; iterator.Next())
                    {
                        if (iterator.Value.DestinationStateIndex == stateIndex)
                        {
                            return(iterator.Value);
                        }
                    }

                    return(null);
                }

                // Adds EndWeight and all transitions from state2 into state1.
                // All state2 weights are multiplied by state2WeightMultiplier
                void MergeStates(int state1Index, int state2Index, Weight state2WeightMultiplier)
                {
                    var state1 = builder[state1Index];
                    var state2 = builder[state2Index];

                    // sum end weights
                    if (!state2.EndWeight.IsZero)
                    {
                        var state2EndWeight = state2WeightMultiplier * state2.EndWeight;
                        state1.SetEndWeight(state1.EndWeight + state2EndWeight);
                    }

                    // Copy all transitions
                    for (var iterator = state2.TransitionIterator; iterator.Ok; iterator.Next())
                    {
                        var transition = iterator.Value;
                        if (transition.DestinationStateIndex != state2Index)
                        {
                            // Self-loop is not copied: it is already present in state1 and is absolutely
                            // compatible: it has the same distribution and weight
                            transition.Weight *= state2WeightMultiplier;
                            state1.AddTransition(transition);
                        }
                    }
                }
            }
Beispiel #3
0
        /// <summary>
        /// Computes a set of outgoing transitions from a given state of the determinization result.
        /// </summary>
        /// <param name="sourceStateSet">The source state of the determinized automaton represented as
        /// a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.</param>
        /// <returns>
        /// A collection of (element distribution, weight, weighted state set) triples corresponding to outgoing
        /// transitions from <paramref name="sourceStateSet"/>.
        /// The first two elements of a tuple define the element distribution and the weight of a transition.
        /// The third element defines the outgoing state.
        /// </returns>
        protected override IEnumerable <Determinization.OutgoingTransition> GetOutgoingTransitionsForDeterminization(
            Determinization.WeightedStateSet sourceStateSet)
        {
            // Build a list of numbered non-zero probability character segment bounds (they are numbered here due to perf. reasons)
            var segmentBounds = new List <TransitionCharSegmentBound>();

            for (var i = 0; i < sourceStateSet.Count; ++i)
            {
                var sourceState = sourceStateSet[i];
                var state       = this.States[sourceState.Index];
                foreach (var transition in state.Transitions)
                {
                    AddTransitionCharSegmentBounds(transition, sourceState.Weight, segmentBounds);
                }
            }

            segmentBounds.Sort();

            // Produce an outgoing transition for each unique subset of overlapping segments
            var currentSegmentTotal = WeightSum.Zero();

            var currentSegmentStateWeights = new Dictionary <int, WeightSum>();
            var currentSegmentStart        = (int)char.MinValue;
            var destinationStateSetBuilder = Determinization.WeightedStateSetBuilder.Create();

            foreach (var segmentBound in segmentBounds)
            {
                if (currentSegmentTotal.Count != 0 && currentSegmentStart < segmentBound.Bound)
                {
                    // Flush previous segment
                    var segmentEnd     = (char)(segmentBound.Bound - 1);
                    var segmentLength  = segmentEnd - currentSegmentStart + 1;
                    var elementDist    = DiscreteChar.InRange((char)currentSegmentStart, segmentEnd);
                    var invTotalWeight = Weight.Inverse(currentSegmentTotal.Sum);

                    destinationStateSetBuilder.Reset();
                    foreach (var stateIdWithWeight in currentSegmentStateWeights)
                    {
                        var stateWeight = stateIdWithWeight.Value.Sum * invTotalWeight;
                        destinationStateSetBuilder.Add(stateIdWithWeight.Key, stateWeight);
                    }

                    var(destinationStateSet, destinationStateSetWeight) = destinationStateSetBuilder.Get();

                    var transitionWeight = Weight.Product(
                        Weight.FromValue(segmentLength),
                        currentSegmentTotal.Sum,
                        destinationStateSetWeight);
                    yield return(new Determinization.OutgoingTransition(
                                     elementDist, transitionWeight, destinationStateSet));
                }

                // Update current segment
                currentSegmentStart = segmentBound.Bound;

                if (segmentBound.IsStart)
                {
                    currentSegmentTotal += segmentBound.Weight;
                    if (currentSegmentStateWeights.TryGetValue(segmentBound.DestinationStateId, out var stateWeight))
                    {
                        currentSegmentStateWeights[segmentBound.DestinationStateId] =
                            stateWeight + segmentBound.Weight;
                    }
                    else
                    {
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = new WeightSum(segmentBound.Weight);
                    }
                }
                else
                {
                    Debug.Assert(currentSegmentStateWeights.ContainsKey(segmentBound.DestinationStateId), "We shouldn't exit a state we didn't enter.");
                    Debug.Assert(!segmentBound.Weight.IsInfinity);
                    currentSegmentTotal -= segmentBound.Weight;

                    var prevStateWeight = currentSegmentStateWeights[segmentBound.DestinationStateId];
                    var newStateWeight  = prevStateWeight - segmentBound.Weight;
                    if (newStateWeight.Count == 0)
                    {
                        currentSegmentStateWeights.Remove(segmentBound.DestinationStateId);
                    }
                    else
                    {
                        currentSegmentStateWeights[segmentBound.DestinationStateId] = newStateWeight;
                    }
                }
            }
        }