/// <inheritdoc/>
            public virtual TAutomaton AsAutomaton()
            {
                var result = new Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TAutomaton> .Builder();

                foreach (var entry in Dictionary)
                {
                    if (!entry.Value.IsZero)
                    {
                        var sequenceStartState = result.AddState();
                        var sequenceEndState   = sequenceStartState.AddTransitionsForSequence(entry.Key);
                        sequenceEndState.SetEndWeight(Weight.One);
                        result.Start.AddEpsilonTransition(entry.Value, sequenceStartState.Index);
                    }
                }

                return(result.GetAutomaton());
            }
Пример #2
0
        /// <summary>
        /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence.
        /// </summary>
        /// <param name="srcSequence">The sequence to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/>
        /// to the automaton representation of a projected sequence.
        /// </remarks>
        /// <remarks>
        /// The code of this method has a lot in common with the code of Automaton.SetToProduct.
        /// Unfortunately, it's not clear how to avoid the duplication in the current design.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcSequence srcSequence)
        {
            Argument.CheckIfNotNull(srcSequence, "srcSequence");

            var mappingAutomaton = this.sequencePairToWeight;

            if (mappingAutomaton.IsCanonicZero())
            {
                return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero());
            }

            var sourceSequenceManipulator =
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator;
            var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence);

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            var destStateCache = new Dictionary <(int, int), int>();
            var stack          = new Stack <(int state1, int state2, int destStateIndex)>();

            // Creates destination state and schedules projection computation for it.
            // If computation is already scheduled or done the state index is simply taken from cache
            int CreateDestState(PairListAutomaton.State mappingState, int srcSequenceIndex)
            {
                var destPair = (mappingState.Index, srcSequenceIndex);

                if (!destStateCache.TryGetValue(destPair, out var destStateIndex))
                {
                    var destState = result.AddState();
                    destState.SetEndWeight(
                        srcSequenceIndex == srcSequenceLength
                            ? mappingState.EndWeight
                            : Weight.Zero);
                    stack.Push((mappingState.Index, srcSequenceIndex, destState.Index));
                    destStateCache[destPair] = destState.Index;
                    destStateIndex           = destState.Index;
                }

                return(destStateIndex);
            }

            // Populate the stack with start destination state
            result.StartStateIndex = CreateDestState(mappingAutomaton.Start, 0);

            while (stack.Count > 0)
            {
                var(mappingStateIndex, srcSequenceIndex, destStateIndex) = stack.Pop();

                var mappingState = mappingAutomaton.States[mappingStateIndex];
                var destState    = result[destStateIndex];

                // Enumerate transitions from the current mapping state
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementWeights =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = CreateDestState(destMappingState, srcSequenceIndex);
                        destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Normal transition case - Find epsilon-reachable states
                    if (srcSequenceIndex < srcSequenceLength)
                    {
                        var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex);

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcSequenceElement, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        var weight         = mappingTransition.Weight * Weight.FromLogValue(projectionLogScale);
                        var childDestState = CreateDestState(destMappingState, srcSequenceIndex + 1);
                        destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group);
                    }
                }
            }

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());
        }
Пример #3
0
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// The code of this method has a lot in common with the code of Automaton.SetToProduct.
        /// Unfortunately, it's not clear how to avoid the duplication in the current design.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
        {
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var mappingAutomaton = this.sequencePairToWeight;

            if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero())
            {
                return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero());
            }

            // The projected automaton must be epsilon-free
            srcAutomaton.MakeEpsilonFree();

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            var destStateCache = new Dictionary <(int, int), int>();
            var stack          = new Stack <(int state1, int state2, int destStateIndex)>();

            // Creates destination state and schedules projection computation for it.
            // If computation is already scheduled or done the state index is simply taken from cache
            int CreateDestState(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
            {
                var destPair = (mappingState.Index, srcState.Index);

                if (!destStateCache.TryGetValue(destPair, out var destStateIndex))
                {
                    var destState = result.AddState();
                    destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight);
                    stack.Push((mappingState.Index, srcState.Index, destState.Index));
                    destStateCache[destPair] = destState.Index;
                    destStateIndex           = destState.Index;
                }

                return(destStateIndex);
            }

            // Populate the stack with start destination state
            result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start);

            while (stack.Count > 0)
            {
                var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop();

                var mappingState = mappingAutomaton.States[mappingStateIndex];
                var srcState     = srcAutomaton.States[srcStateIndex];
                var destState    = result[destStateIndex];

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementDistribution =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = CreateDestState(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                    {
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        var destWeight          = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestStateIndex = CreateDestState(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);
                    }
                }
            }

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());
        }
Пример #4
0
        /// <summary>
        /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence.
        /// </summary>
        /// <param name="srcSequence">The sequence to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/>
        /// to the automaton representation of a projected sequence.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcSequence srcSequence)
        {
            Argument.CheckIfNotNull(srcSequence, "srcSequence");

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            if (this.sequencePairToWeight.IsCanonicZero())
            {
                return(result.GetAutomaton());
            }

            var destStateCache = new Dictionary <(int, int), int>();

            result.StartStateIndex = BuildProjectionOfSequence(this.sequencePairToWeight.Start, 0);

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());

            // Recursively builds the projection of a given sequence onto this transducer.
            int BuildProjectionOfSequence(PairListAutomaton.State mappingState, int srcSequenceIndex)
            {
                //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
                //// Unfortunately, it's not clear how to avoid the duplication in the current design.

                var sourceSequenceManipulator =
                    Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator;

                var statePair = (mappingState.Index, srcSequenceIndex);

                if (destStateCache.TryGetValue(statePair, out var destStateIndex))
                {
                    return(destStateIndex);
                }

                var destState = result.AddState();

                destStateCache.Add(statePair, destState.Index);

                var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence);

                // Enumerate transitions from the current mapping state
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementWeights =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = BuildProjectionOfSequence(destMappingState, srcSequenceIndex);
                        destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Normal transition case - Find epsilon-reachable states
                    if (srcSequenceIndex < srcSequenceLength)
                    {
                        var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex);

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcSequenceElement, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        var weight         = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestState = BuildProjectionOfSequence(destMappingState, srcSequenceIndex + 1);
                        destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group);
                    }
                }

                destState.SetEndWeight(srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero);
                return(destState.Index);
            }
        }
Пример #5
0
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
        {
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            if (srcAutomaton.IsCanonicZero() || this.sequencePairToWeight.IsCanonicZero())
            {
                return(result.GetAutomaton());
            }

            // The projected automaton must be epsilon-free
            srcAutomaton.MakeEpsilonFree();

            var destStateCache = new Dictionary <(int, int), int>();

            result.StartStateIndex = BuildProjectionOfAutomaton(this.sequencePairToWeight.Start, srcAutomaton.Start);

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());

            // Recursively builds the projection of a given automaton onto this transducer.
            // The projected automaton must be epsilon-free.
            int BuildProjectionOfAutomaton(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
            {
                //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
                //// Unfortunately, it's not clear how to avoid the duplication in the current design.

                // State already exists, return its index
                var statePair = (mappingState.Index, srcState.Index);

                if (destStateCache.TryGetValue(statePair, out var destStateIndex))
                {
                    return(destStateIndex);
                }

                var destState = result.AddState();

                destStateCache.Add(statePair, destState.Index);

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementDistribution =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                    {
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        var destWeight          = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);
                    }
                }

                destState.SetEndWeight(Weight.Product(mappingState.EndWeight, srcState.EndWeight));
                return(destState.Index);
            }
        }
Пример #6
0
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// The code of this method has a lot in common with the code of Automaton.SetToProduct.
        /// Unfortunately, it's not clear how to avoid the duplication in the current design.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
        {
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var mappingAutomaton = this.sequencePairToWeight;

            if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero())
            {
                return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero());
            }

            // The projected automaton must be epsilon-free
            srcAutomaton.MakeEpsilonFree();

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            var destStateCache = new Dictionary <(int, int), int>();
            var stack          = new Stack <(int state1, int state2, int destStateIndex)>();

            // Creates destination state and schedules projection computation for it.
            // If computation is already scheduled or done the state index is simply taken from cache
            int CreateDestState(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
            {
                var destPair = (mappingState.Index, srcState.Index);

                if (!destStateCache.TryGetValue(destPair, out var destStateIndex))
                {
                    var destState = result.AddState();
                    destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight);
                    stack.Push((mappingState.Index, srcState.Index, destState.Index));
                    destStateCache[destPair] = destState.Index;
                    destStateIndex           = destState.Index;
                }

                return(destStateIndex);
            }

            // Populate the stack with start destination state
            result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start);
            var stringAutomaton = srcAutomaton as StringAutomaton;
            var sourceDistributionHasLogProbabilityOverrides = stringAutomaton?.HasElementLogValueOverrides ?? false;

            while (stack.Count > 0)
            {
                var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop();

                var mappingState = mappingAutomaton.States[mappingStateIndex];
                var srcState     = srcAutomaton.States[srcStateIndex];
                var destState    = result[destStateIndex];

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var childMappingState = mappingAutomaton.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementDistribution =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = CreateDestState(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                    {
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcAutomaton.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        // In the special case of a log probability override in a DiscreteChar element distribution,
                        // we need to compensate for the fact that the distribution is not normalized.
                        if (destElementDistribution.HasValue && sourceDistributionHasLogProbabilityOverrides)
                        {
                            var discreteChar =
                                (DiscreteChar)(IDistribution <char>)srcTransition.ElementDistribution.Value;
                            if (discreteChar.HasLogProbabilityOverride)
                            {
                                var totalMass = discreteChar.Ranges.EnumerableSum(rng =>
                                                                                  rng.Probability.Value * (rng.EndExclusive - rng.StartInclusive));
                                projectionLogScale -= System.Math.Log(totalMass);
                            }
                        }

                        var destWeight =
                            sourceDistributionHasLogProbabilityOverrides && destElementDistribution.HasNoValue
                                ? Weight.One
                                : Weight.Product(mappingTransition.Weight, srcTransition.Weight,
                                                 Weight.FromLogValue(projectionLogScale));

                        // We don't want an unnormalizable distribution to become normalizable due to a rounding error.
                        if (Math.Abs(destWeight.LogValue) < 1e-12)
                        {
                            destWeight = Weight.One;
                        }

                        var childDestStateIndex = CreateDestState(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);
                    }
                }
            }

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());
        }