示例#1
0
            /// <summary>
            /// Recursively computes the value of the automaton on a given sequence.
            /// </summary>
            /// <param name="sequence">The sequence to compute the value on.</param>
            /// <param name="sequencePosition">The current position in the sequence.</param>
            /// <param name="valueCache">A lookup table for memoization.</param>
            /// <returns>The value computed from the current state.</returns>
            private Weight DoGetValue(
                TSequence sequence, int sequencePosition, Dictionary <IntPair, Weight> valueCache)
            {
                var    stateIndexPair = new IntPair(this.Index, sequencePosition);
                Weight cachedValue;

                if (valueCache.TryGetValue(stateIndexPair, out cachedValue))
                {
                    return(cachedValue);
                }

                EpsilonClosure closure = this.GetEpsilonClosure();

                Weight value = Weight.Zero;
                int    count = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetLength(sequence);

                bool isCurrent = sequencePosition < count;

                if (isCurrent)
                {
                    TElement element = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetElement(sequence, sequencePosition);

                    for (int closureStateIndex = 0; closureStateIndex < closure.Size; ++closureStateIndex)
                    {
                        State  closureState       = closure.GetStateByIndex(closureStateIndex);
                        Weight closureStateWeight = closure.GetStateWeightByIndex(closureStateIndex);

                        for (int transitionIndex = 0; transitionIndex < closureState.transitionCount; transitionIndex++)
                        {
                            Transition transition = closureState.transitions[transitionIndex];
                            if (transition.IsEpsilon)
                            {
                                continue; // The destination is a part of the closure anyway
                            }

                            State  destState  = this.Owner.states[transition.DestinationStateIndex];
                            Weight distWeight = Weight.FromLogValue(transition.ElementDistribution.GetLogProb(element));
                            if (!distWeight.IsZero && !transition.Weight.IsZero)
                            {
                                Weight destValue = destState.DoGetValue(sequence, sequencePosition + 1, valueCache);
                                if (!destValue.IsZero)
                                {
                                    value = Weight.Sum(
                                        value,
                                        Weight.Product(closureStateWeight, transition.Weight, distWeight, destValue));
                                }
                            }
                        }
                    }
                }
                else
                {
                    value = closure.EndWeight;
                }

                valueCache.Add(stateIndexPair, value);
                return(value);
            }
            /// <inheritdoc/>
            public TThis SumLog(double logWeight1, TThis weightFunction, double logWeight2)
            {
                var scale1 = Weight.FromLogValue(logWeight1);
                var scale2 = Weight.FromLogValue(logWeight2);

                return(FromWeights(
                           Dictionary.Select(kvp => new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * scale1))
                           .Concat(weightFunction.Dictionary.Select(kvp => new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * scale2)))));
            }
示例#3
0
                /// <inheritdoc/>
                public MultiRepresentationWeightFunction <TDictionary> ConstantOnSupportOfLog(double logValue, MultiRepresentationWeightFunction <TDictionary> weightFunction)
                {
                    if (weightFunction.TryEnumerateSupportInternal(MaxDictionarySize, out var support))
                    {
                        if (!support.Any())
                        {
                            return(Zero());
                        }

                        if (logValue == 0 && !support.Skip(1).Any())
                        {
                            return(FromPoint(support.Single()));
                        }

                        var weight = Weight.FromLogValue(logValue);
                        return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                                  support.Select(sequence => new KeyValuePair <TSequence, Weight>(sequence, weight)))));
                    }
                    var automaton = weightFunction.AsAutomaton().ConstantOnSupportLog(logValue);

                    return(FromAutomaton(automaton));
                }
示例#4
0
        /// <summary>
        /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence.
        /// </summary>
        /// <param name="srcSequence">The sequence to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/>
        /// to the automaton representation of a projected sequence.
        /// </remarks>
        /// <remarks>
        /// The code of this method has a lot in common with the code of Automaton.SetToProduct.
        /// Unfortunately, it's not clear how to avoid the duplication in the current design.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcSequence srcSequence)
        {
            Argument.CheckIfNotNull(srcSequence, "srcSequence");

            var mappingAutomaton = this.sequencePairToWeight;

            if (mappingAutomaton.IsCanonicZero())
            {
                return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero());
            }

            var sourceSequenceManipulator =
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator;
            var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence);

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            var destStateCache = new Dictionary <(int, int), int>();
            var stack          = new Stack <(int state1, int state2, int destStateIndex)>();

            // Creates destination state and schedules projection computation for it.
            // If computation is already scheduled or done the state index is simply taken from cache
            int CreateDestState(PairListAutomaton.State mappingState, int srcSequenceIndex)
            {
                var destPair = (mappingState.Index, srcSequenceIndex);

                if (!destStateCache.TryGetValue(destPair, out var destStateIndex))
                {
                    var destState = result.AddState();
                    destState.SetEndWeight(
                        srcSequenceIndex == srcSequenceLength
                            ? mappingState.EndWeight
                            : Weight.Zero);
                    stack.Push((mappingState.Index, srcSequenceIndex, destState.Index));
                    destStateCache[destPair] = destState.Index;
                    destStateIndex           = destState.Index;
                }

                return(destStateIndex);
            }

            // Populate the stack with start destination state
            result.StartStateIndex = CreateDestState(mappingAutomaton.Start, 0);

            while (stack.Count > 0)
            {
                var(mappingStateIndex, srcSequenceIndex, destStateIndex) = stack.Pop();

                var mappingState = mappingAutomaton.States[mappingStateIndex];
                var destState    = result[destStateIndex];

                // Enumerate transitions from the current mapping state
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementWeights =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = CreateDestState(destMappingState, srcSequenceIndex);
                        destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Normal transition case - Find epsilon-reachable states
                    if (srcSequenceIndex < srcSequenceLength)
                    {
                        var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex);

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcSequenceElement, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        var weight         = mappingTransition.Weight * Weight.FromLogValue(projectionLogScale);
                        var childDestState = CreateDestState(destMappingState, srcSequenceIndex + 1);
                        destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group);
                    }
                }
            }

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());
        }
示例#5
0
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// The code of this method has a lot in common with the code of Automaton.SetToProduct.
        /// Unfortunately, it's not clear how to avoid the duplication in the current design.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
        {
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var mappingAutomaton = this.sequencePairToWeight;

            if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero())
            {
                return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero());
            }

            // The projected automaton must be epsilon-free
            srcAutomaton.MakeEpsilonFree();

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            var destStateCache = new Dictionary <(int, int), int>();
            var stack          = new Stack <(int state1, int state2, int destStateIndex)>();

            // Creates destination state and schedules projection computation for it.
            // If computation is already scheduled or done the state index is simply taken from cache
            int CreateDestState(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
            {
                var destPair = (mappingState.Index, srcState.Index);

                if (!destStateCache.TryGetValue(destPair, out var destStateIndex))
                {
                    var destState = result.AddState();
                    destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight);
                    stack.Push((mappingState.Index, srcState.Index, destState.Index));
                    destStateCache[destPair] = destState.Index;
                    destStateIndex           = destState.Index;
                }

                return(destStateIndex);
            }

            // Populate the stack with start destination state
            result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start);

            while (stack.Count > 0)
            {
                var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop();

                var mappingState = mappingAutomaton.States[mappingStateIndex];
                var srcState     = srcAutomaton.States[srcStateIndex];
                var destState    = result[destStateIndex];

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementDistribution =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = CreateDestState(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                    {
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        var destWeight          = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestStateIndex = CreateDestState(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);
                    }
                }
            }

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());
        }
            /// <inheritdoc/>
            public virtual bool TryNormalizeValues(out TThis normalizedFunction, out double logNormalizer)
            {
                double logNormalizerLocal = GetLogNormalizer();

                logNormalizer = logNormalizerLocal;
                if (double.IsNaN(logNormalizerLocal) || double.IsInfinity(logNormalizerLocal))
                {
                    normalizedFunction = (TThis)this;
                    return(false);
                }
                if (logNormalizerLocal == 0.0)
                {
                    normalizedFunction = (TThis)this;
                    return(true);
                }
                normalizedFunction = FromDistinctWeights(Dictionary.Select(kvp => new KeyValuePair <TSequence, Weight>(kvp.Key, Weight.FromLogValue(kvp.Value.LogValue - logNormalizerLocal))));
                return(true);
            }
            /// <inheritdoc/>
            public TThis ScaleLog(double logScale)
            {
                var scale = Weight.FromLogValue(logScale);

                return(FromDistinctWeights(Dictionary.Select(kvp => new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * scale))));
            }
示例#8
0
        /// <summary>
        /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence.
        /// </summary>
        /// <param name="srcSequence">The sequence to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/>
        /// to the automaton representation of a projected sequence.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcSequence srcSequence)
        {
            Argument.CheckIfNotNull(srcSequence, "srcSequence");

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            if (this.sequencePairToWeight.IsCanonicZero())
            {
                return(result.GetAutomaton());
            }

            var destStateCache = new Dictionary <(int, int), int>();

            result.StartStateIndex = BuildProjectionOfSequence(this.sequencePairToWeight.Start, 0);

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());

            // Recursively builds the projection of a given sequence onto this transducer.
            int BuildProjectionOfSequence(PairListAutomaton.State mappingState, int srcSequenceIndex)
            {
                //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
                //// Unfortunately, it's not clear how to avoid the duplication in the current design.

                var sourceSequenceManipulator =
                    Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator;

                var statePair = (mappingState.Index, srcSequenceIndex);

                if (destStateCache.TryGetValue(statePair, out var destStateIndex))
                {
                    return(destStateIndex);
                }

                var destState = result.AddState();

                destStateCache.Add(statePair, destState.Index);

                var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence);

                // Enumerate transitions from the current mapping state
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementWeights =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = BuildProjectionOfSequence(destMappingState, srcSequenceIndex);
                        destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Normal transition case - Find epsilon-reachable states
                    if (srcSequenceIndex < srcSequenceLength)
                    {
                        var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex);

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcSequenceElement, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        var weight         = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestState = BuildProjectionOfSequence(destMappingState, srcSequenceIndex + 1);
                        destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group);
                    }
                }

                destState.SetEndWeight(srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero);
                return(destState.Index);
            }
        }
示例#9
0
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
        {
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            if (srcAutomaton.IsCanonicZero() || this.sequencePairToWeight.IsCanonicZero())
            {
                return(result.GetAutomaton());
            }

            // The projected automaton must be epsilon-free
            srcAutomaton.MakeEpsilonFree();

            var destStateCache = new Dictionary <(int, int), int>();

            result.StartStateIndex = BuildProjectionOfAutomaton(this.sequencePairToWeight.Start, srcAutomaton.Start);

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());

            // Recursively builds the projection of a given automaton onto this transducer.
            // The projected automaton must be epsilon-free.
            int BuildProjectionOfAutomaton(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
            {
                //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
                //// Unfortunately, it's not clear how to avoid the duplication in the current design.

                // State already exists, return its index
                var statePair = (mappingState.Index, srcState.Index);

                if (destStateCache.TryGetValue(statePair, out var destStateIndex))
                {
                    return(destStateIndex);
                }

                var destState = result.AddState();

                destStateCache.Add(statePair, destState.Index);

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementDistribution =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                    {
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        var destWeight          = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale));
                        var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);
                    }
                }

                destState.SetEndWeight(Weight.Product(mappingState.EndWeight, srcState.EndWeight));
                return(destState.Index);
            }
        }
示例#10
0
        /// <summary>
        /// Recursively builds the projection of a given automaton onto this transducer.
        /// The projected automaton must be epsilon-free.
        /// </summary>
        /// <param name="destAutomaton">The projection being built.</param>
        /// <param name="mappingState">The currently traversed state of the transducer.</param>
        /// <param name="srcState">The currently traversed state of the automaton being projected.</param>
        /// <param name="destStateCache">The cache of the created projection states.</param>
        /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns>
        private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfAutomaton(
            TDestAutomaton destAutomaton,
            PairListAutomaton.State mappingState,
            Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState,
            Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache)
        {
            Debug.Assert(mappingState != null && srcState != null, "Valid states must be provided.");
            Debug.Assert(!ReferenceEquals(srcState.Owner, destAutomaton), "Cannot build a projection in place.");

            //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
            //// Unfortunately, it's not clear how to avoid the duplication in the current design.

            // State already exists, return its index
            var statePair = new IntPair(mappingState.Index, srcState.Index);

            Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState;
            if (destStateCache.TryGetValue(statePair, out destState))
            {
                return(destState);
            }

            destState = destAutomaton.AddState();
            destStateCache.Add(statePair, destState);

            // Iterate over transitions from mappingState
            for (int mappingTransitionIndex = 0; mappingTransitionIndex < mappingState.TransitionCount; mappingTransitionIndex++)
            {
                var mappingTransition = mappingState.GetTransition(mappingTransitionIndex);
                var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                // Epsilon transition case
                if (IsSrcEpsilon(mappingTransition))
                {
                    TDestElementDistribution destElementDistribution = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second;
                    var childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcState, destStateCache);
                    destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestState, mappingTransition.Group);
                    continue;
                }

                // Iterate over states and transitions in the closure of srcState
                for (int srcTransitionIndex = 0; srcTransitionIndex < srcState.TransitionCount; srcTransitionIndex++)
                {
                    var srcTransition = srcState.GetTransition(srcTransitionIndex);
                    Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                    var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex];

                    TDestElementDistribution destElementDistribution;
                    double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst(
                        srcTransition.ElementDistribution, out destElementDistribution);
                    if (double.IsNegativeInfinity(projectionLogScale))
                    {
                        continue;
                    }

                    Weight destWeight     = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale));
                    var    childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcChildState, destStateCache);
                    destState.AddTransition(destElementDistribution, destWeight, childDestState, mappingTransition.Group);
                }
            }

            destState.EndWeight = Weight.Product(mappingState.EndWeight, srcState.EndWeight);
            return(destState);
        }
示例#11
0
        /// <summary>
        /// Recursively builds the projection of a given sequence onto this transducer.
        /// </summary>
        /// <param name="destAutomaton">The projection being built.</param>
        /// <param name="mappingState">The currently traversed state of the transducer.</param>
        /// <param name="srcSequence">The sequence being projected.</param>
        /// <param name="srcSequenceIndex">The current index in the sequence being projected.</param>
        /// <param name="destStateCache">The cache of the created projection states.</param>
        /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns>
        private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfSequence(
            TDestAutomaton destAutomaton,
            PairListAutomaton.State mappingState,
            TSrcSequence srcSequence,
            int srcSequenceIndex,
            Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache)
        {
            //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct.
            //// Unfortunately, it's not clear how to avoid the duplication in the current design.

            var sourceSequenceManipulator =
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator;

            var statePair = new IntPair(mappingState.Index, srcSequenceIndex);

            Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState;
            if (destStateCache.TryGetValue(statePair, out destState))
            {
                return(destState);
            }

            destState = destAutomaton.AddState();
            destStateCache.Add(statePair, destState);

            int srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence);

            // Enumerate transitions from the current mapping state
            for (int i = 0; i < mappingState.TransitionCount; i++)
            {
                var mappingTransition = mappingState.GetTransition(i);
                var destMappingState  = mappingState.Owner.States[mappingTransition.DestinationStateIndex];

                // Epsilon transition case
                if (IsSrcEpsilon(mappingTransition))
                {
                    TDestElementDistribution destElementWeights = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second;
                    var childDestState = this.BuildProjectionOfSequence(
                        destAutomaton, destMappingState, srcSequence, srcSequenceIndex, destStateCache);
                    destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestState, mappingTransition.Group);
                    continue;
                }

                // Normal transition case - Find epsilon-reachable states
                if (srcSequenceIndex < srcSequenceLength)
                {
                    var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex);

                    TDestElementDistribution destElementDistribution;
                    double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst(
                        srcSequenceElement, out destElementDistribution);
                    if (double.IsNegativeInfinity(projectionLogScale))
                    {
                        continue;
                    }

                    Weight weight         = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale));
                    var    childDestState = this.BuildProjectionOfSequence(
                        destAutomaton, destMappingState, srcSequence, srcSequenceIndex + 1, destStateCache);
                    destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group);
                }
            }

            destState.EndWeight = srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero;
            return(destState);
        }
示例#12
0
        /// <summary>
        /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton.
        /// </summary>
        /// <param name="srcAutomaton">The automaton to project.</param>
        /// <returns>The projection.</returns>
        /// <remarks>
        /// The code of this method has a lot in common with the code of Automaton.SetToProduct.
        /// Unfortunately, it's not clear how to avoid the duplication in the current design.
        /// </remarks>
        public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton)
        {
            Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton");

            var mappingAutomaton = this.sequencePairToWeight;

            if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero())
            {
                return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero());
            }

            // The projected automaton must be epsilon-free
            srcAutomaton.MakeEpsilonFree();

            var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder();

            var destStateCache = new Dictionary <(int, int), int>();
            var stack          = new Stack <(int state1, int state2, int destStateIndex)>();

            // Creates destination state and schedules projection computation for it.
            // If computation is already scheduled or done the state index is simply taken from cache
            int CreateDestState(
                PairListAutomaton.State mappingState,
                Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState)
            {
                var destPair = (mappingState.Index, srcState.Index);

                if (!destStateCache.TryGetValue(destPair, out var destStateIndex))
                {
                    var destState = result.AddState();
                    destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight);
                    stack.Push((mappingState.Index, srcState.Index, destState.Index));
                    destStateCache[destPair] = destState.Index;
                    destStateIndex           = destState.Index;
                }

                return(destStateIndex);
            }

            // Populate the stack with start destination state
            result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start);
            var stringAutomaton = srcAutomaton as StringAutomaton;
            var sourceDistributionHasLogProbabilityOverrides = stringAutomaton?.HasElementLogValueOverrides ?? false;

            while (stack.Count > 0)
            {
                var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop();

                var mappingState = mappingAutomaton.States[mappingStateIndex];
                var srcState     = srcAutomaton.States[srcStateIndex];
                var destState    = result[destStateIndex];

                // Iterate over transitions from mappingState
                foreach (var mappingTransition in mappingState.Transitions)
                {
                    var childMappingState = mappingAutomaton.States[mappingTransition.DestinationStateIndex];

                    // Epsilon transition case
                    if (IsSrcEpsilon(mappingTransition))
                    {
                        var destElementDistribution =
                            mappingTransition.ElementDistribution.HasValue
                                ? mappingTransition.ElementDistribution.Value.Second
                                : Option.None;
                        var childDestStateIndex = CreateDestState(childMappingState, srcState);
                        destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group);
                        continue;
                    }

                    // Iterate over states and transitions in the closure of srcState
                    foreach (var srcTransition in srcState.Transitions)
                    {
                        Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free.");

                        var srcChildState = srcAutomaton.States[srcTransition.DestinationStateIndex];

                        var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst(
                            srcTransition.ElementDistribution.Value, out var destElementDistribution);
                        if (double.IsNegativeInfinity(projectionLogScale))
                        {
                            continue;
                        }

                        // In the special case of a log probability override in a DiscreteChar element distribution,
                        // we need to compensate for the fact that the distribution is not normalized.
                        if (destElementDistribution.HasValue && sourceDistributionHasLogProbabilityOverrides)
                        {
                            var discreteChar =
                                (DiscreteChar)(IDistribution <char>)srcTransition.ElementDistribution.Value;
                            if (discreteChar.HasLogProbabilityOverride)
                            {
                                var totalMass = discreteChar.Ranges.EnumerableSum(rng =>
                                                                                  rng.Probability.Value * (rng.EndExclusive - rng.StartInclusive));
                                projectionLogScale -= System.Math.Log(totalMass);
                            }
                        }

                        var destWeight =
                            sourceDistributionHasLogProbabilityOverrides && destElementDistribution.HasNoValue
                                ? Weight.One
                                : Weight.Product(mappingTransition.Weight, srcTransition.Weight,
                                                 Weight.FromLogValue(projectionLogScale));

                        // We don't want an unnormalizable distribution to become normalizable due to a rounding error.
                        if (Math.Abs(destWeight.LogValue) < 1e-12)
                        {
                            destWeight = Weight.One;
                        }

                        var childDestStateIndex = CreateDestState(childMappingState, srcChildState);
                        destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group);
                    }
                }
            }

            var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null);

            simplification.RemoveDeadStates();
            simplification.SimplifyIfNeeded();

            return(result.GetAutomaton());
        }
示例#13
0
            public MultiRepresentationWeightFunction <TDictionary> Product(MultiRepresentationWeightFunction <TDictionary> weightFunction)
            {
                if (IsCanonicZero() || weightFunction.IsCanonicZero())
                {
                    return(Zero());
                }

                PointMassWeightFunction pointMass = null;
                IWeightFunction         other     = null;

                if (this.weightFunction is PointMassWeightFunction thisPointMass)
                {
                    pointMass = thisPointMass;
                    other     = weightFunction.weightFunction;
                }
                else if (weightFunction.weightFunction is PointMassWeightFunction otherPointMass)
                {
                    pointMass = otherPointMass;
                    other     = this.weightFunction;
                }
                if (pointMass != null && !other.UsesGroups)
                {
                    var logValue = other.GetLogValue(pointMass.Point);
                    if (double.IsNegativeInfinity(logValue))
                    {
                        return(Zero());
                    }
                    else if (logValue == 0.0)
                    {
                        return(FromPointMass(pointMass));
                    }
                    else
                    {
                        return(FromDictionary(
                                   DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                       new[] { new KeyValuePair <TSequence, Weight>(pointMass.Point, Weight.FromLogValue(logValue)) })));
                    }
                }

                TDictionary dictionary = null;

                if (this.weightFunction is TDictionary thisDictionary)
                {
                    if (weightFunction.weightFunction is TDictionary secondDictionary)
                    {
                        return(FromDictionary(thisDictionary.Product(secondDictionary)));
                    }

                    dictionary = thisDictionary;
                    other      = weightFunction.weightFunction;
                }
                else if (weightFunction.weightFunction is TDictionary otherDictionary)
                {
                    dictionary = otherDictionary;
                    other      = this.weightFunction;
                }

                if (dictionary != null && !other.UsesGroups)
                {
                    var resultList = new List <KeyValuePair <TSequence, Weight> >(dictionary.Dictionary.Count);
                    foreach (var kvp in dictionary.Dictionary)
                    {
                        if (!kvp.Value.IsZero)
                        {
                            var otherLogValue = other.GetLogValue(kvp.Key);
                            if (!double.IsNegativeInfinity(otherLogValue))
                            {
                                resultList.Add(new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * Weight.FromLogValue(otherLogValue)));
                            }
                        }
                    }
                    if (resultList.Count == 0)
                    {
                        return(Zero());
                    }
                    else if (resultList.Count == 1 && resultList[0].Value.LogValue == 0.0)
                    {
                        return(FromPoint(resultList[0].Key));
                    }
                    else
                    {
                        return(FromDictionary(
                                   DictionaryWeightFunction <TDictionary> .FromDistinctWeights(resultList)));
                    }
                }

                return(FromAutomaton(AsAutomaton().Product(weightFunction.AsAutomaton())));
            }
示例#14
0
            public MultiRepresentationWeightFunction <TDictionary> ScaleLog(double logScale)
            {
                switch (weightFunction)
                {
                case null:
                    return(Zero());

                case PointMassWeightFunction pointMass:
                    return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                              new[] { new KeyValuePair <TSequence, Weight>(pointMass.Point, Weight.FromLogValue(logScale)) })));

                case TDictionary dictionary:
                    return(FromDictionary(dictionary.ScaleLog(logScale)));

                case TAutomaton automaton:
                    return(FromAutomaton(automaton.ScaleLog(logScale)));

                default:
                    throw new InvalidOperationException("Current function has an invalid type");
                }
            }
示例#15
0
            public MultiRepresentationWeightFunction <TDictionary> NormalizeStructure()
            {
                switch (weightFunction)
                {
                case TDictionary dictionary:
                    var filteredTruncated = dictionary.Dictionary.Where(kvp => !kvp.Value.IsZero).Take(2).ToList();
                    if (filteredTruncated.Count == 0)
                    {
                        return(Zero());
                    }
                    else if (filteredTruncated.Count == 1)
                    {
                        return(FromPoint(filteredTruncated.Single().Key));
                    }
                    else
                    {
                        return(FromDictionary(dictionary.NormalizeStructure()));
                    }

                case TAutomaton automaton:
                    if (!automaton.UsesGroups)
                    {
                        if (automaton.LogValueOverride == null && automaton.TryEnumerateSupport(MaxDictionarySize, out var support, false, 4 * MaxDictionarySize, true))
                        {
                            var list = support.ToList();
                            if (list.Count == 0)
                            {
                                return(Zero());
                            }
                            else if (list.Count == 1)
                            {
                                return(FromPoint(list[0]));
                            }
                            else
                            {
                                // Create a dictionary only if we expect it to be smaller than the automaton.
                                // Approximation uses sizes corresponding to a string automaton, which is the most used one.
                                // We don't require this comparison to be always precise - most of the times is good enough.
                                var dictSizeApprox     = list.Sum(el => SequenceManipulator.GetLength(el)) * sizeof(char) + (24 + 8 + sizeof(double)) * list.Count;
                                var automatonSizeAprox =
                                    24                                                                 // header
                                    + 16 + 2 * sizeof(double)                                          // 2 double? fields
                                                                                                       // Data Container
                                    + 2 * sizeof(int)                                                  // Flags and StartStateIndex
                                    + 2 * 24                                                           // Headers of the states and transitions arrays
                                    + automaton.Data.States.Count * (2 * sizeof(int) + sizeof(double)) // states
                                    + automaton.Data.Transitions.Count * 24                            // 24 is the size of one transition w/o storage for discrete char
                                    + automaton.Data.Transitions.Count(tr => !tr.IsEpsilon) * 80;
                                // 40 is the size of a DiscreteChar filled with nulls;
                                // another 40 is the size of an array with a single char range.
                                // Any specific DiscreteChar can be larger or can be cached.
                                // 40 seems an ok approximation for the average case.
                                if (dictSizeApprox < automatonSizeAprox)
                                {
                                    return(FromDictionary(
                                               DictionaryWeightFunction <TDictionary> .FromDistinctWeights(
                                                   list.Select(seq => new KeyValuePair <TSequence, Weight>(seq, Weight.FromLogValue(automaton.GetLogValue(seq)))))));
                                }
                            }
                        }
                        // TryEnumerateSupport(..., maxTraversedPaths, ...) is allowed to quit early
                        // on complex automata, so we need to explicitly check for point mass
                        var point = automaton.TryComputePoint();
                        if (point != null)
                        {
                            return(FromPoint(point));
                        }
                    }
                    break;
                }

                return(Clone()); // TODO: replace with `this` after making automata immutable
            }