/// <summary> /// Recursively computes the value of the automaton on a given sequence. /// </summary> /// <param name="sequence">The sequence to compute the value on.</param> /// <param name="sequencePosition">The current position in the sequence.</param> /// <param name="valueCache">A lookup table for memoization.</param> /// <returns>The value computed from the current state.</returns> private Weight DoGetValue( TSequence sequence, int sequencePosition, Dictionary <IntPair, Weight> valueCache) { var stateIndexPair = new IntPair(this.Index, sequencePosition); Weight cachedValue; if (valueCache.TryGetValue(stateIndexPair, out cachedValue)) { return(cachedValue); } EpsilonClosure closure = this.GetEpsilonClosure(); Weight value = Weight.Zero; int count = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetLength(sequence); bool isCurrent = sequencePosition < count; if (isCurrent) { TElement element = Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> .SequenceManipulator.GetElement(sequence, sequencePosition); for (int closureStateIndex = 0; closureStateIndex < closure.Size; ++closureStateIndex) { State closureState = closure.GetStateByIndex(closureStateIndex); Weight closureStateWeight = closure.GetStateWeightByIndex(closureStateIndex); for (int transitionIndex = 0; transitionIndex < closureState.transitionCount; transitionIndex++) { Transition transition = closureState.transitions[transitionIndex]; if (transition.IsEpsilon) { continue; // The destination is a part of the closure anyway } State destState = this.Owner.states[transition.DestinationStateIndex]; Weight distWeight = Weight.FromLogValue(transition.ElementDistribution.GetLogProb(element)); if (!distWeight.IsZero && !transition.Weight.IsZero) { Weight destValue = destState.DoGetValue(sequence, sequencePosition + 1, valueCache); if (!destValue.IsZero) { value = Weight.Sum( value, Weight.Product(closureStateWeight, transition.Weight, distWeight, destValue)); } } } } } else { value = closure.EndWeight; } valueCache.Add(stateIndexPair, value); return(value); }
/// <inheritdoc/> public TThis SumLog(double logWeight1, TThis weightFunction, double logWeight2) { var scale1 = Weight.FromLogValue(logWeight1); var scale2 = Weight.FromLogValue(logWeight2); return(FromWeights( Dictionary.Select(kvp => new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * scale1)) .Concat(weightFunction.Dictionary.Select(kvp => new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * scale2))))); }
/// <inheritdoc/> public MultiRepresentationWeightFunction <TDictionary> ConstantOnSupportOfLog(double logValue, MultiRepresentationWeightFunction <TDictionary> weightFunction) { if (weightFunction.TryEnumerateSupportInternal(MaxDictionarySize, out var support)) { if (!support.Any()) { return(Zero()); } if (logValue == 0 && !support.Skip(1).Any()) { return(FromPoint(support.Single())); } var weight = Weight.FromLogValue(logValue); return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights( support.Select(sequence => new KeyValuePair <TSequence, Weight>(sequence, weight))))); } var automaton = weightFunction.AsAutomaton().ConstantOnSupportLog(logValue); return(FromAutomaton(automaton)); }
/// <summary> /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence. /// </summary> /// <param name="srcSequence">The sequence to project.</param> /// <returns>The projection.</returns> /// <remarks> /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/> /// to the automaton representation of a projected sequence. /// </remarks> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcSequence srcSequence) { Argument.CheckIfNotNull(srcSequence, "srcSequence"); var mappingAutomaton = this.sequencePairToWeight; if (mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState(PairListAutomaton.State mappingState, int srcSequenceIndex) { var destPair = (mappingState.Index, srcSequenceIndex); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight( srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero); stack.Push((mappingState.Index, srcSequenceIndex, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, 0); while (stack.Count > 0) { var(mappingStateIndex, srcSequenceIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var destState = result[destStateIndex]; // Enumerate transitions from the current mapping state foreach (var mappingTransition in mappingState.Transitions) { var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementWeights = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(destMappingState, srcSequenceIndex); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcSequenceElement, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var weight = mappingTransition.Weight * Weight.FromLogValue(projectionLogScale); var childDestState = CreateDestState(destMappingState, srcSequenceIndex + 1); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <inheritdoc/> public virtual bool TryNormalizeValues(out TThis normalizedFunction, out double logNormalizer) { double logNormalizerLocal = GetLogNormalizer(); logNormalizer = logNormalizerLocal; if (double.IsNaN(logNormalizerLocal) || double.IsInfinity(logNormalizerLocal)) { normalizedFunction = (TThis)this; return(false); } if (logNormalizerLocal == 0.0) { normalizedFunction = (TThis)this; return(true); } normalizedFunction = FromDistinctWeights(Dictionary.Select(kvp => new KeyValuePair <TSequence, Weight>(kvp.Key, Weight.FromLogValue(kvp.Value.LogValue - logNormalizerLocal)))); return(true); }
/// <inheritdoc/> public TThis ScaleLog(double logScale) { var scale = Weight.FromLogValue(logScale); return(FromDistinctWeights(Dictionary.Select(kvp => new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * scale)))); }
/// <summary> /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence. /// </summary> /// <param name="srcSequence">The sequence to project.</param> /// <returns>The projection.</returns> /// <remarks> /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/> /// to the automaton representation of a projected sequence. /// </remarks> public TDestAutomaton ProjectSource(TSrcSequence srcSequence) { Argument.CheckIfNotNull(srcSequence, "srcSequence"); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); if (this.sequencePairToWeight.IsCanonicZero()) { return(result.GetAutomaton()); } var destStateCache = new Dictionary <(int, int), int>(); result.StartStateIndex = BuildProjectionOfSequence(this.sequencePairToWeight.Start, 0); var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); // Recursively builds the projection of a given sequence onto this transducer. int BuildProjectionOfSequence(PairListAutomaton.State mappingState, int srcSequenceIndex) { //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var statePair = (mappingState.Index, srcSequenceIndex); if (destStateCache.TryGetValue(statePair, out var destStateIndex)) { return(destStateIndex); } var destState = result.AddState(); destStateCache.Add(statePair, destState.Index); var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); // Enumerate transitions from the current mapping state foreach (var mappingTransition in mappingState.Transitions) { var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementWeights = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = BuildProjectionOfSequence(destMappingState, srcSequenceIndex); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcSequenceElement, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var weight = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestState = BuildProjectionOfSequence(destMappingState, srcSequenceIndex + 1); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } destState.SetEndWeight(srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero); return(destState.Index); } }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); if (srcAutomaton.IsCanonicZero() || this.sequencePairToWeight.IsCanonicZero()) { return(result.GetAutomaton()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var destStateCache = new Dictionary <(int, int), int>(); result.StartStateIndex = BuildProjectionOfAutomaton(this.sequencePairToWeight.Start, srcAutomaton.Start); var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); // Recursively builds the projection of a given automaton onto this transducer. // The projected automaton must be epsilon-free. int BuildProjectionOfAutomaton( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. // State already exists, return its index var statePair = (mappingState.Index, srcState.Index); if (destStateCache.TryGetValue(statePair, out var destStateIndex)) { return(destStateIndex); } var destState = result.AddState(); destStateCache.Add(statePair, destState.Index); // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } destState.SetEndWeight(Weight.Product(mappingState.EndWeight, srcState.EndWeight)); return(destState.Index); } }
/// <summary> /// Recursively builds the projection of a given automaton onto this transducer. /// The projected automaton must be epsilon-free. /// </summary> /// <param name="destAutomaton">The projection being built.</param> /// <param name="mappingState">The currently traversed state of the transducer.</param> /// <param name="srcState">The currently traversed state of the automaton being projected.</param> /// <param name="destStateCache">The cache of the created projection states.</param> /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns> private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfAutomaton( TDestAutomaton destAutomaton, PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState, Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache) { Debug.Assert(mappingState != null && srcState != null, "Valid states must be provided."); Debug.Assert(!ReferenceEquals(srcState.Owner, destAutomaton), "Cannot build a projection in place."); //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. // State already exists, return its index var statePair = new IntPair(mappingState.Index, srcState.Index); Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState; if (destStateCache.TryGetValue(statePair, out destState)) { return(destState); } destState = destAutomaton.AddState(); destStateCache.Add(statePair, destState); // Iterate over transitions from mappingState for (int mappingTransitionIndex = 0; mappingTransitionIndex < mappingState.TransitionCount; mappingTransitionIndex++) { var mappingTransition = mappingState.GetTransition(mappingTransitionIndex); var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { TDestElementDistribution destElementDistribution = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second; var childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcState, destStateCache); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestState, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState for (int srcTransitionIndex = 0; srcTransitionIndex < srcState.TransitionCount; srcTransitionIndex++) { var srcTransition = srcState.GetTransition(srcTransitionIndex); Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; TDestElementDistribution destElementDistribution; double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst( srcTransition.ElementDistribution, out destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } Weight destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcChildState, destStateCache); destState.AddTransition(destElementDistribution, destWeight, childDestState, mappingTransition.Group); } } destState.EndWeight = Weight.Product(mappingState.EndWeight, srcState.EndWeight); return(destState); }
/// <summary> /// Recursively builds the projection of a given sequence onto this transducer. /// </summary> /// <param name="destAutomaton">The projection being built.</param> /// <param name="mappingState">The currently traversed state of the transducer.</param> /// <param name="srcSequence">The sequence being projected.</param> /// <param name="srcSequenceIndex">The current index in the sequence being projected.</param> /// <param name="destStateCache">The cache of the created projection states.</param> /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns> private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfSequence( TDestAutomaton destAutomaton, PairListAutomaton.State mappingState, TSrcSequence srcSequence, int srcSequenceIndex, Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache) { //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var statePair = new IntPair(mappingState.Index, srcSequenceIndex); Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState; if (destStateCache.TryGetValue(statePair, out destState)) { return(destState); } destState = destAutomaton.AddState(); destStateCache.Add(statePair, destState); int srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); // Enumerate transitions from the current mapping state for (int i = 0; i < mappingState.TransitionCount; i++) { var mappingTransition = mappingState.GetTransition(i); var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { TDestElementDistribution destElementWeights = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second; var childDestState = this.BuildProjectionOfSequence( destAutomaton, destMappingState, srcSequence, srcSequenceIndex, destStateCache); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestState, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); TDestElementDistribution destElementDistribution; double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst( srcSequenceElement, out destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } Weight weight = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestState = this.BuildProjectionOfSequence( destAutomaton, destMappingState, srcSequence, srcSequenceIndex + 1, destStateCache); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } destState.EndWeight = srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero; return(destState); }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); var stringAutomaton = srcAutomaton as StringAutomaton; var sourceDistributionHasLogProbabilityOverrides = stringAutomaton?.HasElementLogValueOverrides ?? false; while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingAutomaton.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcAutomaton.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } // In the special case of a log probability override in a DiscreteChar element distribution, // we need to compensate for the fact that the distribution is not normalized. if (destElementDistribution.HasValue && sourceDistributionHasLogProbabilityOverrides) { var discreteChar = (DiscreteChar)(IDistribution <char>)srcTransition.ElementDistribution.Value; if (discreteChar.HasLogProbabilityOverride) { var totalMass = discreteChar.Ranges.EnumerableSum(rng => rng.Probability.Value * (rng.EndExclusive - rng.StartInclusive)); projectionLogScale -= System.Math.Log(totalMass); } } var destWeight = sourceDistributionHasLogProbabilityOverrides && destElementDistribution.HasNoValue ? Weight.One : Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); // We don't want an unnormalizable distribution to become normalizable due to a rounding error. if (Math.Abs(destWeight.LogValue) < 1e-12) { destWeight = Weight.One; } var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
public MultiRepresentationWeightFunction <TDictionary> Product(MultiRepresentationWeightFunction <TDictionary> weightFunction) { if (IsCanonicZero() || weightFunction.IsCanonicZero()) { return(Zero()); } PointMassWeightFunction pointMass = null; IWeightFunction other = null; if (this.weightFunction is PointMassWeightFunction thisPointMass) { pointMass = thisPointMass; other = weightFunction.weightFunction; } else if (weightFunction.weightFunction is PointMassWeightFunction otherPointMass) { pointMass = otherPointMass; other = this.weightFunction; } if (pointMass != null && !other.UsesGroups) { var logValue = other.GetLogValue(pointMass.Point); if (double.IsNegativeInfinity(logValue)) { return(Zero()); } else if (logValue == 0.0) { return(FromPointMass(pointMass)); } else { return(FromDictionary( DictionaryWeightFunction <TDictionary> .FromDistinctWeights( new[] { new KeyValuePair <TSequence, Weight>(pointMass.Point, Weight.FromLogValue(logValue)) }))); } } TDictionary dictionary = null; if (this.weightFunction is TDictionary thisDictionary) { if (weightFunction.weightFunction is TDictionary secondDictionary) { return(FromDictionary(thisDictionary.Product(secondDictionary))); } dictionary = thisDictionary; other = weightFunction.weightFunction; } else if (weightFunction.weightFunction is TDictionary otherDictionary) { dictionary = otherDictionary; other = this.weightFunction; } if (dictionary != null && !other.UsesGroups) { var resultList = new List <KeyValuePair <TSequence, Weight> >(dictionary.Dictionary.Count); foreach (var kvp in dictionary.Dictionary) { if (!kvp.Value.IsZero) { var otherLogValue = other.GetLogValue(kvp.Key); if (!double.IsNegativeInfinity(otherLogValue)) { resultList.Add(new KeyValuePair <TSequence, Weight>(kvp.Key, kvp.Value * Weight.FromLogValue(otherLogValue))); } } } if (resultList.Count == 0) { return(Zero()); } else if (resultList.Count == 1 && resultList[0].Value.LogValue == 0.0) { return(FromPoint(resultList[0].Key)); } else { return(FromDictionary( DictionaryWeightFunction <TDictionary> .FromDistinctWeights(resultList))); } } return(FromAutomaton(AsAutomaton().Product(weightFunction.AsAutomaton()))); }
public MultiRepresentationWeightFunction <TDictionary> ScaleLog(double logScale) { switch (weightFunction) { case null: return(Zero()); case PointMassWeightFunction pointMass: return(FromDictionary(DictionaryWeightFunction <TDictionary> .FromDistinctWeights( new[] { new KeyValuePair <TSequence, Weight>(pointMass.Point, Weight.FromLogValue(logScale)) }))); case TDictionary dictionary: return(FromDictionary(dictionary.ScaleLog(logScale))); case TAutomaton automaton: return(FromAutomaton(automaton.ScaleLog(logScale))); default: throw new InvalidOperationException("Current function has an invalid type"); } }
public MultiRepresentationWeightFunction <TDictionary> NormalizeStructure() { switch (weightFunction) { case TDictionary dictionary: var filteredTruncated = dictionary.Dictionary.Where(kvp => !kvp.Value.IsZero).Take(2).ToList(); if (filteredTruncated.Count == 0) { return(Zero()); } else if (filteredTruncated.Count == 1) { return(FromPoint(filteredTruncated.Single().Key)); } else { return(FromDictionary(dictionary.NormalizeStructure())); } case TAutomaton automaton: if (!automaton.UsesGroups) { if (automaton.LogValueOverride == null && automaton.TryEnumerateSupport(MaxDictionarySize, out var support, false, 4 * MaxDictionarySize, true)) { var list = support.ToList(); if (list.Count == 0) { return(Zero()); } else if (list.Count == 1) { return(FromPoint(list[0])); } else { // Create a dictionary only if we expect it to be smaller than the automaton. // Approximation uses sizes corresponding to a string automaton, which is the most used one. // We don't require this comparison to be always precise - most of the times is good enough. var dictSizeApprox = list.Sum(el => SequenceManipulator.GetLength(el)) * sizeof(char) + (24 + 8 + sizeof(double)) * list.Count; var automatonSizeAprox = 24 // header + 16 + 2 * sizeof(double) // 2 double? fields // Data Container + 2 * sizeof(int) // Flags and StartStateIndex + 2 * 24 // Headers of the states and transitions arrays + automaton.Data.States.Count * (2 * sizeof(int) + sizeof(double)) // states + automaton.Data.Transitions.Count * 24 // 24 is the size of one transition w/o storage for discrete char + automaton.Data.Transitions.Count(tr => !tr.IsEpsilon) * 80; // 40 is the size of a DiscreteChar filled with nulls; // another 40 is the size of an array with a single char range. // Any specific DiscreteChar can be larger or can be cached. // 40 seems an ok approximation for the average case. if (dictSizeApprox < automatonSizeAprox) { return(FromDictionary( DictionaryWeightFunction <TDictionary> .FromDistinctWeights( list.Select(seq => new KeyValuePair <TSequence, Weight>(seq, Weight.FromLogValue(automaton.GetLogValue(seq))))))); } } } // TryEnumerateSupport(..., maxTraversedPaths, ...) is allowed to quit early // on complex automata, so we need to explicitly check for point mass var point = automaton.TryComputePoint(); if (point != null) { return(FromPoint(point)); } } break; } return(Clone()); // TODO: replace with `this` after making automata immutable }