/// <summary> /// Converts the current <see cref="StringDictionaryWeightFunction"/> to a deterministic automaton, /// compressing shared suffixes. When the current <see cref="StringDictionaryWeightFunction"/> is /// normalized, the resulting automaton is stochastic. /// </summary> /// <returns>A deterministic <see cref="StringAutomaton"/>.</returns> public override StringAutomaton AsAutomaton() { var dict = (SortedList <string, Weight>)Dictionary; if (dict.Count == 0) { return(Automaton <string, char, ImmutableDiscreteChar, StringManipulator, StringAutomaton> .Zero()); } if (dict.Count == 1) { return(Automaton <string, char, ImmutableDiscreteChar, StringManipulator, StringAutomaton> .ConstantOnLog(dict.Values[0].LogValue, dict.Keys[0])); } var result = new Automaton <string, char, ImmutableDiscreteChar, StringManipulator, StringAutomaton> .Builder(); var end = result .AddState() .SetEndWeight(Weight.One); var sharedPrefixWithPreviousLength = new int[dict.Count]; sharedPrefixWithPreviousLength[0] = 0; for (int i = 1; i < dict.Count; ++i) { sharedPrefixWithPreviousLength[i] = GetSharedPrefixLength(dict.Keys[i - 1], dict.Keys[i]); } var suffixInfo = new (int suffixLength, int prefixEndStateIndex, char suffixTransitionChar, Weight suffixTransitionWeight)[dict.Count];
/// <summary> /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence. /// </summary> /// <param name="srcSequence">The sequence to project.</param> /// <returns>The projection.</returns> /// <remarks> /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/> /// to the automaton representation of a projected sequence. /// </remarks> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcSequence srcSequence) { Argument.CheckIfNotNull(srcSequence, "srcSequence"); var mappingAutomaton = this.sequencePairToWeight; if (mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState(PairListAutomaton.State mappingState, int srcSequenceIndex) { var destPair = (mappingState.Index, srcSequenceIndex); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight( srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero); stack.Push((mappingState.Index, srcSequenceIndex, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, 0); while (stack.Count > 0) { var(mappingStateIndex, srcSequenceIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var destState = result[destStateIndex]; // Enumerate transitions from the current mapping state foreach (var mappingTransition in mappingState.Transitions) { var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementWeights = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(destMappingState, srcSequenceIndex); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcSequenceElement, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var weight = mappingTransition.Weight * Weight.FromLogValue(projectionLogScale); var childDestState = CreateDestState(destMappingState, srcSequenceIndex + 1); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); var stringAutomaton = srcAutomaton as StringAutomaton; var sourceDistributionHasLogProbabilityOverrides = stringAutomaton?.HasElementLogValueOverrides ?? false; while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingAutomaton.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcAutomaton.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } // In the special case of a log probability override in a DiscreteChar element distribution, // we need to compensate for the fact that the distribution is not normalized. if (destElementDistribution.HasValue && sourceDistributionHasLogProbabilityOverrides) { var discreteChar = (DiscreteChar)(IDistribution <char>)srcTransition.ElementDistribution.Value; if (discreteChar.HasLogProbabilityOverride) { var totalMass = discreteChar.Ranges.EnumerableSum(rng => rng.Probability.Value * (rng.EndExclusive - rng.StartInclusive)); projectionLogScale -= System.Math.Log(totalMass); } } var destWeight = sourceDistributionHasLogProbabilityOverrides && destElementDistribution.HasNoValue ? Weight.One : Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); // We don't want an unnormalizable distribution to become normalizable due to a rounding error. if (Math.Abs(destWeight.LogValue) < 1e-12) { destWeight = Weight.One; } var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }