/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Adds a self-transition labeled with a given element to the current state. /// </summary> /// <param name="element">The element.</param> /// <param name="weight">The transition weight.</param> /// <param name="group">The group of the added transition.</param> /// <returns>The current state.</returns> public StateBuilder AddSelfTransition(TElement element, Weight weight, int group = 0) { return(this.AddTransition(element, weight, this.Index, group)); }
/// <summary> /// Adds a self-transition to the current state. /// </summary> /// <param name="elementDistribution"> /// The element distribution associated with the transition. /// If the value of this parameter is <see langword="null"/>, an epsilon transition will be created. /// </param> /// <param name="weight">The transition weight.</param> /// <param name="group">The group of the added transition.</param> /// <returns>The current state.</returns> public StateBuilder AddSelfTransition( Option <TElementDistribution> elementDistribution, Weight weight, byte group = 0) { return(this.AddTransition(elementDistribution, weight, this.Index, group)); }
/// <summary> /// Creates an automaton <c>f'(s) = sum_{tu=s} f(t)g(u)</c>, where <c>f(t)</c> is the current /// automaton (in builder) and <c>g(u)</c> is the given automaton. /// The resulting automaton is also known as the Cauchy product of two automata. /// </summary> public void Append( Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> automaton, int group = 0, bool avoidEpsilonTransitions = true) { var oldStateCount = this.states.Count; foreach (var state in automaton.States) { var stateBuilder = this.AddState(); stateBuilder.SetEndWeight(state.EndWeight); foreach (var transition in state.Transitions) { var updatedTransition = transition; updatedTransition.DestinationStateIndex += oldStateCount; if (group != 0) { updatedTransition.Group = group; } stateBuilder.AddTransition(updatedTransition); } } var secondStartState = this[oldStateCount + automaton.Start.Index]; if (avoidEpsilonTransitions && (AllEndStatesHaveNoTransitions() || !automaton.Start.HasIncomingTransitions)) { // Remove start state of appended automaton and copy all its transitions to previous end states for (var i = 0; i < oldStateCount; ++i) { var endState = this[i]; if (!endState.CanEnd) { continue; } for (var iterator = secondStartState.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (group != 0) { transition.Group = group; } if (transition.DestinationStateIndex == secondStartState.Index) { transition.DestinationStateIndex = endState.Index; } else { transition.Weight = Weight.Product(transition.Weight, endState.EndWeight); } endState.AddTransition(transition); } endState.SetEndWeight(Weight.Product(endState.EndWeight, secondStartState.EndWeight)); } this.RemoveState(secondStartState.Index); } else { // Just connect all end states with start state of appended automaton for (var i = 0; i < oldStateCount; i++) { var state = this[i]; if (state.CanEnd) { state.AddEpsilonTransition(state.EndWeight, secondStartState.Index, group); state.SetEndWeight(Weight.Zero); } } } bool AllEndStatesHaveNoTransitions() { for (var i = 0; i < oldStateCount; ++i) { var state = this.states[i]; if (state.CanEnd && state.FirstTransition != -1) { return(false); } } return(true); } }
/// <summary> /// Adds an epsilon transition to the current state. /// </summary> /// <param name="weight">The transition weight.</param> /// <param name="destinationStateIndex"> /// The destination state of the added transition. /// If the value of this parameter is <see langword="null"/>, a new state will be created.</param> /// <param name="group">The group of the added transition.</param> /// <returns>The destination state of the added transition.</returns> public StateBuilder AddEpsilonTransition( Weight weight, int?destinationStateIndex = null, int group = 0) { return(this.AddTransition(Option.None, weight, destinationStateIndex, group)); }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); var stringAutomaton = srcAutomaton as StringAutomaton; var sourceDistributionHasLogProbabilityOverrides = stringAutomaton?.HasElementLogValueOverrides ?? false; while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } // In the special case of a log probability override in a DiscreteChar element distribution, // we need to compensate for the fact that the distribution is not normalized. if (destElementDistribution.HasValue && sourceDistributionHasLogProbabilityOverrides) { var discreteChar = (DiscreteChar)(IDistribution <char>)srcTransition.ElementDistribution.Value; if (discreteChar.HasLogProbabilityOverride) { var totalMass = discreteChar.Ranges.EnumerableSum(rng => rng.Probability.Value * (rng.EndExclusive - rng.StartInclusive)); projectionLogScale -= System.Math.Log(totalMass); } } var destWeight = sourceDistributionHasLogProbabilityOverrides && destElementDistribution.HasNoValue ? Weight.One : Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); // We don't want an unnormalizable distribution to become normalizable due to a rounding error. if (Math.Abs(destWeight.LogValue) < 1e-12) { destWeight = Weight.One; } var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence. /// </summary> /// <param name="srcSequence">The sequence to project.</param> /// <returns>The projection.</returns> /// <remarks> /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/> /// to the automaton representation of a projected sequence. /// </remarks> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcSequence srcSequence) { Argument.CheckIfNotNull(srcSequence, "srcSequence"); var mappingAutomaton = this.sequencePairToWeight; if (mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState(PairListAutomaton.State mappingState, int srcSequenceIndex) { var destPair = (mappingState.Index, srcSequenceIndex); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight( srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero); stack.Push((mappingState.Index, srcSequenceIndex, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, 0); while (stack.Count > 0) { var(mappingStateIndex, srcSequenceIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var destState = result[destStateIndex]; // Enumerate transitions from the current mapping state foreach (var mappingTransition in mappingState.Transitions) { var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementWeights = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(destMappingState, srcSequenceIndex); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcSequenceElement, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var weight = mappingTransition.Weight * Weight.FromLogValue(projectionLogScale); var childDestState = CreateDestState(destMappingState, srcSequenceIndex + 1); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Attempts to determinize the automaton, /// i.e. modify it such that for every state and every element there is at most one transition that allows for that element, /// and there are no epsilon transitions. /// </summary> /// <returns> /// <see langword="true"/> if the determinization attempt was successful and the automaton is now deterministic, /// <see langword="false"/> otherwise. /// </returns> /// <remarks>See <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf"/> for algorithm details.</remarks> public bool TryDeterminize() { if (this.Data.DeterminizationState != DeterminizationState.Unknown) { return(this.Data.DeterminizationState == DeterminizationState.IsDeterminized); } int maxStatesBeforeStop = Math.Min(this.States.Count * 3, MaxStateCount); this.MakeEpsilonFree(); // Deterministic automata cannot have epsilon-transitions if (this.UsesGroups) { // Determinization will result in lost of group information, which we cannot allow this.Data = this.Data.WithDeterminizationState(DeterminizationState.IsNonDeterminizable); return(false); } // Weighted state set is a set of (stateId, weight) pairs, where state ids correspond to states of the original automaton.. // Such pairs correspond to states of the resulting automaton. var weightedStateSetQueue = new Queue <Determinization.WeightedStateSet>(); var weightedStateSetToNewState = new Dictionary <Determinization.WeightedStateSet, int>(); var builder = new Builder(); var startWeightedStateSet = new Determinization.WeightedStateSet { { this.Start.Index, Weight.One } }; weightedStateSetQueue.Enqueue(startWeightedStateSet); weightedStateSetToNewState.Add(startWeightedStateSet, builder.StartStateIndex); builder.Start.SetEndWeight(this.Start.EndWeight); while (weightedStateSetQueue.Count > 0) { // Take one unprocessed state of the resulting automaton Determinization.WeightedStateSet currentWeightedStateSet = weightedStateSetQueue.Dequeue(); var currentStateIndex = weightedStateSetToNewState[currentWeightedStateSet]; var currentState = builder[currentStateIndex]; // Find out what transitions we should add for this state var outgoingTransitionInfos = this.GetOutgoingTransitionsForDeterminization(currentWeightedStateSet); // For each transition to add foreach ((TElementDistribution, Weight, Determinization.WeightedStateSet)outgoingTransitionInfo in outgoingTransitionInfos) { TElementDistribution elementDistribution = outgoingTransitionInfo.Item1; Weight weight = outgoingTransitionInfo.Item2; Determinization.WeightedStateSet destWeightedStateSet = outgoingTransitionInfo.Item3; int destinationStateIndex; if (!weightedStateSetToNewState.TryGetValue(destWeightedStateSet, out destinationStateIndex)) { if (builder.StatesCount == maxStatesBeforeStop) { // Too many states, determinization attempt failed return(false); } // Add new state to the result var destinationState = builder.AddState(); weightedStateSetToNewState.Add(destWeightedStateSet, destinationState.Index); weightedStateSetQueue.Enqueue(destWeightedStateSet); // Compute its ending weight destinationState.SetEndWeight(Weight.Zero); foreach (KeyValuePair <int, Weight> stateIdWithWeight in destWeightedStateSet) { var addedWeight = stateIdWithWeight.Value * this.States[stateIdWithWeight.Key].EndWeight; destinationState.SetEndWeight(destinationState.EndWeight + addedWeight); } destinationStateIndex = destinationState.Index; } // Add transition to the destination state currentState.AddTransition(elementDistribution, weight, destinationStateIndex); } } var simplification = new Simplification(builder, this.PruneStatesWithLogEndWeightLessThan); simplification.MergeParallelTransitions(); // Determinization produces a separate transition for each segment this.Data = builder.GetData().WithDeterminizationState(DeterminizationState.IsDeterminized); this.PruneStatesWithLogEndWeightLessThan = this.PruneStatesWithLogEndWeightLessThan; this.LogValueOverride = this.LogValueOverride; return(true); }
/// <summary> /// Computes the inverse of a given weight. /// </summary> /// <param name="weight">The weight.</param> /// <returns>The inverse <c>I</c> such that <paramref name="weight"/>*I=1.</returns> public static Weight Inverse(Weight weight) { return(new Weight(-weight.LogValue)); }
/// <summary> /// Compute the sum of given weights. /// </summary> /// <param name="weight1">The first weight.</param> /// <param name="weight2">The second weight.</param> /// <returns>The computed sum.</returns> public static Weight Sum(Weight weight1, Weight weight2) { return(new Weight(MMath.LogSumExp(weight1.LogValue, weight2.LogValue))); }
internal TransitionElement(int destIndex, Weight weight, TElementDistribution distribution) { this.destIndex = destIndex; this.distribution = distribution; this.weight = weight; }
/// <summary> /// Computes the total weights between each pair of states in the component /// using the <a href="http://www.cs.nyu.edu/~mohri/pub/hwa.pdf">generalized Floyd's algorithm</a>. /// </summary> private void ComputePairwiseWeightsMatrix() { this.pairwiseWeights = Util.ArrayInit(this.Size, this.Size, (i, j) => Weight.Zero); for (int srcStateIndexInComponent = 0; srcStateIndexInComponent < this.Size; ++srcStateIndexInComponent) { State state = this.statesInComponent[srcStateIndexInComponent]; for (int transitionIndex = 0; transitionIndex < state.TransitionCount; ++transitionIndex) { Transition transition = state.GetTransition(transitionIndex); State destState = state.Owner.States[transition.DestinationStateIndex]; int destStateIndexInComponent; if (this.transitionFilter(transition) && (destStateIndexInComponent = this.GetIndexByState(destState)) != -1) { this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent] = Weight.Sum( this.pairwiseWeights[srcStateIndexInComponent, destStateIndexInComponent], transition.Weight); } } } for (int k = 0; k < this.Size; ++k) { Weight loopWeight = this.useApproximateClosure ? Weight.ApproximateClosure(this.pairwiseWeights[k, k]) : Weight.Closure(this.pairwiseWeights[k, k]); for (int i = 0; i < this.Size; ++i) { if (i == k || this.pairwiseWeights[i, k].IsZero) { continue; } for (int j = 0; j < this.Size; ++j) { if (j == k || this.pairwiseWeights[k, j].IsZero) { continue; } Weight additionalWeight = Weight.Product( this.pairwiseWeights[i, k], loopWeight, this.pairwiseWeights[k, j]); this.pairwiseWeights[i, j] = Weight.Sum(this.pairwiseWeights[i, j], additionalWeight); } } for (int i = 0; i < this.Size; ++i) { this.pairwiseWeights[i, k] = Weight.Product(this.pairwiseWeights[i, k], loopWeight); this.pairwiseWeights[k, i] = Weight.Product(this.pairwiseWeights[k, i], loopWeight); } this.pairwiseWeights[k, k] = loopWeight; } }