/// <summary> /// Initializes instance of <see cref="StateCollection"/>. /// </summary> internal StateCollection(Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> owner, List <StateData> states) { this.owner = owner; this.statesData = owner.statesData; }
/// <summary> /// Creates an automaton <c>f'(s) = sum_{tu=s} f(t)g(u)</c>, where <c>f(t)</c> is the current /// automaton (in builder) and <c>g(u)</c> is the given automaton. /// The resulting automaton is also known as the Cauchy product of two automata. /// </summary> public void Append( Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> automaton, int group = 0, bool avoidEpsilonTransitions = true) { var oldStateCount = this.states.Count; foreach (var state in automaton.States) { var stateBuilder = this.AddState(); stateBuilder.SetEndWeight(state.EndWeight); foreach (var transition in state.Transitions) { var updatedTransition = transition; updatedTransition.DestinationStateIndex += oldStateCount; if (group != 0) { updatedTransition.Group = group; } stateBuilder.AddTransition(updatedTransition); } } var secondStartState = this[oldStateCount + automaton.Start.Index]; if (avoidEpsilonTransitions && (AllEndStatesHaveNoTransitions() || !automaton.Start.HasIncomingTransitions)) { // Remove start state of appended automaton and copy all its transitions to previous end states for (var i = 0; i < oldStateCount; ++i) { var endState = this[i]; if (!endState.CanEnd) { continue; } for (var iterator = secondStartState.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; if (group != 0) { transition.Group = group; } if (transition.DestinationStateIndex == secondStartState.Index) { transition.DestinationStateIndex = endState.Index; } else { transition.Weight *= endState.EndWeight; } endState.AddTransition(transition); } endState.SetEndWeight(endState.EndWeight * secondStartState.EndWeight); } this.RemoveState(secondStartState.Index); } else { // Just connect all end states with start state of appended automaton for (var i = 0; i < oldStateCount; i++) { var state = this[i]; if (state.CanEnd) { state.AddEpsilonTransition(state.EndWeight, secondStartState.Index, group); state.SetEndWeight(Weight.Zero); } } } bool AllEndStatesHaveNoTransitions() { for (var i = 0; i < oldStateCount; ++i) { var state = this.states[i]; if (!state.EndWeight.IsZero && state.FirstTransitionIndex != -1) { return(false); } } return(true); } }
internal static Dictionary <int, TThis> ExtractGroups(Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> automaton) { var order = ComputeTopologicalOrderAndGroupSubgraphs(automaton, out var subGraphs); return(BuildSubautomata(automaton.States, order, subGraphs)); }
/// <summary> /// Creates a transducer <c>T(a, b) = I[a = ""] I[b[0] in c, |b| = 1]</c>, where <c>c</c> is a given element distribution. /// </summary> /// <param name="destElementDist">The element distribution to constrain the second transducer argument to.</param> /// <returns>The created transducer.</returns> public static TThis ProduceElement(TDestElementDistribution destElementDist) { return(Produce(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .ConstantOnElement(1.0, destElementDist))); }
/// <summary> /// Creates a transducer <c>T(a, b) = I[a[0] = c, |a| = 1] I[b = ""]</c>, where <c>c</c> is a given element. /// </summary> /// <param name="srcElement">The element to constrain the first transducer argument to.</param> /// <returns>The created transducer.</returns> public static TThis ConsumeElement(TSrcElement srcElement) { return(Consume(Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .ConstantOnElement(1.0, srcElement))); }
/// <summary> /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence. /// </summary> /// <param name="srcSequence">The sequence to project.</param> /// <returns>The projection.</returns> /// <remarks> /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/> /// to the automaton representation of a projected sequence. /// </remarks> public TDestAutomaton ProjectSource(TSrcSequence srcSequence) { Argument.CheckIfNotNull(srcSequence, "srcSequence"); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); if (this.sequencePairToWeight.IsCanonicZero()) { return(result.GetAutomaton()); } var destStateCache = new Dictionary <(int, int), int>(); result.StartStateIndex = BuildProjectionOfSequence(this.sequencePairToWeight.Start, 0); var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); // Recursively builds the projection of a given sequence onto this transducer. int BuildProjectionOfSequence(PairListAutomaton.State mappingState, int srcSequenceIndex) { //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var statePair = (mappingState.Index, srcSequenceIndex); if (destStateCache.TryGetValue(statePair, out var destStateIndex)) { return(destStateIndex); } var destState = result.AddState(); destStateCache.Add(statePair, destState.Index); var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); // Enumerate transitions from the current mapping state foreach (var mappingTransition in mappingState.Transitions) { var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementWeights = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = BuildProjectionOfSequence(destMappingState, srcSequenceIndex); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcSequenceElement, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var weight = Weight.Product(mappingTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestState = BuildProjectionOfSequence(destMappingState, srcSequenceIndex + 1); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } destState.SetEndWeight(srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero); return(destState.Index); } }
/// <summary> /// Initializes a new instance of <see cref="State"/> class. Used internally by automaton implementation /// to wrap StateData for use in public Automaton APIs. /// </summary> internal State(Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> owner, int index, StateData data) { this.Owner = owner; this.Index = index; this.Data = data; }
/// <summary> /// Recursively builds the projection of a given automaton onto this transducer. /// The projected automaton must be epsilon-free. /// </summary> /// <param name="destAutomaton">The projection being built.</param> /// <param name="mappingState">The currently traversed state of the transducer.</param> /// <param name="srcState">The currently traversed state of the automaton being projected.</param> /// <param name="destStateCache">The cache of the created projection states.</param> /// <returns>The state of the projection corresponding to the given mapping state and the position in the projected sequence.</returns> private Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State BuildProjectionOfAutomaton( TDestAutomaton destAutomaton, PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState, Dictionary <IntPair, Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State> destStateCache) { Debug.Assert(mappingState != null && srcState != null, "Valid states must be provided."); Debug.Assert(!ReferenceEquals(srcState.Owner, destAutomaton), "Cannot build a projection in place."); //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. // State already exists, return its index var statePair = new IntPair(mappingState.Index, srcState.Index); Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .State destState; if (destStateCache.TryGetValue(statePair, out destState)) { return(destState); } destState = destAutomaton.AddState(); destStateCache.Add(statePair, destState); // Iterate over transitions from mappingState for (int mappingTransitionIndex = 0; mappingTransitionIndex < mappingState.TransitionCount; mappingTransitionIndex++) { var mappingTransition = mappingState.GetTransition(mappingTransitionIndex); var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { TDestElementDistribution destElementDistribution = mappingTransition.ElementDistribution == null ? null : mappingTransition.ElementDistribution.Second; var childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcState, destStateCache); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestState, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState for (int srcTransitionIndex = 0; srcTransitionIndex < srcState.TransitionCount; srcTransitionIndex++) { var srcTransition = srcState.GetTransition(srcTransitionIndex); Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; TDestElementDistribution destElementDistribution; double projectionLogScale = mappingTransition.ElementDistribution.ProjectFirst( srcTransition.ElementDistribution, out destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } Weight destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestState = this.BuildProjectionOfAutomaton(destAutomaton, childMappingState, srcChildState, destStateCache); destState.AddTransition(destElementDistribution, destWeight, childDestState, mappingTransition.Group); } } destState.EndWeight = Weight.Product(mappingState.EndWeight, srcState.EndWeight); return(destState); }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); if (srcAutomaton.IsCanonicZero() || this.sequencePairToWeight.IsCanonicZero()) { return(result.GetAutomaton()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var destStateCache = new Dictionary <(int, int), int>(); result.StartStateIndex = BuildProjectionOfAutomaton(this.sequencePairToWeight.Start, srcAutomaton.Start); var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); // Recursively builds the projection of a given automaton onto this transducer. // The projected automaton must be epsilon-free. int BuildProjectionOfAutomaton( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { //// The code of this method has a lot in common with the code of Automaton<>.BuildProduct. //// Unfortunately, it's not clear how to avoid the duplication in the current design. // State already exists, return its index var statePair = (mappingState.Index, srcState.Index); if (destStateCache.TryGetValue(statePair, out var destStateIndex)) { return(destStateIndex); } var destState = result.AddState(); destStateCache.Add(statePair, destState.Index); // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestStateIndex = BuildProjectionOfAutomaton(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } destState.SetEndWeight(Weight.Product(mappingState.EndWeight, srcState.EndWeight)); return(destState.Index); } }
/// <summary> /// Computes <c>g(b) = f(A) T(A, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>A</c> is a given sequence. /// </summary> /// <param name="srcSequence">The sequence to project.</param> /// <returns>The projection.</returns> /// <remarks> /// Using this method is more efficient than applying <see cref="ProjectSource(TSrcAutomaton)"/> /// to the automaton representation of a projected sequence. /// </remarks> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcSequence srcSequence) { Argument.CheckIfNotNull(srcSequence, "srcSequence"); var mappingAutomaton = this.sequencePairToWeight; if (mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } var sourceSequenceManipulator = Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .SequenceManipulator; var srcSequenceLength = sourceSequenceManipulator.GetLength(srcSequence); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState(PairListAutomaton.State mappingState, int srcSequenceIndex) { var destPair = (mappingState.Index, srcSequenceIndex); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight( srcSequenceIndex == srcSequenceLength ? mappingState.EndWeight : Weight.Zero); stack.Push((mappingState.Index, srcSequenceIndex, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, 0); while (stack.Count > 0) { var(mappingStateIndex, srcSequenceIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var destState = result[destStateIndex]; // Enumerate transitions from the current mapping state foreach (var mappingTransition in mappingState.Transitions) { var destMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementWeights = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(destMappingState, srcSequenceIndex); destState.AddTransition(destElementWeights, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Normal transition case - Find epsilon-reachable states if (srcSequenceIndex < srcSequenceLength) { var srcSequenceElement = sourceSequenceManipulator.GetElement(srcSequence, srcSequenceIndex); var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcSequenceElement, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var weight = mappingTransition.Weight * Weight.FromLogValue(projectionLogScale); var childDestState = CreateDestState(destMappingState, srcSequenceIndex + 1); destState.AddTransition(destElementDistribution, weight, childDestState, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } var destWeight = Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }
/// <summary> /// Creates an automaton <c>f'(s) = sum_{tu=s} f(t)g(u)</c>, where <c>f(t)</c> is the current /// automaton (in builder) and <c>g(u)</c> is the given automaton. /// The resulting automaton is also known as the Cauchy product of two automata. /// </summary> /// <param name="automaton">Given automaton.</param> /// <param name="group">If non-zero, all transitions in the appended part will be put /// into the specified group.</param> /// <param name="avoidEpsilonTransitions">When set to <see langword="true"/> (default), and /// at least one of the following /// <list type="bullet"> /// <item>None of the end states of the current automaton have any outgoing transitions</item> /// <item>The start state of the given <paramref name="automaton"/> has no incoming transitions</item> /// </list> /// is true, no epsilon transitions will be used to concatenate the automata. Otherwise, /// epsilon transitions will be used</param> /// <returns>A pair of boolean values. The first indicates whether adding new epsilon transitions was avoided. /// The second indicates, whether the determinization state of the concatenated automata was preserved, i.e. /// whether both the current and the given automata being determinized implies that the result automaton is /// determinized as well.</returns> public (bool avoidedEpsilonTransitions, bool preservedDeterminizationState) Append( Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> automaton, int group = 0, bool avoidEpsilonTransitions = true) { var oldStateCount = this.states.Count; foreach (var state in automaton.States) { var stateBuilder = this.AddState(); stateBuilder.SetEndWeight(state.EndWeight); foreach (var transition in state.Transitions) { var updatedTransition = transition.With(destinationStateIndex: transition.DestinationStateIndex + oldStateCount); if (group != 0) { updatedTransition = updatedTransition.With(group: group); } stateBuilder.AddTransition(updatedTransition); } } var secondStartState = this[oldStateCount + automaton.Start.Index]; bool allOldEndStatesHaveNoOutgoingTransitions = AllOldEndStatesHaveNoOutgoingTransitions(); bool secondStartStateHasIncomingTransitions = SecondStartStateHasIncomingTransitions(); bool canMergeEndAndStart = allOldEndStatesHaveNoOutgoingTransitions || !secondStartStateHasIncomingTransitions; bool willAvoidEpsilonTransitions = avoidEpsilonTransitions && canMergeEndAndStart; bool preservedDeterminization = avoidEpsilonTransitions && allOldEndStatesHaveNoOutgoingTransitions && !secondStartStateHasIncomingTransitions; if (willAvoidEpsilonTransitions) { // Remove start state of appended automaton and copy all its transitions to previous end states for (var i = 0; i < oldStateCount; ++i) { var endState = this[i]; if (!endState.CanEnd) { continue; } for (var iterator = secondStartState.TransitionIterator; iterator.Ok; iterator.Next()) { var transition = iterator.Value; transition = transition.With( weight: transition.DestinationStateIndex == secondStartState.Index ? (Weight?)null : transition.Weight * endState.EndWeight, destinationStateIndex: transition.DestinationStateIndex == secondStartState.Index ? (int?)endState.Index : null, group: group != 0 ? (int?)group : null); endState.AddTransition(transition); } endState.SetEndWeight(endState.EndWeight * secondStartState.EndWeight); } this.RemoveState(secondStartState.Index); } else { // Just connect all end states with start state of appended automaton for (var i = 0; i < oldStateCount; i++) { var state = this[i]; if (state.CanEnd) { state.AddEpsilonTransition(state.EndWeight, secondStartState.Index, group); state.SetEndWeight(Weight.Zero); } } } return(willAvoidEpsilonTransitions, preservedDeterminization); bool AllOldEndStatesHaveNoOutgoingTransitions() { for (var i = 0; i < oldStateCount; ++i) { var state = this.states[i]; if (!state.EndWeight.IsZero && state.FirstTransitionIndex != -1) { return(false); } } return(true); } bool SecondStartStateHasIncomingTransitions() { foreach (var transition in automaton.Data.Transitions) { if (transition.DestinationStateIndex == automaton.Data.StartStateIndex) { return(true); } } return(false); } }
/// <summary> /// Initializes instance of <see cref="StateCollection"/>. /// </summary> internal StateCollection( Automaton <TSequence, TElement, TElementDistribution, TSequenceManipulator, TThis> owner) { this.states = owner.Data.States; this.transitions = owner.Data.Transitions; }
/// <summary> /// Computes <c>g(b) = sum_a f(a) T(a, b)</c>, where <c>T(a, b)</c> is the current transducer and <c>f(a)</c> is a given automaton. /// </summary> /// <param name="srcAutomaton">The automaton to project.</param> /// <returns>The projection.</returns> /// <remarks> /// The code of this method has a lot in common with the code of Automaton.SetToProduct. /// Unfortunately, it's not clear how to avoid the duplication in the current design. /// </remarks> public TDestAutomaton ProjectSource(TSrcAutomaton srcAutomaton) { Argument.CheckIfNotNull(srcAutomaton, "srcAutomaton"); var mappingAutomaton = this.sequencePairToWeight; if (srcAutomaton.IsCanonicZero() || mappingAutomaton.IsCanonicZero()) { return(Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Zero()); } // The projected automaton must be epsilon-free srcAutomaton.MakeEpsilonFree(); var result = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Builder(); var destStateCache = new Dictionary <(int, int), int>(); var stack = new Stack <(int state1, int state2, int destStateIndex)>(); // Creates destination state and schedules projection computation for it. // If computation is already scheduled or done the state index is simply taken from cache int CreateDestState( PairListAutomaton.State mappingState, Automaton <TSrcSequence, TSrcElement, TSrcElementDistribution, TSrcSequenceManipulator, TSrcAutomaton> .State srcState) { var destPair = (mappingState.Index, srcState.Index); if (!destStateCache.TryGetValue(destPair, out var destStateIndex)) { var destState = result.AddState(); destState.SetEndWeight(mappingState.EndWeight * srcState.EndWeight); stack.Push((mappingState.Index, srcState.Index, destState.Index)); destStateCache[destPair] = destState.Index; destStateIndex = destState.Index; } return(destStateIndex); } // Populate the stack with start destination state result.StartStateIndex = CreateDestState(mappingAutomaton.Start, srcAutomaton.Start); var stringAutomaton = srcAutomaton as StringAutomaton; var sourceDistributionHasLogProbabilityOverrides = stringAutomaton?.HasElementLogValueOverrides ?? false; while (stack.Count > 0) { var(mappingStateIndex, srcStateIndex, destStateIndex) = stack.Pop(); var mappingState = mappingAutomaton.States[mappingStateIndex]; var srcState = srcAutomaton.States[srcStateIndex]; var destState = result[destStateIndex]; // Iterate over transitions from mappingState foreach (var mappingTransition in mappingState.Transitions) { var childMappingState = mappingState.Owner.States[mappingTransition.DestinationStateIndex]; // Epsilon transition case if (IsSrcEpsilon(mappingTransition)) { var destElementDistribution = mappingTransition.ElementDistribution.HasValue ? mappingTransition.ElementDistribution.Value.Second : Option.None; var childDestStateIndex = CreateDestState(childMappingState, srcState); destState.AddTransition(destElementDistribution, mappingTransition.Weight, childDestStateIndex, mappingTransition.Group); continue; } // Iterate over states and transitions in the closure of srcState foreach (var srcTransition in srcState.Transitions) { Debug.Assert(!srcTransition.IsEpsilon, "The automaton being projected must be epsilon-free."); var srcChildState = srcState.Owner.States[srcTransition.DestinationStateIndex]; var projectionLogScale = mappingTransition.ElementDistribution.Value.ProjectFirst( srcTransition.ElementDistribution.Value, out var destElementDistribution); if (double.IsNegativeInfinity(projectionLogScale)) { continue; } // In the special case of a log probability override in a DiscreteChar element distribution, // we need to compensate for the fact that the distribution is not normalized. if (destElementDistribution.HasValue && sourceDistributionHasLogProbabilityOverrides) { var discreteChar = (DiscreteChar)(IDistribution <char>)srcTransition.ElementDistribution.Value; if (discreteChar.HasLogProbabilityOverride) { var totalMass = discreteChar.Ranges.EnumerableSum(rng => rng.Probability.Value * (rng.EndExclusive - rng.StartInclusive)); projectionLogScale -= System.Math.Log(totalMass); } } var destWeight = sourceDistributionHasLogProbabilityOverrides && destElementDistribution.HasNoValue ? Weight.One : Weight.Product(mappingTransition.Weight, srcTransition.Weight, Weight.FromLogValue(projectionLogScale)); // We don't want an unnormalizable distribution to become normalizable due to a rounding error. if (Math.Abs(destWeight.LogValue) < 1e-12) { destWeight = Weight.One; } var childDestStateIndex = CreateDestState(childMappingState, srcChildState); destState.AddTransition(destElementDistribution, destWeight, childDestStateIndex, mappingTransition.Group); } } } var simplification = new Automaton <TDestSequence, TDestElement, TDestElementDistribution, TDestSequenceManipulator, TDestAutomaton> .Simplification(result, null); simplification.RemoveDeadStates(); simplification.SimplifyIfNeeded(); return(result.GetAutomaton()); }