public void NoPoint1() { var f = new StringAutomaton.Builder(); f.Start.AddTransition('a', Weight.FromValue(0.5)).AddTransition('b', Weight.FromValue(0.5), f.Start.Index).SetEndWeight(Weight.One); Assert.Null(f.GetAutomaton().TryComputePoint()); }
public void SampleGeometric() { Rand.Restart(96); const double StoppingProbability = 0.7; // The length of sequences sampled from this distribution must follow a geometric distribution var builder = new StringAutomaton.Builder(); builder.StartStateIndex = builder.AddState().Index; builder.Start.SetEndWeight(Weight.FromValue(StoppingProbability)); builder.Start.AddTransition('a', Weight.FromValue(1 - StoppingProbability), builder.Start.Index); StringDistribution dist = StringDistribution.FromWeightFunction(builder.GetAutomaton()); var acc = new MeanVarianceAccumulator(); const int SampleCount = 30000; for (int i = 0; i < SampleCount; ++i) { string sample = dist.Sample(); acc.Add(sample.Length); } const double ExpectedMean = (1.0 - StoppingProbability) / StoppingProbability; const double ExpectedVariance = (1.0 - StoppingProbability) / (StoppingProbability * StoppingProbability); Assert.Equal(ExpectedMean, acc.Mean, 1e-2); Assert.Equal(ExpectedVariance, acc.Variance, 1e-2); }
public void NormalizeValuesWithNonTrivialLoop() { var builder = new StringAutomaton.Builder(); var endState = builder.Start.AddTransition('a', Weight.FromValue(2.0)); endState.SetEndWeight(Weight.FromValue(5.0)); endState.AddTransition('b', Weight.FromValue(0.1), builder.Start.Index); endState.AddTransition('c', Weight.FromValue(0.05), builder.Start.Index); endState.AddSelfTransition('!', Weight.FromValue(0.5)); var automaton = builder.GetAutomaton(); var normalizedAutomaton = automaton.Clone(); double logNormalizer = normalizedAutomaton.NormalizeValues(); Assert.Equal(Math.Log(50.0), logNormalizer, 1e-6); Assert.Equal(Math.Log(50.0), GetLogNormalizerByGetValue(automaton), 1e-6); Assert.Equal(Math.Log(50.0), GetLogNormalizerByGetValueWithTransducers(automaton), 1e-6); AssertStochastic(normalizedAutomaton); foreach (var str in new[] { "a!!", "abaca", "a!ba!!ca!!!!" }) { Assert.False(double.IsNegativeInfinity(automaton.GetLogValue(str))); Assert.Equal(automaton.GetLogValue(str), normalizedAutomaton.GetLogValue(str) + logNormalizer, 1e-6); } }
public void LargeTransducer() { var largeStatesCount = 1200000; // bigger than default MaxStatesCount in automata using (var unlimitedAutomatonStates = new StringAutomaton.UnlimitedStatesComputation()) { var bigAutomatonBuilder = new StringAutomaton.Builder(); bigAutomatonBuilder.AddStates(largeStatesCount - bigAutomatonBuilder.StatesCount); Func <Option <ImmutableDiscreteChar>, Weight, ValueTuple <Option <ImmutablePairDistribution <char, ImmutableDiscreteChar> >, Weight> > transitionConverter = (dist, weight) => ValueTuple.Create( Option.Some(ImmutablePairDistribution <char, ImmutableDiscreteChar> .FromFirstSecond(dist, dist)), weight); var bigAutomaton = bigAutomatonBuilder.GetAutomaton(); Assert.Throws <AutomatonTooLargeException>(() => StringTransducer.FromAutomaton(bigAutomaton, transitionConverter)); // Shouldn't throw if the maximum number of states is increased using (var unlimitedTransducerStates = new StringTransducer.UnlimitedStatesComputation()) { StringTransducer.FromAutomaton(bigAutomaton, transitionConverter); } } }
public void ProjectSourceLargeAutomaton() { using (var unlimited = new StringAutomaton.UnlimitedStatesComputation()) { const int StateCount = 100_000; var builder = new StringAutomaton.Builder(); var state = builder.Start; for (var i = 1; i < StateCount; ++i) { state = state.AddTransition('a', Weight.One); } state.SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); var point = new string('a', StateCount - 1); var copyTransducer = StringTransducer.Copy(); var projectedAutomaton = copyTransducer.ProjectSource(automaton); var projectedPoint = copyTransducer.ProjectSource(point); StringInferenceTestUtilities.TestValue(projectedAutomaton, 1.0, point); StringInferenceTestUtilities.TestValue(projectedPoint, 1.0, point); } }
public void LargeTransducer() { StringAutomaton.MaxStateCount = 1200000; // Something big var bigAutomatonBuilder = new StringAutomaton.Builder(); bigAutomatonBuilder.AddStates(StringAutomaton.MaxStateCount - bigAutomatonBuilder.StatesCount); Func <Option <DiscreteChar>, Weight, ValueTuple <Option <PairDistribution <char, DiscreteChar> >, Weight> > transitionConverter = (dist, weight) => ValueTuple.Create(Option.Some(PairDistribution <char, DiscreteChar> .FromFirstSecond(dist, dist)), weight); var bigAutomaton = bigAutomatonBuilder.GetAutomaton(); Assert.Throws <AutomatonTooLargeException>(() => StringTransducer.FromAutomaton(bigAutomaton, transitionConverter)); // Shouldn't throw if the maximum number of states is increased int prevMaxStateCount = StringTransducer.MaxStateCount; try { StringTransducer.MaxStateCount = StringAutomaton.MaxStateCount; StringTransducer.FromAutomaton(bigAutomaton, transitionConverter); } finally { StringTransducer.MaxStateCount = prevMaxStateCount; } }
public void LoopyEpsilonClosure1() { var builder = new StringAutomaton.Builder(); builder.Start.AddEpsilonTransition(Weight.FromValue(0.5), builder.Start.Index); var nextState = builder.Start.AddEpsilonTransition(Weight.FromValue(0.4)); nextState.AddEpsilonTransition(Weight.One).AddEpsilonTransition(Weight.One, builder.Start.Index); builder.Start.SetEndWeight(Weight.FromValue(0.1)); var automaton = builder.GetAutomaton(); AssertStochastic(automaton); StringAutomaton.EpsilonClosure startClosure = automaton.Start.GetEpsilonClosure(); Assert.Equal(3, startClosure.Size); Assert.Equal(0.0, startClosure.EndWeight.LogValue, 1e-8); for (int i = 0; i < startClosure.Size; ++i) { Weight weight = startClosure.GetStateWeightByIndex(i); double expectedWeight = startClosure.GetStateByIndex(i) == automaton.Start ? 10 : 4; Assert.Equal(expectedWeight, weight.Value, 1e-8); } }
public void GetOutgoingTransitionsForDeterminization2() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransition(DiscreteChar.UniformInRange('a', 'z'), Weight.FromValue(2)); builder.Start.AddTransition(DiscreteChar.UniformInRanges('a', 'z', 'A', 'Z'), Weight.FromValue(3)); var wrapper = new StringAutomatonWrapper(builder); var outgoingTransitions = wrapper.GetOutgoingTransitionsForDeterminization(new Dictionary <int, Weight> { { 0, Weight.FromValue(5) } }); var expectedOutgoingTransitions = new[] { new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.UniformInRange('A', 'Z'), Weight.FromValue(7.5), new Dictionary <int, Weight> { { 2, Weight.FromValue(1) } }), new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.UniformInRange('a', 'z'), Weight.FromValue(17.5), new Dictionary <int, Weight> { { 1, Weight.FromValue(10 / 17.5) }, { 2, Weight.FromValue(7.5 / 17.5) } }), }; AssertCollectionsEqual(expectedOutgoingTransitions, outgoingTransitions, TransitionInfoEqualityComparer.Instance); }
public void ComputeNormalizerWithManyNonTrivialLoops2() { var builder = new StringAutomaton.Builder(); builder.AddStates(6); builder[0].AddEpsilonTransition(Weight.FromValue(0.2), 1); builder[0].AddEpsilonTransition(Weight.FromValue(0.5), 3); builder[0].SetEndWeight(Weight.FromValue(0.3)); builder[1].AddEpsilonTransition(Weight.FromValue(0.8), 0); builder[1].AddEpsilonTransition(Weight.FromValue(0.1), 2); builder[1].SetEndWeight(Weight.FromValue(0.1)); builder[2].SetEndWeight(Weight.FromValue(1.0)); builder[3].AddEpsilonTransition(Weight.FromValue(0.2), 4); builder[3].AddEpsilonTransition(Weight.FromValue(0.1), 5); builder[3].SetEndWeight(Weight.FromValue(0.7)); builder[4].AddEpsilonTransition(Weight.FromValue(0.5), 2); builder[4].AddEpsilonTransition(Weight.FromValue(0.5), 6); builder[4].SetEndWeight(Weight.FromValue(0.0)); builder[5].AddEpsilonTransition(Weight.FromValue(0.1), 3); builder[5].AddEpsilonTransition(Weight.FromValue(0.9), 6); builder[5].SetEndWeight(Weight.Zero); builder[6].SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); AssertStochastic(automaton); Assert.Equal(0.0, automaton.GetLogNormalizer(), 1e-6); Assert.Equal(0.0, GetLogNormalizerByGetValue(automaton), 1e-6); Assert.Equal(0.0, GetLogNormalizerByGetValueWithTransducers(automaton), 1e-6); }
public void PointMassDetectionWithDeadLoop() { var f = new StringAutomaton.Builder(); f.Start.AddTransition('a', Weight.FromValue(0.5)).AddTransition('b', Weight.Zero, f.Start.Index); f.Start.AddTransitionsForSequence("abc").SetEndWeight(Weight.One); Assert.Equal("abc", f.GetAutomaton().TryComputePoint()); }
public void PointMassDetectionWithEpsilonLoop() { var f = new StringAutomaton.Builder(); AddEpsilonLoop(f.Start, 5, 0.5); f.Start.AddTransitionsForSequence("abc").SetEndWeight(Weight.One); Assert.Equal("abc", f.GetAutomaton().TryComputePoint()); }
public void PointMassDetectionLoopInDeadEnd() { var f = new StringAutomaton.Builder(); f.Start.AddTransition('a', Weight.FromValue(0.5)).AddSelfTransition('a', Weight.FromValue(0.5)).AddTransition('b', Weight.One); f.Start.AddTransition('b', Weight.FromValue(0.5)).SetEndWeight(Weight.One); Assert.Equal("b", f.GetAutomaton().TryComputePoint()); }
public void ZeroDetectionWithDeadSelfLoop() { var f = new StringAutomaton.Builder(); f.Start.AddSelfTransition('x', Weight.Zero); f.Start.AddTransition('y', Weight.Zero).SetEndWeight(Weight.One); Assert.True(f.GetAutomaton().IsZero()); }
public void ZeroDetectionWithEpsilonLoop1() { var builder = new StringAutomaton.Builder(); AddEpsilonLoop(builder.Start, 5, 0); var f = builder.GetAutomaton(); Assert.False(f.IsCanonicZero()); Assert.True(f.IsZero()); }
public void ConvertToStringWithLoops4() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransitionsForSequence("xyz", builder.Start.Index); builder.Start.AddTransition('!', Weight.One).SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); Assert.Equal("(xyz)*!", automaton.ToString(AutomatonFormats.Friendly)); Assert.Equal("(xyz)*?!", automaton.ToString(AutomatonFormats.Regexp)); }
public void GetOutgoingTransitionsForDeterminization1() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransition(DiscreteChar.Uniform(), Weight.FromValue(2)); var wrapper = new StringAutomatonWrapper(builder); var outgoingTransitions = wrapper.GetOutgoingTransitionsForDeterminization(0, Weight.FromValue(3)); var expectedOutgoingTransitions = new[]
public void ZeroDetectionWithEpsilonLoop2() { var builder = new StringAutomaton.Builder(); AddEpsilonLoop(builder.Start, 5, 2.0); builder.Start.AddTransition('a', Weight.One); var f = builder.GetAutomaton(); Assert.False(f.IsCanonicZero()); Assert.True(f.IsZero()); }
public void ConvertToStringWithDeadTransitions2() { var builder = new StringAutomaton.Builder(); builder.Start.AddSelfTransition('x', Weight.Zero); builder.Start.AddTransition('y', Weight.Zero).SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); Assert.Equal("Ø", automaton.ToString(AutomatonFormats.Friendly)); Assert.Equal("Ø", automaton.ToString(AutomatonFormats.Regexp)); }
public void NormalizeWithInfiniteEpsilon1() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransition('a', Weight.One).AddSelfTransition(Option.None, Weight.FromValue(3)).SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); // The automaton takes an infinite value on "a", and yet the normalization must work Assert.True(automaton.TryNormalizeValues()); StringInferenceTestUtilities.TestValue(automaton, 1, "a"); StringInferenceTestUtilities.TestValue(automaton, 0, "b"); }
public void ConvertToStringWithLoops2() { var builder = new StringAutomaton.Builder(); builder.Start.AddSelfTransition('a', Weight.One); builder.Start.AddSelfTransition('b', Weight.One); builder.Start.SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); Assert.Equal("(a|b)*", automaton.ToString(AutomatonFormats.Friendly)); Assert.Equal("(a|b)*?", automaton.ToString(AutomatonFormats.Regexp)); }
public static StringDistribution EmptyOrStartsWith(ImmutableDiscreteChar charsInMainString, ImmutableDiscreteChar startsWith) { // TODO: fix equality and then use factory methods to create this var result = new StringAutomaton.Builder(); result.Start.SetEndWeight(Weight.One); var otherState = result.Start.AddTransition(startsWith, Weight.FromLogValue(-startsWith.GetLogAverageOf(startsWith))); otherState.AddSelfTransition(charsInMainString, Weight.FromLogValue(-charsInMainString.GetLogAverageOf(charsInMainString))); otherState.SetEndWeight(Weight.One); return(StringDistribution.FromWeightFunction(result.GetAutomaton())); }
public void ComputeNormalizerSimple1() { var builder = new StringAutomaton.Builder(); builder.Start.AddSelfTransition('a', Weight.FromValue(0.7)); builder.Start.SetEndWeight(Weight.FromValue(0.3)); var automaton = builder.GetAutomaton(); AssertStochastic(automaton); Assert.Equal(0.0, automaton.GetLogNormalizer(), 1e-6); Assert.Equal(0.0, GetLogNormalizerByGetValue(automaton), 1e-6); Assert.Equal(0.0, GetLogNormalizerByGetValueWithTransducers(automaton), 1e-6); }
public void GetOutgoingTransitionsForDeterminization3() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransition(DiscreteChar.UniformInRange('a', 'b'), Weight.FromValue(2)); builder.Start.AddTransition(DiscreteChar.UniformInRanges('b', 'd'), Weight.FromValue(3)); builder.Start.AddTransition(DiscreteChar.UniformInRanges('e', 'g'), Weight.FromValue(4)); builder.Start.AddTransition(DiscreteChar.UniformInRanges(char.MinValue, 'a'), Weight.FromValue(5)); var wrapper = new StringAutomatonWrapper(builder); var outgoingTransitions = wrapper.GetOutgoingTransitionsForDeterminization(new Dictionary <int, Weight> { { 0, Weight.FromValue(6) } }); var expectedOutgoingTransitions = new[] { new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.UniformInRange(char.MinValue, (char)('a' - 1)), Weight.FromValue(30.0 * 97.0 / 98.0), new Dictionary <int, Weight> { { 4, Weight.FromValue(1) } }), new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.PointMass('a'), Weight.FromValue((30.0 / 98.0) + 6.0), new Dictionary <int, Weight> { { 1, Weight.FromValue(6.0 / ((30.0 / 98.0) + 6.0)) }, { 4, Weight.FromValue((30.0 / 98.0) / ((30.0 / 98.0) + 6.0)) } }), new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.PointMass('b'), Weight.FromValue(12.0), new Dictionary <int, Weight> { { 1, Weight.FromValue(0.5) }, { 2, Weight.FromValue(0.5) } }), new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.UniformInRange('c', 'd'), Weight.FromValue(12.0), new Dictionary <int, Weight> { { 2, Weight.FromValue(1.0) } }), new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.UniformInRange('e', 'g'), Weight.FromValue(24.0), new Dictionary <int, Weight> { { 3, Weight.FromValue(1.0) } }), }; AssertCollectionsEqual(expectedOutgoingTransitions, outgoingTransitions, TransitionInfoEqualityComparer.Instance); }
public void GetOutgoingTransitionsForDeterminization4() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransition(DiscreteChar.UniformInRange(char.MinValue, char.MaxValue), Weight.FromValue(2)); builder.Start.AddTransition(DiscreteChar.UniformInRange('a', char.MaxValue), Weight.FromValue(3)); builder.Start.AddTransition(DiscreteChar.UniformInRanges('z', char.MaxValue), Weight.FromValue(4)); var wrapper = new StringAutomatonWrapper(builder); var outgoingTransitions = wrapper.GetOutgoingTransitionsForDeterminization(new Dictionary <int, Weight> { { 0, Weight.FromValue(5) } }); double transition1Segment1Weight = 10.0 * 'a' / (char.MaxValue + 1.0); double transition1Segment2Weight = 10.0 * ('z' - 'a') / (char.MaxValue + 1.0); double transition1Segment3Weight = 10.0 * (char.MaxValue - 'z' + 1.0) / (char.MaxValue + 1.0); double transition2Segment1Weight = 15.0 * ('z' - 'a') / (char.MaxValue - 'a' + 1.0); double transition2Segment2Weight = 15.0 * (char.MaxValue - 'z' + 1.0) / (char.MaxValue - 'a' + 1.0); double transition3Segment1Weight = 20.0; var expectedOutgoingTransitions = new[] { new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.UniformInRange(char.MinValue, (char)('a' - 1)), Weight.FromValue(transition1Segment1Weight), new Dictionary <int, Weight> { { 1, Weight.FromValue(1) } }), new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.UniformInRange('a', (char)('z' - 1)), Weight.FromValue(transition1Segment2Weight + transition2Segment1Weight), new Dictionary <int, Weight> { { 1, Weight.FromValue(transition1Segment2Weight / (transition1Segment2Weight + transition2Segment1Weight)) }, { 2, Weight.FromValue(transition2Segment1Weight / (transition1Segment2Weight + transition2Segment1Weight)) } }), new ValueTuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >( DiscreteChar.UniformInRange('z', char.MaxValue), Weight.FromValue(transition1Segment3Weight + transition2Segment2Weight + transition3Segment1Weight), new Dictionary <int, Weight> { { 1, Weight.FromValue(transition1Segment3Weight / (transition1Segment3Weight + transition2Segment2Weight + transition3Segment1Weight)) }, { 2, Weight.FromValue(transition2Segment2Weight / (transition1Segment3Weight + transition2Segment2Weight + transition3Segment1Weight)) }, { 3, Weight.FromValue(transition3Segment1Weight / (transition1Segment3Weight + transition2Segment2Weight + transition3Segment1Weight)) } }), }; AssertCollectionsEqual(expectedOutgoingTransitions, outgoingTransitions, TransitionInfoEqualityComparer.Instance); }
public void ConvertToStringWithLoops1() { var builder = new StringAutomaton.Builder(); var middleNode = builder.Start.AddTransition('a', Weight.One); middleNode.AddTransitionsForSequence("bbb", builder.Start.Index); middleNode.AddTransition('c', Weight.One, builder.Start.Index); builder.Start.SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); Assert.Equal("(a(c|bbb))*", automaton.ToString(AutomatonFormats.Friendly)); Assert.Equal("(a(c|bbb))*?", automaton.ToString(AutomatonFormats.Regexp)); }
public void NormalizeWithInfiniteEpsilon2() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransition('a', Weight.One).AddSelfTransition(Option.None, Weight.FromValue(2)).SetEndWeight(Weight.One); builder.Start.AddTransition('b', Weight.One).AddSelfTransition(Option.None, Weight.FromValue(1)).SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); // "a" branch infinitely dominates over the "b" branch Assert.True(automaton.TryNormalizeValues()); StringInferenceTestUtilities.TestValue(automaton, 1, "a"); Assert.True(automaton.GetValue("b") < 1e-50); }
public void NonNormalizableLoop3() { var builder = new StringAutomaton.Builder(); builder.Start.AddTransition('a', Weight.FromValue(2.0), builder.Start.Index); builder.Start.SetEndWeight(Weight.FromValue(5.0)); var automaton = builder.GetAutomaton(); StringAutomaton copyOfAutomaton = automaton.Clone(); Assert.Throws <InvalidOperationException>(() => automaton.NormalizeValues()); Assert.False(copyOfAutomaton.TryNormalizeValues()); ////Assert.Equal(f, copyOfF); // TODO: fix equality first }
public void AutomatonNormalizationPerformance1() { AssertTimeout(() => { var builder = new StringAutomaton.Builder(); var nextState = builder.Start.AddTransitionsForSequence("abc"); nextState.AddSelfTransition('d', Weight.FromValue(0.1)); nextState.AddTransitionsForSequence("efg").SetEndWeight(Weight.One); nextState.AddTransitionsForSequence("hejfhoenmf").SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); ProfileAction(() => automaton.GetLogNormalizer(), 100000); }, 10000); }
public void ConvertToStringWithLoops3() { var builder = new StringAutomaton.Builder(); var state = builder.Start.AddTransition('x', Weight.One); builder.Start.AddTransition('y', Weight.One, state.Index); state.AddSelfTransition('a', Weight.One); state.AddSelfTransition('b', Weight.One); state.SetEndWeight(Weight.One); state.AddTransitionsForSequence("zzz").SetEndWeight(Weight.One); var automaton = builder.GetAutomaton(); Assert.Equal("(x|y)(a|b)*[zzz]", automaton.ToString(AutomatonFormats.Friendly)); Assert.Equal("(x|y)(a|b)*(|zzz)", automaton.ToString(AutomatonFormats.Regexp)); }
public void ComputeNormalizerWithNonTrivialLoop2() { var builder = new StringAutomaton.Builder(); var endState = builder.Start.AddTransition('a', Weight.FromValue(2.0)); endState.SetEndWeight(Weight.FromValue(5.0)); endState.AddTransition('b', Weight.FromValue(0.25), builder.Start.Index); endState.AddTransition('c', Weight.FromValue(0.2), builder.Start.Index); var automaton = builder.GetAutomaton(); Assert.Equal(Math.Log(100.0), automaton.GetLogNormalizer(), 1e-6); Assert.Equal(Math.Log(100.0), GetLogNormalizerByGetValue(automaton), 1e-6); Assert.Equal(Math.Log(100.0), GetLogNormalizerByGetValueWithTransducers(automaton), 1e-6); }