public void LengthBounds() { var lengthDist1 = StringDistribution.Any(minLength: 1, maxLength: 3); Assert.True(lengthDist1.IsProper()); StringInferenceTestUtilities.TestProbability(lengthDist1, StringInferenceTestUtilities.StringUniformProbability(1, 3, 65536), "a", "aa", "aaa"); StringInferenceTestUtilities.TestProbability(lengthDist1, 0.0, string.Empty, "aaaa"); var lengthDist2 = StringDistribution.Repeat(DiscreteChar.OneOf('a', 'b'), minTimes: 1, maxTimes: 3); Assert.True(lengthDist2.IsProper()); StringInferenceTestUtilities.TestProbability(lengthDist2, StringInferenceTestUtilities.StringUniformProbability(1, 3, 2), "a", "ab", "aba"); StringInferenceTestUtilities.TestProbability(lengthDist2, 0.0, string.Empty, "aaaa", "abab", "cc"); var lengthDist3 = StringDistribution.Repeat(DiscreteChar.OneOf('a', 'b'), minTimes: 2, maxTimes: 2); Assert.True(lengthDist3.IsProper()); StringInferenceTestUtilities.TestProbability(lengthDist3, StringInferenceTestUtilities.StringUniformProbability(2, 2, 2), "aa", "ab", "ba", "bb"); StringInferenceTestUtilities.TestProbability(lengthDist3, 0.0, string.Empty, "a", "abab", "cc"); var minLengthDist = StringDistribution.Any(minLength: 2); Assert.False(minLengthDist.IsProper()); StringInferenceTestUtilities.TestProbability(minLengthDist, 1.0, "aa", "123", "@*(@*&(@)"); StringInferenceTestUtilities.TestProbability(minLengthDist, 0.0, string.Empty, "a", "!"); var maxLengthDist = StringDistribution.ZeroOrMore(DiscreteChar.Digit(), maxTimes: 3); Assert.True(maxLengthDist.IsProper()); StringInferenceTestUtilities.TestProbability(maxLengthDist, StringInferenceTestUtilities.StringUniformProbability(0, 3, 10), string.Empty, "1", "32", "432"); StringInferenceTestUtilities.TestProbability(maxLengthDist, 0.0, "abc", "1234"); }
public void SemanticWebTest2b() { var prop0Dist = StringDistribution.OneOf("Anthony Blair", "Tony Blair"); var prop0 = Variable.Random(prop0Dist); var dateStrings = Variable.Observed(new[] { "6 May 1953", "May 6, 1953" }); var dateFormat = Variable.DiscreteUniform(dateStrings.Range); var prop1 = ArrayIndex(dateStrings, dateFormat); var template = Variable.Random(StringDistribution.Any()); var text = Variable.StringFormat(template, prop0, prop1); var engine = new InferenceEngine(); engine.Compiler.RecommendedQuality = QualityBand.Experimental; engine.NumberOfIterations = 1; var textDist = engine.Infer <StringDistribution>(text); Console.WriteLine("textDist={0}", textDist); Assert.False(double.IsNegativeInfinity(textDist.GetLogProb("6 May 1953 is the date of birth of Tony Blair."))); Assert.False(double.IsNegativeInfinity(textDist.GetLogProb("6 May 1953 is the date of birth of Anthony Blair."))); Assert.False(double.IsNegativeInfinity(textDist.GetLogProb("Mr. Tony Blair was born on May 6, 1953."))); }
public void PointMassToUniform() { var dist = StringDistribution.String("1337"); Assert.False(dist.IsUniform()); dist.SetToUniform(); Assert.True(dist.IsUniform()); }
public void ZeroDetectionWithEpsilonLoop1() { StringAutomaton f = StringAutomaton.Zero(); AddEpsilonLoop(f.Start, 5, 0); Assert.False(f.IsCanonicZero()); Assert.True(f.IsZero()); }
public void AppendPointMassUniform() { var unifPlusH = StringDistribution.Any() + StringDistribution.String("h"); Assert.False(unifPlusH.IsProper()); StringInferenceTestUtilities.TestProbability(unifPlusH, 1.0, "h", "hh", "advahbdkjshbfjlhh"); StringInferenceTestUtilities.TestProbability(unifPlusH, 0.0, string.Empty, "jam"); }
public void Mixture3() { var unifMix = StringDistribution.Zero(); Assert.False(unifMix.IsProper()); unifMix.SetToSum(0.5, StringDistribution.Any(), 0.5, StringDistribution.String("hello")); StringInferenceTestUtilities.TestProbability(unifMix, 1.0, "hello"); StringInferenceTestUtilities.TestProbability(unifMix, 0.5, string.Empty, "something else"); }
public void Mixture2() { var dist1 = StringDistribution.Any(); var dist2 = StringDistribution.OneOf("c", "d", "e"); var mixture = StringDistribution.OneOf(dist1, dist2); Assert.False(mixture.IsProper()); StringInferenceTestUtilities.TestIfIncludes(mixture, "a", "b", "c", "d", "e"); }
public void Zero() { var zero = StringDistribution.Zero(); Assert.False(zero.IsUniform()); Assert.False(zero.IsPointMass); Assert.False(zero.IsProper()); StringInferenceTestUtilities.TestProbability(zero, 0.0, "hello", "!", string.Empty); }
public void ZeroDetectionWithEpsilonLoop2() { StringAutomaton f = StringAutomaton.Zero(); AddEpsilonLoop(f.Start, 5, 2.0); f.Start.AddTransition('a', Weight.One); Assert.False(f.IsCanonicZero()); Assert.True(f.IsZero()); }
public void WeightedAverageTest() { Assert.Equal(Environment.Is64BitProcess ? 3.86361619394904E-311 : 3.86361619394162E-311, MMath.WeightedAverage(0.82912896852490248, 2.5484859206000203E-311, 3.50752234977395E-313, 31.087830618727477)); Assert.Equal(MMath.WeightedAverage(0.1, double.MinValue, 0.01, double.MinValue), double.MinValue); Assert.Equal(MMath.WeightedAverage(0.1, -double.Epsilon, double.MaxValue, -double.Epsilon), -double.Epsilon); Assert.Equal(MMath.WeightedAverage(1e-100, 2e-250, 1e-100, 4e-250), MMath.Average(2e-250, 4e-250)); Assert.Equal(MMath.WeightedAverage(1e100, 2e250, 1e100, 4e250), MMath.Average(2e250, 4e250)); Assert.Equal(MMath.WeightedAverage(0, 0, 0.1, -double.Epsilon), -double.Epsilon); Assert.Equal(MMath.WeightedAverage(0.1, -double.Epsilon, 0, double.NegativeInfinity), -double.Epsilon); Assert.False(double.IsNaN(MMath.WeightedAverage(1.7976931348623157E+308, double.NegativeInfinity, 4.94065645841247E-324, double.NegativeInfinity))); Assert.False(double.IsNaN(MMath.WeightedAverage(0.01, double.NegativeInfinity, double.MaxValue, double.MaxValue))); Assert.False(double.IsNaN(MMath.WeightedAverage(0.01, double.NegativeInfinity, double.Epsilon, double.NegativeInfinity))); Assert.Equal(double.MaxValue, MMath.WeightedAverage(double.MaxValue, double.MaxValue, double.MaxValue, double.MaxValue)); const int limit = 2_000_000; int count = 0; Parallel.ForEach(OperatorTests.DoublesAtLeastZero(), wa => { Parallel.ForEach(OperatorTests.DoublesAtLeastZero(), wb => { if (count > limit) { return; } Trace.WriteLine($"wa = {wa}, wb = {wb}"); foreach (var a in OperatorTests.Doubles()) { if (count > limit) { break; } foreach (var b in OperatorTests.Doubles()) { if (count > limit) { break; } if (double.IsNaN(a + b)) { continue; } double midpoint = MMath.WeightedAverage(wa, a, wb, b); Assert.True(midpoint >= System.Math.Min(a, b), $"Failed assertion: MMath.WeightedAverage({wa:r}, {a:r}, {wb:r}, {b:r}) {midpoint} >= {System.Math.Min(a, b)}"); Assert.True(midpoint <= System.Math.Max(a, b), $"Failed assertion: MMath.WeightedAverage({wa:r}, {a:r}, {wb:r}, {b:r}) {midpoint} <= {System.Math.Max(a, b)}"); if (wa == wb) { Assert.Equal(MMath.Average(a, b), midpoint); } Interlocked.Add(ref count, 1); } } }); }); }
/// <summary> /// Tests whether the product of given distributions is equal to another distribution on a specified strings. /// </summary> /// <param name="argument1">The first argument of the product.</param> /// <param name="argument2">The second argument of the product.</param> /// <param name="trueProduct">The true product.</param> /// <param name="stringsToCheckOn">The strings to test.</param> public static void TestProduct( StringDistribution argument1, StringDistribution argument2, StringDistribution trueProduct, params string[] stringsToCheckOn) { var product = new StringDistribution(); double productLogNormalizer = product.SetToProductAndReturnLogNormalizer(argument1, argument2); double logAverageOf = argument1.GetLogAverageOf(argument2); Assert.Equal(productLogNormalizer, logAverageOf); Assert.Equal(logAverageOf, Clone(argument1).GetLogAverageOf(argument2)); if (trueProduct.IsZero()) { Assert.True(product.IsZero()); Assert.True(double.IsNegativeInfinity(productLogNormalizer)); } else if (trueProduct.IsPointMass) { Assert.True(product.IsPointMass); Assert.Equal(product.Point, trueProduct.Point); } else if (trueProduct.IsUniform()) { Assert.True(product.IsUniform()); } else { Assert.False(product.IsZero()); Assert.False(product.IsPointMass); Assert.False(product.IsUniform()); Assert.Equal(trueProduct.IsProper(), product.IsProper()); foreach (var str in stringsToCheckOn) { double logProb1 = argument1.GetLogProb(str); double logProb2 = argument2.GetLogProb(str); double logProbProduct = trueProduct.GetLogProb(str); if (double.IsNegativeInfinity(logProb1) || double.IsNegativeInfinity(logProb2)) { Assert.True(double.IsNegativeInfinity(logProbProduct)); } else if (double.IsNegativeInfinity(logProbProduct)) { Assert.True(double.IsNegativeInfinity(logProb1) || double.IsNegativeInfinity(logProb2)); } else { Assert.Equal(logProb1 + logProb2, logProbProduct + productLogNormalizer, LogValueEps); } } } }
public void WeightedAverageTest() { Assert.Equal(MMath.WeightedAverage(0.1, double.MinValue, 0.01, double.MinValue), double.MinValue); Assert.Equal(MMath.WeightedAverage(0.1, -double.Epsilon, double.MaxValue, -double.Epsilon), -double.Epsilon); Assert.Equal(MMath.WeightedAverage(1e-100, 2e-250, 1e-100, 4e-250), MMath.Average(2e-250, 4e-250)); Assert.Equal(MMath.WeightedAverage(1e100, 2e250, 1e100, 4e250), MMath.Average(2e250, 4e250)); Assert.Equal(MMath.WeightedAverage(0, 0, 0.1, -double.Epsilon), -double.Epsilon); Assert.Equal(MMath.WeightedAverage(0.1, -double.Epsilon, 0, double.NegativeInfinity), -double.Epsilon); Assert.False(double.IsNaN(MMath.WeightedAverage(1.7976931348623157E+308, double.NegativeInfinity, 4.94065645841247E-324, double.NegativeInfinity))); Assert.False(double.IsNaN(MMath.WeightedAverage(0.01, double.NegativeInfinity, double.MaxValue, double.MaxValue))); Assert.False(double.IsNaN(MMath.WeightedAverage(0.01, double.NegativeInfinity, double.Epsilon, double.NegativeInfinity))); Assert.Equal(double.MaxValue, MMath.WeightedAverage(double.MaxValue, double.MaxValue, double.MaxValue, double.MaxValue)); const int limit = 2_000_000; int count = 0; Parallel.ForEach(OperatorTests.DoublesAtLeastZero(), wa => { Parallel.ForEach(OperatorTests.DoublesAtLeastZero(), wb => { if (count > limit) { return; } Trace.WriteLine($"wa = {wa}, wb = {wb}"); foreach (var a in OperatorTests.Doubles()) { if (count > limit) { break; } foreach (var b in OperatorTests.Doubles()) { if (count > limit) { break; } if (double.IsNaN(a + b)) { continue; } double midpoint = MMath.WeightedAverage(wa, a, wb, b); Assert.True(midpoint >= System.Math.Min(a, b), $"Failed assertion: {midpoint} >= {System.Math.Min(a, b)}, wa={wa:r}, a={a:r}, wb={wb:r}, b={b:r}"); Assert.True(midpoint <= System.Math.Max(a, b), $"Failed assertion: {midpoint} <= {System.Math.Max(a, b)}, wa={wa:r}, a={a:r}, wb={wb:r}, b={b:r}"); if (wa == wb) { Assert.Equal(MMath.Average(a, b), midpoint); } Interlocked.Add(ref count, 1); } } }); }); }
public void Uniform() { var unif1 = StringDistribution.Any(); var unif2 = StringDistribution.Uniform(); Assert.True(unif1.IsUniform()); Assert.True(unif2.IsUniform()); Assert.False(unif1.IsProper()); Assert.False(unif2.IsProper()); StringInferenceTestUtilities.TestProbability(unif1, 1.0, "hello", string.Empty); StringInferenceTestUtilities.TestProbability(unif2, 1.0, "hello", string.Empty); }
public void NonNormalizableLoop3() { StringAutomaton automaton = StringAutomaton.Zero(); automaton.Start.AddTransition('a', Weight.FromValue(2.0), automaton.Start); automaton.Start.EndWeight = Weight.FromValue(5.0); StringAutomaton copyOfAutomaton = automaton.Clone(); Assert.Throws <InvalidOperationException>(() => automaton.NormalizeValues()); Assert.False(copyOfAutomaton.TryNormalizeValues()); ////Assert.Equal(f, copyOfF); // TODO: fix equality first }
public void Upper() { int uppercaseCharacterCount = DiscreteChar.Upper().GetProbs().Count(p => p > 0); var uppercaseAutomaton1 = StringDistribution.Upper(minLength: 1, maxLength: 2); Assert.True(uppercaseAutomaton1.IsProper()); StringInferenceTestUtilities.TestProbability(uppercaseAutomaton1, StringInferenceTestUtilities.StringUniformProbability(1, 2, uppercaseCharacterCount), "A", "BC"); StringInferenceTestUtilities.TestProbability(uppercaseAutomaton1, 0.0, "ABC", "bc", "a", string.Empty); var uppercaseAutomaton2 = StringDistribution.Upper(minLength: 2); Assert.False(uppercaseAutomaton2.IsProper()); StringInferenceTestUtilities.TestProbability(uppercaseAutomaton2, 1.0, "BC", "HFJLHFLJN"); StringInferenceTestUtilities.TestProbability(uppercaseAutomaton2, 0.0, "A", "bc", "JDFJjjlkJ", string.Empty); }
public void Lower() { int lowercaseCharacterCount = DiscreteChar.Lower().GetProbs().Count(p => p > 0); var lowercaseAutomaton1 = StringDistribution.Lower(minLength: 1, maxLength: 2); Assert.True(lowercaseAutomaton1.IsProper()); StringInferenceTestUtilities.TestProbability(lowercaseAutomaton1, StringInferenceTestUtilities.StringUniformProbability(1, 2, lowercaseCharacterCount), "a", "bc"); StringInferenceTestUtilities.TestProbability(lowercaseAutomaton1, 0.0, "abc", "BC", "A", string.Empty); var lowercaseAutomaton2 = StringDistribution.Lower(minLength: 2); Assert.False(lowercaseAutomaton2.IsProper()); StringInferenceTestUtilities.TestProbability(lowercaseAutomaton2, 1.0, "bc", "abvhrbfijbr"); StringInferenceTestUtilities.TestProbability(lowercaseAutomaton2, 0.0, "a", "BC", "adasdADNdej", string.Empty); }
public void NonNormalizableLoop2() { StringAutomaton automaton = StringAutomaton.Zero(); var endState = automaton.Start.AddTransition('a', Weight.FromValue(2.0)); endState.EndWeight = Weight.FromValue(5.0); endState.AddTransition('b', Weight.FromValue(0.1), automaton.Start); endState.AddTransition('c', Weight.FromValue(0.05), automaton.Start); endState.AddSelfTransition('!', Weight.FromValue(0.75)); StringAutomaton copyOfAutomaton = automaton.Clone(); Assert.Throws <InvalidOperationException>(() => copyOfAutomaton.NormalizeValues()); Assert.False(copyOfAutomaton.TryNormalizeValues()); ////Assert.Equal(f, copyOfF); // TODO: fix equality first }
public void SemanticWebTest1() { var prop0 = "Tony Blair"; var prop1 = "6 May 1953"; var template = Variable.Random(StringDistribution.Any()); var text = Variable.StringFormat(template, prop0, prop1); var engine = new InferenceEngine(); engine.Compiler.RecommendedQuality = QualityBand.Experimental; engine.NumberOfIterations = 1; var textDist = engine.Infer <StringDistribution>(text); Console.WriteLine("textDist={0}", textDist); Assert.False(double.IsNegativeInfinity(textDist.GetLogProb("6 May 1953 is the date of birth of Tony Blair."))); }
public void ProductWithGroups() { StringDistribution lhsWithoutGroup = StringDistribution.String("ab"); var weightFunction = lhsWithoutGroup.GetWorkspaceOrPoint(); var transitionWithGroup = weightFunction.Start.GetTransitions()[0]; transitionWithGroup.Group = 1; weightFunction.Start.SetTransition(0, transitionWithGroup); StringDistribution lhs = StringDistribution.FromWeightFunction(weightFunction); StringDistribution rhs = StringDistribution.OneOf("ab", "ac"); Assert.True(lhs.GetWorkspaceOrPoint().HasGroup(1)); Assert.False(rhs.GetWorkspaceOrPoint().UsesGroups()); var result = StringDistribution.Zero(); result.SetToProduct(lhs, rhs); Assert.True(result.GetWorkspaceOrPoint().HasGroup(1)); }
public void NonNormalizableLoop4() { StringAutomaton automaton = StringAutomaton.Zero(); automaton.Start.AddSelfTransition('a', Weight.FromValue(0.1)); var branch1 = automaton.Start.AddTransition('a', Weight.FromValue(2.0)); branch1.AddSelfTransition('a', Weight.FromValue(2.0)); branch1.SetEndWeight(Weight.One); var branch2 = automaton.Start.AddTransition('a', Weight.FromValue(2.0)); branch2.SetEndWeight(Weight.One); StringAutomaton copyOfAutomaton = automaton.Clone(); Assert.Throws <InvalidOperationException>(() => automaton.NormalizeValues()); Assert.False(copyOfAutomaton.TryNormalizeValues()); ////Assert.Equal(f, copyOfF); // TODO: fix equality first }
public void UniformOf() { var unif1 = StringDistribution.ZeroOrMore(DiscreteChar.Lower()); Assert.False(unif1.IsUniform()); Assert.False(unif1.IsProper()); StringInferenceTestUtilities.TestProbability(unif1, 1.0, "hello", "a", string.Empty); StringInferenceTestUtilities.TestProbability(unif1, 0.0, "123", "!", "Abc"); // Test if non-uniform element distribution does not affect the outcome Vector probs = DiscreteChar.Digit().GetProbs(); probs['1'] = 0; probs['2'] = 0.3; probs['3'] = 0.0001; var unif2 = StringDistribution.ZeroOrMore(DiscreteChar.FromVector(probs)); StringInferenceTestUtilities.TestProbability(unif2, 1.0, "0", "234", string.Empty); StringInferenceTestUtilities.TestProbability(unif2, 0.0, "1", "231", "!", "Abc"); }
public void ProductWithGroups() { StringDistribution lhsWithoutGroup = StringDistribution.String("ab"); // add a group to first transition of the start state var weightFunctionBuilder = StringAutomaton.Builder.FromAutomaton(lhsWithoutGroup.GetWorkspaceOrPoint()); var transitionIterator = weightFunctionBuilder.Start.TransitionIterator; var transitionWithGroup = transitionIterator.Value; transitionWithGroup.Group = 1; transitionIterator.Value = transitionWithGroup; StringDistribution lhs = StringDistribution.FromWeightFunction(weightFunctionBuilder.GetAutomaton()); StringDistribution rhs = StringDistribution.OneOf("ab", "ac"); Assert.True(lhs.GetWorkspaceOrPoint().HasGroup(1)); Assert.False(rhs.GetWorkspaceOrPoint().UsesGroups); var result = StringDistribution.Zero(); result.SetToProduct(lhs, rhs); Assert.True(result.GetWorkspaceOrPoint().HasGroup(1)); }
public void Capitalized() { int lowercaseCharacterCount = DiscreteChar.Lower().GetProbs().Count(p => p > 0); int uppercaseCharacterCount = DiscreteChar.Upper().GetProbs().Count(p => p > 0); var capitalizedAutomaton1 = StringDistribution.Capitalized(minLength: 3, maxLength: 5); Assert.True(capitalizedAutomaton1.IsProper()); StringInferenceTestUtilities.TestProbability( capitalizedAutomaton1, StringInferenceTestUtilities.StringUniformProbability(2, 4, lowercaseCharacterCount) / uppercaseCharacterCount, "Abc", "Bcde", "Abcde"); StringInferenceTestUtilities.TestProbability(capitalizedAutomaton1, 0.0, "A", "abc", "Ab", "Abcdef", string.Empty); var capitalizedAutomaton2 = StringDistribution.Capitalized(minLength: 3); Assert.False(capitalizedAutomaton2.IsProper()); StringInferenceTestUtilities.TestProbability(capitalizedAutomaton2, 1.0, "Abc", "Bcde", "Abcde", "Abfjrhfjlrl"); StringInferenceTestUtilities.TestProbability(capitalizedAutomaton2, 0.0, "A", "abc", "Ab", string.Empty); }
public void NormalizeValuesWithNonTrivialLoop() { StringAutomaton automaton = StringAutomaton.Zero(); var endState = automaton.Start.AddTransition('a', Weight.FromValue(2.0)); endState.EndWeight = Weight.FromValue(5.0); endState.AddTransition('b', Weight.FromValue(0.1), automaton.Start); endState.AddTransition('c', Weight.FromValue(0.05), automaton.Start); endState.AddSelfTransition('!', Weight.FromValue(0.5)); var normalizedAutomaton = automaton.Clone(); double logNormalizer = normalizedAutomaton.NormalizeValues(); Assert.Equal(Math.Log(50.0), logNormalizer, 1e-6); Assert.Equal(Math.Log(50.0), GetLogNormalizerByGetValue(automaton), 1e-6); Assert.Equal(Math.Log(50.0), GetLogNormalizerByGetValueWithTransducers(automaton), 1e-6); AssertStochastic(normalizedAutomaton); foreach (var str in new[] { "a!!", "abaca", "a!ba!!ca!!!!" }) { Assert.False(double.IsNegativeInfinity(automaton.GetLogValue(str))); Assert.Equal(automaton.GetLogValue(str), normalizedAutomaton.GetLogValue(str) + logNormalizer, 1e-6); } }