public void LengthBounds() { var lengthDist1 = StringDistribution.Any(minLength: 1, maxLength: 3); Assert.True(lengthDist1.IsProper()); StringInferenceTestUtilities.TestProbability(lengthDist1, StringInferenceTestUtilities.StringUniformProbability(1, 3, 65536), "a", "aa", "aaa"); StringInferenceTestUtilities.TestProbability(lengthDist1, 0.0, string.Empty, "aaaa"); var lengthDist2 = StringDistribution.Repeat(DiscreteChar.OneOf('a', 'b'), minTimes: 1, maxTimes: 3); Assert.True(lengthDist2.IsProper()); StringInferenceTestUtilities.TestProbability(lengthDist2, StringInferenceTestUtilities.StringUniformProbability(1, 3, 2), "a", "ab", "aba"); StringInferenceTestUtilities.TestProbability(lengthDist2, 0.0, string.Empty, "aaaa", "abab", "cc"); var lengthDist3 = StringDistribution.Repeat(DiscreteChar.OneOf('a', 'b'), minTimes: 2, maxTimes: 2); Assert.True(lengthDist3.IsProper()); StringInferenceTestUtilities.TestProbability(lengthDist3, StringInferenceTestUtilities.StringUniformProbability(2, 2, 2), "aa", "ab", "ba", "bb"); StringInferenceTestUtilities.TestProbability(lengthDist3, 0.0, string.Empty, "a", "abab", "cc"); var minLengthDist = StringDistribution.Any(minLength: 2); Assert.False(minLengthDist.IsProper()); StringInferenceTestUtilities.TestProbability(minLengthDist, 1.0, "aa", "123", "@*(@*&(@)"); StringInferenceTestUtilities.TestProbability(minLengthDist, 0.0, string.Empty, "a", "!"); var maxLengthDist = StringDistribution.ZeroOrMore(DiscreteChar.Digit(), maxTimes: 3); Assert.True(maxLengthDist.IsProper()); StringInferenceTestUtilities.TestProbability(maxLengthDist, StringInferenceTestUtilities.StringUniformProbability(0, 3, 10), string.Empty, "1", "32", "432"); StringInferenceTestUtilities.TestProbability(maxLengthDist, 0.0, "abc", "1234"); }
public void CopyElement() { StringTransducer copy = StringTransducer.CopyElement(DiscreteChar.OneOf('a', 'b')); StringInferenceTestUtilities.TestTransducerValue(copy, "a", "a", 1.0); StringInferenceTestUtilities.TestTransducerValue(copy, "b", "b", 1.0); StringInferenceTestUtilities.TestTransducerValue(copy, "a", "b", 0.0); StringInferenceTestUtilities.TestTransducerValue(copy, "b", "a", 0.0); StringInferenceTestUtilities.TestTransducerValue(copy, string.Empty, string.Empty, 0.0); StringInferenceTestUtilities.TestTransducerValue(copy, "bb", "bb", 0.0); StringInferenceTestUtilities.TestTransducerValue(copy, "bab", "bab", 0.0); StringInferenceTestUtilities.TestTransducerValue(copy, "bab", "ba", 0.0); //// Tests that projection on CopyElement(elements) shrinks the support StringAutomaton automaton = StringAutomaton.ConstantOn(2.0, "a", "ab", "ac"); automaton = automaton.Sum(StringAutomaton.ConstantOn(1.0, "a")); automaton = automaton.Sum(StringAutomaton.Constant(2.0)); automaton = automaton.Product(StringAutomaton.Constant(3.0)); for (int i = 0; i < 2; ++i) { StringInferenceTestUtilities.TestValue(automaton, 15, "a"); StringInferenceTestUtilities.TestValue(automaton, 6.0, "b"); StringInferenceTestUtilities.TestValue(automaton, i == 0 ? 6.0 : 0.0, string.Empty); StringInferenceTestUtilities.TestValue(automaton, i == 0 ? 12.0 : 0.0, "ac", "ab"); automaton = copy.ProjectSource(automaton); } }
public void MessageOpsTest() { const double Eps = 1e-6; StringDistribution str1 = StringOfLengthOp.StrAverageConditional(DiscreteChar.Letter(), 10); Assert.Equal(StringDistribution.Repeat(DiscreteChar.Letter(), 10, 10), str1); StringDistribution str2 = StringOfLengthOp.StrAverageConditional( DiscreteChar.PointMass('a'), Discrete.UniformInRange(5, 2, 4)); Assert.Equal(StringDistribution.OneOf("aa", "aaa", "aaaa"), str2); StringDistribution str3 = StringOfLengthOp.StrAverageConditional( DiscreteChar.OneOf('a', 'b'), new Discrete(0.1, 0.0, 0.6, 0.3)); StringInferenceTestUtilities.TestProbability(str3, 0.1, string.Empty); StringInferenceTestUtilities.TestProbability(str3, 0.6 / 4, "aa", "ab", "ba", "bb"); StringInferenceTestUtilities.TestProbability(str3, 0.3 / 8, "aaa", "bbb", "abb", "bab"); Discrete length1 = StringOfLengthOp.LengthAverageConditional( StringDistribution.OneOf("aa", "bbb"), DiscreteChar.PointMass('a'), Discrete.Uniform(10)); Assert.Equal(Discrete.PointMass(2, 10), length1); Discrete length2 = StringOfLengthOp.LengthAverageConditional( StringDistribution.OneOf("aab", "ab", "b", "bc"), DiscreteChar.OneOf('a', 'b'), Discrete.Uniform(10)); Assert.Equal(4.0 / 7.0, length2[1], Eps); Assert.Equal(2.0 / 7.0, length2[2], Eps); Assert.Equal(1.0 / 7.0, length2[3], Eps); }
public void ProductWithLogOverrideNarrow() { for (var i = 0; i < 2; i++) { var dist1 = DiscreteChar.LetterOrDigit(); var dist2 = DiscreteChar.OneOf('1', '3', '5', '6'); var logOverrideProbability = Math.Log(0.9); dist1.SetToPartialUniformOf(dist1, logOverrideProbability); Xunit.Assert.True(dist1.HasLogProbabilityOverride); Xunit.Assert.False(dist2.IsBroad); if (i == 1) { Util.Swap(ref dist1, ref dist2); } var dist3 = DiscreteChar.Uniform(); dist3.SetToProduct(dist1, dist2); Xunit.Assert.False(dist3.HasLogProbabilityOverride); Assert.Equal(Math.Log(0.25), dist3.GetLogProb('5'), Eps); Xunit.Assert.True(double.IsNegativeInfinity(dist3.GetLogProb('a'))); } }
public void Repeat2() { var baseDist = StringDistribution.OneOf("a", "b"); var dist1 = StringDistribution.Repeat(baseDist, minTimes: 1, maxTimes: 3); var dist2 = StringDistribution.Repeat(DiscreteChar.OneOf('a', 'b'), minTimes: 1, maxTimes: 3); Assert.Equal(dist2, dist1); }
public void Product2() { var ab = StringDistribution.ZeroOrMore(DiscreteChar.OneOf('a', 'b')); var a = StringDistribution.ZeroOrMore('a'); var prod = ab.Product(a); StringInferenceTestUtilities.TestProbability(prod, 1.0, string.Empty, "a", "aa", "aaa"); StringInferenceTestUtilities.TestProbability(prod, 0.0, "b", "bb", "ab", "ba"); }
public void BroadAndNarrow() { var dist1 = DiscreteChar.Digit(); Xunit.Assert.True(dist1.IsBroad); var dist2 = DiscreteChar.OneOf('1', '3', '5', '6'); Xunit.Assert.False(dist2.IsBroad); }
private static StringDistribution NamePrior() { //TODO: make this closer to: // NP([\s\-]NP)*(\s[""\(]NP[""\)])?([\s\-]NP)+ var result = NamePart(); result.AppendInPlace(DiscreteChar.OneOf(' ', '-')); result.AppendInPlace(NamePart()); return(result); }
/// <summary> /// Initializes static members of the <see cref="StringFormatOpBase{TThis}"/> class. /// </summary> static StringFormatOpBase() { // More general behavior by default RequirePlaceholderForEveryArgument = false; DiscreteChar noBraces = DiscreteChar.OneOf('{', '}').Complement(); DisallowBracesAutomaton = StringAutomaton.Constant(1.0, noBraces); DisallowBracesTransducer = StringTransducer.Copy(noBraces); // Make sure that the static constructor of TThis has been invoked so that TThis sets everything up new TThis(); }
[Trait("Category", "OpenBug")] // Test failing with AutomatonTooLarge due to determinization added to SetToProduct in change 47614. Increasing max states to 1M does not fix the issue public void PropertyInferencePerformanceTest() { Rand.Restart(777); var namesData = new[] { "Alice", "Bob", "Charlie", "Eve", "Boris", "John" }; var valueData = new[] { "sender", "receiver", "attacker", "eavesdropper", "developer", "researcher" }; var templatesData = new[] { "{0} is {1}", "{0} is known as {1}", "{1} is a role of {0}", "{0} -- {1}", "{0} aka {1}" }; var textsData = new string[10]; for (int i = 0; i < textsData.Length; ++i) { int entityIndex = Rand.Int(namesData.Length); int templateIndex = Rand.Int(templatesData.Length); textsData[i] = string.Format(templatesData[templateIndex], namesData[entityIndex], valueData[entityIndex]); } var entity = new Range(namesData.Length).Named("entity"); var template = new Range(templatesData.Length).Named("template"); var text = new Range(textsData.Length).Named("text"); var entityNames = Variable.Array <string>(entity).Named("entityNames"); entityNames[entity] = Variable.Random(StringDistribution.Capitalized()).ForEach(entity); var entityValues = Variable.Array <string>(entity).Named("entityValues"); entityValues[entity] = Variable.Random(StringDistribution.Lower()).ForEach(entity); StringDistribution templatePriorMiddle = StringDistribution.ZeroOrMore(DiscreteChar.OneOf('{', '}').Complement()); StringDistribution templatePrior = StringDistribution.OneOf( StringDistribution.String("{0} ") + templatePriorMiddle + StringDistribution.String(" {1}"), StringDistribution.String("{1} ") + templatePriorMiddle + StringDistribution.String(" {0}")); var templates = Variable.Array <string>(template).Named("templates"); templates[template] = Variable.Random(templatePrior).ForEach(template); var texts = Variable.Array <string>(text).Named("texts"); using (Variable.ForEach(text)) { var entityIndex = Variable.DiscreteUniform(entity).Named("entityIndex"); var templateIndex = Variable.DiscreteUniform(template).Named("templateIndex"); using (Variable.Switch(entityIndex)) using (Variable.Switch(templateIndex)) { texts[text] = Variable.StringFormat(templates[templateIndex], entityNames[entityIndex], entityValues[entityIndex]); } } texts.ObservedValue = textsData; var engine = new InferenceEngine(); engine.ShowProgress = false; engine.OptimiseForVariables = new[] { entityNames, entityValues }; engine.Compiler.RecommendedQuality = QualityBand.Experimental; // TODO: get this test to work with parallel for loops. engine.Compiler.UseParallelForLoops = false; engine.NumberOfIterations = 1; ProfileAction( () => { Console.WriteLine(engine.Infer <StringDistribution[]>(entityNames)[0]); Console.WriteLine(engine.Infer <StringDistribution[]>(entityValues)[0]); }, 1); }
public void WordModel() { // We want to build a word model as a reasonably simple StringDistribution. It // should satisfy the following: // (1) The probability of a word of moderate length should not be // significantly less than the probability of a shorter word. // (2) The probability of a specific word conditioned on its length matches that of // words in the target language. // We achieve this by putting non-normalized character distributions on the edges. The // StringDistribution is unaware that these are non-normalized. // The StringDistribution itself is non-normalizable. const double TargetProb1 = 0.05; const double Ratio1 = 0.4; const double TargetProb2 = TargetProb1 * Ratio1; const double Ratio2 = 0.2; const double TargetProb3 = TargetProb2 * Ratio2; const double TargetProb4 = TargetProb3 * Ratio2; const double TargetProb5 = TargetProb4 * Ratio2; const double Ratio3 = 0.999; const double TargetProb6 = TargetProb5 * Ratio3; const double TargetProb7 = TargetProb6 * Ratio3; const double TargetProb8 = TargetProb7 * Ratio3; const double Ratio4 = 0.9; const double TargetProb9 = TargetProb8 * Ratio4; const double TargetProb10 = TargetProb9 * Ratio4; var targetProbabilitiesPerLength = new double[] { TargetProb1, TargetProb2, TargetProb3, TargetProb4, TargetProb5, TargetProb6, TargetProb7, TargetProb8, TargetProb9, TargetProb10 }; var charDistUpper = DiscreteChar.Upper(); var charDistLower = DiscreteChar.Lower(); var charDistUpperNarrow = DiscreteChar.OneOf('A', 'B'); var charDistLowerNarrow = DiscreteChar.OneOf('a', 'b'); var charDistUpperScaled = DiscreteChar.Uniform(); var charDistLowerScaled1 = DiscreteChar.Uniform(); var charDistLowerScaled2 = DiscreteChar.Uniform(); var charDistLowerScaled3 = DiscreteChar.Uniform(); var charDistLowerScaledEnd = DiscreteChar.Uniform(); charDistUpperScaled.SetToPartialUniformOf(charDistUpper, Math.Log(TargetProb1)); charDistLowerScaled1.SetToPartialUniformOf(charDistLower, Math.Log(Ratio1)); charDistLowerScaled2.SetToPartialUniformOf(charDistLower, Math.Log(Ratio2)); charDistLowerScaled3.SetToPartialUniformOf(charDistLower, Math.Log(Ratio3)); charDistLowerScaledEnd.SetToPartialUniformOf(charDistLower, Math.Log(Ratio4)); var wordModel = StringDistribution.Concatenate( new List <DiscreteChar> { charDistUpperScaled, charDistLowerScaled1, charDistLowerScaled2, charDistLowerScaled2, charDistLowerScaled2, charDistLowerScaled3, charDistLowerScaled3, charDistLowerScaled3, charDistLowerScaledEnd }, true, true); const string Word = "Abcdefghij"; const double Eps = 1e-5; var broadDist = StringDistribution.Char(charDistUpper); var narrowDist = StringDistribution.Char(charDistUpperNarrow); var narrowWord = "A"; var expectedProbForNarrow = 0.5; for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(wordModel.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); var logAvg = Math.Exp(wordModel.GetLogAverageOf(broadDist)); Assert.Equal(targetProbabilitiesPerLength[i], logAvg, Eps); var prod = StringDistribution.Zero(); prod.SetToProduct(broadDist, wordModel); Xunit.Assert.True(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides); probCurrentWord = Math.Exp(prod.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); prod.SetToProduct(narrowDist, wordModel); Xunit.Assert.False(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides); var probNarrowWord = Math.Exp(prod.GetLogProb(narrowWord)); Assert.Equal(expectedProbForNarrow, probNarrowWord, Eps); broadDist = broadDist.Append(charDistLower); narrowDist = narrowDist.Append(charDistLowerNarrow); narrowWord += "a"; expectedProbForNarrow *= 0.5; } // Copied model var copiedModel = StringDistribution.FromWorkspace(StringTransducer.Copy().ProjectSource(wordModel.GetWorkspaceOrPoint())); // Under transducer. for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(copiedModel.GetLogProb(currentWord)); Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps); } // Rescaled model var scale = 0.5; var newTargetProb1 = TargetProb1 * scale; var charDistUpperScaled1 = DiscreteChar.Uniform(); charDistUpperScaled1.SetToPartialUniformOf(charDistUpper, Math.Log(newTargetProb1)); var reWeightingTransducer = StringTransducer.Replace(StringDistribution.Char(charDistUpper).GetWorkspaceOrPoint(), StringDistribution.Char(charDistUpperScaled1).GetWorkspaceOrPoint()) .Append(StringTransducer.Copy()); var reWeightedWordModel = StringDistribution.FromWorkspace(reWeightingTransducer.ProjectSource(wordModel.GetWorkspaceOrPoint())); for (var i = 0; i < targetProbabilitiesPerLength.Length; i++) { var currentWord = Word.Substring(0, i + 1); var probCurrentWord = Math.Exp(reWeightedWordModel.GetLogProb(currentWord)); Assert.Equal(scale * targetProbabilitiesPerLength[i], probCurrentWord, Eps); } }
public void MessageOperatorsTest() { var str1 = StringFromArrayOp.StrAverageConditional(new[] { DiscreteChar.PointMass('a'), DiscreteChar.OneOf('b', 'c'), DiscreteChar.OneOf('d', 'e') }); Assert.Equal(StringDistribution.OneOf("abd", "abe", "acd", "ace"), str1, Eps); var str2 = StringFromArrayOp.StrAverageConditional(new DiscreteChar[0]); Assert.Equal(StringDistribution.Empty(), str2, Eps); var chars1 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "cd"), new[] { DiscreteChar.PointMass('a'), DiscreteChar.Uniform() }, new DiscreteChar[2]); Assert.Equal(DiscreteChar.OneOf('a', 'c'), chars1[0], Eps); Assert.Equal(DiscreteChar.PointMass('b'), chars1[1], Eps); var chars2 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "ac"), new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() }, new DiscreteChar[2]); Assert.Equal(DiscreteChar.PointMass('a'), chars2[0], Eps); Assert.Equal(DiscreteChar.OneOf('b', 'c'), chars2[1], Eps); var chars3 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "ac", "bc"), new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() }, new DiscreteChar[2]); Assert.Equal(2.0 / 3.0, chars3[0]['a'], Eps); Assert.Equal(1.0 / 3.0, chars3[0]['b'], Eps); Assert.Equal(1.0 / 3.0, chars3[1]['b'], Eps); Assert.Equal(2.0 / 3.0, chars3[1]['c'], Eps); var chars4 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "cde"), new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() }, new DiscreteChar[2]); Assert.Equal(DiscreteChar.PointMass('a'), chars4[0], Eps); Assert.Equal(DiscreteChar.PointMass('b'), chars4[1], Eps); var chars5 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("ab", "cb", "ae", "ax"), new[] { DiscreteChar.PointMass('a'), DiscreteChar.PointMass('b') }, new DiscreteChar[2]); Assert.Equal(DiscreteChar.OneOf('a', 'c'), chars5[0], Eps); Assert.Equal(DiscreteChar.OneOf('b', 'e', 'x'), chars5[1], Eps); var chars6 = StringFromArrayOp.CharactersAverageConditional( StringDistribution.OneOf("abcd", "accd", "acce"), new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform(), DiscreteChar.Uniform(), DiscreteChar.PointMass('d') }, new DiscreteChar[4]); Assert.Equal(DiscreteChar.PointMass('a'), chars6[0], Eps); Assert.Equal(DiscreteChar.OneOf('b', 'c'), chars6[1], Eps); Assert.Equal(DiscreteChar.PointMass('c'), chars6[2], Eps); Assert.Equal(2.0 / 3.0, chars6[3]['d'], Eps); Assert.Equal(1.0 / 3.0, chars6[3]['e'], Eps); }