예제 #1
0
        public void LengthBounds()
        {
            var lengthDist1 = StringDistribution.Any(minLength: 1, maxLength: 3);

            Assert.True(lengthDist1.IsProper());
            StringInferenceTestUtilities.TestProbability(lengthDist1, StringInferenceTestUtilities.StringUniformProbability(1, 3, 65536), "a", "aa", "aaa");
            StringInferenceTestUtilities.TestProbability(lengthDist1, 0.0, string.Empty, "aaaa");

            var lengthDist2 = StringDistribution.Repeat(DiscreteChar.OneOf('a', 'b'), minTimes: 1, maxTimes: 3);

            Assert.True(lengthDist2.IsProper());
            StringInferenceTestUtilities.TestProbability(lengthDist2, StringInferenceTestUtilities.StringUniformProbability(1, 3, 2), "a", "ab", "aba");
            StringInferenceTestUtilities.TestProbability(lengthDist2, 0.0, string.Empty, "aaaa", "abab", "cc");

            var lengthDist3 = StringDistribution.Repeat(DiscreteChar.OneOf('a', 'b'), minTimes: 2, maxTimes: 2);

            Assert.True(lengthDist3.IsProper());
            StringInferenceTestUtilities.TestProbability(lengthDist3, StringInferenceTestUtilities.StringUniformProbability(2, 2, 2), "aa", "ab", "ba", "bb");
            StringInferenceTestUtilities.TestProbability(lengthDist3, 0.0, string.Empty, "a", "abab", "cc");

            var minLengthDist = StringDistribution.Any(minLength: 2);

            Assert.False(minLengthDist.IsProper());
            StringInferenceTestUtilities.TestProbability(minLengthDist, 1.0, "aa", "123", "@*(@*&(@)");
            StringInferenceTestUtilities.TestProbability(minLengthDist, 0.0, string.Empty, "a", "!");

            var maxLengthDist = StringDistribution.ZeroOrMore(DiscreteChar.Digit(), maxTimes: 3);

            Assert.True(maxLengthDist.IsProper());
            StringInferenceTestUtilities.TestProbability(maxLengthDist, StringInferenceTestUtilities.StringUniformProbability(0, 3, 10), string.Empty, "1", "32", "432");
            StringInferenceTestUtilities.TestProbability(maxLengthDist, 0.0, "abc", "1234");
        }
예제 #2
0
        public void CopyElement()
        {
            StringTransducer copy = StringTransducer.CopyElement(DiscreteChar.OneOf('a', 'b'));

            StringInferenceTestUtilities.TestTransducerValue(copy, "a", "a", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "b", "b", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "a", "b", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "b", "a", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, string.Empty, string.Empty, 0.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "bb", "bb", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "bab", "bab", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "bab", "ba", 0.0);

            //// Tests that projection on CopyElement(elements) shrinks the support

            StringAutomaton automaton = StringAutomaton.ConstantOn(2.0, "a", "ab", "ac");

            automaton = automaton.Sum(StringAutomaton.ConstantOn(1.0, "a"));
            automaton = automaton.Sum(StringAutomaton.Constant(2.0));
            automaton = automaton.Product(StringAutomaton.Constant(3.0));

            for (int i = 0; i < 2; ++i)
            {
                StringInferenceTestUtilities.TestValue(automaton, 15, "a");
                StringInferenceTestUtilities.TestValue(automaton, 6.0, "b");
                StringInferenceTestUtilities.TestValue(automaton, i == 0 ? 6.0 : 0.0, string.Empty);
                StringInferenceTestUtilities.TestValue(automaton, i == 0 ? 12.0 : 0.0, "ac", "ab");

                automaton = copy.ProjectSource(automaton);
            }
        }
예제 #3
0
        public void MessageOpsTest()
        {
            const double Eps = 1e-6;

            StringDistribution str1 = StringOfLengthOp.StrAverageConditional(DiscreteChar.Letter(), 10);

            Assert.Equal(StringDistribution.Repeat(DiscreteChar.Letter(), 10, 10), str1);

            StringDistribution str2 = StringOfLengthOp.StrAverageConditional(
                DiscreteChar.PointMass('a'), Discrete.UniformInRange(5, 2, 4));

            Assert.Equal(StringDistribution.OneOf("aa", "aaa", "aaaa"), str2);

            StringDistribution str3 = StringOfLengthOp.StrAverageConditional(
                DiscreteChar.OneOf('a', 'b'), new Discrete(0.1, 0.0, 0.6, 0.3));

            StringInferenceTestUtilities.TestProbability(str3, 0.1, string.Empty);
            StringInferenceTestUtilities.TestProbability(str3, 0.6 / 4, "aa", "ab", "ba", "bb");
            StringInferenceTestUtilities.TestProbability(str3, 0.3 / 8, "aaa", "bbb", "abb", "bab");

            Discrete length1 = StringOfLengthOp.LengthAverageConditional(
                StringDistribution.OneOf("aa", "bbb"), DiscreteChar.PointMass('a'), Discrete.Uniform(10));

            Assert.Equal(Discrete.PointMass(2, 10), length1);

            Discrete length2 = StringOfLengthOp.LengthAverageConditional(
                StringDistribution.OneOf("aab", "ab", "b", "bc"), DiscreteChar.OneOf('a', 'b'), Discrete.Uniform(10));

            Assert.Equal(4.0 / 7.0, length2[1], Eps);
            Assert.Equal(2.0 / 7.0, length2[2], Eps);
            Assert.Equal(1.0 / 7.0, length2[3], Eps);
        }
예제 #4
0
        public void ProductWithLogOverrideNarrow()
        {
            for (var i = 0; i < 2; i++)
            {
                var dist1 = DiscreteChar.LetterOrDigit();
                var dist2 = DiscreteChar.OneOf('1', '3', '5', '6');

                var logOverrideProbability = Math.Log(0.9);
                dist1.SetToPartialUniformOf(dist1, logOverrideProbability);
                Xunit.Assert.True(dist1.HasLogProbabilityOverride);
                Xunit.Assert.False(dist2.IsBroad);

                if (i == 1)
                {
                    Util.Swap(ref dist1, ref dist2);
                }

                var dist3 = DiscreteChar.Uniform();
                dist3.SetToProduct(dist1, dist2);

                Xunit.Assert.False(dist3.HasLogProbabilityOverride);
                Assert.Equal(Math.Log(0.25), dist3.GetLogProb('5'), Eps);
                Xunit.Assert.True(double.IsNegativeInfinity(dist3.GetLogProb('a')));
            }
        }
예제 #5
0
        public void Repeat2()
        {
            var baseDist = StringDistribution.OneOf("a", "b");
            var dist1    = StringDistribution.Repeat(baseDist, minTimes: 1, maxTimes: 3);
            var dist2    = StringDistribution.Repeat(DiscreteChar.OneOf('a', 'b'), minTimes: 1, maxTimes: 3);

            Assert.Equal(dist2, dist1);
        }
예제 #6
0
        public void Product2()
        {
            var ab   = StringDistribution.ZeroOrMore(DiscreteChar.OneOf('a', 'b'));
            var a    = StringDistribution.ZeroOrMore('a');
            var prod = ab.Product(a);

            StringInferenceTestUtilities.TestProbability(prod, 1.0, string.Empty, "a", "aa", "aaa");
            StringInferenceTestUtilities.TestProbability(prod, 0.0, "b", "bb", "ab", "ba");
        }
예제 #7
0
        public void BroadAndNarrow()
        {
            var dist1 = DiscreteChar.Digit();

            Xunit.Assert.True(dist1.IsBroad);

            var dist2 = DiscreteChar.OneOf('1', '3', '5', '6');

            Xunit.Assert.False(dist2.IsBroad);
        }
예제 #8
0
        private static StringDistribution NamePrior()
        {
            //TODO: make this closer to:
            // NP([\s\-]NP)*(\s[""\(]NP[""\)])?([\s\-]NP)+
            var result = NamePart();

            result.AppendInPlace(DiscreteChar.OneOf(' ', '-'));
            result.AppendInPlace(NamePart());

            return(result);
        }
예제 #9
0
        /// <summary>
        /// Initializes static members of the <see cref="StringFormatOpBase{TThis}"/> class.
        /// </summary>
        static StringFormatOpBase()
        {
            // More general behavior by default
            RequirePlaceholderForEveryArgument = false;

            DiscreteChar noBraces = DiscreteChar.OneOf('{', '}').Complement();

            DisallowBracesAutomaton  = StringAutomaton.Constant(1.0, noBraces);
            DisallowBracesTransducer = StringTransducer.Copy(noBraces);

            // Make sure that the static constructor of TThis has been invoked so that TThis sets everything up
            new TThis();
        }
예제 #10
0
        [Trait("Category", "OpenBug")] // Test failing with AutomatonTooLarge due to determinization added to SetToProduct in change 47614.  Increasing max states to 1M does not fix the issue
        public void PropertyInferencePerformanceTest()
        {
            Rand.Restart(777);

            var namesData     = new[] { "Alice", "Bob", "Charlie", "Eve", "Boris", "John" };
            var valueData     = new[] { "sender", "receiver", "attacker", "eavesdropper", "developer", "researcher" };
            var templatesData = new[] { "{0} is {1}", "{0} is known as {1}", "{1} is a role of {0}", "{0} -- {1}", "{0} aka {1}" };

            var textsData = new string[10];

            for (int i = 0; i < textsData.Length; ++i)
            {
                int entityIndex   = Rand.Int(namesData.Length);
                int templateIndex = Rand.Int(templatesData.Length);
                textsData[i] = string.Format(templatesData[templateIndex], namesData[entityIndex], valueData[entityIndex]);
            }

            var entity   = new Range(namesData.Length).Named("entity");
            var template = new Range(templatesData.Length).Named("template");
            var text     = new Range(textsData.Length).Named("text");

            var entityNames = Variable.Array <string>(entity).Named("entityNames");

            entityNames[entity] = Variable.Random(StringDistribution.Capitalized()).ForEach(entity);
            var entityValues = Variable.Array <string>(entity).Named("entityValues");

            entityValues[entity] = Variable.Random(StringDistribution.Lower()).ForEach(entity);

            StringDistribution templatePriorMiddle = StringDistribution.ZeroOrMore(DiscreteChar.OneOf('{', '}').Complement());
            StringDistribution templatePrior       =
                StringDistribution.OneOf(
                    StringDistribution.String("{0} ") + templatePriorMiddle + StringDistribution.String(" {1}"),
                    StringDistribution.String("{1} ") + templatePriorMiddle + StringDistribution.String(" {0}"));
            var templates = Variable.Array <string>(template).Named("templates");

            templates[template] = Variable.Random(templatePrior).ForEach(template);

            var texts = Variable.Array <string>(text).Named("texts");

            using (Variable.ForEach(text))
            {
                var entityIndex   = Variable.DiscreteUniform(entity).Named("entityIndex");
                var templateIndex = Variable.DiscreteUniform(template).Named("templateIndex");
                using (Variable.Switch(entityIndex))
                    using (Variable.Switch(templateIndex))
                    {
                        texts[text] = Variable.StringFormat(templates[templateIndex], entityNames[entityIndex], entityValues[entityIndex]);
                    }
            }

            texts.ObservedValue = textsData;

            var engine = new InferenceEngine();

            engine.ShowProgress                = false;
            engine.OptimiseForVariables        = new[] { entityNames, entityValues };
            engine.Compiler.RecommendedQuality = QualityBand.Experimental;
            // TODO: get this test to work with parallel for loops.
            engine.Compiler.UseParallelForLoops = false;
            engine.NumberOfIterations           = 1;

            ProfileAction(
                () =>
            {
                Console.WriteLine(engine.Infer <StringDistribution[]>(entityNames)[0]);
                Console.WriteLine(engine.Infer <StringDistribution[]>(entityValues)[0]);
            },
                1);
        }
예제 #11
0
        public void WordModel()
        {
            // We want to build a word model as a reasonably simple StringDistribution. It
            // should satisfy the following:
            // (1) The probability of a word of moderate length should not be
            //     significantly less than the probability of a shorter word.
            // (2) The probability of a specific word conditioned on its length matches that of
            //     words in the target language.
            // We achieve this by putting non-normalized character distributions on the edges. The
            // StringDistribution is unaware that these are non-normalized.
            // The StringDistribution itself is non-normalizable.
            const double TargetProb1  = 0.05;
            const double Ratio1       = 0.4;
            const double TargetProb2  = TargetProb1 * Ratio1;
            const double Ratio2       = 0.2;
            const double TargetProb3  = TargetProb2 * Ratio2;
            const double TargetProb4  = TargetProb3 * Ratio2;
            const double TargetProb5  = TargetProb4 * Ratio2;
            const double Ratio3       = 0.999;
            const double TargetProb6  = TargetProb5 * Ratio3;
            const double TargetProb7  = TargetProb6 * Ratio3;
            const double TargetProb8  = TargetProb7 * Ratio3;
            const double Ratio4       = 0.9;
            const double TargetProb9  = TargetProb8 * Ratio4;
            const double TargetProb10 = TargetProb9 * Ratio4;

            var targetProbabilitiesPerLength = new double[]
            {
                TargetProb1, TargetProb2, TargetProb3, TargetProb4, TargetProb5, TargetProb6, TargetProb7, TargetProb8, TargetProb9, TargetProb10
            };

            var charDistUpper          = DiscreteChar.Upper();
            var charDistLower          = DiscreteChar.Lower();
            var charDistUpperNarrow    = DiscreteChar.OneOf('A', 'B');
            var charDistLowerNarrow    = DiscreteChar.OneOf('a', 'b');
            var charDistUpperScaled    = DiscreteChar.Uniform();
            var charDistLowerScaled1   = DiscreteChar.Uniform();
            var charDistLowerScaled2   = DiscreteChar.Uniform();
            var charDistLowerScaled3   = DiscreteChar.Uniform();
            var charDistLowerScaledEnd = DiscreteChar.Uniform();

            charDistUpperScaled.SetToPartialUniformOf(charDistUpper, Math.Log(TargetProb1));
            charDistLowerScaled1.SetToPartialUniformOf(charDistLower, Math.Log(Ratio1));
            charDistLowerScaled2.SetToPartialUniformOf(charDistLower, Math.Log(Ratio2));
            charDistLowerScaled3.SetToPartialUniformOf(charDistLower, Math.Log(Ratio3));
            charDistLowerScaledEnd.SetToPartialUniformOf(charDistLower, Math.Log(Ratio4));

            var wordModel = StringDistribution.Concatenate(
                new List <DiscreteChar>
            {
                charDistUpperScaled,
                charDistLowerScaled1,
                charDistLowerScaled2,
                charDistLowerScaled2,
                charDistLowerScaled2,
                charDistLowerScaled3,
                charDistLowerScaled3,
                charDistLowerScaled3,
                charDistLowerScaledEnd
            },
                true,
                true);

            const string Word = "Abcdefghij";

            const double Eps                   = 1e-5;
            var          broadDist             = StringDistribution.Char(charDistUpper);
            var          narrowDist            = StringDistribution.Char(charDistUpperNarrow);
            var          narrowWord            = "A";
            var          expectedProbForNarrow = 0.5;

            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(wordModel.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);

                var logAvg = Math.Exp(wordModel.GetLogAverageOf(broadDist));
                Assert.Equal(targetProbabilitiesPerLength[i], logAvg, Eps);

                var prod = StringDistribution.Zero();
                prod.SetToProduct(broadDist, wordModel);
                Xunit.Assert.True(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides);
                probCurrentWord = Math.Exp(prod.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);

                prod.SetToProduct(narrowDist, wordModel);
                Xunit.Assert.False(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides);
                var probNarrowWord = Math.Exp(prod.GetLogProb(narrowWord));
                Assert.Equal(expectedProbForNarrow, probNarrowWord, Eps);

                broadDist              = broadDist.Append(charDistLower);
                narrowDist             = narrowDist.Append(charDistLowerNarrow);
                narrowWord            += "a";
                expectedProbForNarrow *= 0.5;
            }

            // Copied model
            var copiedModel = StringDistribution.FromWorkspace(StringTransducer.Copy().ProjectSource(wordModel.GetWorkspaceOrPoint()));

            // Under transducer.
            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(copiedModel.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);
            }

            // Rescaled model
            var scale                = 0.5;
            var newTargetProb1       = TargetProb1 * scale;
            var charDistUpperScaled1 = DiscreteChar.Uniform();

            charDistUpperScaled1.SetToPartialUniformOf(charDistUpper, Math.Log(newTargetProb1));
            var reWeightingTransducer =
                StringTransducer.Replace(StringDistribution.Char(charDistUpper).GetWorkspaceOrPoint(), StringDistribution.Char(charDistUpperScaled1).GetWorkspaceOrPoint())
                .Append(StringTransducer.Copy());
            var reWeightedWordModel = StringDistribution.FromWorkspace(reWeightingTransducer.ProjectSource(wordModel.GetWorkspaceOrPoint()));

            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(reWeightedWordModel.GetLogProb(currentWord));
                Assert.Equal(scale * targetProbabilitiesPerLength[i], probCurrentWord, Eps);
            }
        }
예제 #12
0
        public void MessageOperatorsTest()
        {
            var str1 = StringFromArrayOp.StrAverageConditional(new[] { DiscreteChar.PointMass('a'), DiscreteChar.OneOf('b', 'c'), DiscreteChar.OneOf('d', 'e') });

            Assert.Equal(StringDistribution.OneOf("abd", "abe", "acd", "ace"), str1, Eps);

            var str2 = StringFromArrayOp.StrAverageConditional(new DiscreteChar[0]);

            Assert.Equal(StringDistribution.Empty(), str2, Eps);

            var chars1 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "cd"),
                new[] { DiscreteChar.PointMass('a'), DiscreteChar.Uniform() },
                new DiscreteChar[2]);

            Assert.Equal(DiscreteChar.OneOf('a', 'c'), chars1[0], Eps);
            Assert.Equal(DiscreteChar.PointMass('b'), chars1[1], Eps);

            var chars2 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "ac"),
                new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() },
                new DiscreteChar[2]);

            Assert.Equal(DiscreteChar.PointMass('a'), chars2[0], Eps);
            Assert.Equal(DiscreteChar.OneOf('b', 'c'), chars2[1], Eps);

            var chars3 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "ac", "bc"),
                new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() },
                new DiscreteChar[2]);

            Assert.Equal(2.0 / 3.0, chars3[0]['a'], Eps);
            Assert.Equal(1.0 / 3.0, chars3[0]['b'], Eps);
            Assert.Equal(1.0 / 3.0, chars3[1]['b'], Eps);
            Assert.Equal(2.0 / 3.0, chars3[1]['c'], Eps);

            var chars4 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "cde"),
                new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() },
                new DiscreteChar[2]);

            Assert.Equal(DiscreteChar.PointMass('a'), chars4[0], Eps);
            Assert.Equal(DiscreteChar.PointMass('b'), chars4[1], Eps);

            var chars5 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "cb", "ae", "ax"),
                new[] { DiscreteChar.PointMass('a'), DiscreteChar.PointMass('b') },
                new DiscreteChar[2]);

            Assert.Equal(DiscreteChar.OneOf('a', 'c'), chars5[0], Eps);
            Assert.Equal(DiscreteChar.OneOf('b', 'e', 'x'), chars5[1], Eps);

            var chars6 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("abcd", "accd", "acce"),
                new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform(), DiscreteChar.Uniform(), DiscreteChar.PointMass('d') },
                new DiscreteChar[4]);

            Assert.Equal(DiscreteChar.PointMass('a'), chars6[0], Eps);
            Assert.Equal(DiscreteChar.OneOf('b', 'c'), chars6[1], Eps);
            Assert.Equal(DiscreteChar.PointMass('c'), chars6[2], Eps);
            Assert.Equal(2.0 / 3.0, chars6[3]['d'], Eps);
            Assert.Equal(1.0 / 3.0, chars6[3]['e'], Eps);
        }