Exemplo n.º 1
0
        public void ProductWithLogOverrideNarrow()
        {
            for (var i = 0; i < 2; i++)
            {
                var dist1 = DiscreteChar.LetterOrDigit();
                var dist2 = DiscreteChar.OneOf('1', '3', '5', '6');

                var logOverrideProbability = Math.Log(0.9);
                dist1.SetToPartialUniformOf(dist1, logOverrideProbability);
                Xunit.Assert.True(dist1.HasLogProbabilityOverride);
                Xunit.Assert.False(dist2.IsBroad);

                if (i == 1)
                {
                    Util.Swap(ref dist1, ref dist2);
                }

                var dist3 = DiscreteChar.Uniform();
                dist3.SetToProduct(dist1, dist2);

                Xunit.Assert.False(dist3.HasLogProbabilityOverride);
                Assert.Equal(Math.Log(0.25), dist3.GetLogProb('5'), Eps);
                Xunit.Assert.True(double.IsNegativeInfinity(dist3.GetLogProb('a')));
            }
        }
Exemplo n.º 2
0
        public void PartialUniformWithLogProbabilityOverride()
        {
            var dist       = DiscreteChar.LetterOrDigit();
            var probLetter = Math.Exp(dist.GetLogProb('j'));
            var probNumber = Math.Exp(dist.GetLogProb('5'));

            var logProbabilityOverride = Math.Log(0.7);
            var scaledDist             = DiscreteChar.Uniform();

            scaledDist.SetToPartialUniformOf(dist, logProbabilityOverride);
            var scaledLogProbLetter = scaledDist.GetLogProb('j');
            var scaledLogProbNumber = scaledDist.GetLogProb('5');

            Assert.Equal(scaledLogProbLetter, logProbabilityOverride, Eps);
            Assert.Equal(scaledLogProbNumber, logProbabilityOverride, Eps);

            // Check that cache has not been compromised.
            Assert.Equal(probLetter, Math.Exp(dist.GetLogProb('j')), Eps);
            Assert.Equal(probNumber, Math.Exp(dist.GetLogProb('5')), Eps);

            // Check that an exception is thrown if a bad maximumProbability is passed down.
            Xunit.Assert.Throws <ArgumentException>(() =>
            {
                var badDist = DiscreteChar.Uniform();
                badDist.SetToPartialUniformOf(dist, Math.Log(1.2));
            });
        }
Exemplo n.º 3
0
        public void GetOutgoingTransitionsForDeterminization1()
        {
            var builder = new StringAutomaton.Builder();

            builder.Start.AddTransition(DiscreteChar.Uniform(), Weight.FromValue(2));

            var wrapper = new StringAutomatonWrapper(builder);

            var outgoingTransitions =
                wrapper.GetOutgoingTransitionsForDeterminization(0, Weight.FromValue(3));
            var expectedOutgoingTransitions = new[]
Exemplo n.º 4
0
        /// <summary>
        /// Creates a string distribution <c>P(s) = \prod_i P_i(s_i)^I[i != j]</c>,
        /// where <c>P_i(c)</c> is a given array of character distributions and <c>j</c> is a given position in the array.
        /// </summary>
        /// <param name="characters">The distributions over individual characters.</param>
        /// <param name="excludedPos">The character to skip.</param>
        /// <returns>The created distribution.</returns>
        private static StringDistribution GetCharWeighter(IList <DiscreteChar> characters, int excludedPos)
        {
            StringDistribution result = StringDistribution.Empty();

            for (int i = 0; i < characters.Count; ++i)
            {
                result.AppendInPlace(i == excludedPos ? DiscreteChar.Uniform() : characters[i]);
            }

            return(result);
        }
Exemplo n.º 5
0
        public void CharDistribution()
        {
            var rng  = DiscreteChar.UniformInRanges("bdgi");
            var unif = DiscreteChar.Uniform();
            var mix  = new DiscreteChar();

            mix.SetToSum(0.8, rng, 0.2, unif);

            DistributionTests.DistributionTest(unif, mix, false);
            DistributionTests.PointMassTest(mix, 'b');
            DistributionTests.UniformTest(rng, 'b');
        }
        public void GetOutgoingTransitionsForDeterminization1()
        {
            var wrapper = new StringAutomatonWrapper();

            wrapper.Start.AddTransition(DiscreteChar.Uniform(), Weight.FromValue(2));

            var outgoingTransitions =
                wrapper.GetOutgoingTransitionsForDeterminization(new Dictionary <int, Weight> {
                { 0, Weight.FromValue(3) }
            });
            var expectedOutgoingTransitions = new[]
            {
                new Tuple <DiscreteChar, Weight, IEnumerable <KeyValuePair <int, Weight> > >(
                    DiscreteChar.Uniform(), Weight.FromValue(6), new Dictionary <int, Weight> {
                    { 1, Weight.FromValue(1) }
                })
            };

            AssertCollectionsEqual(expectedOutgoingTransitions, outgoingTransitions, TransitionInfoEqualityComparer.Instance);
        }
Exemplo n.º 7
0
        public void WordModel()
        {
            // We want to build a word model as a reasonably simple StringDistribution. It
            // should satisfy the following:
            // (1) The probability of a word of moderate length should not be
            //     significantly less than the probability of a shorter word.
            // (2) The probability of a specific word conditioned on its length matches that of
            //     words in the target language.
            // We achieve this by putting non-normalized character distributions on the edges. The
            // StringDistribution is unaware that these are non-normalized.
            // The StringDistribution itself is non-normalizable.
            const double TargetProb1  = 0.05;
            const double Ratio1       = 0.4;
            const double TargetProb2  = TargetProb1 * Ratio1;
            const double Ratio2       = 0.2;
            const double TargetProb3  = TargetProb2 * Ratio2;
            const double TargetProb4  = TargetProb3 * Ratio2;
            const double TargetProb5  = TargetProb4 * Ratio2;
            const double Ratio3       = 0.999;
            const double TargetProb6  = TargetProb5 * Ratio3;
            const double TargetProb7  = TargetProb6 * Ratio3;
            const double TargetProb8  = TargetProb7 * Ratio3;
            const double Ratio4       = 0.9;
            const double TargetProb9  = TargetProb8 * Ratio4;
            const double TargetProb10 = TargetProb9 * Ratio4;

            var targetProbabilitiesPerLength = new double[]
            {
                TargetProb1, TargetProb2, TargetProb3, TargetProb4, TargetProb5, TargetProb6, TargetProb7, TargetProb8, TargetProb9, TargetProb10
            };

            var charDistUpper          = DiscreteChar.Upper();
            var charDistLower          = DiscreteChar.Lower();
            var charDistUpperNarrow    = DiscreteChar.OneOf('A', 'B');
            var charDistLowerNarrow    = DiscreteChar.OneOf('a', 'b');
            var charDistUpperScaled    = DiscreteChar.Uniform();
            var charDistLowerScaled1   = DiscreteChar.Uniform();
            var charDistLowerScaled2   = DiscreteChar.Uniform();
            var charDistLowerScaled3   = DiscreteChar.Uniform();
            var charDistLowerScaledEnd = DiscreteChar.Uniform();

            charDistUpperScaled.SetToPartialUniformOf(charDistUpper, Math.Log(TargetProb1));
            charDistLowerScaled1.SetToPartialUniformOf(charDistLower, Math.Log(Ratio1));
            charDistLowerScaled2.SetToPartialUniformOf(charDistLower, Math.Log(Ratio2));
            charDistLowerScaled3.SetToPartialUniformOf(charDistLower, Math.Log(Ratio3));
            charDistLowerScaledEnd.SetToPartialUniformOf(charDistLower, Math.Log(Ratio4));

            var wordModel = StringDistribution.Concatenate(
                new List <DiscreteChar>
            {
                charDistUpperScaled,
                charDistLowerScaled1,
                charDistLowerScaled2,
                charDistLowerScaled2,
                charDistLowerScaled2,
                charDistLowerScaled3,
                charDistLowerScaled3,
                charDistLowerScaled3,
                charDistLowerScaledEnd
            },
                true,
                true);

            const string Word = "Abcdefghij";

            const double Eps                   = 1e-5;
            var          broadDist             = StringDistribution.Char(charDistUpper);
            var          narrowDist            = StringDistribution.Char(charDistUpperNarrow);
            var          narrowWord            = "A";
            var          expectedProbForNarrow = 0.5;

            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(wordModel.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);

                var logAvg = Math.Exp(wordModel.GetLogAverageOf(broadDist));
                Assert.Equal(targetProbabilitiesPerLength[i], logAvg, Eps);

                var prod = StringDistribution.Zero();
                prod.SetToProduct(broadDist, wordModel);
                Xunit.Assert.True(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides);
                probCurrentWord = Math.Exp(prod.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);

                prod.SetToProduct(narrowDist, wordModel);
                Xunit.Assert.False(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides);
                var probNarrowWord = Math.Exp(prod.GetLogProb(narrowWord));
                Assert.Equal(expectedProbForNarrow, probNarrowWord, Eps);

                broadDist              = broadDist.Append(charDistLower);
                narrowDist             = narrowDist.Append(charDistLowerNarrow);
                narrowWord            += "a";
                expectedProbForNarrow *= 0.5;
            }

            // Copied model
            var copiedModel = StringDistribution.FromWorkspace(StringTransducer.Copy().ProjectSource(wordModel.GetWorkspaceOrPoint()));

            // Under transducer.
            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(copiedModel.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);
            }

            // Rescaled model
            var scale                = 0.5;
            var newTargetProb1       = TargetProb1 * scale;
            var charDistUpperScaled1 = DiscreteChar.Uniform();

            charDistUpperScaled1.SetToPartialUniformOf(charDistUpper, Math.Log(newTargetProb1));
            var reWeightingTransducer =
                StringTransducer.Replace(StringDistribution.Char(charDistUpper).GetWorkspaceOrPoint(), StringDistribution.Char(charDistUpperScaled1).GetWorkspaceOrPoint())
                .Append(StringTransducer.Copy());
            var reWeightedWordModel = StringDistribution.FromWorkspace(reWeightingTransducer.ProjectSource(wordModel.GetWorkspaceOrPoint()));

            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(reWeightedWordModel.GetLogProb(currentWord));
                Assert.Equal(scale * targetProbabilitiesPerLength[i], probCurrentWord, Eps);
            }
        }
Exemplo n.º 8
0
        public void MessageOperatorsTest()
        {
            var str1 = StringFromArrayOp.StrAverageConditional(new[] { DiscreteChar.PointMass('a'), DiscreteChar.OneOf('b', 'c'), DiscreteChar.OneOf('d', 'e') });

            Assert.Equal(StringDistribution.OneOf("abd", "abe", "acd", "ace"), str1, Eps);

            var str2 = StringFromArrayOp.StrAverageConditional(new DiscreteChar[0]);

            Assert.Equal(StringDistribution.Empty(), str2, Eps);

            var chars1 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "cd"),
                new[] { DiscreteChar.PointMass('a'), DiscreteChar.Uniform() },
                new DiscreteChar[2]);

            Assert.Equal(DiscreteChar.OneOf('a', 'c'), chars1[0], Eps);
            Assert.Equal(DiscreteChar.PointMass('b'), chars1[1], Eps);

            var chars2 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "ac"),
                new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() },
                new DiscreteChar[2]);

            Assert.Equal(DiscreteChar.PointMass('a'), chars2[0], Eps);
            Assert.Equal(DiscreteChar.OneOf('b', 'c'), chars2[1], Eps);

            var chars3 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "ac", "bc"),
                new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() },
                new DiscreteChar[2]);

            Assert.Equal(2.0 / 3.0, chars3[0]['a'], Eps);
            Assert.Equal(1.0 / 3.0, chars3[0]['b'], Eps);
            Assert.Equal(1.0 / 3.0, chars3[1]['b'], Eps);
            Assert.Equal(2.0 / 3.0, chars3[1]['c'], Eps);

            var chars4 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "cde"),
                new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform() },
                new DiscreteChar[2]);

            Assert.Equal(DiscreteChar.PointMass('a'), chars4[0], Eps);
            Assert.Equal(DiscreteChar.PointMass('b'), chars4[1], Eps);

            var chars5 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("ab", "cb", "ae", "ax"),
                new[] { DiscreteChar.PointMass('a'), DiscreteChar.PointMass('b') },
                new DiscreteChar[2]);

            Assert.Equal(DiscreteChar.OneOf('a', 'c'), chars5[0], Eps);
            Assert.Equal(DiscreteChar.OneOf('b', 'e', 'x'), chars5[1], Eps);

            var chars6 = StringFromArrayOp.CharactersAverageConditional(
                StringDistribution.OneOf("abcd", "accd", "acce"),
                new[] { DiscreteChar.Uniform(), DiscreteChar.Uniform(), DiscreteChar.Uniform(), DiscreteChar.PointMass('d') },
                new DiscreteChar[4]);

            Assert.Equal(DiscreteChar.PointMass('a'), chars6[0], Eps);
            Assert.Equal(DiscreteChar.OneOf('b', 'c'), chars6[1], Eps);
            Assert.Equal(DiscreteChar.PointMass('c'), chars6[2], Eps);
            Assert.Equal(2.0 / 3.0, chars6[3]['d'], Eps);
            Assert.Equal(1.0 / 3.0, chars6[3]['e'], Eps);
        }