Beispiel #1
0
 public void CommonChars()
 {
     TestSupport("digit", DiscreteChar.Digit(), "0123456789", "Ab !Ј");
     TestSupport("lower", DiscreteChar.Lower(), "abcdefghixyz", "ABC0123, ");
     TestSupport("upper", DiscreteChar.Upper(), "ABCDEFGHUXYZ", "abc0123, ");
     TestSupport("letter", DiscreteChar.Letter(), "aBcDeFgGhxyzXYZ", "0123! ,");
     TestSupport("letterOrDigit", DiscreteChar.LetterOrDigit(), "abcABC0123xyzXYZ789", " !Ј$,");
     TestSupport("wordChar", DiscreteChar.WordChar(), "abc_ABC_0123s", " !:.,");
     TestSupport("whitespace", DiscreteChar.Whitespace(), " \t", "abcABC0123,:!");
 }
Beispiel #2
0
        public void ReplaceElements()
        {
            StringTransducer replace = StringTransducer.Replace(DiscreteChar.Lower(), DiscreteChar.Digit());

            StringInferenceTestUtilities.TestTransducerValue(replace, "hello", "123", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "w", "1337", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "w", string.Empty, 1.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, "17", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, string.Empty, 1.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "123", "worlds", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "123", "123", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "1", string.Empty, 0.0);
        }
        public void Repeat2()
        {
            StringAutomaton automaton = StringAutomaton.Constant(1.0, DiscreteChar.Lower());

            StringInferenceTestUtilities.TestValue(automaton, 1.0, string.Empty, "ab", "abcab");

            automaton = StringAutomaton.Repeat(automaton);

            // Can't use StringInferenceTestUtilities.TestValue here since the value is not infinite in log-domain
            // due to approximate closure computations for epsilon-loops
            Assert.True(automaton.GetValue(string.Empty) > 1000);
            Assert.True(automaton.GetValue("ab") > 1000);
            Assert.True(automaton.GetValue("abcab") > 1000);
        }
Beispiel #4
0
        public void Optional()
        {
            StringAutomaton  automaton    = StringAutomaton.Constant(1.0, DiscreteChar.Lower());
            StringTransducer copy         = StringTransducer.Copy(automaton);
            StringTransducer copyOptional = StringTransducer.Optional(copy);

            StringInferenceTestUtilities.TestTransducerValue(copy, "abc", "abc", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copyOptional, "abc", "abc", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, string.Empty, string.Empty, 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copyOptional, string.Empty, string.Empty, 2.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "abc", "ab", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(copyOptional, "abc", "ab", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "abc", string.Empty, 0.0);
            StringInferenceTestUtilities.TestTransducerValue(copyOptional, "abc", string.Empty, 0.0);
        }
Beispiel #5
0
        public void Lower()
        {
            int lowercaseCharacterCount = DiscreteChar.Lower().GetProbs().Count(p => p > 0);

            var lowercaseAutomaton1 = StringDistribution.Lower(minLength: 1, maxLength: 2);

            Assert.True(lowercaseAutomaton1.IsProper());
            StringInferenceTestUtilities.TestProbability(lowercaseAutomaton1, StringInferenceTestUtilities.StringUniformProbability(1, 2, lowercaseCharacterCount), "a", "bc");
            StringInferenceTestUtilities.TestProbability(lowercaseAutomaton1, 0.0, "abc", "BC", "A", string.Empty);

            var lowercaseAutomaton2 = StringDistribution.Lower(minLength: 2);

            Assert.False(lowercaseAutomaton2.IsProper());
            StringInferenceTestUtilities.TestProbability(lowercaseAutomaton2, 1.0, "bc", "abvhrbfijbr");
            StringInferenceTestUtilities.TestProbability(lowercaseAutomaton2, 0.0, "a", "BC", "adasdADNdej", string.Empty);
        }
Beispiel #6
0
        public void ConsumeAutomaton()
        {
            StringAutomaton automaton = StringAutomaton.Constant(2.0, DiscreteChar.Lower());

            automaton = automaton.Sum(StringAutomaton.ConstantOnElement(3.0, 'a'));
            StringTransducer consume = StringTransducer.Consume(automaton);

            StringInferenceTestUtilities.TestTransducerValue(consume, "aaa", string.Empty, 2.0);
            StringInferenceTestUtilities.TestTransducerValue(consume, "bb", string.Empty, 2.0);
            StringInferenceTestUtilities.TestTransducerValue(consume, "a", string.Empty, 5.0);
            StringInferenceTestUtilities.TestTransducerValue(consume, string.Empty, string.Empty, 2.0);
            StringInferenceTestUtilities.TestTransducerValue(consume, "bb", "aaa", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(consume, "bb", "bb", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(consume, string.Empty, "bb", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(consume, string.Empty, "a", 0.0);
        }
Beispiel #7
0
        public void Sum()
        {
            StringTransducer replace = StringTransducer.Sum(
                StringTransducer.Replace(DiscreteChar.Lower(), DiscreteChar.Digit()),
                StringTransducer.Replace(DiscreteChar.Lower(), DiscreteChar.LetterOrDigit()));

            StringInferenceTestUtilities.TestTransducerValue(replace, "hello", "123", 2.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "w", "1337", 2.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "w", string.Empty, 2.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, "17", 2.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, string.Empty, 2.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "hello", "worlds", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "hello", "WORLDS111", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "123", "worlds", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "123", "123", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "1", string.Empty, 0.0);
        }
Beispiel #8
0
        public void CopyAutomaton()
        {
            StringAutomaton automaton = StringAutomaton.ConstantOn(1.0, "prefix1", "prefix2");

            automaton.AppendInPlace(StringAutomaton.Constant(1.0, DiscreteChar.Lower()));
            automaton.AppendInPlace(StringAutomaton.Constant(1.0, DiscreteChar.Upper()));
            automaton.AppendInPlace("!");

            StringTransducer copy = StringTransducer.Copy(automaton);

            StringInferenceTestUtilities.TestTransducerValue(copy, "prefix1!", "prefix1!", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "prefix2!", "prefix2!", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "prefix1lower!", "prefix1lower!", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "prefix2UPPER!", "prefix2UPPER!", 1.0);
            StringInferenceTestUtilities.TestTransducerValue(copy, "prefix1lowerUPPER!", "prefix1lowerUPPER!", 1.0);
            StringInferenceTestUtilities.TestIfTransducerRejects(copy, "prefix1lower", "prefix2UPPER", "!", "prefix1lowerUPPER");

            StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix1!", 1.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix2!", 1.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix1lower!", 1.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix2UPPER!", 1.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, automaton, "prefix1lowerUPPER!", 1.0);

            StringAutomaton subsetAutomaton = StringAutomaton.ConstantOn(2.0, "prefix1");

            subsetAutomaton.AppendInPlace(StringAutomaton.ConstantOn(3.0, "lll", "mmm"));
            subsetAutomaton.AppendInPlace(StringAutomaton.ConstantOn(1.5, "!", "U!"));
            StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix1lll!", 9.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix1mmmU!", 9.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix1!", 0.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix2lower!", 0.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, subsetAutomaton, "prefix2U!", 0.0);

            StringAutomaton supersetAutomaton = StringAutomaton.ConstantOn(1.5, "pr");

            supersetAutomaton.AppendInPlace(StringAutomaton.Constant(2.0));
            StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix1!", 3.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix2!", 3.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix1lower!", 3.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix2UPPER!", 3.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix1lowerUPPER!", 3.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix11!", 0.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prefix1lowerUPPERlower!", 0.0);
            StringInferenceTestUtilities.TestTransducerProjection(copy, supersetAutomaton, "prrrrr!", 0.0);
        }
Beispiel #9
0
        public void ReplaceAutomaton()
        {
            StringAutomaton automaton1 = StringAutomaton.Constant(2.0, DiscreteChar.Lower());

            automaton1 = automaton1.Sum(StringAutomaton.ConstantOnElement(3.0, 'a'));
            StringAutomaton automaton2 = StringAutomaton.Constant(0.5, DiscreteChar.Digit());

            automaton2 = automaton2.Sum(StringAutomaton.Constant(2.5, DiscreteChar.LetterOrDigit()));
            StringTransducer replace = StringTransducer.Replace(automaton1, automaton2);

            StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, "123", 6.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "a", "123", 15.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "ax", "AbC", 5.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "a", "a", 12.5);
            StringInferenceTestUtilities.TestTransducerValue(replace, string.Empty, string.Empty, 6.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "123", string.Empty, 0.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "AbC", "ax", 0.0);
            StringInferenceTestUtilities.TestTransducerValue(replace, "1", "1", 0.0);
        }
Beispiel #10
0
        public void UniformOf()
        {
            var unif1 = StringDistribution.ZeroOrMore(DiscreteChar.Lower());

            Assert.False(unif1.IsUniform());
            Assert.False(unif1.IsProper());
            StringInferenceTestUtilities.TestProbability(unif1, 1.0, "hello", "a", string.Empty);
            StringInferenceTestUtilities.TestProbability(unif1, 0.0, "123", "!", "Abc");

            // Test if non-uniform element distribution does not affect the outcome
            Vector probs = DiscreteChar.Digit().GetProbs();

            probs['1'] = 0;
            probs['2'] = 0.3;
            probs['3'] = 0.0001;
            var unif2 = StringDistribution.ZeroOrMore(DiscreteChar.FromVector(probs));

            StringInferenceTestUtilities.TestProbability(unif2, 1.0, "0", "234", string.Empty);
            StringInferenceTestUtilities.TestProbability(unif2, 0.0, "1", "231", "!", "Abc");
        }
Beispiel #11
0
        public void Capitalized()
        {
            int lowercaseCharacterCount = DiscreteChar.Lower().GetProbs().Count(p => p > 0);
            int uppercaseCharacterCount = DiscreteChar.Upper().GetProbs().Count(p => p > 0);

            var capitalizedAutomaton1 = StringDistribution.Capitalized(minLength: 3, maxLength: 5);

            Assert.True(capitalizedAutomaton1.IsProper());
            StringInferenceTestUtilities.TestProbability(
                capitalizedAutomaton1,
                StringInferenceTestUtilities.StringUniformProbability(2, 4, lowercaseCharacterCount) / uppercaseCharacterCount,
                "Abc",
                "Bcde",
                "Abcde");
            StringInferenceTestUtilities.TestProbability(capitalizedAutomaton1, 0.0, "A", "abc", "Ab", "Abcdef", string.Empty);

            var capitalizedAutomaton2 = StringDistribution.Capitalized(minLength: 3);

            Assert.False(capitalizedAutomaton2.IsProper());
            StringInferenceTestUtilities.TestProbability(capitalizedAutomaton2, 1.0, "Abc", "Bcde", "Abcde", "Abfjrhfjlrl");
            StringInferenceTestUtilities.TestProbability(capitalizedAutomaton2, 0.0, "A", "abc", "Ab", string.Empty);
        }
Beispiel #12
0
        public void MessageOperatorsTest()
        {
            int lowercaseCharacterCount = DiscreteChar.Lower().GetProbs().Count(p => p > 0);
            int uppercaseCharacterCount = DiscreteChar.Upper().GetProbs().Count(p => p > 0);

            StringDistribution strDist1  = StringDistribution.OneOf("a", "b", "abc", "ab", "bcd", "d", string.Empty);
            DiscreteChar       charDist1 = SingleOp.CharacterAverageConditional(strDist1);

            Assert.Equal(1.0 / 3.0, charDist1['a'], ProbEps);
            Assert.Equal(1.0 / 3.0, charDist1['b'], ProbEps);
            Assert.Equal(1.0 / 3.0, charDist1['d'], ProbEps);

            StringDistribution strDist2  = StringDistribution.OneOf(strDist1, StringDistribution.OneOf("b", "d"));
            DiscreteChar       charDist2 = SingleOp.CharacterAverageConditional(strDist2);

            Assert.Equal(1.0 / 10.0, charDist2['a'], ProbEps);
            Assert.Equal(4.5 / 10.0, charDist2['b'], ProbEps);
            Assert.Equal(4.5 / 10.0, charDist2['d'], ProbEps);

            StringDistribution strDist3  = StringDistribution.Letters(minLength: 0);
            DiscreteChar       charDist3 = SingleOp.CharacterAverageConditional(strDist3);

            Assert.Equal(1.0 / (lowercaseCharacterCount + uppercaseCharacterCount), charDist3['a'], ProbEps);
            Assert.Equal(1.0 / (lowercaseCharacterCount + uppercaseCharacterCount), charDist3['B'], ProbEps);
            Assert.Equal(1.0 / (lowercaseCharacterCount + uppercaseCharacterCount), charDist3['d'], ProbEps);

            StringDistribution strDist4  = StringDistribution.OneOf(strDist3, StringDistribution.Lower(minLength: 0));
            DiscreteChar       charDist4 = SingleOp.CharacterAverageConditional(strDist4);

            Assert.Equal(2.0 / (2 * lowercaseCharacterCount + uppercaseCharacterCount), charDist4['a'], ProbEps);
            Assert.Equal(1.0 / (2 * lowercaseCharacterCount + uppercaseCharacterCount), charDist4['B'], ProbEps);
            Assert.Equal(2.0 / (2 * lowercaseCharacterCount + uppercaseCharacterCount), charDist4['d'], ProbEps);

            StringDistribution strDist5 = StringDistribution.String("XX").Append(strDist4);

            Assert.Throws <AllZeroException>(() => SingleOp.CharacterAverageConditional(strDist5));
        }
Beispiel #13
0
        public void WordModel()
        {
            // We want to build a word model as a reasonably simple StringDistribution. It
            // should satisfy the following:
            // (1) The probability of a word of moderate length should not be
            //     significantly less than the probability of a shorter word.
            // (2) The probability of a specific word conditioned on its length matches that of
            //     words in the target language.
            // We achieve this by putting non-normalized character distributions on the edges. The
            // StringDistribution is unaware that these are non-normalized.
            // The StringDistribution itself is non-normalizable.
            const double TargetProb1  = 0.05;
            const double Ratio1       = 0.4;
            const double TargetProb2  = TargetProb1 * Ratio1;
            const double Ratio2       = 0.2;
            const double TargetProb3  = TargetProb2 * Ratio2;
            const double TargetProb4  = TargetProb3 * Ratio2;
            const double TargetProb5  = TargetProb4 * Ratio2;
            const double Ratio3       = 0.999;
            const double TargetProb6  = TargetProb5 * Ratio3;
            const double TargetProb7  = TargetProb6 * Ratio3;
            const double TargetProb8  = TargetProb7 * Ratio3;
            const double Ratio4       = 0.9;
            const double TargetProb9  = TargetProb8 * Ratio4;
            const double TargetProb10 = TargetProb9 * Ratio4;

            var targetProbabilitiesPerLength = new double[]
            {
                TargetProb1, TargetProb2, TargetProb3, TargetProb4, TargetProb5, TargetProb6, TargetProb7, TargetProb8, TargetProb9, TargetProb10
            };

            var charDistUpper          = DiscreteChar.Upper();
            var charDistLower          = DiscreteChar.Lower();
            var charDistUpperNarrow    = DiscreteChar.OneOf('A', 'B');
            var charDistLowerNarrow    = DiscreteChar.OneOf('a', 'b');
            var charDistUpperScaled    = DiscreteChar.Uniform();
            var charDistLowerScaled1   = DiscreteChar.Uniform();
            var charDistLowerScaled2   = DiscreteChar.Uniform();
            var charDistLowerScaled3   = DiscreteChar.Uniform();
            var charDistLowerScaledEnd = DiscreteChar.Uniform();

            charDistUpperScaled.SetToPartialUniformOf(charDistUpper, Math.Log(TargetProb1));
            charDistLowerScaled1.SetToPartialUniformOf(charDistLower, Math.Log(Ratio1));
            charDistLowerScaled2.SetToPartialUniformOf(charDistLower, Math.Log(Ratio2));
            charDistLowerScaled3.SetToPartialUniformOf(charDistLower, Math.Log(Ratio3));
            charDistLowerScaledEnd.SetToPartialUniformOf(charDistLower, Math.Log(Ratio4));

            var wordModel = StringDistribution.Concatenate(
                new List <DiscreteChar>
            {
                charDistUpperScaled,
                charDistLowerScaled1,
                charDistLowerScaled2,
                charDistLowerScaled2,
                charDistLowerScaled2,
                charDistLowerScaled3,
                charDistLowerScaled3,
                charDistLowerScaled3,
                charDistLowerScaledEnd
            },
                true,
                true);

            const string Word = "Abcdefghij";

            const double Eps                   = 1e-5;
            var          broadDist             = StringDistribution.Char(charDistUpper);
            var          narrowDist            = StringDistribution.Char(charDistUpperNarrow);
            var          narrowWord            = "A";
            var          expectedProbForNarrow = 0.5;

            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(wordModel.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);

                var logAvg = Math.Exp(wordModel.GetLogAverageOf(broadDist));
                Assert.Equal(targetProbabilitiesPerLength[i], logAvg, Eps);

                var prod = StringDistribution.Zero();
                prod.SetToProduct(broadDist, wordModel);
                Xunit.Assert.True(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides);
                probCurrentWord = Math.Exp(prod.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);

                prod.SetToProduct(narrowDist, wordModel);
                Xunit.Assert.False(prod.GetWorkspaceOrPoint().HasElementLogValueOverrides);
                var probNarrowWord = Math.Exp(prod.GetLogProb(narrowWord));
                Assert.Equal(expectedProbForNarrow, probNarrowWord, Eps);

                broadDist              = broadDist.Append(charDistLower);
                narrowDist             = narrowDist.Append(charDistLowerNarrow);
                narrowWord            += "a";
                expectedProbForNarrow *= 0.5;
            }

            // Copied model
            var copiedModel = StringDistribution.FromWorkspace(StringTransducer.Copy().ProjectSource(wordModel.GetWorkspaceOrPoint()));

            // Under transducer.
            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(copiedModel.GetLogProb(currentWord));
                Assert.Equal(targetProbabilitiesPerLength[i], probCurrentWord, Eps);
            }

            // Rescaled model
            var scale                = 0.5;
            var newTargetProb1       = TargetProb1 * scale;
            var charDistUpperScaled1 = DiscreteChar.Uniform();

            charDistUpperScaled1.SetToPartialUniformOf(charDistUpper, Math.Log(newTargetProb1));
            var reWeightingTransducer =
                StringTransducer.Replace(StringDistribution.Char(charDistUpper).GetWorkspaceOrPoint(), StringDistribution.Char(charDistUpperScaled1).GetWorkspaceOrPoint())
                .Append(StringTransducer.Copy());
            var reWeightedWordModel = StringDistribution.FromWorkspace(reWeightingTransducer.ProjectSource(wordModel.GetWorkspaceOrPoint()));

            for (var i = 0; i < targetProbabilitiesPerLength.Length; i++)
            {
                var currentWord     = Word.Substring(0, i + 1);
                var probCurrentWord = Math.Exp(reWeightedWordModel.GetLogProb(currentWord));
                Assert.Equal(scale * targetProbabilitiesPerLength[i], probCurrentWord, Eps);
            }
        }