예제 #1
0
        public void TestSequenceProbability()
        {
            UniGram unigram = new UniGram();

            Assert.AreEqual(
                0,
                unigram.Compile().SequenceProbability(new string[] { "a", "b" }));

            unigram.AddData(null, "a");
            Assert.AreEqual(
                1,
                unigram.Compile().SequenceProbability(new string[] { "a", "a" }));

            unigram.AddData(null, "b");
            Assert.AreEqual(
                0.5,
                unigram.Compile().SequenceProbability(new string[] { "a" }));

            Assert.AreEqual(
                0.25,
                unigram.Compile().SequenceProbability(new string[] { "b", "b" }));

            unigram.AddData(null, "b");
            unigram.AddData(null, "b");

            Assert.AreEqual(
                0.75,
                unigram.Compile().SequenceProbability(new string[] { "b" }));

            Assert.AreEqual(
                0.75 * 0.25 * 0.75,
                unigram.Compile().SequenceProbability(new string[] { "b", "a", "b" }));
        }
예제 #2
0
        public void TestPerplexity()
        {
            UniGram unigram = new UniGram();

            Assert.AreEqual(
                double.PositiveInfinity,
                unigram.Compile().Perplexity(new string[] { "a", "b" }));

            unigram.AddData(null, "a");
            Assert.AreEqual(
                1,
                unigram.Compile().Perplexity(new string[] { "a", "a" }));

            unigram.AddData(null, "b");
            Assert.AreEqual(
                1 / 0.5,
                unigram.Compile().Perplexity(new string[] { "a" }));

            Assert.AreEqual(
                1 / 0.25,
                unigram.Compile().Perplexity(new string[] { "b", "b" }));

            unigram.AddData(null, "b");
            unigram.AddData(null, "b");

            Assert.AreEqual(
                1 / 0.75,
                unigram.Compile().Perplexity(new string[] { "b" }));

            Assert.AreEqual(
                1 / (0.75 * 0.25 * 0.75),
                unigram.Compile().Perplexity(new string[] { "b", "a", "b" }));
        }
예제 #3
0
        public void TestGetValues()
        {
            UniGram unigram = new UniGram();

            Assert.AreEqual(
                new Dictionary <string, float>(),
                unigram.Compile().GetValues(null));

            unigram.AddData(null, "a");
            Assert.AreEqual(
                new Dictionary <string, float>()
            {
                { "a", 1f }
            },
                unigram.Compile().GetValues(null));

            unigram.AddData(null, "a");
            unigram.AddData(null, "a");
            unigram.AddData(null, "b");
            Assert.AreEqual(
                new Dictionary <string, float>()
            {
                { "a", 0.75f }, { "b", 0.25f }
            },
                unigram.Compile().GetValues(null));
        }
예제 #4
0
        public void TestUpdateMemory()
        {
            UniGram a = new UniGram();

            a.AddData(null, "a");
            a.UpdateMemory(0.9f);
            Assert.AreEqual(0.9f, a.Grammar["a"]);

            a.AddData(null, "b");
            a.UpdateMemory(0.9f);
            Assert.IsTrue(Mathf.Approximately(0.81f, a.Grammar["a"]));
            Assert.AreEqual(0.9f, a.Grammar["b"]);
        }
예제 #5
0
        public void TestClone()
        {
            UniGram unigram = new UniGram();

            unigram.AddData(null, "a");
            unigram.AddData(null, "a");
            unigram.AddData(null, "b");

            CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar);
            ICompiledGram   clone           = compiledUnigram.Clone();

            Assert.AreEqual(compiledUnigram.GetN(), clone.GetN());
            Assert.AreEqual(compiledUnigram.GetValues(null), clone.GetValues(null));
        }
예제 #6
0
        public void TestHasNextStep()
        {
            UniGram unigram = new UniGram();

            Assert.IsFalse(unigram.Compile().HasNextStep(null));

            unigram.AddData(null, "a");
            Assert.IsTrue(unigram.Compile().HasNextStep(null));

            unigram.AddData(null, "a");
            Assert.IsTrue(unigram.Compile().HasNextStep(null));

            unigram.AddData(null, "b");
            Assert.IsTrue(unigram.Compile().HasNextStep(null));
        }
예제 #7
0
        public void TestAddDataString()
        {
            UniGram a = new UniGram();

            a.AddData(null, "a");
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(1f, a.Grammar["a"]);

            a.AddData(new string[] { "asdf" }, "a");
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(2f, a.Grammar["a"]);

            a.AddData(new string[] { "31234" }, "b");
            Assert.AreEqual(2, a.Grammar.Keys.Count);
            Assert.AreEqual(1f, a.Grammar["b"]);
            Assert.AreEqual(2f, a.Grammar["a"]);
        }
예제 #8
0
        public void TestSequenceProbability()
        {
            HierarchicalNGram gram = new HierarchicalNGram(3, 0.9f);

            Assert.AreEqual(
                0,
                gram.Compile().SequenceProbability(new string[] { "a", "b", "c" }));

            double denominator = 0.9 + 0.81 + 0.729;
            double triWeight   = 0.9 / denominator;
            double biWeight    = 0.81 / denominator;
            double uniweight   = 0.729 / denominator;

            UniGram gram1 = new UniGram();

            gram1.AddData(null, "a");
            gram1.AddData(null, "a");
            gram1.AddData(null, "b");

            NGram gram2 = new NGram(2);

            gram2.AddData(new string[] { "a" }, "a");
            gram2.AddData(new string[] { "a" }, "b");

            NGram gram3 = new NGram(3);

            gram3.AddData(new string[] { "a", "a" }, "b");

            gram.AddData(new string[] { "a", "a" }, "b");

            ICompiledGram c1 = gram1.Compile();
            ICompiledGram c2 = gram2.Compile();
            ICompiledGram c3 = gram3.Compile();

            string[] input    = new string[] { "a", "a", "b" };
            double   expected =
                uniweight * c1.SequenceProbability(input) +
                biWeight * c2.SequenceProbability(input) +
                triWeight * c3.SequenceProbability(input);
            double actual = gram.Compile().SequenceProbability(new string[] { "a", "a", "b" });

            Assert.IsTrue(
                Mathf.Approximately((float)expected, (float)actual),
                $"Expected {expected} but received {actual}.");
        }
예제 #9
0
        public void TestAddDataWeight()
        {
            UniGram a = new UniGram();

            a.AddData("a", 2.2f);
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(2.2f, a.Grammar["a"]);


            a.AddData("a", 0.8f);
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(3f, a.Grammar["a"]);

            a.AddData("b", 2.823f);
            Assert.AreEqual(3f, a.Grammar["a"]);
            Assert.AreEqual(2.823f, a.Grammar["b"]);
            Assert.AreEqual(2, a.Grammar.Keys.Count);
        }
예제 #10
0
        public void TestGet()
        {
            UniGram unigram = new UniGram();

            unigram.AddData(null, "a");
            unigram.AddData(null, "a");
            unigram.AddData(null, "b");
            unigram.AddData(null, "c");

            CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar);

            bool seenA = false;
            bool seenB = false;
            bool seenC = false;

            for (int i = 0; i < 1000; ++i)
            {
                string val = compiledUnigram.Get(null);
                switch (val)
                {
                case "a":
                    seenA = true;
                    break;

                case "b":
                    seenB = true;
                    break;

                case "c":
                    seenC = true;
                    break;

                default:
                    Assert.Fail($"{val} should not be possible.");
                    break;
                }
            }

            // in theory, we could potentially not see one of these but it is very unlikely.
            Assert.IsTrue(seenA);
            Assert.IsTrue(seenB);
            Assert.IsTrue(seenC);
        }
예제 #11
0
        public void TestAddGrammar()
        {
            UniGram a = new UniGram();
            UniGram b = new UniGram();

            a.AddGrammar(b);
            Assert.AreEqual(0, a.Grammar.Keys.Count);

            b.AddData(null, "a");
            a.AddGrammar(b);
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(1f, a.Grammar["a"]);

            b.AddData(null, "b");
            b.AddData(null, "a");
            a.AddGrammar(b);
            Assert.AreEqual(2, a.Grammar.Keys.Count);
            Assert.AreEqual(3f, a.Grammar["a"]);
            Assert.AreEqual(1f, a.Grammar["b"]);
        }
예제 #12
0
        public void TestGetGuesses()
        {
            UniGram unigram = new UniGram();

            Assert.AreEqual(0, unigram.Compile().GetGuesses(null).Length);

            unigram.AddData(null, "a");
            Assert.AreEqual(1, unigram.Compile().GetGuesses(null).Length);

            unigram.AddData(null, "a");
            Assert.AreEqual(1, unigram.Compile().GetGuesses(null).Length);

            unigram.AddData(null, "b");
            Assert.AreEqual(2, unigram.Compile().GetGuesses(null).Length);

            unigram.AddData(null, "c");
            string[] guesses = unigram.Compile().GetGuesses(null);
            Assert.AreEqual(3, guesses.Length);
            Assert.IsTrue(guesses.Contains("a"));
            Assert.IsTrue(guesses.Contains("b"));
            Assert.IsTrue(guesses.Contains("c"));
        }
예제 #13
0
        public void TestAddUnigram()
        {
            HierarchicalNGram a  = new HierarchicalNGram(2, 0.6f);
            UniGram           u1 = a.Grammars[0] as UniGram;
            NGram             n2 = a.Grammars[1] as NGram;

            UniGram unigram = new UniGram();

            unigram.AddData(null, "a");
            a.AddGrammar(unigram);

            Assert.AreEqual(1, u1.Grammar.Keys.Count);
            Assert.AreEqual(0, n2.Grammar.Keys.Count);
            Assert.AreEqual(1, u1.Grammar["a"]);
        }
예제 #14
0
        public void TestConstructor()
        {
            UniGram unigram = new UniGram();

            Assert.DoesNotThrow(() => { unigram.Compile(); });
            unigram.AddData(null, "a");
            Assert.DoesNotThrow(() => { unigram.Compile(); });
            unigram.AddData(null, "b");
            Assert.DoesNotThrow(() => { unigram.Compile(); });
            unigram.AddData(null, "b");
            unigram.AddData(null, "b");
            unigram.AddData(null, "b");
            unigram.AddData(null, "a");
            Assert.DoesNotThrow(() => { unigram.Compile(); });
        }