Exemple #1
0
        public void TestSequenceProbability()
        {
            UniGram unigram = new UniGram();

            Assert.AreEqual(
                0,
                unigram.Compile().SequenceProbability(new string[] { "a", "b" }));

            unigram.AddData(null, "a");
            Assert.AreEqual(
                1,
                unigram.Compile().SequenceProbability(new string[] { "a", "a" }));

            unigram.AddData(null, "b");
            Assert.AreEqual(
                0.5,
                unigram.Compile().SequenceProbability(new string[] { "a" }));

            Assert.AreEqual(
                0.25,
                unigram.Compile().SequenceProbability(new string[] { "b", "b" }));

            unigram.AddData(null, "b");
            unigram.AddData(null, "b");

            Assert.AreEqual(
                0.75,
                unigram.Compile().SequenceProbability(new string[] { "b" }));

            Assert.AreEqual(
                0.75 * 0.25 * 0.75,
                unigram.Compile().SequenceProbability(new string[] { "b", "a", "b" }));
        }
        public void TestAddNGram()
        {
            HierarchicalNGram a  = new HierarchicalNGram(3, 0.6f);
            UniGram           u1 = a.Grammars[0] as UniGram;
            NGram             n2 = a.Grammars[1] as NGram;
            NGram             n3 = a.Grammars[2] as NGram;

            NGram ngram = new NGram(2);

            ngram.AddData(new string[] { "a" }, "b");
            a.AddGrammar(ngram);
            Assert.AreEqual(0, u1.Grammar.Keys.Count);
            Assert.AreEqual(1, n2.Grammar.Keys.Count);
            Assert.AreEqual(0, n3.Grammar.Keys.Count);
            Assert.AreEqual(1f, n2.Grammar["a"].Grammar["b"]);

            ngram = new NGram(3);
            ngram.AddData(new string[] { "a", "b" }, "c");
            ngram.AddData(new string[] { "a", "b" }, "c");
            ngram.AddData(new string[] { "a", "b" }, "d");
            ngram.AddData(new string[] { "a", "c" }, "d");
            a.AddGrammar(ngram);
            Assert.AreEqual(0, u1.Grammar.Keys.Count);
            Assert.AreEqual(1, n2.Grammar.Keys.Count);
            Assert.AreEqual(2, n3.Grammar.Keys.Count);
            Assert.AreEqual(1f, n2.Grammar["a"].Grammar["b"]);
            Assert.AreEqual(2f, n3.Grammar["a,b"].Grammar["c"]);
            Assert.AreEqual(1f, n3.Grammar["a,b"].Grammar["d"]);
            Assert.AreEqual(1f, n3.Grammar["a,c"].Grammar["d"]);
        }
Exemple #3
0
        public void TestGetValues()
        {
            UniGram unigram = new UniGram();

            Assert.AreEqual(
                new Dictionary <string, float>(),
                unigram.Compile().GetValues(null));

            unigram.AddData(null, "a");
            Assert.AreEqual(
                new Dictionary <string, float>()
            {
                { "a", 1f }
            },
                unigram.Compile().GetValues(null));

            unigram.AddData(null, "a");
            unigram.AddData(null, "a");
            unigram.AddData(null, "b");
            Assert.AreEqual(
                new Dictionary <string, float>()
            {
                { "a", 0.75f }, { "b", 0.25f }
            },
                unigram.Compile().GetValues(null));
        }
Exemple #4
0
        public void TestPerplexity()
        {
            UniGram unigram = new UniGram();

            Assert.AreEqual(
                double.PositiveInfinity,
                unigram.Compile().Perplexity(new string[] { "a", "b" }));

            unigram.AddData(null, "a");
            Assert.AreEqual(
                1,
                unigram.Compile().Perplexity(new string[] { "a", "a" }));

            unigram.AddData(null, "b");
            Assert.AreEqual(
                1 / 0.5,
                unigram.Compile().Perplexity(new string[] { "a" }));

            Assert.AreEqual(
                1 / 0.25,
                unigram.Compile().Perplexity(new string[] { "b", "b" }));

            unigram.AddData(null, "b");
            unigram.AddData(null, "b");

            Assert.AreEqual(
                1 / 0.75,
                unigram.Compile().Perplexity(new string[] { "b" }));

            Assert.AreEqual(
                1 / (0.75 * 0.25 * 0.75),
                unigram.Compile().Perplexity(new string[] { "b", "a", "b" }));
        }
        public void TestAddData()
        {
            HierarchicalNGram a  = new HierarchicalNGram(3, 0.6f);
            UniGram           u1 = a.Grammars[0] as UniGram;
            NGram             n2 = a.Grammars[1] as NGram;
            NGram             n3 = a.Grammars[2] as NGram;

            a.AddData(new string[] { "a", "b" }, "c");
            Assert.AreEqual(3, u1.Grammar.Keys.Count);
            Assert.AreEqual(2, n2.Grammar.Keys.Count);
            Assert.AreEqual(1, n3.Grammar.Keys.Count);

            Assert.AreEqual(1f, u1.Grammar["a"]);
            Assert.AreEqual(1f, u1.Grammar["b"]);
            Assert.AreEqual(1f, u1.Grammar["c"]);

            Assert.AreEqual(1f, n2.Grammar["a"].Grammar["b"]);
            Assert.AreEqual(1f, n2.Grammar["b"].Grammar["c"]);

            Assert.AreEqual(1f, n3.Grammar["a,b"].Grammar["c"]);

            a.AddData(new string[] { "a", "b" }, "c");
            a.AddData(new string[] { "c", "b" }, "c");
            Assert.AreEqual(3, u1.Grammar.Keys.Count);
            Assert.AreEqual(3, n2.Grammar.Keys.Count);
            Assert.AreEqual(2, n3.Grammar.Keys.Count);

            Assert.AreEqual(2f, u1.Grammar["a"]);
            Assert.AreEqual(3f, u1.Grammar["b"]);
            Assert.AreEqual(4f, u1.Grammar["c"]);

            Assert.AreEqual(2f, n2.Grammar["a"].Grammar["b"]);
            Assert.AreEqual(3f, n2.Grammar["b"].Grammar["c"]);
            Assert.AreEqual(1f, n2.Grammar["c"].Grammar["b"]);

            Assert.AreEqual(2f, n3.Grammar["a,b"].Grammar["c"]);
            Assert.AreEqual(1f, n3.Grammar["c,b"].Grammar["c"]);

            // a, b, c, d
            // ab, bc, cb, bd
            // abc, cbc, abd
            a.AddData(new string[] { "a", "b" }, "d");
            Assert.AreEqual(4, u1.Grammar.Keys.Count);
            Assert.AreEqual(3, n2.Grammar.Keys.Count);
            Assert.AreEqual(2, n3.Grammar.Keys.Count);

            Assert.AreEqual(3f, u1.Grammar["a"]);
            Assert.AreEqual(4f, u1.Grammar["b"]);
            Assert.AreEqual(4f, u1.Grammar["c"]);
            Assert.AreEqual(1f, u1.Grammar["d"]);

            Assert.AreEqual(3f, n2.Grammar["a"].Grammar["b"]);
            Assert.AreEqual(3f, n2.Grammar["b"].Grammar["c"]);
            Assert.AreEqual(1f, n2.Grammar["b"].Grammar["d"]);
            Assert.AreEqual(1f, n2.Grammar["c"].Grammar["b"]);

            Assert.AreEqual(2f, n3.Grammar["a,b"].Grammar["c"]);
            Assert.AreEqual(1f, n3.Grammar["a,b"].Grammar["d"]);
            Assert.AreEqual(1f, n3.Grammar["c,b"].Grammar["c"]);
        }
Exemple #6
0
        public void ToString_OneWordLine_ReturnsSimpleWord()
        {
            string line             = "a\t33198366";
            string expectedToString = "a";
            var    ngram            = new UniGram(line);

            var result = ngram.ToString();

            Assert.AreEqual(result, expectedToString);
        }
Exemple #7
0
        public void Frequency_OneWordLine_ReturnsIntFrequency()
        {
            string line      = "a\t33198366";
            int    frequency = 33198366;
            var    uniGram   = new UniGram(line);

            var result = uniGram.Frequency;

            Assert.AreEqual(result, frequency);
        }
Exemple #8
0
        public void Words_OneWordLine_ReturnsArrayWithOneString()
        {
            string line = "a\t33198366";

            string[] words   = { "a" };
            var      uniGram = new UniGram(line);

            var result = uniGram.Words;

            Assert.AreEqual(result, words);
        }
Exemple #9
0
        public void TestUpdateMemory()
        {
            UniGram a = new UniGram();

            a.AddData(null, "a");
            a.UpdateMemory(0.9f);
            Assert.AreEqual(0.9f, a.Grammar["a"]);

            a.AddData(null, "b");
            a.UpdateMemory(0.9f);
            Assert.IsTrue(Mathf.Approximately(0.81f, a.Grammar["a"]));
            Assert.AreEqual(0.9f, a.Grammar["b"]);
        }
Exemple #10
0
        public void TestClone()
        {
            UniGram unigram = new UniGram();

            unigram.AddData(null, "a");
            unigram.AddData(null, "a");
            unigram.AddData(null, "b");

            CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar);
            ICompiledGram   clone           = compiledUnigram.Clone();

            Assert.AreEqual(compiledUnigram.GetN(), clone.GetN());
            Assert.AreEqual(compiledUnigram.GetValues(null), clone.GetValues(null));
        }
Exemple #11
0
        public void TestConstructor()
        {
            UniGram unigram = new UniGram();

            Assert.DoesNotThrow(() => { unigram.Compile(); });
            unigram.AddData(null, "a");
            Assert.DoesNotThrow(() => { unigram.Compile(); });
            unigram.AddData(null, "b");
            Assert.DoesNotThrow(() => { unigram.Compile(); });
            unigram.AddData(null, "b");
            unigram.AddData(null, "b");
            unigram.AddData(null, "b");
            unigram.AddData(null, "a");
            Assert.DoesNotThrow(() => { unigram.Compile(); });
        }
Exemple #12
0
        public void TestHasNextStep()
        {
            UniGram unigram = new UniGram();

            Assert.IsFalse(unigram.Compile().HasNextStep(null));

            unigram.AddData(null, "a");
            Assert.IsTrue(unigram.Compile().HasNextStep(null));

            unigram.AddData(null, "a");
            Assert.IsTrue(unigram.Compile().HasNextStep(null));

            unigram.AddData(null, "b");
            Assert.IsTrue(unigram.Compile().HasNextStep(null));
        }
Exemple #13
0
        public void TestAddUnigram()
        {
            HierarchicalNGram a  = new HierarchicalNGram(2, 0.6f);
            UniGram           u1 = a.Grammars[0] as UniGram;
            NGram             n2 = a.Grammars[1] as NGram;

            UniGram unigram = new UniGram();

            unigram.AddData(null, "a");
            a.AddGrammar(unigram);

            Assert.AreEqual(1, u1.Grammar.Keys.Count);
            Assert.AreEqual(0, n2.Grammar.Keys.Count);
            Assert.AreEqual(1, u1.Grammar["a"]);
        }
Exemple #14
0
        public void TestAddDataString()
        {
            UniGram a = new UniGram();

            a.AddData(null, "a");
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(1f, a.Grammar["a"]);

            a.AddData(new string[] { "asdf" }, "a");
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(2f, a.Grammar["a"]);

            a.AddData(new string[] { "31234" }, "b");
            Assert.AreEqual(2, a.Grammar.Keys.Count);
            Assert.AreEqual(1f, a.Grammar["b"]);
            Assert.AreEqual(2f, a.Grammar["a"]);
        }
Exemple #15
0
        public void TestSequenceProbability()
        {
            HierarchicalNGram gram = new HierarchicalNGram(3, 0.9f);

            Assert.AreEqual(
                0,
                gram.Compile().SequenceProbability(new string[] { "a", "b", "c" }));

            double denominator = 0.9 + 0.81 + 0.729;
            double triWeight   = 0.9 / denominator;
            double biWeight    = 0.81 / denominator;
            double uniweight   = 0.729 / denominator;

            UniGram gram1 = new UniGram();

            gram1.AddData(null, "a");
            gram1.AddData(null, "a");
            gram1.AddData(null, "b");

            NGram gram2 = new NGram(2);

            gram2.AddData(new string[] { "a" }, "a");
            gram2.AddData(new string[] { "a" }, "b");

            NGram gram3 = new NGram(3);

            gram3.AddData(new string[] { "a", "a" }, "b");

            gram.AddData(new string[] { "a", "a" }, "b");

            ICompiledGram c1 = gram1.Compile();
            ICompiledGram c2 = gram2.Compile();
            ICompiledGram c3 = gram3.Compile();

            string[] input    = new string[] { "a", "a", "b" };
            double   expected =
                uniweight * c1.SequenceProbability(input) +
                biWeight * c2.SequenceProbability(input) +
                triWeight * c3.SequenceProbability(input);
            double actual = gram.Compile().SequenceProbability(new string[] { "a", "a", "b" });

            Assert.IsTrue(
                Mathf.Approximately((float)expected, (float)actual),
                $"Expected {expected} but received {actual}.");
        }
Exemple #16
0
        public void TestAddDataWeight()
        {
            UniGram a = new UniGram();

            a.AddData("a", 2.2f);
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(2.2f, a.Grammar["a"]);


            a.AddData("a", 0.8f);
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(3f, a.Grammar["a"]);

            a.AddData("b", 2.823f);
            Assert.AreEqual(3f, a.Grammar["a"]);
            Assert.AreEqual(2.823f, a.Grammar["b"]);
            Assert.AreEqual(2, a.Grammar.Keys.Count);
        }
Exemple #17
0
        public void TestGet()
        {
            UniGram unigram = new UniGram();

            unigram.AddData(null, "a");
            unigram.AddData(null, "a");
            unigram.AddData(null, "b");
            unigram.AddData(null, "c");

            CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar);

            bool seenA = false;
            bool seenB = false;
            bool seenC = false;

            for (int i = 0; i < 1000; ++i)
            {
                string val = compiledUnigram.Get(null);
                switch (val)
                {
                case "a":
                    seenA = true;
                    break;

                case "b":
                    seenB = true;
                    break;

                case "c":
                    seenC = true;
                    break;

                default:
                    Assert.Fail($"{val} should not be possible.");
                    break;
                }
            }

            // in theory, we could potentially not see one of these but it is very unlikely.
            Assert.IsTrue(seenA);
            Assert.IsTrue(seenB);
            Assert.IsTrue(seenC);
        }
Exemple #18
0
        public void TestAddGrammar()
        {
            UniGram a = new UniGram();
            UniGram b = new UniGram();

            a.AddGrammar(b);
            Assert.AreEqual(0, a.Grammar.Keys.Count);

            b.AddData(null, "a");
            a.AddGrammar(b);
            Assert.AreEqual(1, a.Grammar.Keys.Count);
            Assert.AreEqual(1f, a.Grammar["a"]);

            b.AddData(null, "b");
            b.AddData(null, "a");
            a.AddGrammar(b);
            Assert.AreEqual(2, a.Grammar.Keys.Count);
            Assert.AreEqual(3f, a.Grammar["a"]);
            Assert.AreEqual(1f, a.Grammar["b"]);
        }
Exemple #19
0
        public void TestGetGuesses()
        {
            UniGram unigram = new UniGram();

            Assert.AreEqual(0, unigram.Compile().GetGuesses(null).Length);

            unigram.AddData(null, "a");
            Assert.AreEqual(1, unigram.Compile().GetGuesses(null).Length);

            unigram.AddData(null, "a");
            Assert.AreEqual(1, unigram.Compile().GetGuesses(null).Length);

            unigram.AddData(null, "b");
            Assert.AreEqual(2, unigram.Compile().GetGuesses(null).Length);

            unigram.AddData(null, "c");
            string[] guesses = unigram.Compile().GetGuesses(null);
            Assert.AreEqual(3, guesses.Length);
            Assert.IsTrue(guesses.Contains("a"));
            Assert.IsTrue(guesses.Contains("b"));
            Assert.IsTrue(guesses.Contains("c"));
        }
Exemple #20
0
        public void TestConstruction()
        {
            Assert.Throws <UnityEngine.Assertions.AssertionException>(() =>
            {
                IGram test = new HierarchicalNGram(0, 0.6f);
            });

            Assert.Throws <UnityEngine.Assertions.AssertionException>(() =>
            {
                IGram test = new HierarchicalNGram(1, 0.6f);
            });

            Assert.Throws <UnityEngine.Assertions.AssertionException>(() =>
            {
                new HierarchicalNGram(1, 0f);
            });

            Assert.Throws <UnityEngine.Assertions.AssertionException>(() =>
            {
                new HierarchicalNGram(1, 1f);
            });

            for (int i = 2; i < 15; ++i)
            {
                Assert.DoesNotThrow(() => { IGram test = new HierarchicalNGram(i, 0.6f); });
            }

            HierarchicalNGram a = new HierarchicalNGram(3, 0.6f);

            Assert.AreEqual(3, a.Grammars.Length);

            UniGram u1 = a.Grammars[0] as UniGram;
            NGram   n2 = a.Grammars[1] as NGram;
            NGram   n3 = a.Grammars[2] as NGram;

            Assert.NotNull(u1);
            Assert.NotNull(n2);
            Assert.NotNull(n3);
        }
Exemple #21
0
        public void TestAddHierarchicalNGram()
        {
            HierarchicalNGram a = new HierarchicalNGram(3, 0.6f);

            a.AddData(new string[] { "a", "b" }, "c");
            UniGram u1 = a.Grammars[0] as UniGram;
            NGram   n2 = a.Grammars[1] as NGram;
            NGram   n3 = a.Grammars[2] as NGram;

            HierarchicalNGram b = new HierarchicalNGram(3, 0.6f);

            b.AddData(new string[] { "a", "b" }, "c");
            b.AddData(new string[] { "a", "b" }, "c");
            b.AddData(new string[] { "c", "b" }, "c");
            b.AddData(new string[] { "b", "b" }, "d");

            a.AddGrammar(b);

            // a, b, c, d
            Assert.AreEqual(4, u1.Grammar.Keys.Count);
            Assert.AreEqual(3f, u1.Grammar["a"]);
            Assert.AreEqual(6f, u1.Grammar["b"]);
            Assert.AreEqual(5f, u1.Grammar["c"]);
            Assert.AreEqual(1f, u1.Grammar["d"]);

            // ab, bc, cb, bc, bb, bd
            Assert.AreEqual(3, n2.Grammar.Count);
            Assert.AreEqual(3f, n2.Grammar["a"].Grammar["b"]);
            Assert.AreEqual(4f, n2.Grammar["b"].Grammar["c"]);
            Assert.AreEqual(1f, n2.Grammar["c"].Grammar["b"]);
            Assert.AreEqual(1f, n2.Grammar["b"].Grammar["b"]);
            Assert.AreEqual(1f, n2.Grammar["b"].Grammar["d"]);

            // abc, cbd, bbd
            Assert.AreEqual(3f, n3.Grammar.Count);
            Assert.AreEqual(3f, n3.Grammar["a,b"].Grammar["c"]);
            Assert.AreEqual(1f, n3.Grammar["c,b"].Grammar["c"]);
            Assert.AreEqual(1f, n3.Grammar["b,b"].Grammar["d"]);
        }
Exemple #22
0
        public void TestUpdateMemory()
        {
            HierarchicalNGram a  = new HierarchicalNGram(3, 0.6f);
            UniGram           u1 = a.Grammars[0] as UniGram;
            NGram             n2 = a.Grammars[1] as NGram;
            NGram             n3 = a.Grammars[2] as NGram;

            a.AddData(new string[] { "a", "b" }, "c");
            a.UpdateMemory(0.9f);
            Assert.AreEqual(3, u1.Grammar.Keys.Count);
            Assert.AreEqual(2, n2.Grammar.Keys.Count);
            Assert.AreEqual(1, n3.Grammar.Keys.Count);

            Assert.AreEqual(0.9f, u1.Grammar["a"]);
            Assert.AreEqual(0.9f, u1.Grammar["b"]);
            Assert.AreEqual(0.9f, u1.Grammar["c"]);

            Assert.AreEqual(0.9f, n2.Grammar["a"].Grammar["b"]);
            Assert.AreEqual(0.9f, n2.Grammar["b"].Grammar["c"]);

            Assert.AreEqual(0.9f, n3.Grammar["a,b"].Grammar["c"]);

            a.UpdateMemory(0.9f);
            Assert.AreEqual(3, u1.Grammar.Keys.Count);
            Assert.AreEqual(2, n2.Grammar.Keys.Count);
            Assert.AreEqual(1, n3.Grammar.Keys.Count);

            Assert.IsTrue(Mathf.Approximately(0.81f, u1.Grammar["a"]));
            Assert.IsTrue(Mathf.Approximately(0.81f, u1.Grammar["b"]));
            Assert.IsTrue(Mathf.Approximately(0.81f, u1.Grammar["c"]));

            Assert.IsTrue(Mathf.Approximately(0.81f, n2.Grammar["a"].Grammar["b"]));
            Assert.IsTrue(Mathf.Approximately(0.81f, n2.Grammar["b"].Grammar["c"]));

            Assert.IsTrue(Mathf.Approximately(0.81f, n3.Grammar["a,b"].Grammar["c"]));
        }
Exemple #23
0
 public void TestConstruction()
 {
     Assert.DoesNotThrow(() => { UniGram a = new UniGram(); });
 }