public void TestSequenceProbability() { UniGram unigram = new UniGram(); Assert.AreEqual( 0, unigram.Compile().SequenceProbability(new string[] { "a", "b" })); unigram.AddData(null, "a"); Assert.AreEqual( 1, unigram.Compile().SequenceProbability(new string[] { "a", "a" })); unigram.AddData(null, "b"); Assert.AreEqual( 0.5, unigram.Compile().SequenceProbability(new string[] { "a" })); Assert.AreEqual( 0.25, unigram.Compile().SequenceProbability(new string[] { "b", "b" })); unigram.AddData(null, "b"); unigram.AddData(null, "b"); Assert.AreEqual( 0.75, unigram.Compile().SequenceProbability(new string[] { "b" })); Assert.AreEqual( 0.75 * 0.25 * 0.75, unigram.Compile().SequenceProbability(new string[] { "b", "a", "b" })); }
public void TestPerplexity() { UniGram unigram = new UniGram(); Assert.AreEqual( double.PositiveInfinity, unigram.Compile().Perplexity(new string[] { "a", "b" })); unigram.AddData(null, "a"); Assert.AreEqual( 1, unigram.Compile().Perplexity(new string[] { "a", "a" })); unigram.AddData(null, "b"); Assert.AreEqual( 1 / 0.5, unigram.Compile().Perplexity(new string[] { "a" })); Assert.AreEqual( 1 / 0.25, unigram.Compile().Perplexity(new string[] { "b", "b" })); unigram.AddData(null, "b"); unigram.AddData(null, "b"); Assert.AreEqual( 1 / 0.75, unigram.Compile().Perplexity(new string[] { "b" })); Assert.AreEqual( 1 / (0.75 * 0.25 * 0.75), unigram.Compile().Perplexity(new string[] { "b", "a", "b" })); }
public void TestGetValues() { UniGram unigram = new UniGram(); Assert.AreEqual( new Dictionary <string, float>(), unigram.Compile().GetValues(null)); unigram.AddData(null, "a"); Assert.AreEqual( new Dictionary <string, float>() { { "a", 1f } }, unigram.Compile().GetValues(null)); unigram.AddData(null, "a"); unigram.AddData(null, "a"); unigram.AddData(null, "b"); Assert.AreEqual( new Dictionary <string, float>() { { "a", 0.75f }, { "b", 0.25f } }, unigram.Compile().GetValues(null)); }
public void TestUpdateMemory() { UniGram a = new UniGram(); a.AddData(null, "a"); a.UpdateMemory(0.9f); Assert.AreEqual(0.9f, a.Grammar["a"]); a.AddData(null, "b"); a.UpdateMemory(0.9f); Assert.IsTrue(Mathf.Approximately(0.81f, a.Grammar["a"])); Assert.AreEqual(0.9f, a.Grammar["b"]); }
public void TestClone() { UniGram unigram = new UniGram(); unigram.AddData(null, "a"); unigram.AddData(null, "a"); unigram.AddData(null, "b"); CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar); ICompiledGram clone = compiledUnigram.Clone(); Assert.AreEqual(compiledUnigram.GetN(), clone.GetN()); Assert.AreEqual(compiledUnigram.GetValues(null), clone.GetValues(null)); }
public void TestHasNextStep() { UniGram unigram = new UniGram(); Assert.IsFalse(unigram.Compile().HasNextStep(null)); unigram.AddData(null, "a"); Assert.IsTrue(unigram.Compile().HasNextStep(null)); unigram.AddData(null, "a"); Assert.IsTrue(unigram.Compile().HasNextStep(null)); unigram.AddData(null, "b"); Assert.IsTrue(unigram.Compile().HasNextStep(null)); }
public void TestAddDataString() { UniGram a = new UniGram(); a.AddData(null, "a"); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(1f, a.Grammar["a"]); a.AddData(new string[] { "asdf" }, "a"); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(2f, a.Grammar["a"]); a.AddData(new string[] { "31234" }, "b"); Assert.AreEqual(2, a.Grammar.Keys.Count); Assert.AreEqual(1f, a.Grammar["b"]); Assert.AreEqual(2f, a.Grammar["a"]); }
public void TestSequenceProbability() { HierarchicalNGram gram = new HierarchicalNGram(3, 0.9f); Assert.AreEqual( 0, gram.Compile().SequenceProbability(new string[] { "a", "b", "c" })); double denominator = 0.9 + 0.81 + 0.729; double triWeight = 0.9 / denominator; double biWeight = 0.81 / denominator; double uniweight = 0.729 / denominator; UniGram gram1 = new UniGram(); gram1.AddData(null, "a"); gram1.AddData(null, "a"); gram1.AddData(null, "b"); NGram gram2 = new NGram(2); gram2.AddData(new string[] { "a" }, "a"); gram2.AddData(new string[] { "a" }, "b"); NGram gram3 = new NGram(3); gram3.AddData(new string[] { "a", "a" }, "b"); gram.AddData(new string[] { "a", "a" }, "b"); ICompiledGram c1 = gram1.Compile(); ICompiledGram c2 = gram2.Compile(); ICompiledGram c3 = gram3.Compile(); string[] input = new string[] { "a", "a", "b" }; double expected = uniweight * c1.SequenceProbability(input) + biWeight * c2.SequenceProbability(input) + triWeight * c3.SequenceProbability(input); double actual = gram.Compile().SequenceProbability(new string[] { "a", "a", "b" }); Assert.IsTrue( Mathf.Approximately((float)expected, (float)actual), $"Expected {expected} but received {actual}."); }
public void TestAddDataWeight() { UniGram a = new UniGram(); a.AddData("a", 2.2f); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(2.2f, a.Grammar["a"]); a.AddData("a", 0.8f); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(3f, a.Grammar["a"]); a.AddData("b", 2.823f); Assert.AreEqual(3f, a.Grammar["a"]); Assert.AreEqual(2.823f, a.Grammar["b"]); Assert.AreEqual(2, a.Grammar.Keys.Count); }
public void TestGet() { UniGram unigram = new UniGram(); unigram.AddData(null, "a"); unigram.AddData(null, "a"); unigram.AddData(null, "b"); unigram.AddData(null, "c"); CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar); bool seenA = false; bool seenB = false; bool seenC = false; for (int i = 0; i < 1000; ++i) { string val = compiledUnigram.Get(null); switch (val) { case "a": seenA = true; break; case "b": seenB = true; break; case "c": seenC = true; break; default: Assert.Fail($"{val} should not be possible."); break; } } // in theory, we could potentially not see one of these but it is very unlikely. Assert.IsTrue(seenA); Assert.IsTrue(seenB); Assert.IsTrue(seenC); }
public void TestAddGrammar() { UniGram a = new UniGram(); UniGram b = new UniGram(); a.AddGrammar(b); Assert.AreEqual(0, a.Grammar.Keys.Count); b.AddData(null, "a"); a.AddGrammar(b); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(1f, a.Grammar["a"]); b.AddData(null, "b"); b.AddData(null, "a"); a.AddGrammar(b); Assert.AreEqual(2, a.Grammar.Keys.Count); Assert.AreEqual(3f, a.Grammar["a"]); Assert.AreEqual(1f, a.Grammar["b"]); }
public void TestGetGuesses() { UniGram unigram = new UniGram(); Assert.AreEqual(0, unigram.Compile().GetGuesses(null).Length); unigram.AddData(null, "a"); Assert.AreEqual(1, unigram.Compile().GetGuesses(null).Length); unigram.AddData(null, "a"); Assert.AreEqual(1, unigram.Compile().GetGuesses(null).Length); unigram.AddData(null, "b"); Assert.AreEqual(2, unigram.Compile().GetGuesses(null).Length); unigram.AddData(null, "c"); string[] guesses = unigram.Compile().GetGuesses(null); Assert.AreEqual(3, guesses.Length); Assert.IsTrue(guesses.Contains("a")); Assert.IsTrue(guesses.Contains("b")); Assert.IsTrue(guesses.Contains("c")); }
public void TestAddUnigram() { HierarchicalNGram a = new HierarchicalNGram(2, 0.6f); UniGram u1 = a.Grammars[0] as UniGram; NGram n2 = a.Grammars[1] as NGram; UniGram unigram = new UniGram(); unigram.AddData(null, "a"); a.AddGrammar(unigram); Assert.AreEqual(1, u1.Grammar.Keys.Count); Assert.AreEqual(0, n2.Grammar.Keys.Count); Assert.AreEqual(1, u1.Grammar["a"]); }
public void TestConstructor() { UniGram unigram = new UniGram(); Assert.DoesNotThrow(() => { unigram.Compile(); }); unigram.AddData(null, "a"); Assert.DoesNotThrow(() => { unigram.Compile(); }); unigram.AddData(null, "b"); Assert.DoesNotThrow(() => { unigram.Compile(); }); unigram.AddData(null, "b"); unigram.AddData(null, "b"); unigram.AddData(null, "b"); unigram.AddData(null, "a"); Assert.DoesNotThrow(() => { unigram.Compile(); }); }