public void TestSequenceProbability() { UniGram unigram = new UniGram(); Assert.AreEqual( 0, unigram.Compile().SequenceProbability(new string[] { "a", "b" })); unigram.AddData(null, "a"); Assert.AreEqual( 1, unigram.Compile().SequenceProbability(new string[] { "a", "a" })); unigram.AddData(null, "b"); Assert.AreEqual( 0.5, unigram.Compile().SequenceProbability(new string[] { "a" })); Assert.AreEqual( 0.25, unigram.Compile().SequenceProbability(new string[] { "b", "b" })); unigram.AddData(null, "b"); unigram.AddData(null, "b"); Assert.AreEqual( 0.75, unigram.Compile().SequenceProbability(new string[] { "b" })); Assert.AreEqual( 0.75 * 0.25 * 0.75, unigram.Compile().SequenceProbability(new string[] { "b", "a", "b" })); }
public void TestAddNGram() { HierarchicalNGram a = new HierarchicalNGram(3, 0.6f); UniGram u1 = a.Grammars[0] as UniGram; NGram n2 = a.Grammars[1] as NGram; NGram n3 = a.Grammars[2] as NGram; NGram ngram = new NGram(2); ngram.AddData(new string[] { "a" }, "b"); a.AddGrammar(ngram); Assert.AreEqual(0, u1.Grammar.Keys.Count); Assert.AreEqual(1, n2.Grammar.Keys.Count); Assert.AreEqual(0, n3.Grammar.Keys.Count); Assert.AreEqual(1f, n2.Grammar["a"].Grammar["b"]); ngram = new NGram(3); ngram.AddData(new string[] { "a", "b" }, "c"); ngram.AddData(new string[] { "a", "b" }, "c"); ngram.AddData(new string[] { "a", "b" }, "d"); ngram.AddData(new string[] { "a", "c" }, "d"); a.AddGrammar(ngram); Assert.AreEqual(0, u1.Grammar.Keys.Count); Assert.AreEqual(1, n2.Grammar.Keys.Count); Assert.AreEqual(2, n3.Grammar.Keys.Count); Assert.AreEqual(1f, n2.Grammar["a"].Grammar["b"]); Assert.AreEqual(2f, n3.Grammar["a,b"].Grammar["c"]); Assert.AreEqual(1f, n3.Grammar["a,b"].Grammar["d"]); Assert.AreEqual(1f, n3.Grammar["a,c"].Grammar["d"]); }
public void TestGetValues() { UniGram unigram = new UniGram(); Assert.AreEqual( new Dictionary <string, float>(), unigram.Compile().GetValues(null)); unigram.AddData(null, "a"); Assert.AreEqual( new Dictionary <string, float>() { { "a", 1f } }, unigram.Compile().GetValues(null)); unigram.AddData(null, "a"); unigram.AddData(null, "a"); unigram.AddData(null, "b"); Assert.AreEqual( new Dictionary <string, float>() { { "a", 0.75f }, { "b", 0.25f } }, unigram.Compile().GetValues(null)); }
public void TestPerplexity() { UniGram unigram = new UniGram(); Assert.AreEqual( double.PositiveInfinity, unigram.Compile().Perplexity(new string[] { "a", "b" })); unigram.AddData(null, "a"); Assert.AreEqual( 1, unigram.Compile().Perplexity(new string[] { "a", "a" })); unigram.AddData(null, "b"); Assert.AreEqual( 1 / 0.5, unigram.Compile().Perplexity(new string[] { "a" })); Assert.AreEqual( 1 / 0.25, unigram.Compile().Perplexity(new string[] { "b", "b" })); unigram.AddData(null, "b"); unigram.AddData(null, "b"); Assert.AreEqual( 1 / 0.75, unigram.Compile().Perplexity(new string[] { "b" })); Assert.AreEqual( 1 / (0.75 * 0.25 * 0.75), unigram.Compile().Perplexity(new string[] { "b", "a", "b" })); }
public void TestAddData() { HierarchicalNGram a = new HierarchicalNGram(3, 0.6f); UniGram u1 = a.Grammars[0] as UniGram; NGram n2 = a.Grammars[1] as NGram; NGram n3 = a.Grammars[2] as NGram; a.AddData(new string[] { "a", "b" }, "c"); Assert.AreEqual(3, u1.Grammar.Keys.Count); Assert.AreEqual(2, n2.Grammar.Keys.Count); Assert.AreEqual(1, n3.Grammar.Keys.Count); Assert.AreEqual(1f, u1.Grammar["a"]); Assert.AreEqual(1f, u1.Grammar["b"]); Assert.AreEqual(1f, u1.Grammar["c"]); Assert.AreEqual(1f, n2.Grammar["a"].Grammar["b"]); Assert.AreEqual(1f, n2.Grammar["b"].Grammar["c"]); Assert.AreEqual(1f, n3.Grammar["a,b"].Grammar["c"]); a.AddData(new string[] { "a", "b" }, "c"); a.AddData(new string[] { "c", "b" }, "c"); Assert.AreEqual(3, u1.Grammar.Keys.Count); Assert.AreEqual(3, n2.Grammar.Keys.Count); Assert.AreEqual(2, n3.Grammar.Keys.Count); Assert.AreEqual(2f, u1.Grammar["a"]); Assert.AreEqual(3f, u1.Grammar["b"]); Assert.AreEqual(4f, u1.Grammar["c"]); Assert.AreEqual(2f, n2.Grammar["a"].Grammar["b"]); Assert.AreEqual(3f, n2.Grammar["b"].Grammar["c"]); Assert.AreEqual(1f, n2.Grammar["c"].Grammar["b"]); Assert.AreEqual(2f, n3.Grammar["a,b"].Grammar["c"]); Assert.AreEqual(1f, n3.Grammar["c,b"].Grammar["c"]); // a, b, c, d // ab, bc, cb, bd // abc, cbc, abd a.AddData(new string[] { "a", "b" }, "d"); Assert.AreEqual(4, u1.Grammar.Keys.Count); Assert.AreEqual(3, n2.Grammar.Keys.Count); Assert.AreEqual(2, n3.Grammar.Keys.Count); Assert.AreEqual(3f, u1.Grammar["a"]); Assert.AreEqual(4f, u1.Grammar["b"]); Assert.AreEqual(4f, u1.Grammar["c"]); Assert.AreEqual(1f, u1.Grammar["d"]); Assert.AreEqual(3f, n2.Grammar["a"].Grammar["b"]); Assert.AreEqual(3f, n2.Grammar["b"].Grammar["c"]); Assert.AreEqual(1f, n2.Grammar["b"].Grammar["d"]); Assert.AreEqual(1f, n2.Grammar["c"].Grammar["b"]); Assert.AreEqual(2f, n3.Grammar["a,b"].Grammar["c"]); Assert.AreEqual(1f, n3.Grammar["a,b"].Grammar["d"]); Assert.AreEqual(1f, n3.Grammar["c,b"].Grammar["c"]); }
public void ToString_OneWordLine_ReturnsSimpleWord() { string line = "a\t33198366"; string expectedToString = "a"; var ngram = new UniGram(line); var result = ngram.ToString(); Assert.AreEqual(result, expectedToString); }
public void Frequency_OneWordLine_ReturnsIntFrequency() { string line = "a\t33198366"; int frequency = 33198366; var uniGram = new UniGram(line); var result = uniGram.Frequency; Assert.AreEqual(result, frequency); }
public void Words_OneWordLine_ReturnsArrayWithOneString() { string line = "a\t33198366"; string[] words = { "a" }; var uniGram = new UniGram(line); var result = uniGram.Words; Assert.AreEqual(result, words); }
public void TestUpdateMemory() { UniGram a = new UniGram(); a.AddData(null, "a"); a.UpdateMemory(0.9f); Assert.AreEqual(0.9f, a.Grammar["a"]); a.AddData(null, "b"); a.UpdateMemory(0.9f); Assert.IsTrue(Mathf.Approximately(0.81f, a.Grammar["a"])); Assert.AreEqual(0.9f, a.Grammar["b"]); }
public void TestClone() { UniGram unigram = new UniGram(); unigram.AddData(null, "a"); unigram.AddData(null, "a"); unigram.AddData(null, "b"); CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar); ICompiledGram clone = compiledUnigram.Clone(); Assert.AreEqual(compiledUnigram.GetN(), clone.GetN()); Assert.AreEqual(compiledUnigram.GetValues(null), clone.GetValues(null)); }
public void TestConstructor() { UniGram unigram = new UniGram(); Assert.DoesNotThrow(() => { unigram.Compile(); }); unigram.AddData(null, "a"); Assert.DoesNotThrow(() => { unigram.Compile(); }); unigram.AddData(null, "b"); Assert.DoesNotThrow(() => { unigram.Compile(); }); unigram.AddData(null, "b"); unigram.AddData(null, "b"); unigram.AddData(null, "b"); unigram.AddData(null, "a"); Assert.DoesNotThrow(() => { unigram.Compile(); }); }
public void TestHasNextStep() { UniGram unigram = new UniGram(); Assert.IsFalse(unigram.Compile().HasNextStep(null)); unigram.AddData(null, "a"); Assert.IsTrue(unigram.Compile().HasNextStep(null)); unigram.AddData(null, "a"); Assert.IsTrue(unigram.Compile().HasNextStep(null)); unigram.AddData(null, "b"); Assert.IsTrue(unigram.Compile().HasNextStep(null)); }
public void TestAddUnigram() { HierarchicalNGram a = new HierarchicalNGram(2, 0.6f); UniGram u1 = a.Grammars[0] as UniGram; NGram n2 = a.Grammars[1] as NGram; UniGram unigram = new UniGram(); unigram.AddData(null, "a"); a.AddGrammar(unigram); Assert.AreEqual(1, u1.Grammar.Keys.Count); Assert.AreEqual(0, n2.Grammar.Keys.Count); Assert.AreEqual(1, u1.Grammar["a"]); }
public void TestAddDataString() { UniGram a = new UniGram(); a.AddData(null, "a"); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(1f, a.Grammar["a"]); a.AddData(new string[] { "asdf" }, "a"); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(2f, a.Grammar["a"]); a.AddData(new string[] { "31234" }, "b"); Assert.AreEqual(2, a.Grammar.Keys.Count); Assert.AreEqual(1f, a.Grammar["b"]); Assert.AreEqual(2f, a.Grammar["a"]); }
public void TestSequenceProbability() { HierarchicalNGram gram = new HierarchicalNGram(3, 0.9f); Assert.AreEqual( 0, gram.Compile().SequenceProbability(new string[] { "a", "b", "c" })); double denominator = 0.9 + 0.81 + 0.729; double triWeight = 0.9 / denominator; double biWeight = 0.81 / denominator; double uniweight = 0.729 / denominator; UniGram gram1 = new UniGram(); gram1.AddData(null, "a"); gram1.AddData(null, "a"); gram1.AddData(null, "b"); NGram gram2 = new NGram(2); gram2.AddData(new string[] { "a" }, "a"); gram2.AddData(new string[] { "a" }, "b"); NGram gram3 = new NGram(3); gram3.AddData(new string[] { "a", "a" }, "b"); gram.AddData(new string[] { "a", "a" }, "b"); ICompiledGram c1 = gram1.Compile(); ICompiledGram c2 = gram2.Compile(); ICompiledGram c3 = gram3.Compile(); string[] input = new string[] { "a", "a", "b" }; double expected = uniweight * c1.SequenceProbability(input) + biWeight * c2.SequenceProbability(input) + triWeight * c3.SequenceProbability(input); double actual = gram.Compile().SequenceProbability(new string[] { "a", "a", "b" }); Assert.IsTrue( Mathf.Approximately((float)expected, (float)actual), $"Expected {expected} but received {actual}."); }
public void TestAddDataWeight() { UniGram a = new UniGram(); a.AddData("a", 2.2f); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(2.2f, a.Grammar["a"]); a.AddData("a", 0.8f); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(3f, a.Grammar["a"]); a.AddData("b", 2.823f); Assert.AreEqual(3f, a.Grammar["a"]); Assert.AreEqual(2.823f, a.Grammar["b"]); Assert.AreEqual(2, a.Grammar.Keys.Count); }
public void TestGet() { UniGram unigram = new UniGram(); unigram.AddData(null, "a"); unigram.AddData(null, "a"); unigram.AddData(null, "b"); unigram.AddData(null, "c"); CompiledUniGram compiledUnigram = new CompiledUniGram(unigram.Grammar); bool seenA = false; bool seenB = false; bool seenC = false; for (int i = 0; i < 1000; ++i) { string val = compiledUnigram.Get(null); switch (val) { case "a": seenA = true; break; case "b": seenB = true; break; case "c": seenC = true; break; default: Assert.Fail($"{val} should not be possible."); break; } } // in theory, we could potentially not see one of these but it is very unlikely. Assert.IsTrue(seenA); Assert.IsTrue(seenB); Assert.IsTrue(seenC); }
public void TestAddGrammar() { UniGram a = new UniGram(); UniGram b = new UniGram(); a.AddGrammar(b); Assert.AreEqual(0, a.Grammar.Keys.Count); b.AddData(null, "a"); a.AddGrammar(b); Assert.AreEqual(1, a.Grammar.Keys.Count); Assert.AreEqual(1f, a.Grammar["a"]); b.AddData(null, "b"); b.AddData(null, "a"); a.AddGrammar(b); Assert.AreEqual(2, a.Grammar.Keys.Count); Assert.AreEqual(3f, a.Grammar["a"]); Assert.AreEqual(1f, a.Grammar["b"]); }
public void TestGetGuesses() { UniGram unigram = new UniGram(); Assert.AreEqual(0, unigram.Compile().GetGuesses(null).Length); unigram.AddData(null, "a"); Assert.AreEqual(1, unigram.Compile().GetGuesses(null).Length); unigram.AddData(null, "a"); Assert.AreEqual(1, unigram.Compile().GetGuesses(null).Length); unigram.AddData(null, "b"); Assert.AreEqual(2, unigram.Compile().GetGuesses(null).Length); unigram.AddData(null, "c"); string[] guesses = unigram.Compile().GetGuesses(null); Assert.AreEqual(3, guesses.Length); Assert.IsTrue(guesses.Contains("a")); Assert.IsTrue(guesses.Contains("b")); Assert.IsTrue(guesses.Contains("c")); }
public void TestConstruction() { Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { IGram test = new HierarchicalNGram(0, 0.6f); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { IGram test = new HierarchicalNGram(1, 0.6f); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { new HierarchicalNGram(1, 0f); }); Assert.Throws <UnityEngine.Assertions.AssertionException>(() => { new HierarchicalNGram(1, 1f); }); for (int i = 2; i < 15; ++i) { Assert.DoesNotThrow(() => { IGram test = new HierarchicalNGram(i, 0.6f); }); } HierarchicalNGram a = new HierarchicalNGram(3, 0.6f); Assert.AreEqual(3, a.Grammars.Length); UniGram u1 = a.Grammars[0] as UniGram; NGram n2 = a.Grammars[1] as NGram; NGram n3 = a.Grammars[2] as NGram; Assert.NotNull(u1); Assert.NotNull(n2); Assert.NotNull(n3); }
public void TestAddHierarchicalNGram() { HierarchicalNGram a = new HierarchicalNGram(3, 0.6f); a.AddData(new string[] { "a", "b" }, "c"); UniGram u1 = a.Grammars[0] as UniGram; NGram n2 = a.Grammars[1] as NGram; NGram n3 = a.Grammars[2] as NGram; HierarchicalNGram b = new HierarchicalNGram(3, 0.6f); b.AddData(new string[] { "a", "b" }, "c"); b.AddData(new string[] { "a", "b" }, "c"); b.AddData(new string[] { "c", "b" }, "c"); b.AddData(new string[] { "b", "b" }, "d"); a.AddGrammar(b); // a, b, c, d Assert.AreEqual(4, u1.Grammar.Keys.Count); Assert.AreEqual(3f, u1.Grammar["a"]); Assert.AreEqual(6f, u1.Grammar["b"]); Assert.AreEqual(5f, u1.Grammar["c"]); Assert.AreEqual(1f, u1.Grammar["d"]); // ab, bc, cb, bc, bb, bd Assert.AreEqual(3, n2.Grammar.Count); Assert.AreEqual(3f, n2.Grammar["a"].Grammar["b"]); Assert.AreEqual(4f, n2.Grammar["b"].Grammar["c"]); Assert.AreEqual(1f, n2.Grammar["c"].Grammar["b"]); Assert.AreEqual(1f, n2.Grammar["b"].Grammar["b"]); Assert.AreEqual(1f, n2.Grammar["b"].Grammar["d"]); // abc, cbd, bbd Assert.AreEqual(3f, n3.Grammar.Count); Assert.AreEqual(3f, n3.Grammar["a,b"].Grammar["c"]); Assert.AreEqual(1f, n3.Grammar["c,b"].Grammar["c"]); Assert.AreEqual(1f, n3.Grammar["b,b"].Grammar["d"]); }
public void TestUpdateMemory() { HierarchicalNGram a = new HierarchicalNGram(3, 0.6f); UniGram u1 = a.Grammars[0] as UniGram; NGram n2 = a.Grammars[1] as NGram; NGram n3 = a.Grammars[2] as NGram; a.AddData(new string[] { "a", "b" }, "c"); a.UpdateMemory(0.9f); Assert.AreEqual(3, u1.Grammar.Keys.Count); Assert.AreEqual(2, n2.Grammar.Keys.Count); Assert.AreEqual(1, n3.Grammar.Keys.Count); Assert.AreEqual(0.9f, u1.Grammar["a"]); Assert.AreEqual(0.9f, u1.Grammar["b"]); Assert.AreEqual(0.9f, u1.Grammar["c"]); Assert.AreEqual(0.9f, n2.Grammar["a"].Grammar["b"]); Assert.AreEqual(0.9f, n2.Grammar["b"].Grammar["c"]); Assert.AreEqual(0.9f, n3.Grammar["a,b"].Grammar["c"]); a.UpdateMemory(0.9f); Assert.AreEqual(3, u1.Grammar.Keys.Count); Assert.AreEqual(2, n2.Grammar.Keys.Count); Assert.AreEqual(1, n3.Grammar.Keys.Count); Assert.IsTrue(Mathf.Approximately(0.81f, u1.Grammar["a"])); Assert.IsTrue(Mathf.Approximately(0.81f, u1.Grammar["b"])); Assert.IsTrue(Mathf.Approximately(0.81f, u1.Grammar["c"])); Assert.IsTrue(Mathf.Approximately(0.81f, n2.Grammar["a"].Grammar["b"])); Assert.IsTrue(Mathf.Approximately(0.81f, n2.Grammar["b"].Grammar["c"])); Assert.IsTrue(Mathf.Approximately(0.81f, n3.Grammar["a,b"].Grammar["c"])); }
public void TestConstruction() { Assert.DoesNotThrow(() => { UniGram a = new UniGram(); }); }