public void TestRandomVocabularyAndSentence() { var model = new NGramLanguageModel(); foreach (var sentence in LanguageModelTestUtils.GenerateRandomVocabulary(10)) { model.Add(sentence, 2, 3); } var probability = model.CalculateProbability(LanguageModelTestUtils.GenerateRandomSentence()); Assert.That(probability, Is.InRange(0d, 1d), "a probability measure should be between 0 and 1 [was {0} ]", probability); }
public void TestPerplexityComparison() { var trainingVocabulary = LanguageModelTestUtils.GenerateRandomVocabulary(11000); //var trainingVocabulary = LanguageModelTestUtils.GenerateRandomVocabulary(1100000); var testVocabulary = LanguageModelTestUtils.GenerateRandomVocabulary(100); var unigramLM = new NGramLanguageModel(1); foreach (var sentence in trainingVocabulary) { unigramLM.Add(sentence, 1, 1); } var unigramPerplexity = LanguageModelTestUtils.GetPerplexity(unigramLM, testVocabulary, 1); var bigramLM = new NGramLanguageModel(2); foreach (var sentence in trainingVocabulary) { bigramLM.Add(sentence, 1, 2); } var bigramPerplexity = LanguageModelTestUtils.GetPerplexity(bigramLM, testVocabulary, 2); Assert.That(unigramPerplexity, Is.GreaterThanOrEqualTo(bigramPerplexity)); var trigramLM = new NGramLanguageModel(3); foreach (var sentence in trainingVocabulary) { trigramLM.Add(sentence, 2, 3); } var trigramPerplexity = LanguageModelTestUtils.GetPerplexity(trigramLM, testVocabulary, 3); Assert.That(bigramPerplexity, Is.GreaterThanOrEqualTo(trigramPerplexity)); }