public void TestRandomVocabularyAndSentence()
        {
            var model = new NGramLanguageModel();

            foreach (var sentence in LanguageModelTestUtils.GenerateRandomVocabulary(10))
            {
                model.Add(sentence, 2, 3);
            }
            var probability = model.CalculateProbability(LanguageModelTestUtils.GenerateRandomSentence());

            Assert.That(probability, Is.InRange(0d, 1d), "a probability measure should be between 0 and 1 [was {0} ]",
                        probability);
        }
        public void TestPerplexityComparison()
        {
            var trainingVocabulary = LanguageModelTestUtils.GenerateRandomVocabulary(11000);

            //var trainingVocabulary = LanguageModelTestUtils.GenerateRandomVocabulary(1100000);
            var testVocabulary = LanguageModelTestUtils.GenerateRandomVocabulary(100);

            var unigramLM = new NGramLanguageModel(1);

            foreach (var sentence in trainingVocabulary)
            {
                unigramLM.Add(sentence, 1, 1);
            }
            var unigramPerplexity = LanguageModelTestUtils.GetPerplexity(unigramLM, testVocabulary, 1);

            var bigramLM = new NGramLanguageModel(2);

            foreach (var sentence in trainingVocabulary)
            {
                bigramLM.Add(sentence, 1, 2);
            }
            var bigramPerplexity = LanguageModelTestUtils.GetPerplexity(bigramLM, testVocabulary, 2);

            Assert.That(unigramPerplexity, Is.GreaterThanOrEqualTo(bigramPerplexity));

            var trigramLM = new NGramLanguageModel(3);

            foreach (var sentence in trainingVocabulary)
            {
                trigramLM.Add(sentence, 2, 3);
            }
            var trigramPerplexity = LanguageModelTestUtils.GetPerplexity(trigramLM, testVocabulary, 3);


            Assert.That(bigramPerplexity, Is.GreaterThanOrEqualTo(trigramPerplexity));
        }