public void TestGetSequenceProbabilityForTrigramModel() { var model = new NGramModel(Trigram); model.AddSentence(Text1.Split(null).ToList()); model.AddSentence(Text2.Split(null).ToList()); model.AddSentence(Text3.Split(null).ToList()); double actual = model.GetSentenceProbability("I", "am", "Sam"); double expected = .167; Assert.AreEqual(expected, Math.Round(actual, 3)); actual = model.GetSentenceProbability("Sam", "I", "am"); expected = .167; Assert.AreEqual(expected, Math.Round(actual, 3)); actual = model.GetSentenceProbability("I", "do", "not", "like", "green", "eggs", "and", "ham"); expected = .333; Assert.AreEqual(expected, Math.Round(actual, 3)); actual = model.GetSentenceProbability("I", "am", "Sam", "I", "am"); expected = .000; Assert.AreEqual(expected, Math.Round(actual, 3)); actual = model.GetSentenceProbability("I", "am"); expected = .167; Assert.AreEqual(expected, Math.Round(actual, 3)); }
public string GetStem(string word) { IList <Word> solutions = analyzer.Analyze(word); if (solutions.Count == 0) { return(word); } if (solutions.Count == 1) { return(solutions[0].GetStem().GetSurface()); } double max = double.NegativeInfinity; int maxIndex = 0; for (int i = 0; i < solutions.Count; i++) { double p = model.GetSentenceProbability(solutions[i].GetMorphemeIds()); //Console.WriteLine(solutions[i] + "\t" + p); if (p > max) { max = p; maxIndex = i; } } return(solutions[maxIndex].GetStem().GetSurface()); }
public void TestGetSequenceProbabilityForUnigramModel() { var model = new NGramModel(Unigram); model.AddSentence(Text1.Split(null).ToList()); model.AddSentence(Text2.Split(null).ToList()); model.AddSentence(Text3.Split(null).ToList()); double actual = model.GetSentenceProbability("I"); double expected = 0.21; Assert.AreEqual(expected, Math.Round(actual, 2)); actual = model.GetSentenceProbability("Sam"); expected = 0.14; Assert.AreEqual(expected, Math.Round(actual, 2)); actual = model.GetSentenceProbability("am"); expected = 0.14; Assert.AreEqual(expected, Math.Round(actual, 2)); actual = model.GetSentenceProbability("the"); expected = 0.00; Assert.AreEqual(expected, Math.Round(actual, 2)); //p(I) * p(am) actual = model.GetSentenceProbability("I", "am"); expected = 0.0306; Assert.AreEqual(expected, Math.Round(actual, 4)); //p(I) * p(do) actual = model.GetSentenceProbability("I", "do"); expected = 0.015; Assert.AreEqual(expected, Math.Round(actual, 3)); //p(Sam) * p(I) actual = model.GetSentenceProbability("Sam", "I"); expected = 0.0306; Assert.AreEqual(expected, Math.Round(actual, 4)); //p(I) * p(am) * p(Sam) actual = model.GetSentenceProbability("I", "am", "Sam"); expected = 0.0044; Assert.AreEqual(expected, Math.Round(actual, 4)); //p(Sam) * p(I) * p(am) actual = model.GetSentenceProbability("Sam", "I", "am"); expected = 0.0044; Assert.AreEqual(expected, Math.Round(actual, 4)); }