public void TestGetSequenceProbabilityForTrigramModel() { var model = new NGramModel(Trigram); model.AddSentence(Text1.Split(null).ToList()); model.AddSentence(Text2.Split(null).ToList()); model.AddSentence(Text3.Split(null).ToList()); double actual = model.GetSentenceProbability("I", "am", "Sam"); double expected = .167; Assert.AreEqual(expected, Math.Round(actual, 3)); actual = model.GetSentenceProbability("Sam", "I", "am"); expected = .167; Assert.AreEqual(expected, Math.Round(actual, 3)); actual = model.GetSentenceProbability("I", "do", "not", "like", "green", "eggs", "and", "ham"); expected = .333; Assert.AreEqual(expected, Math.Round(actual, 3)); actual = model.GetSentenceProbability("I", "am", "Sam", "I", "am"); expected = .000; Assert.AreEqual(expected, Math.Round(actual, 3)); actual = model.GetSentenceProbability("I", "am"); expected = .167; Assert.AreEqual(expected, Math.Round(actual, 3)); }
public void TestGetSequenceProbabilityForUnigramModel() { var model = new NGramModel(Unigram); model.AddSentence(Text1.Split(null).ToList()); model.AddSentence(Text2.Split(null).ToList()); model.AddSentence(Text3.Split(null).ToList()); double actual = model.GetSentenceProbability("I"); double expected = 0.21; Assert.AreEqual(expected, Math.Round(actual, 2)); actual = model.GetSentenceProbability("Sam"); expected = 0.14; Assert.AreEqual(expected, Math.Round(actual, 2)); actual = model.GetSentenceProbability("am"); expected = 0.14; Assert.AreEqual(expected, Math.Round(actual, 2)); actual = model.GetSentenceProbability("the"); expected = 0.00; Assert.AreEqual(expected, Math.Round(actual, 2)); //p(I) * p(am) actual = model.GetSentenceProbability("I", "am"); expected = 0.0306; Assert.AreEqual(expected, Math.Round(actual, 4)); //p(I) * p(do) actual = model.GetSentenceProbability("I", "do"); expected = 0.015; Assert.AreEqual(expected, Math.Round(actual, 3)); //p(Sam) * p(I) actual = model.GetSentenceProbability("Sam", "I"); expected = 0.0306; Assert.AreEqual(expected, Math.Round(actual, 4)); //p(I) * p(am) * p(Sam) actual = model.GetSentenceProbability("I", "am", "Sam"); expected = 0.0044; Assert.AreEqual(expected, Math.Round(actual, 4)); //p(Sam) * p(I) * p(am) actual = model.GetSentenceProbability("Sam", "I", "am"); expected = 0.0044; Assert.AreEqual(expected, Math.Round(actual, 4)); }
private static NGramModel CreateModel() { var lines = File.ReadAllLines( @"C:\Users\hrzafer\Desktop\workspace\Damla\code\suggestion\unigrams.txt") .Select(x => x.Split(null)); var nGramModel = new NGramModel(2); var counter = 0; foreach (var line in lines) { counter++; var solutions = Analyzer.Analyze(line[0]); foreach (var solution in solutions) { var morphemeIds = solution.GetMorphemeIds(); var times = Math.Round((int.Parse(line[1]) + 99) / (double)100); for (var i = 0; i < times; i++) { nGramModel.AddSentence(morphemeIds); } } if (counter % 100 == 0) { Console.WriteLine(counter); } } nGramModel.Deserialize( @"C:\Users\hrzafer\Desktop\workspace\Prizma\code\prizma\src\main\resources\stemDict\model_uni_bi.json"); return(nGramModel); }