Пример #1
0
        public void TestGetSequenceProbabilityForTrigramModel()
        {
            var model = new NGramModel(Trigram);

            model.AddSentence(Text1.Split(null).ToList());
            model.AddSentence(Text2.Split(null).ToList());
            model.AddSentence(Text3.Split(null).ToList());

            double actual   = model.GetSentenceProbability("I", "am", "Sam");
            double expected = .167;

            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual   = model.GetSentenceProbability("Sam", "I", "am");
            expected = .167;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual   = model.GetSentenceProbability("I", "do", "not", "like", "green", "eggs", "and", "ham");
            expected = .333;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual   = model.GetSentenceProbability("I", "am", "Sam", "I", "am");
            expected = .000;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual   = model.GetSentenceProbability("I", "am");
            expected = .167;
            Assert.AreEqual(expected, Math.Round(actual, 3));
        }
Пример #2
0
        public void TestGetSequenceProbabilityForUnigramModel()
        {
            var model = new NGramModel(Unigram);

            model.AddSentence(Text1.Split(null).ToList());
            model.AddSentence(Text2.Split(null).ToList());
            model.AddSentence(Text3.Split(null).ToList());

            double actual   = model.GetSentenceProbability("I");
            double expected = 0.21;

            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual   = model.GetSentenceProbability("Sam");
            expected = 0.14;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual   = model.GetSentenceProbability("am");
            expected = 0.14;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual   = model.GetSentenceProbability("the");
            expected = 0.00;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            //p(I) * p(am)
            actual   = model.GetSentenceProbability("I", "am");
            expected = 0.0306;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(I) * p(do)
            actual   = model.GetSentenceProbability("I", "do");
            expected = 0.015;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            //p(Sam) * p(I)
            actual   = model.GetSentenceProbability("Sam", "I");
            expected = 0.0306;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(I) * p(am) * p(Sam)
            actual   = model.GetSentenceProbability("I", "am", "Sam");
            expected = 0.0044;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(Sam) * p(I) * p(am)
            actual   = model.GetSentenceProbability("Sam", "I", "am");
            expected = 0.0044;
            Assert.AreEqual(expected, Math.Round(actual, 4));
        }
Пример #3
0
        private static NGramModel CreateModel()
        {
            var lines = File.ReadAllLines(
                @"C:\Users\hrzafer\Desktop\workspace\Damla\code\suggestion\unigrams.txt")
                        .Select(x => x.Split(null));
            var nGramModel = new NGramModel(2);

            var counter = 0;

            foreach (var line in lines)
            {
                counter++;
                var solutions = Analyzer.Analyze(line[0]);
                foreach (var solution in solutions)
                {
                    var morphemeIds = solution.GetMorphemeIds();
                    var times       = Math.Round((int.Parse(line[1]) + 99) / (double)100);
                    for (var i = 0; i < times; i++)
                    {
                        nGramModel.AddSentence(morphemeIds);
                    }
                }

                if (counter % 100 == 0)
                {
                    Console.WriteLine(counter);
                }
            }

            nGramModel.Deserialize(
                @"C:\Users\hrzafer\Desktop\workspace\Prizma\code\prizma\src\main\resources\stemDict\model_uni_bi.json");

            return(nGramModel);
        }