AddSentence() public method

public AddSentence ( IEnumerable tokens ) : void
tokens IEnumerable
return void
Esempio n. 1
0
        public void TestGetSequenceProbabilityForBigramModel()
        {
            var model = new NGramModel(Bigram);

            model.AddSentence(Text1.Split(null).ToList());
            model.AddSentence(Text2.Split(null).ToList());
            model.AddSentence(Text3.Split(null).ToList());

            double actual = model.GetSentenceProbability("I", "am", "Sam");
            double expected = .111;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual = model.GetSentenceProbability("Sam", "I", "am");
            expected = .056;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual = model.GetSentenceProbability("I", "do", "not", "like", "green", "eggs", "and", "ham");
            expected = .222;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual = model.GetSentenceProbability("I", "am", "Sam", "I", "am");
            expected = .037;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual = model.GetSentenceProbability("I", "am");
            expected = .222;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual = model.GetSentenceProbability("I", "am", "the");
            expected = .0;
            Assert.AreEqual(expected, actual);
        }
Esempio n. 2
0
        private static NGramModel CreateModel()
        {
            var lines = File.ReadAllLines(
                @"C:\Users\hrzafer\Desktop\workspace\Damla\code\suggestion\unigrams.txt")
                .Select(x => x.Split(null));
            var nGramModel = new NGramModel(2);

            var counter = 0;

            foreach (var line in lines)
            {
                counter++;
                var solutions = Analyzer.Analyze(line[0]);
                foreach (var solution in solutions)
                {
                    var morphemeIds = solution.GetMorphemeIds();
                    var times = Math.Round((int.Parse(line[1]) + 99)/(double) 100);
                    for (var i = 0; i < times; i++)
                    {
                        nGramModel.AddSentence(morphemeIds);
                    }
                }

                if (counter%100 == 0)
                {
                    Console.WriteLine(counter);
                }
            }

            nGramModel.Deserialize(
                @"C:\Users\hrzafer\Desktop\workspace\Prizma\code\prizma\src\main\resources\stemDict\model_uni_bi.json");

            return nGramModel;
        }
Esempio n. 3
0
        public void TestGetSequenceProbabilityForUnigramModel()
        {
            var model = new NGramModel(Unigram);

            model.AddSentence(Text1.Split(null).ToList());
            model.AddSentence(Text2.Split(null).ToList());
            model.AddSentence(Text3.Split(null).ToList());

            double actual = model.GetSentenceProbability("I");
            double expected = 0.21;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual = model.GetSentenceProbability("Sam");
            expected = 0.14;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual = model.GetSentenceProbability("am");
            expected = 0.14;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual = model.GetSentenceProbability("the");
            expected = 0.00;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            //p(I) * p(am)
            actual = model.GetSentenceProbability("I", "am");
            expected = 0.0306;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(I) * p(do)
            actual = model.GetSentenceProbability("I", "do");
            expected = 0.015;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            //p(Sam) * p(I)
            actual = model.GetSentenceProbability("Sam", "I");
            expected = 0.0306;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(I) * p(am) * p(Sam)
            actual = model.GetSentenceProbability("I", "am", "Sam");
            expected = 0.0044;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(Sam) * p(I) * p(am)
            actual = model.GetSentenceProbability("Sam", "I", "am");
            expected = 0.0044;
            Assert.AreEqual(expected, Math.Round(actual, 4));
        }