コード例 #1
0
ファイル: NGramModelTest.cs プロジェクト: KeliBox/nuve
        public void TestGetSequenceProbabilityForTrigramModel()
        {
            var model = new NGramModel(Trigram);

            model.AddSentence(Text1.Split(null).ToList());
            model.AddSentence(Text2.Split(null).ToList());
            model.AddSentence(Text3.Split(null).ToList());

            double actual   = model.GetSentenceProbability("I", "am", "Sam");
            double expected = .167;

            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual   = model.GetSentenceProbability("Sam", "I", "am");
            expected = .167;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual   = model.GetSentenceProbability("I", "do", "not", "like", "green", "eggs", "and", "ham");
            expected = .333;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual   = model.GetSentenceProbability("I", "am", "Sam", "I", "am");
            expected = .000;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            actual   = model.GetSentenceProbability("I", "am");
            expected = .167;
            Assert.AreEqual(expected, Math.Round(actual, 3));
        }
コード例 #2
0
ファイル: StatisticalStemmer.cs プロジェクト: KeliBox/nuve
        public string GetStem(string word)
        {
            IList <Word> solutions = analyzer.Analyze(word);

            if (solutions.Count == 0)
            {
                return(word);
            }

            if (solutions.Count == 1)
            {
                return(solutions[0].GetStem().GetSurface());
            }

            double max      = double.NegativeInfinity;
            int    maxIndex = 0;

            for (int i = 0; i < solutions.Count; i++)
            {
                double p = model.GetSentenceProbability(solutions[i].GetMorphemeIds());
                //Console.WriteLine(solutions[i] + "\t" + p);

                if (p > max)
                {
                    max      = p;
                    maxIndex = i;
                }
            }

            return(solutions[maxIndex].GetStem().GetSurface());
        }
コード例 #3
0
ファイル: NGramModelTest.cs プロジェクト: KeliBox/nuve
        public void TestGetSequenceProbabilityForUnigramModel()
        {
            var model = new NGramModel(Unigram);

            model.AddSentence(Text1.Split(null).ToList());
            model.AddSentence(Text2.Split(null).ToList());
            model.AddSentence(Text3.Split(null).ToList());

            double actual   = model.GetSentenceProbability("I");
            double expected = 0.21;

            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual   = model.GetSentenceProbability("Sam");
            expected = 0.14;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual   = model.GetSentenceProbability("am");
            expected = 0.14;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            actual   = model.GetSentenceProbability("the");
            expected = 0.00;
            Assert.AreEqual(expected, Math.Round(actual, 2));

            //p(I) * p(am)
            actual   = model.GetSentenceProbability("I", "am");
            expected = 0.0306;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(I) * p(do)
            actual   = model.GetSentenceProbability("I", "do");
            expected = 0.015;
            Assert.AreEqual(expected, Math.Round(actual, 3));

            //p(Sam) * p(I)
            actual   = model.GetSentenceProbability("Sam", "I");
            expected = 0.0306;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(I) * p(am) * p(Sam)
            actual   = model.GetSentenceProbability("I", "am", "Sam");
            expected = 0.0044;
            Assert.AreEqual(expected, Math.Round(actual, 4));

            //p(Sam) * p(I) * p(am)
            actual   = model.GetSentenceProbability("Sam", "I", "am");
            expected = 0.0044;
            Assert.AreEqual(expected, Math.Round(actual, 4));
        }