Пример #1
0
        public double CalculateProbability(StringList sample)
        {
            if (Count <= 0)
            {
                return(0d);
            }

            var probability = 0d;

            foreach (var ngram in NGramUtils.GetNGrams(sample, n))
            {
                var nMinusOneToken = NGramUtils.GetNMinusOneTokenFirst(ngram);
                if (Count > 1000000)
                {
                    // use stupid backoff
                    probability += Math.Log(GetStupidBackoffProbability(ngram, nMinusOneToken));
                }
                else
                {
                    // use laplace smoothing
                    probability += Math.Log(GetLaplaceSmoothingProbability(ngram, nMinusOneToken));
                }
            }
            if (double.IsNaN(probability))
            {
                probability = 0d;
            }
            else if (Math.Abs(probability) > 0.000001)
            {
                probability = Math.Exp(probability);
            }
            return(probability);
        }
Пример #2
0
        public void TestGetNGrams()
        {
            var nGrams = NGramUtils.GetNGrams(new StringList("I", "saw", "brown", "fox"), 2);

            Assert.That(nGrams.Count, Is.EqualTo(3));

            nGrams = NGramUtils.GetNGrams(new StringList("I", "saw", "brown", "fox"), 3);
            Assert.That(nGrams.Count, Is.EqualTo(2));
        }
Пример #3
0
        public static double GetPerplexity(ILanguageModel lm, IList <StringList> testSet, int ngramSize)
        {
            var perplexity = new BigDecimal(1d);

            foreach (var sentence in testSet)
            {
                foreach (var ngram in NGramUtils.GetNGrams(sentence, ngramSize))
                {
                    var ngramProbability = lm.CalculateProbability(ngram);
                    perplexity = perplexity.multiply(new BigDecimal(1d).divide(new BigDecimal(ngramProbability), MathContext.DECIMAL128));
                }
            }

            var p = Math.Log(perplexity.doubleValue());

            if (double.IsInfinity(p) || double.IsNaN(p))
            {
                return(double.PositiveInfinity); // over/underflow -> too high perplexity
            }
            var log = new BigDecimal(p);

            return(Math.Pow(Math.E, log.divide(new BigDecimal(testSet.Count), MathContext.DECIMAL128).doubleValue()));
        }