Пример #1
0
        public double CalculateProbability(StringList sample)
        {
            if (Count <= 0)
            {
                return(0d);
            }

            var probability = 0d;

            foreach (var ngram in NGramUtils.GetNGrams(sample, n))
            {
                var nMinusOneToken = NGramUtils.GetNMinusOneTokenFirst(ngram);
                if (Count > 1000000)
                {
                    // use stupid backoff
                    probability += Math.Log(GetStupidBackoffProbability(ngram, nMinusOneToken));
                }
                else
                {
                    // use laplace smoothing
                    probability += Math.Log(GetLaplaceSmoothingProbability(ngram, nMinusOneToken));
                }
            }
            if (double.IsNaN(probability))
            {
                probability = 0d;
            }
            else if (Math.Abs(probability) > 0.000001)
            {
                probability = Math.Exp(probability);
            }
            return(probability);
        }
Пример #2
0
        private double GetStupidBackoffProbability(StringList ngram, StringList nMinusOneToken)
        {
            var count = GetCount(ngram);

            if (nMinusOneToken == null || nMinusOneToken.Count == 0)
            {
                return((double)count / Count);
            }
            if (count > 0)
            {
                return(count / (double)GetCount(nMinusOneToken)); // maximum likelihood probability
            }
            var nextNgram = NGramUtils.GetNMinusOneTokenLast(ngram);

            return(0.4d * GetStupidBackoffProbability(nextNgram, NGramUtils.GetNMinusOneTokenFirst(nextNgram)));
        }