public double CalculateProbability(StringList sample) { if (Count <= 0) { return(0d); } var probability = 0d; foreach (var ngram in NGramUtils.GetNGrams(sample, n)) { var nMinusOneToken = NGramUtils.GetNMinusOneTokenFirst(ngram); if (Count > 1000000) { // use stupid backoff probability += Math.Log(GetStupidBackoffProbability(ngram, nMinusOneToken)); } else { // use laplace smoothing probability += Math.Log(GetLaplaceSmoothingProbability(ngram, nMinusOneToken)); } } if (double.IsNaN(probability)) { probability = 0d; } else if (Math.Abs(probability) > 0.000001) { probability = Math.Exp(probability); } return(probability); }
private double GetStupidBackoffProbability(StringList ngram, StringList nMinusOneToken) { var count = GetCount(ngram); if (nMinusOneToken == null || nMinusOneToken.Count == 0) { return((double)count / Count); } if (count > 0) { return(count / (double)GetCount(nMinusOneToken)); // maximum likelihood probability } var nextNgram = NGramUtils.GetNMinusOneTokenLast(ngram); return(0.4d * GetStupidBackoffProbability(nextNgram, NGramUtils.GetNMinusOneTokenFirst(nextNgram))); }