public void NoSamples() { var words = new string[0]; NgramModel <string, char> model = NgramModel <string, char> .Train(2, words, w => w, new MaxLikelihoodSmoother <string, char>()); Assert.That(model.GetProbability('a', new Ngram <char>("l")), Is.EqualTo(0)); Assert.That(model.GetProbability('l', new Ngram <char>("l")), Is.EqualTo(0)); Assert.That(model.GetProbability('e', new Ngram <char>("l")), Is.EqualTo(0)); Assert.That(model.GetProbability('t', new Ngram <char>("l")), Is.EqualTo(0)); }
public void GetProbability() { var words = new[] { "#call#", "#stall#", "#hello#", "#the#", "#a#", "#test#", "#income#", "#unproduce#" }; NgramModel <string, char> model = NgramModel <string, char> .Train(2, words, w => w, new MaxLikelihoodSmoother <string, char>()); Assert.That(model.GetProbability('l', new Ngram <char>("a")), Is.EqualTo(0.666).Within(0.001)); Assert.That(model.GetProbability('#', new Ngram <char>("a")), Is.EqualTo(0.333).Within(0.001)); Assert.That(model.GetProbability('a', new Ngram <char>("a")), Is.EqualTo(0.0)); Assert.That(model.GetProbability('l', new Ngram <char>("l")), Is.EqualTo(0.5)); Assert.That(model.GetProbability('o', new Ngram <char>("l")), Is.EqualTo(0.166).Within(0.001)); Assert.That(model.GetProbability('#', new Ngram <char>("l")), Is.EqualTo(0.333).Within(0.001)); Assert.That(model.GetProbability('a', new Ngram <char>("l")), Is.EqualTo(0.0)); model = NgramModel <string, char> .Train(3, words, w => w, new MaxLikelihoodSmoother <string, char>()); Assert.That(model.GetProbability('l', new Ngram <char>("at")), Is.EqualTo(0.0)); Assert.That(model.GetProbability('l', new Ngram <char>("al")), Is.EqualTo(1.0)); Assert.That(model.GetProbability('t', new Ngram <char>("al")), Is.EqualTo(0.0)); }
public double GetProbability(TItem item, Ngram <TItem> context) { FrequencyDistribution <TItem> freqDist = _cfd[context]; if (freqDist.ObservedSamples.Count == 0) { return(0); } double numer = freqDist[item] + (freqDist.ObservedSamples.Count * (_lowerOrderModel == null ? 1.0 / freqDist.ObservedSamples.Count : _lowerOrderModel.GetProbability(item, context.SkipFirst(_dir)))); double denom = freqDist.SampleOutcomeCount + freqDist.ObservedSamples.Count; return(numer / denom); }
public double GetProbability(TItem item, Ngram <TItem> context) { FrequencyDistribution <TItem> freqDist = _cfd[context]; if (freqDist.ObservedSamples.Count == 0) { return(0); } if (context.Length == 0) { return((double)freqDist[item] / freqDist.SampleOutcomeCount); } int count = freqDist[item]; Tuple <int, int, int> bigN = _bigNs[context]; double gamma = ((_discount1 * bigN.Item1) + (_discount2 * bigN.Item2) + (_discount3 * bigN.Item3)) / freqDist.SampleOutcomeCount; double d = 0; if (count == 1) { d = _discount1; } else if (count == 2) { d = _discount2; } else if (count > 2) { d = _discount3; } double prob = (count - d) / freqDist.SampleOutcomeCount; return(prob + (gamma * _lowerOrderModel.GetProbability(item, context.SkipFirst(_dir)))); }