예제 #1
0
        public void NoSamples()
        {
            var words = new string[0];

            NgramModel <string, char> model = NgramModel <string, char> .Train(2, words, w => w, new MaxLikelihoodSmoother <string, char>());

            Assert.That(model.GetProbability('a', new Ngram <char>("l")), Is.EqualTo(0));
            Assert.That(model.GetProbability('l', new Ngram <char>("l")), Is.EqualTo(0));
            Assert.That(model.GetProbability('e', new Ngram <char>("l")), Is.EqualTo(0));
            Assert.That(model.GetProbability('t', new Ngram <char>("l")), Is.EqualTo(0));
        }
예제 #2
0
        public void GetProbability()
        {
            var words = new[] { "#call#", "#stall#", "#hello#", "#the#", "#a#", "#test#", "#income#", "#unproduce#" };

            NgramModel <string, char> model = NgramModel <string, char> .Train(2, words, w => w, new MaxLikelihoodSmoother <string, char>());

            Assert.That(model.GetProbability('l', new Ngram <char>("a")), Is.EqualTo(0.666).Within(0.001));
            Assert.That(model.GetProbability('#', new Ngram <char>("a")), Is.EqualTo(0.333).Within(0.001));
            Assert.That(model.GetProbability('a', new Ngram <char>("a")), Is.EqualTo(0.0));

            Assert.That(model.GetProbability('l', new Ngram <char>("l")), Is.EqualTo(0.5));
            Assert.That(model.GetProbability('o', new Ngram <char>("l")), Is.EqualTo(0.166).Within(0.001));
            Assert.That(model.GetProbability('#', new Ngram <char>("l")), Is.EqualTo(0.333).Within(0.001));
            Assert.That(model.GetProbability('a', new Ngram <char>("l")), Is.EqualTo(0.0));

            model = NgramModel <string, char> .Train(3, words, w => w, new MaxLikelihoodSmoother <string, char>());

            Assert.That(model.GetProbability('l', new Ngram <char>("at")), Is.EqualTo(0.0));

            Assert.That(model.GetProbability('l', new Ngram <char>("al")), Is.EqualTo(1.0));
            Assert.That(model.GetProbability('t', new Ngram <char>("al")), Is.EqualTo(0.0));
        }
예제 #3
0
        public double GetProbability(TItem item, Ngram <TItem> context)
        {
            FrequencyDistribution <TItem> freqDist = _cfd[context];

            if (freqDist.ObservedSamples.Count == 0)
            {
                return(0);
            }

            double numer = freqDist[item] + (freqDist.ObservedSamples.Count * (_lowerOrderModel == null ? 1.0 / freqDist.ObservedSamples.Count
                                : _lowerOrderModel.GetProbability(item, context.SkipFirst(_dir))));
            double denom = freqDist.SampleOutcomeCount + freqDist.ObservedSamples.Count;

            return(numer / denom);
        }
예제 #4
0
        public double GetProbability(TItem item, Ngram <TItem> context)
        {
            FrequencyDistribution <TItem> freqDist = _cfd[context];

            if (freqDist.ObservedSamples.Count == 0)
            {
                return(0);
            }

            if (context.Length == 0)
            {
                return((double)freqDist[item] / freqDist.SampleOutcomeCount);
            }

            int count = freqDist[item];
            Tuple <int, int, int> bigN = _bigNs[context];
            double gamma = ((_discount1 * bigN.Item1) + (_discount2 * bigN.Item2) + (_discount3 * bigN.Item3)) / freqDist.SampleOutcomeCount;
            double d     = 0;

            if (count == 1)
            {
                d = _discount1;
            }
            else if (count == 2)
            {
                d = _discount2;
            }
            else if (count > 2)
            {
                d = _discount3;
            }

            double prob = (count - d) / freqDist.SampleOutcomeCount;

            return(prob + (gamma * _lowerOrderModel.GetProbability(item, context.SkipFirst(_dir))));
        }