Пример #1
0
 public void Smooth(int ngramSize, TSeq[] sequences, Func <TSeq, IEnumerable <TItem> > itemsSelector, Direction dir, ConditionalFrequencyDistribution <Ngram <TItem>, TItem> cfd)
 {
     _cfd = cfd;
     _dir = dir;
     if (ngramSize > 1)
     {
         _lowerOrderModel = new NgramModel <TSeq, TItem>(ngramSize - 1, sequences, itemsSelector, dir, new WittenBellSmoother <TSeq, TItem>());
     }
 }
Пример #2
0
        public void NoSamples()
        {
            var words = new string[0];

            NgramModel <string, char> model = NgramModel <string, char> .Train(2, words, w => w, new MaxLikelihoodSmoother <string, char>());

            Assert.That(model.GetProbability('a', new Ngram <char>("l")), Is.EqualTo(0));
            Assert.That(model.GetProbability('l', new Ngram <char>("l")), Is.EqualTo(0));
            Assert.That(model.GetProbability('e', new Ngram <char>("l")), Is.EqualTo(0));
            Assert.That(model.GetProbability('t', new Ngram <char>("l")), Is.EqualTo(0));
        }
Пример #3
0
        public void Ngrams()
        {
            var words = new[] { "#call#", "#stall#", "#hello#", "#the#", "#a#", "#test#", "#income#", "#unproduce#" };

            NgramModel <string, char>[] models = NgramModel <string, char> .TrainAll(10, words, w => w, () => new MaxLikelihoodSmoother <string, char>()).ToArray();

            Assert.That(models[0].Ngrams.Count, Is.EqualTo(16));
            Assert.That(models[1].Ngrams.Count, Is.EqualTo(36));
            Assert.That(models[7].Ngrams.Count, Is.EqualTo(5));
            Assert.That(models[8].Ngrams.Count, Is.EqualTo(3));
            Assert.That(models[9].Ngrams.Count, Is.EqualTo(2));
        }
Пример #4
0
        public void GetProbabilityRightToLeft()
        {
            var words = new[] { "#call#", "#stall#", "#hello#", "#the#", "#a#", "#test#", "#income#", "#unproduce#" };

            NgramModel <string, char> model = NgramModel <string, char> .Train(2, words, w => w, Direction.RightToLeft, new MaxLikelihoodSmoother <string, char>());

            Assert.That(model.GetProbability('a', new Ngram <char>("l")), Is.EqualTo(0.333).Within(0.001));
            Assert.That(model.GetProbability('l', new Ngram <char>("l")), Is.EqualTo(0.5));
            Assert.That(model.GetProbability('e', new Ngram <char>("l")), Is.EqualTo(0.166).Within(0.001));
            Assert.That(model.GetProbability('t', new Ngram <char>("l")), Is.EqualTo(0.0));

            Assert.That(model.GetProbability('c', new Ngram <char>("a")), Is.EqualTo(0.333).Within(0.001));
            Assert.That(model.GetProbability('t', new Ngram <char>("a")), Is.EqualTo(0.333).Within(0.001));
            Assert.That(model.GetProbability('#', new Ngram <char>("a")), Is.EqualTo(0.333).Within(0.001));
            Assert.That(model.GetProbability('l', new Ngram <char>("a")), Is.EqualTo(0.0));
        }
Пример #5
0
        public static IEnumerable <NgramModel <TSeq, TItem> > TrainAll(int maxNgramSize, IEnumerable <TSeq> sequences, Func <TSeq, IEnumerable <TItem> > itemsSelector, Direction dir,
                                                                       Func <INgramModelSmoother <TSeq, TItem> > smootherFactory)
        {
            TSeq[] seqArray = sequences.ToArray();
            var    model    = new NgramModel <TSeq, TItem>(maxNgramSize, seqArray, itemsSelector, dir, smootherFactory());
            var    models   = new NgramModel <TSeq, TItem> [maxNgramSize];

            for (int i = maxNgramSize - 1; i >= 0; i--)
            {
                models[i] = model;
                if (i > 0)
                {
                    model = model.Smoother.LowerOrderModel ?? new NgramModel <TSeq, TItem>(i, seqArray, itemsSelector, dir, smootherFactory());
                }
            }
            return(models);
        }
Пример #6
0
        public void Smooth(int ngramSize, TSeq[] sequences, Func <TSeq, IEnumerable <TItem> > itemsSelector, Direction dir, ConditionalFrequencyDistribution <Ngram <TItem>, TItem> cfd)
        {
            _cfd = cfd;
            _dir = dir;

            int totalN1 = 0, totalN2 = 0, totalN3 = 0, totalN4 = 0;

            _bigNs.Clear();
            foreach (Ngram <TItem> cond in cfd.Conditions)
            {
                int n1 = 0, n2 = 0, n3 = 0, n4 = 0;
                int nGreater = 0;
                FrequencyDistribution <TItem> freqDist = cfd[cond];
                foreach (TItem item in freqDist.ObservedSamples)
                {
                    if (freqDist[item] == 1)
                    {
                        n1++;
                    }
                    else if (freqDist[item] == 2)
                    {
                        n2++;
                    }
                    else if (freqDist[item] > 2)
                    {
                        if (freqDist[item] == 3)
                        {
                            n3++;
                        }
                        else if (freqDist[item] == 4)
                        {
                            n4++;
                        }
                        nGreater++;
                    }
                }

                totalN1 += n1;
                totalN2 += n2;
                totalN3 += n3;
                totalN4 += n4;

                _bigNs[cond] = Tuple.Create(n1, n2, nGreater);
            }

            _discount1 = 0;
            _discount2 = 0;
            _discount3 = 0;
            double y = 0;

            if (totalN1 > 0)
            {
                y          = (double)totalN1 / (totalN1 + (2 * totalN2));
                _discount1 = 1 - (2 * y * ((double)totalN2 / totalN1));
            }
            if (totalN2 > 0)
            {
                _discount2 = 2 - (3 * y * ((double)totalN3 / totalN2));
            }
            if (totalN3 > 0)
            {
                _discount3 = 3 - (4 * y * ((double)totalN4 / totalN3));
            }

            if (ngramSize > 1)
            {
                _lowerOrderModel = new NgramModel <TSeq, TItem>(ngramSize - 1, sequences, itemsSelector, dir, new ModifiedKneserNeySmoother <TSeq, TItem>());
            }
        }