Exemple #1
0
        public bool EndsWith(Ngram <TItem> items, Direction dir, IEqualityComparer <TItem> comparer)
        {
            if (items.Length > Length)
            {
                return(false);
            }

            IEnumerable <TItem> x = items;
            IEnumerable <TItem> y = _items;

            if (dir == Direction.LeftToRight)
            {
                x = x.Reverse();
                y = y.Reverse();
            }

            foreach (Tuple <TItem, TItem> item in x.Zip(y))
            {
                if (!comparer.Equals(item.Item1, item.Item2))
                {
                    return(false);
                }
            }
            return(true);
        }
Exemple #2
0
 public double GetProbability(TItem item, Ngram <TItem> context)
 {
     if (context.Length != _ngramSize - 1)
     {
         throw new ArgumentException("The context size is not valid.", "context");
     }
     return(_smoother.GetProbability(item, context));
 }
        public double GetProbability(TItem item, Ngram <TItem> context)
        {
            FrequencyDistribution <TItem> fd = _cfd[context];

            if (fd.SampleOutcomeCount == 0)
            {
                return(0);
            }
            return((double)fd[item] / fd.SampleOutcomeCount);
        }
Exemple #4
0
        public double GetProbability(TItem item, Ngram <TItem> context)
        {
            FrequencyDistribution <TItem> freqDist = _cfd[context];

            if (freqDist.ObservedSamples.Count == 0)
            {
                return(0);
            }

            double numer = freqDist[item] + (freqDist.ObservedSamples.Count * (_lowerOrderModel == null ? 1.0 / freqDist.ObservedSamples.Count
                                : _lowerOrderModel.GetProbability(item, context.SkipFirst(_dir))));
            double denom = freqDist.SampleOutcomeCount + freqDist.ObservedSamples.Count;

            return(numer / denom);
        }
Exemple #5
0
        public NgramModel(int ngramSize, TSeq[] sequences, Func <TSeq, IEnumerable <TItem> > itemsSelector, Direction dir, INgramModelSmoother <TSeq, TItem> smoother)
        {
            _ngramSize = ngramSize;
            _dir       = dir;
            _smoother  = smoother;
            _ngrams    = new HashSet <Ngram <TItem> >();
            var cfd = new ConditionalFrequencyDistribution <Ngram <TItem>, TItem>();

            foreach (TSeq seq in sequences)
            {
                TItem[] items = itemsSelector(seq).ToArray();

                for (int i = 0; i <= items.Length - ngramSize; i++)
                {
                    var ngram = new Ngram <TItem>(Enumerable.Range(i, _ngramSize).Select(j => items[j]));
                    _ngrams.Add(ngram);
                    Ngram <TItem> context = ngram.TakeAllExceptLast(dir);
                    TItem         item    = ngram.GetLast(dir);
                    cfd[context].Increment(item);
                }
            }

            _smoother.Smooth(ngramSize, sequences, itemsSelector, dir, cfd);
        }
Exemple #6
0
        public double GetProbability(TItem item, Ngram <TItem> context)
        {
            FrequencyDistribution <TItem> freqDist = _cfd[context];

            if (freqDist.ObservedSamples.Count == 0)
            {
                return(0);
            }

            if (context.Length == 0)
            {
                return((double)freqDist[item] / freqDist.SampleOutcomeCount);
            }

            int count = freqDist[item];
            Tuple <int, int, int> bigN = _bigNs[context];
            double gamma = ((_discount1 * bigN.Item1) + (_discount2 * bigN.Item2) + (_discount3 * bigN.Item3)) / freqDist.SampleOutcomeCount;
            double d     = 0;

            if (count == 1)
            {
                d = _discount1;
            }
            else if (count == 2)
            {
                d = _discount2;
            }
            else if (count > 2)
            {
                d = _discount3;
            }

            double prob = (count - d) / freqDist.SampleOutcomeCount;

            return(prob + (gamma * _lowerOrderModel.GetProbability(item, context.SkipFirst(_dir))));
        }
Exemple #7
0
 public bool Equals(Ngram <TItem> other)
 {
     return(other != null && _hashCode == other._hashCode && _items.SequenceEqual(other._items));
 }
Exemple #8
0
 public bool EndsWith(Ngram <TItem> items, Direction dir)
 {
     return(EndsWith(items, dir, EqualityComparer <TItem> .Default));
 }
Exemple #9
0
 public bool EndsWith(Ngram <TItem> items)
 {
     return(EndsWith(items, Direction.LeftToRight));
 }
Exemple #10
0
 public Ngram <TItem> Concat(Ngram <TItem> ngram, Direction dir)
 {
     return(new Ngram <TItem>(dir == Direction.LeftToRight ? _items.Concat(ngram) : ngram.Concat(_items)));
 }
Exemple #11
0
 public Ngram <TItem> Concat(Ngram <TItem> ngram)
 {
     return(Concat(ngram, Direction.LeftToRight));
 }