public bool EndsWith(Ngram <TItem> items, Direction dir, IEqualityComparer <TItem> comparer) { if (items.Length > Length) { return(false); } IEnumerable <TItem> x = items; IEnumerable <TItem> y = _items; if (dir == Direction.LeftToRight) { x = x.Reverse(); y = y.Reverse(); } foreach (Tuple <TItem, TItem> item in x.Zip(y)) { if (!comparer.Equals(item.Item1, item.Item2)) { return(false); } } return(true); }
public double GetProbability(TItem item, Ngram <TItem> context) { if (context.Length != _ngramSize - 1) { throw new ArgumentException("The context size is not valid.", "context"); } return(_smoother.GetProbability(item, context)); }
public double GetProbability(TItem item, Ngram <TItem> context) { FrequencyDistribution <TItem> fd = _cfd[context]; if (fd.SampleOutcomeCount == 0) { return(0); } return((double)fd[item] / fd.SampleOutcomeCount); }
public double GetProbability(TItem item, Ngram <TItem> context) { FrequencyDistribution <TItem> freqDist = _cfd[context]; if (freqDist.ObservedSamples.Count == 0) { return(0); } double numer = freqDist[item] + (freqDist.ObservedSamples.Count * (_lowerOrderModel == null ? 1.0 / freqDist.ObservedSamples.Count : _lowerOrderModel.GetProbability(item, context.SkipFirst(_dir)))); double denom = freqDist.SampleOutcomeCount + freqDist.ObservedSamples.Count; return(numer / denom); }
public NgramModel(int ngramSize, TSeq[] sequences, Func <TSeq, IEnumerable <TItem> > itemsSelector, Direction dir, INgramModelSmoother <TSeq, TItem> smoother) { _ngramSize = ngramSize; _dir = dir; _smoother = smoother; _ngrams = new HashSet <Ngram <TItem> >(); var cfd = new ConditionalFrequencyDistribution <Ngram <TItem>, TItem>(); foreach (TSeq seq in sequences) { TItem[] items = itemsSelector(seq).ToArray(); for (int i = 0; i <= items.Length - ngramSize; i++) { var ngram = new Ngram <TItem>(Enumerable.Range(i, _ngramSize).Select(j => items[j])); _ngrams.Add(ngram); Ngram <TItem> context = ngram.TakeAllExceptLast(dir); TItem item = ngram.GetLast(dir); cfd[context].Increment(item); } } _smoother.Smooth(ngramSize, sequences, itemsSelector, dir, cfd); }
public double GetProbability(TItem item, Ngram <TItem> context) { FrequencyDistribution <TItem> freqDist = _cfd[context]; if (freqDist.ObservedSamples.Count == 0) { return(0); } if (context.Length == 0) { return((double)freqDist[item] / freqDist.SampleOutcomeCount); } int count = freqDist[item]; Tuple <int, int, int> bigN = _bigNs[context]; double gamma = ((_discount1 * bigN.Item1) + (_discount2 * bigN.Item2) + (_discount3 * bigN.Item3)) / freqDist.SampleOutcomeCount; double d = 0; if (count == 1) { d = _discount1; } else if (count == 2) { d = _discount2; } else if (count > 2) { d = _discount3; } double prob = (count - d) / freqDist.SampleOutcomeCount; return(prob + (gamma * _lowerOrderModel.GetProbability(item, context.SkipFirst(_dir)))); }
public bool Equals(Ngram <TItem> other) { return(other != null && _hashCode == other._hashCode && _items.SequenceEqual(other._items)); }
public bool EndsWith(Ngram <TItem> items, Direction dir) { return(EndsWith(items, dir, EqualityComparer <TItem> .Default)); }
public bool EndsWith(Ngram <TItem> items) { return(EndsWith(items, Direction.LeftToRight)); }
public Ngram <TItem> Concat(Ngram <TItem> ngram, Direction dir) { return(new Ngram <TItem>(dir == Direction.LeftToRight ? _items.Concat(ngram) : ngram.Concat(_items))); }
public Ngram <TItem> Concat(Ngram <TItem> ngram) { return(Concat(ngram, Direction.LeftToRight)); }