public NgramModel(int ngramSize, TSeq[] sequences, Func <TSeq, IEnumerable <TItem> > itemsSelector, Direction dir, INgramModelSmoother <TSeq, TItem> smoother) { _ngramSize = ngramSize; _dir = dir; _smoother = smoother; _ngrams = new HashSet <Ngram <TItem> >(); var cfd = new ConditionalFrequencyDistribution <Ngram <TItem>, TItem>(); foreach (TSeq seq in sequences) { TItem[] items = itemsSelector(seq).ToArray(); for (int i = 0; i <= items.Length - ngramSize; i++) { var ngram = new Ngram <TItem>(Enumerable.Range(i, _ngramSize).Select(j => items[j])); _ngrams.Add(ngram); Ngram <TItem> context = ngram.TakeAllExceptLast(dir); TItem item = ngram.GetLast(dir); cfd[context].Increment(item); } } _smoother.Smooth(ngramSize, sequences, itemsSelector, dir, cfd); }
public static NgramModel <TSeq, TItem> Train(int ngramSize, IEnumerable <TSeq> sequences, Func <TSeq, IEnumerable <TItem> > itemsSelector, Direction dir, INgramModelSmoother <TSeq, TItem> smoother) { return(new NgramModel <TSeq, TItem>(ngramSize, sequences.ToArray(), itemsSelector, dir, smoother)); }
public static NgramModel <TSeq, TItem> Train(int ngramSize, IEnumerable <TSeq> sequences, Func <TSeq, IEnumerable <TItem> > itemsSelector, INgramModelSmoother <TSeq, TItem> smoother) { return(Train(ngramSize, sequences, itemsSelector, Direction.LeftToRight, smoother)); }