/// <summary> /// Gets ngrams of given length /// </summary> public static NGramCollection GetNGrams(IList <string> words, int length) { NGramCollection ngrams = new NGramCollection(length); if (words != null) { for (int i = 0; i < words.Count; i++) { if (i + length - 1 >= words.Count) { break; } string[] gram = new string[length]; for (int j = 0; j < length; j++) { gram[j] = words[i + j]; } ngrams.Add(new NGram(gram)); } } return(ngrams); }
public NGramCollection Load(bool useFiltered = true) { var collection = new NGramCollection(); for (var count = 1; count <= 6; count++) { collection.Add(count, new NGramTree(count)); } UseFiltered = useFiltered; if (UseFiltered) { var ngrams = EnglishResources.ngrams_filtered.ReadLines().ForEachParallel(GetNGrams); foreach (var gram in ngrams) { collection[gram.Words.Length].Add(gram.Text, new NGram(gram.Text, gram.Words, gram.Frequency)); } } else { /*var lines = EnglishResources.ngrams1.ReadLines() * .Concat(EnglishResources.ngrams2.ReadLines()) * .Concat(EnglishResources.ngrams3.ReadLines()) * .Concat(EnglishResources.ngrams4.ReadLines()) * .Concat(EnglishResources.ngrams5.ReadLines());*/ } return(collection); }
/// <summary> /// Gets ngrams of varying lengths /// </summary> public static IEnumerable <NGramCollection> GetNGrams(IList <string> words, int minLength, int maxLength) { IList <NGramCollection> allNGrams = new List <NGramCollection>(); for (int i = minLength; i <= maxLength; i++) { NGramCollection ngrams = GetNGrams(words, i); allNGrams.Add(ngrams); } return(allNGrams); }
public static UniqueNGramCollection Create(NGramCollection ngrams) { if (ngrams == null) { throw new ArgumentNullException(nameof(ngrams)); } UniqueNGramCollection uniqueNgrams = new UniqueNGramCollection(ngrams.GramLength); foreach (NGram gram in ngrams.Grams) { uniqueNgrams.Add(gram); } return(uniqueNgrams); }