public override void Describe(DescribeContext <Article> context) { var tokenizer = new WhiteSpaceTokenizer(); var filter = new StopWordFilter(); context.For <ArticleByWord, string>() .Map(article => filter .Filter(tokenizer.Tokenize(article.Content)) .Select(x => new ArticleByWord { Word = x, Count = 1 }) ) .Group(article => article.Word) .Reduce(group => new ArticleByWord { Word = group.Key, Count = group.Sum(y => y.Count) }) .Delete((index, map) => { index.Count -= map.Sum(x => x.Count); // if Count == 0 then delete the index return(index.Count > 0 ? index : null); }); }
public void RemovesStopWordsFromArray() { string[] stopWords = { "fum", "fee" }; string[] filtered = new StopWordFilter(stopWords).Filter(new string[] { "foo", "bar", "fum", "fee" }); Assert.That(filtered, Is.EqualTo(new string[] { "foo", "bar" })); }