public HashSet <string> Process(IDocumentReader reader) { var res = new HashSet <string>(); while (!reader.EndOfFile()) { var line = reader.ReadLine().ToLower(); if (line != string.Empty) { //split line into sentences var sent = textProcessor.GetSentences(line); foreach (var s in sent) { //tokenize var toks = textProcessor.Tokenize(s); foreach (var t in toks) { //add full word if (!res.Contains(t) && !stopwords.Exists(t)) { res.Add(t); } //add stemmed word var st = textProcessor.Stem(t); if (!res.Contains(st) && !stopwords.Exists(st)) { res.Add(st); } } } } } return(res); }
public HashSet <string> Process(IDocumentReader reader) { var res = new HashSet <string>(); while (!reader.EndOfFile()) { var word = reader.ReadLine(); if (!res.Contains(word)) { res.Add(word.ToLower()); } } return(res); }