public List <string> goStemmer(string[] filteredTokens) { outList = new List <string>(); foreach (string token in filteredTokens) { outWord = toStem.Stem(token); outList.Add(outWord.Value); } return(outList); }
private double GetIDFValue(StemmedWord word, List <List <string> > allDocuments) { int countOfDocOccurs = 0; for (int i = 0; i < allDocuments.Count; i++) { countOfDocOccurs += allDocuments[i].Contains(word.Unstemmed) ? 1 : 0; } return(Math.Log10((double)allDocuments.Count / (double)countOfDocOccurs)); }
private double GetTFValue(StemmedWord word, List <string> document) { int countOfOccurs = document.FindAll(x => x == word.Unstemmed).Count; return((double)countOfOccurs / (double)document.Count); }
private double GetTFIDFValue(StemmedWord word, List <string> currentDocument, List <List <string> > allDocuments) { return(GetTFValue(word, currentDocument) * GetIDFValue(word, allDocuments)); }