Пример #1
0
 public List <string> goStemmer(string[] filteredTokens)
 {
     outList = new List <string>();
     foreach (string token in filteredTokens)
     {
         outWord = toStem.Stem(token);
         outList.Add(outWord.Value);
     }
     return(outList);
 }
        private double GetIDFValue(StemmedWord word, List <List <string> > allDocuments)
        {
            int countOfDocOccurs = 0;

            for (int i = 0; i < allDocuments.Count; i++)
            {
                countOfDocOccurs += allDocuments[i].Contains(word.Unstemmed) ? 1 : 0;
            }

            return(Math.Log10((double)allDocuments.Count / (double)countOfDocOccurs));
        }
        private double GetTFValue(StemmedWord word, List <string> document)
        {
            int countOfOccurs = document.FindAll(x => x == word.Unstemmed).Count;

            return((double)countOfOccurs / (double)document.Count);
        }
 private double GetTFIDFValue(StemmedWord word, List <string> currentDocument, List <List <string> > allDocuments)
 {
     return(GetTFValue(word, currentDocument) * GetIDFValue(word, allDocuments));
 }