コード例 #1
0
ファイル: Program.cs プロジェクト: Rooniey/KSR
        private static Dictionary <string, int> GetMostFrequentTermsForLabel(List <LabeledArticle> articles, string label, int termCount = 20, string[] stopList = null)
        {
            List <TokenizedArticle> tokenizedArticles = new List <TokenizedArticle>();

//            List<TokenizedArticle> allTokenizedArticles = new List<TokenizedArticle>();

            foreach (var article in articles)
            {
//                allTokenizedArticles.Add(tokenized);
                if (article.Label == label)
                {
                    var art            = TextUtility.ReplaceSpecialCharacters(article.Article.Body);
                    var processedWords = StopWordsFilterProcessor.Process(Tokenizer.TokenizeWords(art), stopList);
                    processedWords = Lemmatizer.Process(processedWords);
                    var tokenized = new TokenizedArticle(article, processedWords);
                    tokenizedArticles.Add(tokenized);
                }
            }


            Dictionary <string, int> countDictionary = new Dictionary <string, int>();

            foreach (var tokenizedArticle in tokenizedArticles)
            {
                foreach (var token in tokenizedArticle.Tokens)
                {
                    if (countDictionary.ContainsKey(token.Word))
                    {
                        countDictionary[token.Word]++;
                    }
                    else
                    {
                        countDictionary[token.Word] = 1;
                    }
                }
            }

            return(countDictionary
                   .OrderByDescending(pair => pair.Value)
                   .Take(termCount)
                   .ToDictionary(pair => pair.Key, pair => pair.Value));
        }