private Dictionary<long, string> GetPureQuestionAnswersTexts(int count) { var statistics = new Statistics.Statistics(QuestionList); var orderedFrequentWords = statistics.WordFrequencyDistribution(new EmptyStemmer()).Where(item => item.Value >= 10).OrderBy(item => item.Value); var frequentWords = orderedFrequentWords.Take(orderedFrequentWords.Count() - 70).ToDictionary(item => item.Key, item => item.Value); return QuestionList.GetAllQuestions() .Take(count) .Select(q => Tuple.Create(q, q.WholeText)) .Select(item => Tuple.Create(item.Item1, item.Item2 + " " + String.Join(" ", item.Item1.GetAnswers().Select(a => a.Text)))) .Select(item => Tuple.Create(item.Item1, item.Item2.SplitInWordsAndStripHTML())) .Select(item => Tuple.Create(item.Item1, String.Join("\t", item.Item2.Where(frequentWords.ContainsKey)))) .Where(item => item.Item2.Length > 0) .ToDictionary(item => item.Item1.Id, item => item.Item2); }
public static string[] GetFrequentWords(QuestionList questionList) { var getDataFunction = new Func<string[]>( () => { var statistics = new Statistics.Statistics(questionList); return statistics.WordFrequencyDistribution(new EmptyStemmer()) .Where(item => item.Value > 10) .Select(item => item.Key) .ToArray(); }); return DataActualityChecker.Check ( new Lazy<string[]>(getDataFunction), t => t, s => s, new FileDependencies(String.Format("FrequentWords_{0}.txt", questionList.GetHashCode()), Program.QuestionsFileName, Program.AnswersFileName) ).ToArray(); }