private Dictionary<long, string> GetPureQuestionAnswersTexts(int count)
        {
            var statistics = new Statistics.Statistics(QuestionList);
            var orderedFrequentWords = statistics.WordFrequencyDistribution(new EmptyStemmer()).Where(item => item.Value >= 10).OrderBy(item => item.Value);
            var frequentWords = orderedFrequentWords.Take(orderedFrequentWords.Count() - 70).ToDictionary(item => item.Key, item => item.Value);

            return QuestionList.GetAllQuestions()
                    .Take(count)
                    .Select(q => Tuple.Create(q, q.WholeText))
                    .Select(item => Tuple.Create(item.Item1, item.Item2 + " " + String.Join(" ", item.Item1.GetAnswers().Select(a => a.Text))))
                    .Select(item => Tuple.Create(item.Item1, item.Item2.SplitInWordsAndStripHTML()))
                    .Select(item => Tuple.Create(item.Item1, String.Join("\t", item.Item2.Where(frequentWords.ContainsKey))))
                    .Where(item => item.Item2.Length > 0)
                    .ToDictionary(item => item.Item1.Id, item => item.Item2);
        }
Example #2
0
        public static string[] GetFrequentWords(QuestionList questionList)
        {
            var getDataFunction = new Func<string[]>(
                () =>
                    {
                        var statistics = new Statistics.Statistics(questionList);
                        return statistics.WordFrequencyDistribution(new EmptyStemmer())
                            .Where(item => item.Value > 10)
                            .Select(item => item.Key)
                            .ToArray();
                    });

            return DataActualityChecker.Check
                (
                    new Lazy<string[]>(getDataFunction),
                    t => t,
                    s => s,
                    new FileDependencies(String.Format("FrequentWords_{0}.txt", questionList.GetHashCode()),
                                         Program.QuestionsFileName,
                                         Program.AnswersFileName)
                ).ToArray();
        }