示例#1
0
 public SortedDictionary<string, int> UserActivityInMessagesDistibution()
 {
     var statisticGenerator = new DistributionCreator<string>(Questions.Select(t => t.AuthorEmail));
     statisticGenerator.AddData(Answers.Select(t => t.AuthorEmail));
     return statisticGenerator.GetData();
 }
示例#2
0
 public SortedDictionary<string, int> WordFrequencyDistribution(IStemmer stemmer)
 {
     var statisticGenerator =
         new DistributionCreator<string>(
             Questions.SelectMany(t => t.WholeText.SplitInWordsAndStripHTML()).Select(stemmer.Stem));
     statisticGenerator.AddData(Answers.SelectMany(t => t.Text.SplitInWordsAndStripHTML()).Select(stemmer.Stem));
     return statisticGenerator.GetData();
 }
示例#3
0
        public override void GenerateDocuments(int count)
        {
            var allWordCountsByDocuments = new List<Dictionary<int, int>>();
            var wordToId = new Dictionary<string, int>();
            var i = 0;
            foreach (var question in QuestionList.GetAllQuestions().Take(count))
            {
                var wordIdToCountInDocument = new Dictionary<int, int>();

                var statisticGenerator = new DistributionCreator<string>(question.WholeText.SplitInWordsAndStripHTML());
                statisticGenerator.AddData(question.GetAnswers().SelectMany(t => t.Text.SplitInWordsAndStripHTML()));

                var wordToCountInDocument = statisticGenerator.GetData();
                foreach (var word in wordToCountInDocument.Keys)
                {
                    if(!wordToId.ContainsKey(word))
                    {
                        wordToId[word] = i++;
                    }
                    wordIdToCountInDocument[wordToId[word]] = wordToCountInDocument[word];
                }
                allWordCountsByDocuments.Add(wordIdToCountInDocument);
            }

            // Vocabulary storing
            File.WriteAllText(VocabularyStorePath,
                              String.Join(Environment.NewLine, wordToId.OrderBy(item => item.Value).Select(item => item.Key)));

            // Word counts per document storing
            File.WriteAllText(DocumentsStorePath,
                              String.Join(Environment.NewLine,
                                          allWordCountsByDocuments.Select(item => item.Count + " " + String.Join(" ", item.Select(idToCount => idToCount.Key + ":" + idToCount.Value))
                                          )));
        }