public SortedDictionary<string, int> UserActivityInMessagesDistibution() { var statisticGenerator = new DistributionCreator<string>(Questions.Select(t => t.AuthorEmail)); statisticGenerator.AddData(Answers.Select(t => t.AuthorEmail)); return statisticGenerator.GetData(); }
public SortedDictionary<string, int> WordFrequencyDistribution(IStemmer stemmer) { var statisticGenerator = new DistributionCreator<string>( Questions.SelectMany(t => t.WholeText.SplitInWordsAndStripHTML()).Select(stemmer.Stem)); statisticGenerator.AddData(Answers.SelectMany(t => t.Text.SplitInWordsAndStripHTML()).Select(stemmer.Stem)); return statisticGenerator.GetData(); }
public override void GenerateDocuments(int count) { var allWordCountsByDocuments = new List<Dictionary<int, int>>(); var wordToId = new Dictionary<string, int>(); var i = 0; foreach (var question in QuestionList.GetAllQuestions().Take(count)) { var wordIdToCountInDocument = new Dictionary<int, int>(); var statisticGenerator = new DistributionCreator<string>(question.WholeText.SplitInWordsAndStripHTML()); statisticGenerator.AddData(question.GetAnswers().SelectMany(t => t.Text.SplitInWordsAndStripHTML())); var wordToCountInDocument = statisticGenerator.GetData(); foreach (var word in wordToCountInDocument.Keys) { if(!wordToId.ContainsKey(word)) { wordToId[word] = i++; } wordIdToCountInDocument[wordToId[word]] = wordToCountInDocument[word]; } allWordCountsByDocuments.Add(wordIdToCountInDocument); } // Vocabulary storing File.WriteAllText(VocabularyStorePath, String.Join(Environment.NewLine, wordToId.OrderBy(item => item.Value).Select(item => item.Key))); // Word counts per document storing File.WriteAllText(DocumentsStorePath, String.Join(Environment.NewLine, allWordCountsByDocuments.Select(item => item.Count + " " + String.Join(" ", item.Select(idToCount => idToCount.Key + ":" + idToCount.Value)) ))); }