public UserTopicsWalking(QuestionList questionList, ICollection<string> categories, TopicsStatistics topicStatistics) { this.topicStatistics = topicStatistics; UserQuestions = questionList.GetAllQuestions().Where(q => categories.Contains(q.Category)) .GroupBy(q => q.AuthorEmail, (userEmail, hisQuestions) => new { userEmail, hisQuestions }) .Where(u => u.hisQuestions.Count() >= 2) .ToDictionary(u => u.userEmail, u => u.hisQuestions); foreach (var questions in UserQuestions.Values) { foreach (var question in questions) { var topic = topicStatistics.GetTopicByQuestionId(question.Id, Threshold); if (topic == null) continue; if (topicDistribution.ContainsKey(topic.Item1)) { topicDistribution[topic.Item1]++; } else { topicDistribution[topic.Item1] = 1; } } } userQuestionsCount = topicDistribution.Sum(it => it.Value); topicMoves = GetTopicSequenceFrequences(2); }
public static void ModifyTyposCorpus(QuestionList ql) { var detector = new SpellChecker(TrigramIndex.CreateFrom(ql)); Console.WriteLine("I am Modifying"); var start = DateTime.Now; foreach (var question in ql.GetAllQuestions()) { question.Text = String.Join(" ", question.Text.SplitInWordsAndStripHTML().Select(detector.Fix)); question.Title = String.Join(" ", question.Title.SplitInWordsAndStripHTML().Select(detector.Fix)); } Console.WriteLine("Questions modified in {0}", (DateTime.Now - start).TotalSeconds); start = DateTime.Now; foreach (var answer in ql.GetAllAnswers()) { answer.Text = String.Join(" ", answer.Text.SplitInWordsAndStripHTML().Select(detector.Fix)); } Console.WriteLine("Answers modified in {0}", (DateTime.Now - start).TotalSeconds); File.WriteAllLines(Program.QuestionsNoTyposFileName, ql.GetAllQuestions().Select(Question.FormatStringWrite)); File.WriteAllLines(Program.AnswersNoTyposFileName, ql.GetAllAnswers().Select(Answer.FormatStringWrite)); }
public void TestId() { var ql = new QuestionList(QuestionsFileName, AnswersFileName); var hasIdenticId = false; foreach (var question in ql.GetAllQuestions()) { foreach (var answer in ql.GetAllAnswers()) { hasIdenticId = true; if (answer.Id == question.Id) Console.WriteLine("BAD ID!!!!!!!!! " + answer.Id); } //Console.WriteLine(question.Id); } Assert.AreEqual(true, hasIdenticId); }
public UserStatistics(QuestionList questionList) : base(questionList) { var parser = new MailUserPageParser(Program.MailUsersDirectory); var questionUsers = questionList.GetAllQuestions().Select(q => q.AuthorEmail); var answerUsers = questionList.GetAllAnswers().Select(a => a.AuthorEmail); var questionListUsers = new HashSet<string>(questionUsers.Union(answerUsers)); users = parser.GetObjects().Where(u => questionListUsers.Contains(u.Email)).ToList(); TopicStatistics = new TopicsStatistics(questionList); }
private static IEnumerable<string> GetAllVertices(QuestionList ql) { return ql.GetAllQuestions().Select(q => q.AuthorEmail).Concat(ql.GetAllAnswers().Select(q => q.AuthorEmail)).Distinct(); }
private static IEnumerable<Tuple<string, string, double>> GetAllEdges(QuestionList ql) { var edges = new Dictionary<Tuple<string, string>, double>(); foreach (var question in ql.GetAllQuestions()) { foreach (var answer in question.GetAnswers()) { var from = answer.AuthorEmail; var to = question.AuthorEmail; edges.UpdateOrAdd(Tuple.Create(from,to), v => v + 1d, 1d); } } return edges.Select(q => Tuple.Create(q.Key.Item1,q.Key.Item2, q.Value)); }