Пример #1
0
        public static void GetUsersTopicDistribution(QuestionList ql, Dictionary<long, TopicIdAndConfidenceInItTuple[]> dict, out Dictionary<string, TopicIdAndConfidenceInItDictionary> questioners, out Dictionary<string, TopicIdAndConfidenceInItDictionary> answerers)
        {
            questioners = new Dictionary<string, TopicIdAndConfidenceInItDictionary>();
            answerers = new Dictionary<string, TopicIdAndConfidenceInItDictionary>();
            foreach (var e in dict)
            {
                var question = ql.GetQuestion(e.Key);
                foreach (TopicIdAndConfidenceInItTuple q in e.Value)
                {
                    // добавить пользователя,
                    if (!questioners.ContainsKey(question.AuthorEmail))
                        questioners.Add(question.AuthorEmail, new []{q} .ToDictionary(w => w.Item1, w => w.Item2 ));
                    else
                        questioners[question.AuthorEmail].UpdateOrAdd(q.Item1, v => v + q.Item2, q.Item2);

                    foreach (var answer in question.GetAnswers())
                    {
                        if (!answerers.ContainsKey(answer.AuthorEmail))
                            answerers.Add(answer.AuthorEmail, new[] { q }.ToDictionary(w => w.Item1, w => w.Item2));
                        else
                            answerers[answer.AuthorEmail].UpdateOrAdd(q.Item1, v => v + q.Item2, q.Item2);
                    }
                }
            }
        }
Пример #2
0
 private TrigramIndex(QuestionList questionList)
 {
     // Frequencies dictionary based on Mail.Ru corpus
     var frequentWords = GetFrequentWords(questionList);
     int i = 0;
     IdToWord = new SortedDictionary<int, string>(frequentWords.ToDictionary(q =>  i++, q => q));
     Trigrams = GetDefaultTrigramIndex(questionList);
     vocabulary = new HashSet<string>(IdToWord.Values);
 }
Пример #3
0
 public AnswerQuality(QuestionList questionList)
 {
     this.questionList = questionList;
     userQuality = new UserQuality(questionList);
     foreach (var answer in questionList.GetAllAnswers().Where(answer => !answers.ContainsKey(answer.Id)))
     {
         answers.Add(answer.Id, answer);
     }
 }
Пример #4
0
 private TrigramIndex(QuestionList questionList, string wordsDictionaryFileName)
 {
     // External frequencies dictionary (Google: "ruscorpora"):
     Console.WriteLine("TrigramIndex: Geting Words from " + wordsDictionaryFileName);
     IdToWord = new SortedDictionary<int, string>(LoadFromFile(wordsDictionaryFileName));
     Console.WriteLine("TrigramIndex: Geting Trigram Index now");
     Trigrams = GetDefaultTrigramIndex(questionList);
     vocabulary = new HashSet<string>(IdToWord.Values);
 }
Пример #5
0
        private Dictionary<string, HashSet<int>> GetDefaultTrigramIndex(QuestionList questionList)
        {
            var getDataFunction = new Func<Tuple<string, HashSet<int>>[]>(
                () => CalculateTrigramIndex().Select(pair => Tuple.Create(pair.Key, pair.Value)).ToArray());

            return DataActualityChecker.Check
                (
                    new Lazy<Tuple<string, HashSet<int>>[]>(getDataFunction),
                    t => t.Item1 + "\x2" + String.Join("\x2", t.Item2),
                    s =>
                        {
                            var q = s.Split('\x2');
                            return Tuple.Create(q[0], new HashSet<int>(q.Skip(1).Select(int.Parse)));
                        },
                    new FileDependencies(
                        String.Format("TrigramIndex_{0}.txt", questionList.GetHashCode()),
                        Program.QuestionsFileName,
                        Program.AnswersFileName)
                )
                .ToDictionary(item => item.Item1, item => item.Item2);
        }
Пример #6
0
        public static string[] GetFrequentWords(QuestionList questionList)
        {
            var getDataFunction = new Func<string[]>(
                () =>
                    {
                        var statistics = new Statistics.Statistics(questionList);
                        return statistics.WordFrequencyDistribution(new EmptyStemmer())
                            .Where(item => item.Value > 10)
                            .Select(item => item.Key)
                            .ToArray();
                    });

            return DataActualityChecker.Check
                (
                    new Lazy<string[]>(getDataFunction),
                    t => t,
                    s => s,
                    new FileDependencies(String.Format("FrequentWords_{0}.txt", questionList.GetHashCode()),
                                         Program.QuestionsFileName,
                                         Program.AnswersFileName)
                ).ToArray();
        }
Пример #7
0
 public static TrigramIndex CreateFromDefaultDictionaryAnd(QuestionList questionList)
 {
     return new TrigramIndex(questionList, Program.FilesDirectory + "1grams-3.txt");
 }
Пример #8
0
 private static IEnumerable<Tuple<string, string, double>> GetAllEdges(QuestionList ql)
 {
     var edges = new Dictionary<Tuple<string, string>, double>();
     foreach (var question in ql.GetAllQuestions())
     {
         foreach (var answer in question.GetAnswers())
         {
             var from = answer.AuthorEmail;
             var to = question.AuthorEmail;
             edges.UpdateOrAdd(Tuple.Create(from,to), v => v + 1d, 1d);
         }
     }
     return edges.Select(q => Tuple.Create(q.Key.Item1,q.Key.Item2, q.Value));
 }
Пример #9
0
 public Statistics(QuestionList questionList)
     : base(questionList)
 {
 }
Пример #10
0
 protected LDADataGenerator(QuestionList questionList, string vocabularyStorePath, string documentsStorePath)
 {
     QuestionList = questionList;
     VocabularyStorePath = vocabularyStorePath;
     DocumentsStorePath = documentsStorePath;
 }
Пример #11
0
 public InferFormatLDAGenerator(QuestionList questionList, string vocabularyStorePath, string documentsStorePath)
     : base(questionList, vocabularyStorePath, documentsStorePath)
 {
 }
Пример #12
0
 public ExpertUsers(QuestionList ql)
 {
     statistics = new Statistics(ql);
     this.ql = ql;
 }
Пример #13
0
			public void TestId()
			{
				var ql = new QuestionList(QuestionsFileName, AnswersFileName);
				var hasIdenticId = false;
				foreach (var question in ql.GetAllQuestions())
				{

					foreach (var answer in ql.GetAllAnswers())
					{
						hasIdenticId = true;
						if (answer.Id == question.Id)
							Console.WriteLine("BAD ID!!!!!!!!! " + answer.Id);
					}
					//Console.WriteLine(question.Id);
				}
				Assert.AreEqual(true, hasIdenticId);
			}
Пример #14
0
 public TopicsStatistics(QuestionList questionList, string docIdsFile = Program.DocIdsFileName, string topicsFile = Program.TopicsFileName, int allTopicsNumber = Program.TopicsCount)
     : base(questionList)
 {
     questionIds = File.ReadAllLines(docIdsFile).Select(Int64.Parse).ToArray();
     questionsToDocs = questionIds.Select((id, i) => new {docId = i, questionId = id})
                                  .ToDictionary(doc => doc.questionId, doc => doc.docId);
     topics = ReadTopicsFrom(topicsFile, allTopicsNumber);
 }
Пример #15
0
        public UserStatistics(QuestionList questionList)
            : base(questionList)
        {
            var parser = new MailUserPageParser(Program.MailUsersDirectory);

            var questionUsers = questionList.GetAllQuestions().Select(q => q.AuthorEmail);
            var answerUsers = questionList.GetAllAnswers().Select(a => a.AuthorEmail);
            var questionListUsers = new HashSet<string>(questionUsers.Union(answerUsers));

            users = parser.GetObjects().Where(u => questionListUsers.Contains(u.Email)).ToList();

            TopicStatistics = new TopicsStatistics(questionList);
        }
Пример #16
0
 public void Init()
 {
     var questionList = new QuestionList(Program.QuestionsFileName, Program.AnswersFileName);
     statistics = new Statistics(questionList);
 }
Пример #17
0
 public UserQuality(QuestionList questionList)
 {
     this.questionList = questionList;
     userInfos = CalculateUserInfos();
 }
Пример #18
0
 private static IEnumerable<string> GetAllVertices(QuestionList ql)
 {
     return ql.GetAllQuestions().Select(q => q.AuthorEmail).Concat(ql.GetAllAnswers().Select(q => q.AuthorEmail)).Distinct();
 }
Пример #19
0
        public UserTopicsWalking(QuestionList questionList, ICollection<string> categories, TopicsStatistics topicStatistics)
        {
            this.topicStatistics = topicStatistics;

            UserQuestions = questionList.GetAllQuestions().Where(q => categories.Contains(q.Category))
                .GroupBy(q => q.AuthorEmail, (userEmail, hisQuestions) => new { userEmail, hisQuestions })
                .Where(u => u.hisQuestions.Count() >= 2)
                .ToDictionary(u => u.userEmail, u => u.hisQuestions);
            foreach (var questions in UserQuestions.Values)
            {
                foreach (var question in questions)
                {
                    var topic = topicStatistics.GetTopicByQuestionId(question.Id, Threshold);
                    if (topic == null) continue;

                    if (topicDistribution.ContainsKey(topic.Item1))
                    {
                        topicDistribution[topic.Item1]++;
                    }
                    else
                    {
                        topicDistribution[topic.Item1] = 1;
                    }
                }
            }
            userQuestionsCount = topicDistribution.Sum(it => it.Value);
            topicMoves = GetTopicSequenceFrequences(2);
        }
 public GibbsFormatLDAGenerator(QuestionList questionList, string documentIdsFilePath, string documentsFilePath)
     : base(questionList, "", documentsFilePath)
 {
     this.documentIdsFilePath = documentIdsFilePath;
 }
Пример #21
0
 public GibbsFormatLDAGenerator(QuestionList questionList, string documentIdsFilePath, string documentsFilePath, params string[] categories)
     : base(questionList, "", documentsFilePath)
 {
     this.documentIdsFilePath = documentIdsFilePath;
     this.categories = categories;
 }
Пример #22
0
        public static void ModifyTyposCorpus(QuestionList ql)
        {
            var detector = new SpellChecker(TrigramIndex.CreateFrom(ql));
            Console.WriteLine("I am Modifying");

            var start = DateTime.Now;
            foreach (var question in ql.GetAllQuestions())
            {
                question.Text = String.Join(" ", question.Text.SplitInWordsAndStripHTML().Select(detector.Fix));
                question.Title = String.Join(" ", question.Title.SplitInWordsAndStripHTML().Select(detector.Fix));
            }
            Console.WriteLine("Questions modified in {0}", (DateTime.Now - start).TotalSeconds);

            start = DateTime.Now;
            foreach (var answer in ql.GetAllAnswers())
            {
                answer.Text = String.Join(" ", answer.Text.SplitInWordsAndStripHTML().Select(detector.Fix));
            }
            Console.WriteLine("Answers modified in {0}", (DateTime.Now - start).TotalSeconds);

            File.WriteAllLines(Program.QuestionsNoTyposFileName, ql.GetAllQuestions().Select(Question.FormatStringWrite));
            File.WriteAllLines(Program.AnswersNoTyposFileName, ql.GetAllAnswers().Select(Answer.FormatStringWrite));
        }
Пример #23
0
 public BodyCalculator(QuestionList questions, BodyPart body)
 {
     this.questions = questions;
     this.body = body;
 }
Пример #24
0
 public static TrigramIndex CreateFrom(QuestionList questionList)
 {
     return new TrigramIndex(questionList);
 }
Пример #25
0
 public void DistributionInit()
 {
     ql = Program.DefaultQuestionList;
     statistics = new Statistics(ql);
     Console.WriteLine("Preparations have been done");
 }
Пример #26
0
 public UserGraph(QuestionList ql)
     : base(GetAllVertices(ql), GetAllEdges(ql))
 {
 }