public static void GetUsersTopicDistribution(QuestionList ql, Dictionary<long, TopicIdAndConfidenceInItTuple[]> dict, out Dictionary<string, TopicIdAndConfidenceInItDictionary> questioners, out Dictionary<string, TopicIdAndConfidenceInItDictionary> answerers) { questioners = new Dictionary<string, TopicIdAndConfidenceInItDictionary>(); answerers = new Dictionary<string, TopicIdAndConfidenceInItDictionary>(); foreach (var e in dict) { var question = ql.GetQuestion(e.Key); foreach (TopicIdAndConfidenceInItTuple q in e.Value) { // добавить пользователя, if (!questioners.ContainsKey(question.AuthorEmail)) questioners.Add(question.AuthorEmail, new []{q} .ToDictionary(w => w.Item1, w => w.Item2 )); else questioners[question.AuthorEmail].UpdateOrAdd(q.Item1, v => v + q.Item2, q.Item2); foreach (var answer in question.GetAnswers()) { if (!answerers.ContainsKey(answer.AuthorEmail)) answerers.Add(answer.AuthorEmail, new[] { q }.ToDictionary(w => w.Item1, w => w.Item2)); else answerers[answer.AuthorEmail].UpdateOrAdd(q.Item1, v => v + q.Item2, q.Item2); } } } }
private TrigramIndex(QuestionList questionList) { // Frequencies dictionary based on Mail.Ru corpus var frequentWords = GetFrequentWords(questionList); int i = 0; IdToWord = new SortedDictionary<int, string>(frequentWords.ToDictionary(q => i++, q => q)); Trigrams = GetDefaultTrigramIndex(questionList); vocabulary = new HashSet<string>(IdToWord.Values); }
public AnswerQuality(QuestionList questionList) { this.questionList = questionList; userQuality = new UserQuality(questionList); foreach (var answer in questionList.GetAllAnswers().Where(answer => !answers.ContainsKey(answer.Id))) { answers.Add(answer.Id, answer); } }
private TrigramIndex(QuestionList questionList, string wordsDictionaryFileName) { // External frequencies dictionary (Google: "ruscorpora"): Console.WriteLine("TrigramIndex: Geting Words from " + wordsDictionaryFileName); IdToWord = new SortedDictionary<int, string>(LoadFromFile(wordsDictionaryFileName)); Console.WriteLine("TrigramIndex: Geting Trigram Index now"); Trigrams = GetDefaultTrigramIndex(questionList); vocabulary = new HashSet<string>(IdToWord.Values); }
private Dictionary<string, HashSet<int>> GetDefaultTrigramIndex(QuestionList questionList) { var getDataFunction = new Func<Tuple<string, HashSet<int>>[]>( () => CalculateTrigramIndex().Select(pair => Tuple.Create(pair.Key, pair.Value)).ToArray()); return DataActualityChecker.Check ( new Lazy<Tuple<string, HashSet<int>>[]>(getDataFunction), t => t.Item1 + "\x2" + String.Join("\x2", t.Item2), s => { var q = s.Split('\x2'); return Tuple.Create(q[0], new HashSet<int>(q.Skip(1).Select(int.Parse))); }, new FileDependencies( String.Format("TrigramIndex_{0}.txt", questionList.GetHashCode()), Program.QuestionsFileName, Program.AnswersFileName) ) .ToDictionary(item => item.Item1, item => item.Item2); }
public static string[] GetFrequentWords(QuestionList questionList) { var getDataFunction = new Func<string[]>( () => { var statistics = new Statistics.Statistics(questionList); return statistics.WordFrequencyDistribution(new EmptyStemmer()) .Where(item => item.Value > 10) .Select(item => item.Key) .ToArray(); }); return DataActualityChecker.Check ( new Lazy<string[]>(getDataFunction), t => t, s => s, new FileDependencies(String.Format("FrequentWords_{0}.txt", questionList.GetHashCode()), Program.QuestionsFileName, Program.AnswersFileName) ).ToArray(); }
public static TrigramIndex CreateFromDefaultDictionaryAnd(QuestionList questionList) { return new TrigramIndex(questionList, Program.FilesDirectory + "1grams-3.txt"); }
private static IEnumerable<Tuple<string, string, double>> GetAllEdges(QuestionList ql) { var edges = new Dictionary<Tuple<string, string>, double>(); foreach (var question in ql.GetAllQuestions()) { foreach (var answer in question.GetAnswers()) { var from = answer.AuthorEmail; var to = question.AuthorEmail; edges.UpdateOrAdd(Tuple.Create(from,to), v => v + 1d, 1d); } } return edges.Select(q => Tuple.Create(q.Key.Item1,q.Key.Item2, q.Value)); }
public Statistics(QuestionList questionList) : base(questionList) { }
protected LDADataGenerator(QuestionList questionList, string vocabularyStorePath, string documentsStorePath) { QuestionList = questionList; VocabularyStorePath = vocabularyStorePath; DocumentsStorePath = documentsStorePath; }
public InferFormatLDAGenerator(QuestionList questionList, string vocabularyStorePath, string documentsStorePath) : base(questionList, vocabularyStorePath, documentsStorePath) { }
public ExpertUsers(QuestionList ql) { statistics = new Statistics(ql); this.ql = ql; }
public void TestId() { var ql = new QuestionList(QuestionsFileName, AnswersFileName); var hasIdenticId = false; foreach (var question in ql.GetAllQuestions()) { foreach (var answer in ql.GetAllAnswers()) { hasIdenticId = true; if (answer.Id == question.Id) Console.WriteLine("BAD ID!!!!!!!!! " + answer.Id); } //Console.WriteLine(question.Id); } Assert.AreEqual(true, hasIdenticId); }
public TopicsStatistics(QuestionList questionList, string docIdsFile = Program.DocIdsFileName, string topicsFile = Program.TopicsFileName, int allTopicsNumber = Program.TopicsCount) : base(questionList) { questionIds = File.ReadAllLines(docIdsFile).Select(Int64.Parse).ToArray(); questionsToDocs = questionIds.Select((id, i) => new {docId = i, questionId = id}) .ToDictionary(doc => doc.questionId, doc => doc.docId); topics = ReadTopicsFrom(topicsFile, allTopicsNumber); }
public UserStatistics(QuestionList questionList) : base(questionList) { var parser = new MailUserPageParser(Program.MailUsersDirectory); var questionUsers = questionList.GetAllQuestions().Select(q => q.AuthorEmail); var answerUsers = questionList.GetAllAnswers().Select(a => a.AuthorEmail); var questionListUsers = new HashSet<string>(questionUsers.Union(answerUsers)); users = parser.GetObjects().Where(u => questionListUsers.Contains(u.Email)).ToList(); TopicStatistics = new TopicsStatistics(questionList); }
public void Init() { var questionList = new QuestionList(Program.QuestionsFileName, Program.AnswersFileName); statistics = new Statistics(questionList); }
public UserQuality(QuestionList questionList) { this.questionList = questionList; userInfos = CalculateUserInfos(); }
private static IEnumerable<string> GetAllVertices(QuestionList ql) { return ql.GetAllQuestions().Select(q => q.AuthorEmail).Concat(ql.GetAllAnswers().Select(q => q.AuthorEmail)).Distinct(); }
public UserTopicsWalking(QuestionList questionList, ICollection<string> categories, TopicsStatistics topicStatistics) { this.topicStatistics = topicStatistics; UserQuestions = questionList.GetAllQuestions().Where(q => categories.Contains(q.Category)) .GroupBy(q => q.AuthorEmail, (userEmail, hisQuestions) => new { userEmail, hisQuestions }) .Where(u => u.hisQuestions.Count() >= 2) .ToDictionary(u => u.userEmail, u => u.hisQuestions); foreach (var questions in UserQuestions.Values) { foreach (var question in questions) { var topic = topicStatistics.GetTopicByQuestionId(question.Id, Threshold); if (topic == null) continue; if (topicDistribution.ContainsKey(topic.Item1)) { topicDistribution[topic.Item1]++; } else { topicDistribution[topic.Item1] = 1; } } } userQuestionsCount = topicDistribution.Sum(it => it.Value); topicMoves = GetTopicSequenceFrequences(2); }
public GibbsFormatLDAGenerator(QuestionList questionList, string documentIdsFilePath, string documentsFilePath) : base(questionList, "", documentsFilePath) { this.documentIdsFilePath = documentIdsFilePath; }
public GibbsFormatLDAGenerator(QuestionList questionList, string documentIdsFilePath, string documentsFilePath, params string[] categories) : base(questionList, "", documentsFilePath) { this.documentIdsFilePath = documentIdsFilePath; this.categories = categories; }
public static void ModifyTyposCorpus(QuestionList ql) { var detector = new SpellChecker(TrigramIndex.CreateFrom(ql)); Console.WriteLine("I am Modifying"); var start = DateTime.Now; foreach (var question in ql.GetAllQuestions()) { question.Text = String.Join(" ", question.Text.SplitInWordsAndStripHTML().Select(detector.Fix)); question.Title = String.Join(" ", question.Title.SplitInWordsAndStripHTML().Select(detector.Fix)); } Console.WriteLine("Questions modified in {0}", (DateTime.Now - start).TotalSeconds); start = DateTime.Now; foreach (var answer in ql.GetAllAnswers()) { answer.Text = String.Join(" ", answer.Text.SplitInWordsAndStripHTML().Select(detector.Fix)); } Console.WriteLine("Answers modified in {0}", (DateTime.Now - start).TotalSeconds); File.WriteAllLines(Program.QuestionsNoTyposFileName, ql.GetAllQuestions().Select(Question.FormatStringWrite)); File.WriteAllLines(Program.AnswersNoTyposFileName, ql.GetAllAnswers().Select(Answer.FormatStringWrite)); }
public BodyCalculator(QuestionList questions, BodyPart body) { this.questions = questions; this.body = body; }
public static TrigramIndex CreateFrom(QuestionList questionList) { return new TrigramIndex(questionList); }
public void DistributionInit() { ql = Program.DefaultQuestionList; statistics = new Statistics(ql); Console.WriteLine("Preparations have been done"); }
public UserGraph(QuestionList ql) : base(GetAllVertices(ql), GetAllEdges(ql)) { }