public static Dictionary <int, QuestionMarkdown> LoadQuestionsSequence(string site, IEnumerable <int> question_ids) { int[] sequence; //load questions int[] q_arr = question_ids.ToArray(); int i1 = 0; int i2 = 99; if (i2 >= q_arr.Length) { i2 = q_arr.Length - 1; } SeApiClient client = new SeApiClient(Archive.APIURL, site); Dictionary <int, QuestionMarkdown> ret = new Dictionary <int, QuestionMarkdown>(); while (true) { sequence = new int[i2 - i1 + 1]; Console.WriteLine("Loading questions from #{0} to #{1}...", i1, i2); Array.Copy(q_arr, i1, sequence, 0, sequence.Length); Dictionary <int, object> questions = client.LoadQuestionsSequence(sequence); Console.WriteLine("{0} questions loaded", questions.Count); for (int i = 0; i < sequence.Length; i++) { int id = sequence[i]; QuestionMarkdown q = QuestionMarkdown.FromJsonData(site, questions[id]); ret[id] = q; } i1 = i2 + 1; if (i1 >= q_arr.Length) { break; } i2 = i1 + 99; if (i2 >= q_arr.Length) { i2 = q_arr.Length - 1; } } return(ret); }
public static void UpdateTitles(string site, string subdir) { string datadir = "..\\..\\..\\..\\data\\" + site + "\\"; string postsdir = Path.Combine(datadir, subdir + "\\"); Console.WriteLine("Updating titles for saved answers ({0}, {1})...", site, subdir); PostSet posts = PostSet.LoadFromDir(postsdir, site); Dictionary <int, Question> questions = posts.Questions; Console.WriteLine("Answers without parent question: {0}", posts.MarkdownAnswers.Count); int n = 0; List <int> question_ids = new List <int>(posts.MarkdownAnswers.Count); foreach (int a in posts.MarkdownAnswers.Keys) { try { question_ids.Add(posts.MarkdownAnswers[a].QuestionId); n++; //if (n > 70) break; } catch (Exception ex) { Console.WriteLine(ex.GetType() + ": " + ex.Message); //System.Threading.Thread.Sleep(20 * 1000); } } Dictionary <int, QuestionMarkdown> loaded = LoadQuestionsSequence(site, question_ids); foreach (int a in posts.MarkdownAnswers.Keys) { try { int key = posts.MarkdownAnswers[a].QuestionId; if (!loaded.ContainsKey(key)) { Console.WriteLine("Not found Q" + key.ToString()); continue; } QuestionMarkdown qmd = loaded[key]; string newtitle = posts.MarkdownAnswers[a].Title; if (!String.IsNullOrEmpty(qmd.Title)) { newtitle = "Ответ на \"" + qmd.Title + "\""; } posts.MarkdownAnswers[a].Title = newtitle; string filepath = Path.Combine(postsdir, "A" + a.ToString() + ".md"); TextWriter wr = new StreamWriter(filepath, false, Encoding.UTF8); using (wr) { posts.MarkdownAnswers[a].ToMarkdown(wr); } } catch (Exception ex) { Console.WriteLine(ex.GetType() + ": " + ex.Message); } } }
static void LoadDataMarkdown() { const int StartingPoint = 11000; string site = "ru.meta.stackoverflow.com"; string datadir = "..\\..\\..\\..\\data\\" + site + "\\"; string postsdir = Path.Combine(datadir, "posts-raw\\"); string postsdir2 = Path.Combine(datadir, "posts\\"); string deleted_dir = Path.Combine(datadir, "deleted\\"); int i1 = StartingPoint; int i2 = StartingPoint + 99; Dictionary <int, object> posts; string path; if (!Directory.Exists(postsdir)) { Directory.CreateDirectory(postsdir); } if (!Directory.Exists(deleted_dir)) { Directory.CreateDirectory(deleted_dir); } if (!Directory.Exists(postsdir2)) { Directory.CreateDirectory(postsdir2); } SeApiClient client = new SeApiClient(Archive.APIURL, site); Console.WriteLine(" Updating archive data: {0}", DateTime.Now); while (true) { Console.WriteLine("Loading posts {0} to {1}...", i1, i2); posts = client.LoadPostsRange(i1, i2); if (posts.Count == 0) { break; } Console.WriteLine("{0} posts loaded", posts.Count); for (int i = i1; i <= i2; i++) { path = Path.Combine(postsdir, i.ToString() + ".md"); if (!posts.ContainsKey(i)) { if (File.Exists(path)) { Console.WriteLine("Found deleted post: {0}", i); string path2 = Path.Combine(postsdir2, "Q" + i.ToString() + ".md"); string newpath; if (File.Exists(path2)) { newpath = Path.Combine(deleted_dir, "Q" + i.ToString() + ".md"); MoveFile(path2, newpath, true); } path2 = Path.Combine(postsdir2, "A" + i.ToString() + ".md"); if (File.Exists(path2)) { newpath = Path.Combine(deleted_dir, "A" + i.ToString() + ".md"); MoveFile(path2, newpath, true); } File.Delete(path); } } else { using (TextWriter wr = new StreamWriter(path, false)) { PostMarkdown post = PostMarkdown.FromJsonData(site, posts[i]); post.ToMarkdown(wr); } } } i1 = i2 + 1; i2 = i1 + 99; } //Scan posts and split to questions and answers List <int> question_ids = new List <int>(); List <int> answer_ids = new List <int>(); string[] files = Directory.GetFiles(postsdir, "*.md"); for (int i = 0; i < files.Length; i++) { string file = Path.GetFileNameWithoutExtension(files[i]); string idstr = file; int id; if (!Int32.TryParse(idstr, out id)) { Console.WriteLine("Bad post id = {0} in file {1}", idstr, files[i]); continue; } try { PostMarkdown post = null; using (TextReader read = new StreamReader(files[i], Encoding.UTF8)) { post = PostMarkdown.FromMarkdown(site, read); } if (post.PostType == "question") { question_ids.Add(id); } else if (post.PostType == "answer") { answer_ids.Add(id); } else { Console.WriteLine("Unknown post type: {0} in {1}", post.PostType, files[i]); } } catch (Exception ex) { Console.WriteLine("Error reading file " + files[i]); Console.WriteLine(ex.ToString()); throw; } } int[] sequence; //load questions int[] q_arr = question_ids.ToArray(); i1 = 0; i2 = 99; if (i2 >= q_arr.Length) { i2 = q_arr.Length - 1; } while (true) { sequence = new int[i2 - i1 + 1]; Console.WriteLine("Loading questions from #{0} to #{1}...", i1, i2); Array.Copy(q_arr, i1, sequence, 0, sequence.Length); Dictionary <int, object> questions = client.LoadQuestionsSequence(sequence); Console.WriteLine("{0} questions loaded", questions.Count); for (int i = 0; i < sequence.Length; i++) { int id = sequence[i]; path = Path.Combine(postsdir2, "Q" + id.ToString() + ".md"); if (!questions.ContainsKey(id)) { if (File.Exists(path)) { Console.WriteLine("Found deleted question: {0}", id); File.Move( Path.Combine(postsdir2, "Q" + id.ToString() + ".md"), Path.Combine(deleted_dir, "Q" + id.ToString() + ".md") ); } } else { using (TextWriter wr = new StreamWriter(path, false, Encoding.UTF8)) { QuestionMarkdown q = QuestionMarkdown.FromJsonData(site, questions[id]); q.ToMarkdown(wr); } } } i1 = i2 + 1; if (i1 >= q_arr.Length) { break; } i2 = i1 + 99; if (i2 >= q_arr.Length) { i2 = q_arr.Length - 1; } } //load answers int[] a_arr = answer_ids.ToArray(); i1 = 0; i2 = 99; if (i2 >= a_arr.Length) { i2 = a_arr.Length - 1; } while (true) { sequence = new int[i2 - i1 + 1]; Console.WriteLine("Loading answers from #{0} to #{1}...", i1, i2); Array.Copy(a_arr, i1, sequence, 0, sequence.Length); Dictionary <int, object> answers = client.LoadAnswersSequence(sequence); Console.WriteLine("{0} answers loaded", answers.Count); for (int i = 0; i < sequence.Length; i++) { int id = sequence[i]; path = Path.Combine(postsdir2, "A" + id.ToString() + ".md"); if (!answers.ContainsKey(id)) { if (File.Exists(path)) { Console.WriteLine("Found deleted answer: {0}", id); File.Move( Path.Combine(postsdir2, "A" + id.ToString() + ".md"), Path.Combine(deleted_dir, "A" + id.ToString() + ".md") ); } } else { using (TextWriter wr = new StreamWriter(path, false, Encoding.UTF8)) { AnswerMarkdown a = AnswerMarkdown.FromJsonData(site, answers[id]); a.ToMarkdown(wr); } } } i1 = i2 + 1; if (i1 >= a_arr.Length) { break; } i2 = i1 + 99; if (i2 >= a_arr.Length) { i2 = a_arr.Length - 1; } } }