public static void SaveUserAnswers(string site, int userid) { string datadir = "..\\..\\..\\..\\data\\" + site + "\\"; string postsdir = Path.Combine(datadir, "posts\\"); string path; if (!Directory.Exists(postsdir)) { Directory.CreateDirectory(postsdir); } SeApiClient client = new SeApiClient(Archive.APIURL, site); Dictionary <int, object> answers = client.LoadUserAnswers(userid); Console.WriteLine("Saving {0} answers...", answers.Count); foreach (int key in answers.Keys) { path = Path.Combine(postsdir, "A" + key.ToString() + ".md"); string title = "Answer " + key.ToString(); //if answer is already saved, use title from existing file try { if (File.Exists(path)) { TextReader rd = new StreamReader(path, Encoding.UTF8); using (rd) { AnswerMarkdown prev = AnswerMarkdown.FromMarkdown(site, rd); if (!String.IsNullOrEmpty(prev.Title)) { title = prev.Title; } } } } catch (IOException ex) { Console.WriteLine("Error when trying to read existing answer data"); Console.WriteLine(ex.ToString()); } TextWriter wr = new StreamWriter(path, false, Encoding.UTF8); using (wr) { AnswerMarkdown post = AnswerMarkdown.FromJsonData(site, answers[key]); post.Title = title; //insert newlines before code blocks - fix for CommonMark string body = post.Body; post.Body = FixPostBody(body); post.ToMarkdown(wr); } } }
public static Dictionary <int, QuestionMarkdown> LoadQuestionsSequence(string site, IEnumerable <int> question_ids) { int[] sequence; //load questions int[] q_arr = question_ids.ToArray(); int i1 = 0; int i2 = 99; if (i2 >= q_arr.Length) { i2 = q_arr.Length - 1; } SeApiClient client = new SeApiClient(Archive.APIURL, site); Dictionary <int, QuestionMarkdown> ret = new Dictionary <int, QuestionMarkdown>(); while (true) { sequence = new int[i2 - i1 + 1]; Console.WriteLine("Loading questions from #{0} to #{1}...", i1, i2); Array.Copy(q_arr, i1, sequence, 0, sequence.Length); Dictionary <int, object> questions = client.LoadQuestionsSequence(sequence); Console.WriteLine("{0} questions loaded", questions.Count); for (int i = 0; i < sequence.Length; i++) { int id = sequence[i]; QuestionMarkdown q = QuestionMarkdown.FromJsonData(site, questions[id]); ret[id] = q; } i1 = i2 + 1; if (i1 >= q_arr.Length) { break; } i2 = i1 + 99; if (i2 >= q_arr.Length) { i2 = q_arr.Length - 1; } } return(ret); }
public static void SaveSingleAnswer(string site, int id) { string datadir = "..\\..\\..\\..\\data\\" + site + "\\"; string postsdir = Path.Combine(datadir, "posts\\"); string path; if (!Directory.Exists(postsdir)) { Directory.CreateDirectory(postsdir); } Console.WriteLine("Saving single answer {0} from {1}...", id, site); SeApiClient client = new SeApiClient(Archive.APIURL, site); string a = client.LoadSingleAnswer(id); if (a == null) { throw new Exception("Failed to load answer " + id.ToString() + " from " + site); } path = Path.Combine(postsdir, "A" + id.ToString() + ".md"); string title = "Answer " + id.ToString(); //if answer is already saved, use title from existing file try { if (File.Exists(path)) { TextReader rd = new StreamReader(path, Encoding.UTF8); using (rd) { AnswerMarkdown prev = AnswerMarkdown.FromMarkdown(site, rd); if (!String.IsNullOrEmpty(prev.Title)) { title = prev.Title; } } } } catch (IOException ex) { Console.WriteLine("Error when trying to read existing answer data"); Console.WriteLine(ex.ToString()); } TextWriter wr = new StreamWriter(path, false, Encoding.UTF8); using (wr) { dynamic data = JSON.Parse(a); AnswerMarkdown post = AnswerMarkdown.FromJsonData(site, data); post.Title = title; //insert newlines before code blocks - fix for CommonMark string body = post.Body; post.Body = FixPostBody(body); post.ToMarkdown(wr); } Console.WriteLine("Success"); }
static void LoadDataMarkdown() { const int StartingPoint = 11000; string site = "ru.meta.stackoverflow.com"; string datadir = "..\\..\\..\\..\\data\\" + site + "\\"; string postsdir = Path.Combine(datadir, "posts-raw\\"); string postsdir2 = Path.Combine(datadir, "posts\\"); string deleted_dir = Path.Combine(datadir, "deleted\\"); int i1 = StartingPoint; int i2 = StartingPoint + 99; Dictionary <int, object> posts; string path; if (!Directory.Exists(postsdir)) { Directory.CreateDirectory(postsdir); } if (!Directory.Exists(deleted_dir)) { Directory.CreateDirectory(deleted_dir); } if (!Directory.Exists(postsdir2)) { Directory.CreateDirectory(postsdir2); } SeApiClient client = new SeApiClient(Archive.APIURL, site); Console.WriteLine(" Updating archive data: {0}", DateTime.Now); while (true) { Console.WriteLine("Loading posts {0} to {1}...", i1, i2); posts = client.LoadPostsRange(i1, i2); if (posts.Count == 0) { break; } Console.WriteLine("{0} posts loaded", posts.Count); for (int i = i1; i <= i2; i++) { path = Path.Combine(postsdir, i.ToString() + ".md"); if (!posts.ContainsKey(i)) { if (File.Exists(path)) { Console.WriteLine("Found deleted post: {0}", i); string path2 = Path.Combine(postsdir2, "Q" + i.ToString() + ".md"); string newpath; if (File.Exists(path2)) { newpath = Path.Combine(deleted_dir, "Q" + i.ToString() + ".md"); MoveFile(path2, newpath, true); } path2 = Path.Combine(postsdir2, "A" + i.ToString() + ".md"); if (File.Exists(path2)) { newpath = Path.Combine(deleted_dir, "A" + i.ToString() + ".md"); MoveFile(path2, newpath, true); } File.Delete(path); } } else { using (TextWriter wr = new StreamWriter(path, false)) { PostMarkdown post = PostMarkdown.FromJsonData(site, posts[i]); post.ToMarkdown(wr); } } } i1 = i2 + 1; i2 = i1 + 99; } //Scan posts and split to questions and answers List <int> question_ids = new List <int>(); List <int> answer_ids = new List <int>(); string[] files = Directory.GetFiles(postsdir, "*.md"); for (int i = 0; i < files.Length; i++) { string file = Path.GetFileNameWithoutExtension(files[i]); string idstr = file; int id; if (!Int32.TryParse(idstr, out id)) { Console.WriteLine("Bad post id = {0} in file {1}", idstr, files[i]); continue; } try { PostMarkdown post = null; using (TextReader read = new StreamReader(files[i], Encoding.UTF8)) { post = PostMarkdown.FromMarkdown(site, read); } if (post.PostType == "question") { question_ids.Add(id); } else if (post.PostType == "answer") { answer_ids.Add(id); } else { Console.WriteLine("Unknown post type: {0} in {1}", post.PostType, files[i]); } } catch (Exception ex) { Console.WriteLine("Error reading file " + files[i]); Console.WriteLine(ex.ToString()); throw; } } int[] sequence; //load questions int[] q_arr = question_ids.ToArray(); i1 = 0; i2 = 99; if (i2 >= q_arr.Length) { i2 = q_arr.Length - 1; } while (true) { sequence = new int[i2 - i1 + 1]; Console.WriteLine("Loading questions from #{0} to #{1}...", i1, i2); Array.Copy(q_arr, i1, sequence, 0, sequence.Length); Dictionary <int, object> questions = client.LoadQuestionsSequence(sequence); Console.WriteLine("{0} questions loaded", questions.Count); for (int i = 0; i < sequence.Length; i++) { int id = sequence[i]; path = Path.Combine(postsdir2, "Q" + id.ToString() + ".md"); if (!questions.ContainsKey(id)) { if (File.Exists(path)) { Console.WriteLine("Found deleted question: {0}", id); File.Move( Path.Combine(postsdir2, "Q" + id.ToString() + ".md"), Path.Combine(deleted_dir, "Q" + id.ToString() + ".md") ); } } else { using (TextWriter wr = new StreamWriter(path, false, Encoding.UTF8)) { QuestionMarkdown q = QuestionMarkdown.FromJsonData(site, questions[id]); q.ToMarkdown(wr); } } } i1 = i2 + 1; if (i1 >= q_arr.Length) { break; } i2 = i1 + 99; if (i2 >= q_arr.Length) { i2 = q_arr.Length - 1; } } //load answers int[] a_arr = answer_ids.ToArray(); i1 = 0; i2 = 99; if (i2 >= a_arr.Length) { i2 = a_arr.Length - 1; } while (true) { sequence = new int[i2 - i1 + 1]; Console.WriteLine("Loading answers from #{0} to #{1}...", i1, i2); Array.Copy(a_arr, i1, sequence, 0, sequence.Length); Dictionary <int, object> answers = client.LoadAnswersSequence(sequence); Console.WriteLine("{0} answers loaded", answers.Count); for (int i = 0; i < sequence.Length; i++) { int id = sequence[i]; path = Path.Combine(postsdir2, "A" + id.ToString() + ".md"); if (!answers.ContainsKey(id)) { if (File.Exists(path)) { Console.WriteLine("Found deleted answer: {0}", id); File.Move( Path.Combine(postsdir2, "A" + id.ToString() + ".md"), Path.Combine(deleted_dir, "A" + id.ToString() + ".md") ); } } else { using (TextWriter wr = new StreamWriter(path, false, Encoding.UTF8)) { AnswerMarkdown a = AnswerMarkdown.FromJsonData(site, answers[id]); a.ToMarkdown(wr); } } } i1 = i2 + 1; if (i1 >= a_arr.Length) { break; } i2 = i1 + 99; if (i2 >= a_arr.Length) { i2 = a_arr.Length - 1; } } }