Beispiel #1
0
        public static void SaveUserAnswers(string site, int userid)
        {
            string datadir  = "..\\..\\..\\..\\data\\" + site + "\\";
            string postsdir = Path.Combine(datadir, "posts\\");
            string path;

            if (!Directory.Exists(postsdir))
            {
                Directory.CreateDirectory(postsdir);
            }

            SeApiClient client = new SeApiClient(Archive.APIURL, site);
            Dictionary <int, object> answers = client.LoadUserAnswers(userid);

            Console.WriteLine("Saving {0} answers...", answers.Count);

            foreach (int key in answers.Keys)
            {
                path = Path.Combine(postsdir, "A" + key.ToString() + ".md");
                string title = "Answer " + key.ToString();

                //if answer is already saved, use title from existing file

                try
                {
                    if (File.Exists(path))
                    {
                        TextReader rd = new StreamReader(path, Encoding.UTF8);
                        using (rd)
                        {
                            AnswerMarkdown prev = AnswerMarkdown.FromMarkdown(site, rd);

                            if (!String.IsNullOrEmpty(prev.Title))
                            {
                                title = prev.Title;
                            }
                        }
                    }
                }
                catch (IOException ex)
                {
                    Console.WriteLine("Error when trying to read existing answer data");
                    Console.WriteLine(ex.ToString());
                }

                TextWriter wr = new StreamWriter(path, false, Encoding.UTF8);
                using (wr)
                {
                    AnswerMarkdown post = AnswerMarkdown.FromJsonData(site, answers[key]);
                    post.Title = title;

                    //insert newlines before code blocks - fix for CommonMark
                    string body = post.Body;
                    post.Body = FixPostBody(body);

                    post.ToMarkdown(wr);
                }
            }
        }
Beispiel #2
0
        public static Dictionary <int, QuestionMarkdown> LoadQuestionsSequence(string site, IEnumerable <int> question_ids)
        {
            int[] sequence;

            //load questions
            int[] q_arr = question_ids.ToArray();
            int   i1    = 0;
            int   i2    = 99;

            if (i2 >= q_arr.Length)
            {
                i2 = q_arr.Length - 1;
            }

            SeApiClient client = new SeApiClient(Archive.APIURL, site);
            Dictionary <int, QuestionMarkdown> ret = new Dictionary <int, QuestionMarkdown>();

            while (true)
            {
                sequence = new int[i2 - i1 + 1];
                Console.WriteLine("Loading questions from #{0} to #{1}...", i1, i2);
                Array.Copy(q_arr, i1, sequence, 0, sequence.Length);
                Dictionary <int, object> questions = client.LoadQuestionsSequence(sequence);

                Console.WriteLine("{0} questions loaded", questions.Count);

                for (int i = 0; i < sequence.Length; i++)
                {
                    int id             = sequence[i];
                    QuestionMarkdown q = QuestionMarkdown.FromJsonData(site, questions[id]);
                    ret[id] = q;
                }

                i1 = i2 + 1;
                if (i1 >= q_arr.Length)
                {
                    break;
                }
                i2 = i1 + 99;
                if (i2 >= q_arr.Length)
                {
                    i2 = q_arr.Length - 1;
                }
            }

            return(ret);
        }
Beispiel #3
0
        public static void SaveSingleAnswer(string site, int id)
        {
            string datadir  = "..\\..\\..\\..\\data\\" + site + "\\";
            string postsdir = Path.Combine(datadir, "posts\\");
            string path;

            if (!Directory.Exists(postsdir))
            {
                Directory.CreateDirectory(postsdir);
            }
            Console.WriteLine("Saving single answer {0} from {1}...", id, site);

            SeApiClient client = new SeApiClient(Archive.APIURL, site);
            string      a      = client.LoadSingleAnswer(id);

            if (a == null)
            {
                throw new Exception("Failed to load answer " + id.ToString() + " from " + site);
            }

            path = Path.Combine(postsdir, "A" + id.ToString() + ".md");
            string title = "Answer " + id.ToString();

            //if answer is already saved, use title from existing file

            try
            {
                if (File.Exists(path))
                {
                    TextReader rd = new StreamReader(path, Encoding.UTF8);
                    using (rd)
                    {
                        AnswerMarkdown prev = AnswerMarkdown.FromMarkdown(site, rd);

                        if (!String.IsNullOrEmpty(prev.Title))
                        {
                            title = prev.Title;
                        }
                    }
                }
            }
            catch (IOException ex)
            {
                Console.WriteLine("Error when trying to read existing answer data");
                Console.WriteLine(ex.ToString());
            }

            TextWriter wr = new StreamWriter(path, false, Encoding.UTF8);

            using (wr)
            {
                dynamic        data = JSON.Parse(a);
                AnswerMarkdown post = AnswerMarkdown.FromJsonData(site, data);
                post.Title = title;

                //insert newlines before code blocks - fix for CommonMark
                string body = post.Body;
                post.Body = FixPostBody(body);

                post.ToMarkdown(wr);
            }

            Console.WriteLine("Success");
        }
        static void LoadDataMarkdown()
        {
            const int StartingPoint = 11000;
            string    site          = "ru.meta.stackoverflow.com";
            string    datadir       = "..\\..\\..\\..\\data\\" + site + "\\";
            string    postsdir      = Path.Combine(datadir, "posts-raw\\");
            string    postsdir2     = Path.Combine(datadir, "posts\\");
            string    deleted_dir   = Path.Combine(datadir, "deleted\\");
            int       i1            = StartingPoint;
            int       i2            = StartingPoint + 99;
            Dictionary <int, object> posts;
            string path;

            if (!Directory.Exists(postsdir))
            {
                Directory.CreateDirectory(postsdir);
            }
            if (!Directory.Exists(deleted_dir))
            {
                Directory.CreateDirectory(deleted_dir);
            }
            if (!Directory.Exists(postsdir2))
            {
                Directory.CreateDirectory(postsdir2);
            }

            SeApiClient client = new SeApiClient(Archive.APIURL, site);

            Console.WriteLine(" Updating archive data: {0}", DateTime.Now);

            while (true)
            {
                Console.WriteLine("Loading posts {0} to {1}...", i1, i2);
                posts = client.LoadPostsRange(i1, i2);

                if (posts.Count == 0)
                {
                    break;
                }

                Console.WriteLine("{0} posts loaded", posts.Count);

                for (int i = i1; i <= i2; i++)
                {
                    path = Path.Combine(postsdir, i.ToString() + ".md");

                    if (!posts.ContainsKey(i))
                    {
                        if (File.Exists(path))
                        {
                            Console.WriteLine("Found deleted post: {0}", i);
                            string path2 = Path.Combine(postsdir2, "Q" + i.ToString() + ".md");
                            string newpath;

                            if (File.Exists(path2))
                            {
                                newpath = Path.Combine(deleted_dir, "Q" + i.ToString() + ".md");
                                MoveFile(path2, newpath, true);
                            }

                            path2 = Path.Combine(postsdir2, "A" + i.ToString() + ".md");
                            if (File.Exists(path2))
                            {
                                newpath = Path.Combine(deleted_dir, "A" + i.ToString() + ".md");
                                MoveFile(path2, newpath, true);
                            }
                            File.Delete(path);
                        }
                    }
                    else
                    {
                        using (TextWriter wr = new StreamWriter(path, false))
                        {
                            PostMarkdown post = PostMarkdown.FromJsonData(site, posts[i]);
                            post.ToMarkdown(wr);
                        }
                    }
                }

                i1 = i2 + 1;
                i2 = i1 + 99;
            }

            //Scan posts and split to questions and answers
            List <int> question_ids = new List <int>();
            List <int> answer_ids   = new List <int>();

            string[] files = Directory.GetFiles(postsdir, "*.md");

            for (int i = 0; i < files.Length; i++)
            {
                string file  = Path.GetFileNameWithoutExtension(files[i]);
                string idstr = file;
                int    id;

                if (!Int32.TryParse(idstr, out id))
                {
                    Console.WriteLine("Bad post id = {0} in file {1}", idstr, files[i]);
                    continue;
                }

                try
                {
                    PostMarkdown post = null;
                    using (TextReader read = new StreamReader(files[i], Encoding.UTF8))
                    {
                        post = PostMarkdown.FromMarkdown(site, read);
                    }

                    if (post.PostType == "question")
                    {
                        question_ids.Add(id);
                    }
                    else if (post.PostType == "answer")
                    {
                        answer_ids.Add(id);
                    }
                    else
                    {
                        Console.WriteLine("Unknown post type: {0} in {1}", post.PostType, files[i]);
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Error reading file " + files[i]);
                    Console.WriteLine(ex.ToString());
                    throw;
                }
            }

            int[] sequence;

            //load questions
            int[] q_arr = question_ids.ToArray();
            i1 = 0;
            i2 = 99;
            if (i2 >= q_arr.Length)
            {
                i2 = q_arr.Length - 1;
            }

            while (true)
            {
                sequence = new int[i2 - i1 + 1];
                Console.WriteLine("Loading questions from #{0} to #{1}...", i1, i2);
                Array.Copy(q_arr, i1, sequence, 0, sequence.Length);
                Dictionary <int, object> questions = client.LoadQuestionsSequence(sequence);

                Console.WriteLine("{0} questions loaded", questions.Count);

                for (int i = 0; i < sequence.Length; i++)
                {
                    int id = sequence[i];
                    path = Path.Combine(postsdir2, "Q" + id.ToString() + ".md");

                    if (!questions.ContainsKey(id))
                    {
                        if (File.Exists(path))
                        {
                            Console.WriteLine("Found deleted question: {0}", id);
                            File.Move(
                                Path.Combine(postsdir2, "Q" + id.ToString() + ".md"),
                                Path.Combine(deleted_dir, "Q" + id.ToString() + ".md")
                                );
                        }
                    }
                    else
                    {
                        using (TextWriter wr = new StreamWriter(path, false, Encoding.UTF8))
                        {
                            QuestionMarkdown q = QuestionMarkdown.FromJsonData(site, questions[id]);
                            q.ToMarkdown(wr);
                        }
                    }
                }

                i1 = i2 + 1;
                if (i1 >= q_arr.Length)
                {
                    break;
                }
                i2 = i1 + 99;
                if (i2 >= q_arr.Length)
                {
                    i2 = q_arr.Length - 1;
                }
            }

            //load answers
            int[] a_arr = answer_ids.ToArray();
            i1 = 0;
            i2 = 99;
            if (i2 >= a_arr.Length)
            {
                i2 = a_arr.Length - 1;
            }

            while (true)
            {
                sequence = new int[i2 - i1 + 1];
                Console.WriteLine("Loading answers from #{0} to #{1}...", i1, i2);
                Array.Copy(a_arr, i1, sequence, 0, sequence.Length);
                Dictionary <int, object> answers = client.LoadAnswersSequence(sequence);

                Console.WriteLine("{0} answers loaded", answers.Count);

                for (int i = 0; i < sequence.Length; i++)
                {
                    int id = sequence[i];
                    path = Path.Combine(postsdir2, "A" + id.ToString() + ".md");

                    if (!answers.ContainsKey(id))
                    {
                        if (File.Exists(path))
                        {
                            Console.WriteLine("Found deleted answer: {0}", id);
                            File.Move(
                                Path.Combine(postsdir2, "A" + id.ToString() + ".md"),
                                Path.Combine(deleted_dir, "A" + id.ToString() + ".md")
                                );
                        }
                    }
                    else
                    {
                        using (TextWriter wr = new StreamWriter(path, false, Encoding.UTF8))
                        {
                            AnswerMarkdown a = AnswerMarkdown.FromJsonData(site, answers[id]);
                            a.ToMarkdown(wr);
                        }
                    }
                }

                i1 = i2 + 1;
                if (i1 >= a_arr.Length)
                {
                    break;
                }
                i2 = i1 + 99;
                if (i2 >= a_arr.Length)
                {
                    i2 = a_arr.Length - 1;
                }
            }
        }