Esempio n. 1
0
        public static Dictionary <int, QuestionMarkdown> LoadQuestionsSequence(string site, IEnumerable <int> question_ids)
        {
            int[] sequence;

            //load questions
            int[] q_arr = question_ids.ToArray();
            int   i1    = 0;
            int   i2    = 99;

            if (i2 >= q_arr.Length)
            {
                i2 = q_arr.Length - 1;
            }

            SeApiClient client = new SeApiClient(Archive.APIURL, site);
            Dictionary <int, QuestionMarkdown> ret = new Dictionary <int, QuestionMarkdown>();

            while (true)
            {
                sequence = new int[i2 - i1 + 1];
                Console.WriteLine("Loading questions from #{0} to #{1}...", i1, i2);
                Array.Copy(q_arr, i1, sequence, 0, sequence.Length);
                Dictionary <int, object> questions = client.LoadQuestionsSequence(sequence);

                Console.WriteLine("{0} questions loaded", questions.Count);

                for (int i = 0; i < sequence.Length; i++)
                {
                    int id             = sequence[i];
                    QuestionMarkdown q = QuestionMarkdown.FromJsonData(site, questions[id]);
                    ret[id] = q;
                }

                i1 = i2 + 1;
                if (i1 >= q_arr.Length)
                {
                    break;
                }
                i2 = i1 + 99;
                if (i2 >= q_arr.Length)
                {
                    i2 = q_arr.Length - 1;
                }
            }

            return(ret);
        }
Esempio n. 2
0
        public static void UpdateTitles(string site, string subdir)
        {
            string datadir  = "..\\..\\..\\..\\data\\" + site + "\\";
            string postsdir = Path.Combine(datadir, subdir + "\\");

            Console.WriteLine("Updating titles for saved answers ({0}, {1})...", site, subdir);

            PostSet posts = PostSet.LoadFromDir(postsdir, site);
            Dictionary <int, Question> questions = posts.Questions;

            Console.WriteLine("Answers without parent question: {0}", posts.MarkdownAnswers.Count);

            int        n            = 0;
            List <int> question_ids = new List <int>(posts.MarkdownAnswers.Count);

            foreach (int a in posts.MarkdownAnswers.Keys)
            {
                try
                {
                    question_ids.Add(posts.MarkdownAnswers[a].QuestionId);
                    n++;
                    //if (n > 70) break;
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.GetType() + ": " + ex.Message);
                    //System.Threading.Thread.Sleep(20 * 1000);
                }
            }

            Dictionary <int, QuestionMarkdown> loaded = LoadQuestionsSequence(site, question_ids);

            foreach (int a in posts.MarkdownAnswers.Keys)
            {
                try
                {
                    int key = posts.MarkdownAnswers[a].QuestionId;

                    if (!loaded.ContainsKey(key))
                    {
                        Console.WriteLine("Not found Q" + key.ToString());
                        continue;
                    }

                    QuestionMarkdown qmd      = loaded[key];
                    string           newtitle = posts.MarkdownAnswers[a].Title;

                    if (!String.IsNullOrEmpty(qmd.Title))
                    {
                        newtitle = "Ответ на \"" + qmd.Title + "\"";
                    }

                    posts.MarkdownAnswers[a].Title = newtitle;
                    string     filepath = Path.Combine(postsdir, "A" + a.ToString() + ".md");
                    TextWriter wr       = new StreamWriter(filepath, false, Encoding.UTF8);

                    using (wr)
                    {
                        posts.MarkdownAnswers[a].ToMarkdown(wr);
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.GetType() + ": " + ex.Message);
                }
            }
        }
Esempio n. 3
0
        static void LoadDataMarkdown()
        {
            const int StartingPoint = 11000;
            string    site          = "ru.meta.stackoverflow.com";
            string    datadir       = "..\\..\\..\\..\\data\\" + site + "\\";
            string    postsdir      = Path.Combine(datadir, "posts-raw\\");
            string    postsdir2     = Path.Combine(datadir, "posts\\");
            string    deleted_dir   = Path.Combine(datadir, "deleted\\");
            int       i1            = StartingPoint;
            int       i2            = StartingPoint + 99;
            Dictionary <int, object> posts;
            string path;

            if (!Directory.Exists(postsdir))
            {
                Directory.CreateDirectory(postsdir);
            }
            if (!Directory.Exists(deleted_dir))
            {
                Directory.CreateDirectory(deleted_dir);
            }
            if (!Directory.Exists(postsdir2))
            {
                Directory.CreateDirectory(postsdir2);
            }

            SeApiClient client = new SeApiClient(Archive.APIURL, site);

            Console.WriteLine(" Updating archive data: {0}", DateTime.Now);

            while (true)
            {
                Console.WriteLine("Loading posts {0} to {1}...", i1, i2);
                posts = client.LoadPostsRange(i1, i2);

                if (posts.Count == 0)
                {
                    break;
                }

                Console.WriteLine("{0} posts loaded", posts.Count);

                for (int i = i1; i <= i2; i++)
                {
                    path = Path.Combine(postsdir, i.ToString() + ".md");

                    if (!posts.ContainsKey(i))
                    {
                        if (File.Exists(path))
                        {
                            Console.WriteLine("Found deleted post: {0}", i);
                            string path2 = Path.Combine(postsdir2, "Q" + i.ToString() + ".md");
                            string newpath;

                            if (File.Exists(path2))
                            {
                                newpath = Path.Combine(deleted_dir, "Q" + i.ToString() + ".md");
                                MoveFile(path2, newpath, true);
                            }

                            path2 = Path.Combine(postsdir2, "A" + i.ToString() + ".md");
                            if (File.Exists(path2))
                            {
                                newpath = Path.Combine(deleted_dir, "A" + i.ToString() + ".md");
                                MoveFile(path2, newpath, true);
                            }
                            File.Delete(path);
                        }
                    }
                    else
                    {
                        using (TextWriter wr = new StreamWriter(path, false))
                        {
                            PostMarkdown post = PostMarkdown.FromJsonData(site, posts[i]);
                            post.ToMarkdown(wr);
                        }
                    }
                }

                i1 = i2 + 1;
                i2 = i1 + 99;
            }

            //Scan posts and split to questions and answers
            List <int> question_ids = new List <int>();
            List <int> answer_ids   = new List <int>();

            string[] files = Directory.GetFiles(postsdir, "*.md");

            for (int i = 0; i < files.Length; i++)
            {
                string file  = Path.GetFileNameWithoutExtension(files[i]);
                string idstr = file;
                int    id;

                if (!Int32.TryParse(idstr, out id))
                {
                    Console.WriteLine("Bad post id = {0} in file {1}", idstr, files[i]);
                    continue;
                }

                try
                {
                    PostMarkdown post = null;
                    using (TextReader read = new StreamReader(files[i], Encoding.UTF8))
                    {
                        post = PostMarkdown.FromMarkdown(site, read);
                    }

                    if (post.PostType == "question")
                    {
                        question_ids.Add(id);
                    }
                    else if (post.PostType == "answer")
                    {
                        answer_ids.Add(id);
                    }
                    else
                    {
                        Console.WriteLine("Unknown post type: {0} in {1}", post.PostType, files[i]);
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine("Error reading file " + files[i]);
                    Console.WriteLine(ex.ToString());
                    throw;
                }
            }

            int[] sequence;

            //load questions
            int[] q_arr = question_ids.ToArray();
            i1 = 0;
            i2 = 99;
            if (i2 >= q_arr.Length)
            {
                i2 = q_arr.Length - 1;
            }

            while (true)
            {
                sequence = new int[i2 - i1 + 1];
                Console.WriteLine("Loading questions from #{0} to #{1}...", i1, i2);
                Array.Copy(q_arr, i1, sequence, 0, sequence.Length);
                Dictionary <int, object> questions = client.LoadQuestionsSequence(sequence);

                Console.WriteLine("{0} questions loaded", questions.Count);

                for (int i = 0; i < sequence.Length; i++)
                {
                    int id = sequence[i];
                    path = Path.Combine(postsdir2, "Q" + id.ToString() + ".md");

                    if (!questions.ContainsKey(id))
                    {
                        if (File.Exists(path))
                        {
                            Console.WriteLine("Found deleted question: {0}", id);
                            File.Move(
                                Path.Combine(postsdir2, "Q" + id.ToString() + ".md"),
                                Path.Combine(deleted_dir, "Q" + id.ToString() + ".md")
                                );
                        }
                    }
                    else
                    {
                        using (TextWriter wr = new StreamWriter(path, false, Encoding.UTF8))
                        {
                            QuestionMarkdown q = QuestionMarkdown.FromJsonData(site, questions[id]);
                            q.ToMarkdown(wr);
                        }
                    }
                }

                i1 = i2 + 1;
                if (i1 >= q_arr.Length)
                {
                    break;
                }
                i2 = i1 + 99;
                if (i2 >= q_arr.Length)
                {
                    i2 = q_arr.Length - 1;
                }
            }

            //load answers
            int[] a_arr = answer_ids.ToArray();
            i1 = 0;
            i2 = 99;
            if (i2 >= a_arr.Length)
            {
                i2 = a_arr.Length - 1;
            }

            while (true)
            {
                sequence = new int[i2 - i1 + 1];
                Console.WriteLine("Loading answers from #{0} to #{1}...", i1, i2);
                Array.Copy(a_arr, i1, sequence, 0, sequence.Length);
                Dictionary <int, object> answers = client.LoadAnswersSequence(sequence);

                Console.WriteLine("{0} answers loaded", answers.Count);

                for (int i = 0; i < sequence.Length; i++)
                {
                    int id = sequence[i];
                    path = Path.Combine(postsdir2, "A" + id.ToString() + ".md");

                    if (!answers.ContainsKey(id))
                    {
                        if (File.Exists(path))
                        {
                            Console.WriteLine("Found deleted answer: {0}", id);
                            File.Move(
                                Path.Combine(postsdir2, "A" + id.ToString() + ".md"),
                                Path.Combine(deleted_dir, "A" + id.ToString() + ".md")
                                );
                        }
                    }
                    else
                    {
                        using (TextWriter wr = new StreamWriter(path, false, Encoding.UTF8))
                        {
                            AnswerMarkdown a = AnswerMarkdown.FromJsonData(site, answers[id]);
                            a.ToMarkdown(wr);
                        }
                    }
                }

                i1 = i2 + 1;
                if (i1 >= a_arr.Length)
                {
                    break;
                }
                i2 = i1 + 99;
                if (i2 >= a_arr.Length)
                {
                    i2 = a_arr.Length - 1;
                }
            }
        }