Beispiel #1
0
        private async void button1_Click(object sender, EventArgs e)
        {
            progressBar1.MarqueeAnimationSpeed = 60;
            Crawler crawler = new Crawler("913819931996488|2e3ef18f88e42c9068d8a6dba3b14021");

            Crawler.CrawlerQueryResult queryResult = await crawler.ExecuteQueryAsync(txtUrl.Text);

            List <JToken> posts = queryResult.GetFieldToken("data[*]").ToList();

            DateTime earliestDate = new DateTime(2014, 7, 1);
            bool     dateOk       = true;

            using (StreamWriter writer = new StreamWriter(textBox2.Text))
            {
                while (posts.Count > 0 && dateOk)
                {
                    foreach (JToken token in posts)
                    {
                        if (token["message"] != null)
                        {
                            DateTime date = DateTime.Parse(token["created_time"].ToString());
                            if (date < earliestDate)
                            {
                                dateOk = false;
                                break;
                            }
                            writer.WriteLine(token["message"].ToString() + Environment.NewLine + "---" + Environment.NewLine);
                        }
                    }

                    queryResult = await crawler.ExecuteLinkAsync(queryResult.GetSingleField("paging.next"));

                    posts = queryResult.GetFieldToken("data[*]").ToList();
                }
            }

            progressBar1.MarqueeAnimationSpeed = 0;
            progressBar1.Value = 0;
        }
Beispiel #2
0
        private async void btnProcessComments_Click(object sender, EventArgs e)
        {
            progressBarComments.MarqueeAnimationSpeed = 60;

            Crawler crawler = new Crawler("913819931996488|2e3ef18f88e42c9068d8a6dba3b14021");

            Crawler.CrawlerQueryResult queryResult = await crawler.ExecuteQueryAsync(txtUrl.Text);

            List <JToken> posts = queryResult.GetFieldToken("data[*]").ToList();

            DateTime earliestDate = new DateTime(2014, 7, 1);
            bool     dateOk       = true;

            using (StreamWriter writer = new StreamWriter(textBox2.Text))
            {
                while (posts.Count > 0 && dateOk)
                {
                    foreach (JToken post in posts)
                    {
                        if (DateTime.Parse(post["created_time"].ToString()) < earliestDate)
                        {
                            dateOk = false;
                            break;
                        }

                        JToken commentsTokenObject = post.SelectToken("comments");
                        if (commentsTokenObject == null)
                        {
                            continue;
                        }

                        Crawler.CrawlerQueryResult commentsPageObject = new Crawler.CrawlerQueryResult();
                        commentsPageObject.RawResult = commentsTokenObject.ToString();

                        List <JToken> comments = commentsPageObject.GetFieldToken("data[*]").ToList();

                        while (comments.Count > 0)
                        {
                            foreach (JToken comment in comments)
                            {
                                if (comment["message"] != null)
                                {
                                    writer.WriteLine(comment["message"].ToString() + Environment.NewLine + "---" + Environment.NewLine);
                                }
                            }

                            string nextPageUri = commentsPageObject.GetSingleField("paging.next");

                            if (string.IsNullOrEmpty(nextPageUri))
                            {
                                break;
                            }

                            commentsPageObject = await crawler.ExecuteLinkAsync(nextPageUri);

                            comments = commentsPageObject.GetFieldToken("data[*]").ToList();
                        }
                    }

                    queryResult = await crawler.ExecuteLinkAsync(queryResult.GetSingleField("paging.next"));

                    posts = queryResult.GetFieldToken("data[*]").ToList();
                }
            }

            progressBarComments.MarqueeAnimationSpeed = 0;
            progressBarComments.Value = 0;
        }
Beispiel #3
0
        private async void btnProcess_Click(object sender, EventArgs e)
        {
            try
            {
                InitializeProgressBar();

                Dictionary <string, int>            authorsIndexes          = new Dictionary <string, int>();
                Dictionary <string, List <string> > authorsCommentsInMemory = new Dictionary <string, List <string> >();

                string virtualFolderName = DateTime.Now.ToString("yyyy-MM-dd-HH-mm-ss");

                string postsFolderPath    = Path.Combine(textboxFolderPath.Text, txtUrl.Text, "posts", virtualFolderName);
                string commentsFolderPath = Path.Combine(textboxFolderPath.Text, txtUrl.Text, "comments", virtualFolderName);

                if (!System.IO.Directory.Exists(postsFolderPath))
                {
                    System.IO.Directory.CreateDirectory(postsFolderPath);
                }

                if (!System.IO.Directory.Exists(commentsFolderPath))
                {
                    System.IO.Directory.CreateDirectory(commentsFolderPath);
                }

                int postsFetched = 0;
                CommentsFetched = 0;

                Crawler crawler = new Crawler("913819931996488|2e3ef18f88e42c9068d8a6dba3b14021");

                Crawler.CrawlerQueryResult queryResult = await crawler.ExecuteQueryAsync(txtUrl.Text);

                List <JToken> posts = queryResult.GetFieldToken("data[*]").ToList();

                while (posts.Count > 0 && FetchPosts(postsFetched, CommentsFetched))
                {
                    bool continueFetchingPosts = true;

                    foreach (JToken post in posts)
                    {
                        if (rdoGetPosts.Checked)
                        {
                            // save post

                            using (StreamWriter writer = new StreamWriter(Path.Combine(postsFolderPath, postsFetched.ToString() + ".txt")))
                            {
                                writer.WriteLine(post["message"]);
                            }

                            progressBar.PerformStep();

                            if (!FetchPosts(++postsFetched, CommentsFetched))
                            {
                                continueFetchingPosts = false;
                                break;
                            }
                        }

                        if (rdoGetComments.Checked)
                        {
                            int commentsPerPostFetched = 0;

                            JToken commentsTokenObject = post.SelectToken("comments");

                            if (commentsTokenObject == null)
                            {
                                continue;
                            }

                            Crawler.CrawlerQueryResult commentsPageObject = new Crawler.CrawlerQueryResult();
                            commentsPageObject.RawResult = commentsTokenObject.ToString();

                            List <JToken> comments = commentsPageObject.GetFieldToken("data[*]").ToList();

                            while (comments.Count > 0 && FetchComments(CommentsFetched, commentsPerPostFetched))
                            {
                                bool continueFetchingComments = true;

                                foreach (JToken comment in comments)
                                {
                                    // save comment

                                    string authorFolderName = GetAuthorFolderName(comment);
                                    string commentBody      = comment["message"].ToString();

                                    if (commentBody.Length < numMinimumCommenLength.Value)
                                    {
                                        continue;
                                    }

                                    UpdateDictionaries(authorsCommentsInMemory, authorsIndexes, authorFolderName, commentBody);

                                    int authorIndex = authorsIndexes[authorFolderName];

                                    if (cbxGroupByAuthor.Checked)
                                    {
                                        if (authorIndex == 20 - 1)
                                        {
                                            List <string> authorComments = authorsCommentsInMemory[authorFolderName];
                                            for (int i = 0; i < authorComments.Count; ++i)
                                            {
                                                using (StreamWriter writer = new StreamWriter(BuildCommentPath(commentsFolderPath, CommentsFetched, authorFolderName, i)))
                                                {
                                                    writer.WriteLine(authorComments[i]);
                                                }
                                            }
                                        }

                                        if (authorIndex >= 20 - 1)
                                        {
                                            using (StreamWriter writer = new StreamWriter(BuildCommentPath(commentsFolderPath, CommentsFetched, authorFolderName, authorIndex)))
                                            {
                                                writer.WriteLine(commentBody);
                                            }
                                        }
                                    }
                                    else
                                    {
                                        using (StreamWriter writer = new StreamWriter(BuildCommentPath(commentsFolderPath, CommentsFetched, authorFolderName, authorIndex)))
                                        {
                                            writer.WriteLine(commentBody);
                                        }
                                    }

                                    progressBar.PerformStep();

                                    if (!FetchComments(++CommentsFetched, ++commentsPerPostFetched))
                                    {
                                        continueFetchingComments = false;
                                        break;
                                    }
                                }

                                if (continueFetchingComments)
                                {
                                    string nextCommentPageUri = commentsPageObject.GetSingleField("paging.next");

                                    if (string.IsNullOrEmpty(nextCommentPageUri))
                                    {
                                        break;
                                    }

                                    commentsPageObject = await crawler.ExecuteLinkAsync(nextCommentPageUri);

                                    comments = commentsPageObject.GetFieldToken("data[*]").ToList();
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                    }

                    if (continueFetchingPosts)
                    {
                        string nextPostPageUri = queryResult.GetSingleField("paging.next");

                        if (string.IsNullOrEmpty(nextPostPageUri))
                        {
                            break;
                        }

                        queryResult = await crawler.ExecuteLinkAsync(nextPostPageUri);

                        posts = queryResult.GetFieldToken("data[*]").ToList();
                    }
                    else
                    {
                        break;
                    }
                }

                progressBar.Maximum = authorsIndexes.Count(t => t.Value >= 20 - 1);
                progressBar.Value   = 0;
                if (cbxGroupByAuthor.Checked)
                {
                    foreach (var author in authorsIndexes)
                    {
                        if (author.Value < 20 - 1)
                        {
                            continue;
                        }

                        string oldPath = Path.Combine(commentsFolderPath, author.Key);

                        string[] filesInPath = System.IO.Directory.GetFiles(oldPath);

                        string newPath = Path.Combine(commentsFolderPath, filesInPath.Length + " - " + author.Key);

                        System.IO.Directory.Move(oldPath, newPath);

                        progressBar.PerformStep();
                    }
                }
            }
            catch (Exception ex)
            {
                System.Diagnostics.Trace.TraceError(ex.Message);
            }
        }