private async void button1_Click(object sender, EventArgs e) { progressBar1.MarqueeAnimationSpeed = 60; Crawler crawler = new Crawler("913819931996488|2e3ef18f88e42c9068d8a6dba3b14021"); Crawler.CrawlerQueryResult queryResult = await crawler.ExecuteQueryAsync(txtUrl.Text); List <JToken> posts = queryResult.GetFieldToken("data[*]").ToList(); DateTime earliestDate = new DateTime(2014, 7, 1); bool dateOk = true; using (StreamWriter writer = new StreamWriter(textBox2.Text)) { while (posts.Count > 0 && dateOk) { foreach (JToken token in posts) { if (token["message"] != null) { DateTime date = DateTime.Parse(token["created_time"].ToString()); if (date < earliestDate) { dateOk = false; break; } writer.WriteLine(token["message"].ToString() + Environment.NewLine + "---" + Environment.NewLine); } } queryResult = await crawler.ExecuteLinkAsync(queryResult.GetSingleField("paging.next")); posts = queryResult.GetFieldToken("data[*]").ToList(); } } progressBar1.MarqueeAnimationSpeed = 0; progressBar1.Value = 0; }
private async void btnProcessComments_Click(object sender, EventArgs e) { progressBarComments.MarqueeAnimationSpeed = 60; Crawler crawler = new Crawler("913819931996488|2e3ef18f88e42c9068d8a6dba3b14021"); Crawler.CrawlerQueryResult queryResult = await crawler.ExecuteQueryAsync(txtUrl.Text); List <JToken> posts = queryResult.GetFieldToken("data[*]").ToList(); DateTime earliestDate = new DateTime(2014, 7, 1); bool dateOk = true; using (StreamWriter writer = new StreamWriter(textBox2.Text)) { while (posts.Count > 0 && dateOk) { foreach (JToken post in posts) { if (DateTime.Parse(post["created_time"].ToString()) < earliestDate) { dateOk = false; break; } JToken commentsTokenObject = post.SelectToken("comments"); if (commentsTokenObject == null) { continue; } Crawler.CrawlerQueryResult commentsPageObject = new Crawler.CrawlerQueryResult(); commentsPageObject.RawResult = commentsTokenObject.ToString(); List <JToken> comments = commentsPageObject.GetFieldToken("data[*]").ToList(); while (comments.Count > 0) { foreach (JToken comment in comments) { if (comment["message"] != null) { writer.WriteLine(comment["message"].ToString() + Environment.NewLine + "---" + Environment.NewLine); } } string nextPageUri = commentsPageObject.GetSingleField("paging.next"); if (string.IsNullOrEmpty(nextPageUri)) { break; } commentsPageObject = await crawler.ExecuteLinkAsync(nextPageUri); comments = commentsPageObject.GetFieldToken("data[*]").ToList(); } } queryResult = await crawler.ExecuteLinkAsync(queryResult.GetSingleField("paging.next")); posts = queryResult.GetFieldToken("data[*]").ToList(); } } progressBarComments.MarqueeAnimationSpeed = 0; progressBarComments.Value = 0; }
private async void btnProcess_Click(object sender, EventArgs e) { try { InitializeProgressBar(); Dictionary <string, int> authorsIndexes = new Dictionary <string, int>(); Dictionary <string, List <string> > authorsCommentsInMemory = new Dictionary <string, List <string> >(); string virtualFolderName = DateTime.Now.ToString("yyyy-MM-dd-HH-mm-ss"); string postsFolderPath = Path.Combine(textboxFolderPath.Text, txtUrl.Text, "posts", virtualFolderName); string commentsFolderPath = Path.Combine(textboxFolderPath.Text, txtUrl.Text, "comments", virtualFolderName); if (!System.IO.Directory.Exists(postsFolderPath)) { System.IO.Directory.CreateDirectory(postsFolderPath); } if (!System.IO.Directory.Exists(commentsFolderPath)) { System.IO.Directory.CreateDirectory(commentsFolderPath); } int postsFetched = 0; CommentsFetched = 0; Crawler crawler = new Crawler("913819931996488|2e3ef18f88e42c9068d8a6dba3b14021"); Crawler.CrawlerQueryResult queryResult = await crawler.ExecuteQueryAsync(txtUrl.Text); List <JToken> posts = queryResult.GetFieldToken("data[*]").ToList(); while (posts.Count > 0 && FetchPosts(postsFetched, CommentsFetched)) { bool continueFetchingPosts = true; foreach (JToken post in posts) { if (rdoGetPosts.Checked) { // save post using (StreamWriter writer = new StreamWriter(Path.Combine(postsFolderPath, postsFetched.ToString() + ".txt"))) { writer.WriteLine(post["message"]); } progressBar.PerformStep(); if (!FetchPosts(++postsFetched, CommentsFetched)) { continueFetchingPosts = false; break; } } if (rdoGetComments.Checked) { int commentsPerPostFetched = 0; JToken commentsTokenObject = post.SelectToken("comments"); if (commentsTokenObject == null) { continue; } Crawler.CrawlerQueryResult commentsPageObject = new Crawler.CrawlerQueryResult(); commentsPageObject.RawResult = commentsTokenObject.ToString(); List <JToken> comments = commentsPageObject.GetFieldToken("data[*]").ToList(); while (comments.Count > 0 && FetchComments(CommentsFetched, commentsPerPostFetched)) { bool continueFetchingComments = true; foreach (JToken comment in comments) { // save comment string authorFolderName = GetAuthorFolderName(comment); string commentBody = comment["message"].ToString(); if (commentBody.Length < numMinimumCommenLength.Value) { continue; } UpdateDictionaries(authorsCommentsInMemory, authorsIndexes, authorFolderName, commentBody); int authorIndex = authorsIndexes[authorFolderName]; if (cbxGroupByAuthor.Checked) { if (authorIndex == 20 - 1) { List <string> authorComments = authorsCommentsInMemory[authorFolderName]; for (int i = 0; i < authorComments.Count; ++i) { using (StreamWriter writer = new StreamWriter(BuildCommentPath(commentsFolderPath, CommentsFetched, authorFolderName, i))) { writer.WriteLine(authorComments[i]); } } } if (authorIndex >= 20 - 1) { using (StreamWriter writer = new StreamWriter(BuildCommentPath(commentsFolderPath, CommentsFetched, authorFolderName, authorIndex))) { writer.WriteLine(commentBody); } } } else { using (StreamWriter writer = new StreamWriter(BuildCommentPath(commentsFolderPath, CommentsFetched, authorFolderName, authorIndex))) { writer.WriteLine(commentBody); } } progressBar.PerformStep(); if (!FetchComments(++CommentsFetched, ++commentsPerPostFetched)) { continueFetchingComments = false; break; } } if (continueFetchingComments) { string nextCommentPageUri = commentsPageObject.GetSingleField("paging.next"); if (string.IsNullOrEmpty(nextCommentPageUri)) { break; } commentsPageObject = await crawler.ExecuteLinkAsync(nextCommentPageUri); comments = commentsPageObject.GetFieldToken("data[*]").ToList(); } else { break; } } } } if (continueFetchingPosts) { string nextPostPageUri = queryResult.GetSingleField("paging.next"); if (string.IsNullOrEmpty(nextPostPageUri)) { break; } queryResult = await crawler.ExecuteLinkAsync(nextPostPageUri); posts = queryResult.GetFieldToken("data[*]").ToList(); } else { break; } } progressBar.Maximum = authorsIndexes.Count(t => t.Value >= 20 - 1); progressBar.Value = 0; if (cbxGroupByAuthor.Checked) { foreach (var author in authorsIndexes) { if (author.Value < 20 - 1) { continue; } string oldPath = Path.Combine(commentsFolderPath, author.Key); string[] filesInPath = System.IO.Directory.GetFiles(oldPath); string newPath = Path.Combine(commentsFolderPath, filesInPath.Length + " - " + author.Key); System.IO.Directory.Move(oldPath, newPath); progressBar.PerformStep(); } } } catch (Exception ex) { System.Diagnostics.Trace.TraceError(ex.Message); } }