public void GoThroughEachPostAndGetTheCommentsOhMyGodThisWillDestroyMyLaptop() { const int LastScrapeAmount = 0; int i = 0; AllResponse <ScrapedPost> posts = PostScraper.All(new SortField[] { new SortField { Field = "created_time", Order = SortOrder.Descending } }); foreach (ScrapedPost post in posts.Data) { i++; if (post.CreatedTime < new DateTime(2017, 04, 01)) { continue; } if (i > LastScrapeAmount) { List <ScrapedComment> comments = CommentScraper.Scrape(post).ToList(); Console.WriteLine($"{i}/{posts.TotalCount}: {post.Id}; {comments.Count}"); } else { Console.WriteLine($"{i}/{posts.TotalCount}: {post.Id}; Already scraped."); } } }
public PostScrapeHistory ScrapePosts([FromBody] PostScrapeRequest request) { Debug.Assert(request != null); Console.WriteLine("Started Scraping"); // If no specific pages were specified, scrape them all. PageMetadata[] pages; if (request.Pages == null) { pages = PageMetadataRepository.All().Data.ToArray(); } else { pages = request.Pages.Select(p => PageMetadataRepository.Get(p)).ToArray(); } int numberOfComments = 0; ScrapedPost[] posts = PostScraper.Scrape(pages, request.Since, request.Until).ToArray(); Console.WriteLine($"Started scraping comments for {posts.Length} posts"); foreach (ScrapedPost post in posts) { ScrapedComment[] comments = CommentScraper.Scrape(post).ToArray(); numberOfComments += comments.Length; Console.WriteLine(numberOfComments); } Console.WriteLine($"Done scraping {pages.Length} pages. Scraped {posts.Length} posts with {numberOfComments} comments"); var postScrape = new PostScrapeHistory { Id = Guid.NewGuid().ToString(), Since = request.Since, Until = request.Until, ImportStart = posts.FirstOrDefault()?.Scraped ?? DateTime.Now, ImportEnd = DateTime.Now, NumberOfPosts = posts.Length, NumberOfComments = numberOfComments, Pages = pages }; return(PostScrapeHistoryRepository.Save(postScrape)); }
public IEnumerable <ScrapedComment> ScrapeComments([FromBody] CommentScrapeRequest request) { return(CommentScraper.Scrape(PostScraper.Get(request?.PostId))); }