public void GoThroughEachPostAndGetTheCommentsOhMyGodThisWillDestroyMyLaptop()
        {
            const int LastScrapeAmount = 0;
            int       i = 0;

            AllResponse <ScrapedPost> posts = PostScraper.All(new SortField[] { new SortField {
                                                                                    Field = "created_time", Order = SortOrder.Descending
                                                                                } });

            foreach (ScrapedPost post in posts.Data)
            {
                i++;
                if (post.CreatedTime < new DateTime(2017, 04, 01))
                {
                    continue;
                }
                if (i > LastScrapeAmount)
                {
                    List <ScrapedComment> comments = CommentScraper.Scrape(post).ToList();
                    Console.WriteLine($"{i}/{posts.TotalCount}: {post.Id}; {comments.Count}");
                }
                else
                {
                    Console.WriteLine($"{i}/{posts.TotalCount}: {post.Id}; Already scraped.");
                }
            }
        }
        public PostScrapeHistory ScrapePosts([FromBody] PostScrapeRequest request)
        {
            Debug.Assert(request != null);
            Console.WriteLine("Started Scraping");

            // If no specific pages were specified, scrape them all.
            PageMetadata[] pages;
            if (request.Pages == null)
            {
                pages = PageMetadataRepository.All().Data.ToArray();
            }
            else
            {
                pages = request.Pages.Select(p => PageMetadataRepository.Get(p)).ToArray();
            }

            int numberOfComments = 0;

            ScrapedPost[] posts = PostScraper.Scrape(pages, request.Since, request.Until).ToArray();

            Console.WriteLine($"Started scraping comments for {posts.Length} posts");

            foreach (ScrapedPost post in posts)
            {
                ScrapedComment[] comments = CommentScraper.Scrape(post).ToArray();
                numberOfComments += comments.Length;
                Console.WriteLine(numberOfComments);
            }

            Console.WriteLine($"Done scraping {pages.Length} pages. Scraped {posts.Length} posts with {numberOfComments} comments");

            var postScrape = new PostScrapeHistory
            {
                Id               = Guid.NewGuid().ToString(),
                Since            = request.Since,
                Until            = request.Until,
                ImportStart      = posts.FirstOrDefault()?.Scraped ?? DateTime.Now,
                ImportEnd        = DateTime.Now,
                NumberOfPosts    = posts.Length,
                NumberOfComments = numberOfComments,
                Pages            = pages
            };

            return(PostScrapeHistoryRepository.Save(postScrape));
        }
示例#3
0
 public IEnumerable <ScrapedComment> ScrapeComments([FromBody] CommentScrapeRequest request)
 {
     return(CommentScraper.Scrape(PostScraper.Get(request?.PostId)));
 }