public PostScrapeHistory ScrapePosts([FromBody] PostScrapeRequest request) { Debug.Assert(request != null); Console.WriteLine("Started Scraping"); // If no specific pages were specified, scrape them all. PageMetadata[] pages; if (request.Pages == null) { pages = PageMetadataRepository.All().Data.ToArray(); } else { pages = request.Pages.Select(p => PageMetadataRepository.Get(p)).ToArray(); } int numberOfComments = 0; ScrapedPost[] posts = PostScraper.Scrape(pages, request.Since, request.Until).ToArray(); Console.WriteLine($"Started scraping comments for {posts.Length} posts"); foreach (ScrapedPost post in posts) { ScrapedComment[] comments = CommentScraper.Scrape(post).ToArray(); numberOfComments += comments.Length; Console.WriteLine(numberOfComments); } Console.WriteLine($"Done scraping {pages.Length} pages. Scraped {posts.Length} posts with {numberOfComments} comments"); var postScrape = new PostScrapeHistory { Id = Guid.NewGuid().ToString(), Since = request.Since, Until = request.Until, ImportStart = posts.FirstOrDefault()?.Scraped ?? DateTime.Now, ImportEnd = DateTime.Now, NumberOfPosts = posts.Length, NumberOfComments = numberOfComments, Pages = pages }; return(PostScrapeHistoryRepository.Save(postScrape)); }
public PageScrapeHistory ScrapePages([FromBody] IEnumerable <string> request) { // If no pages were specified, scrape them all. PageMetadata[] pagesToScrape; if (request == null) { pagesToScrape = PageMetadataRepository.All().Data.ToArray(); } else { pagesToScrape = request.Select(id => PageMetadataRepository.Get(id)).ToArray(); } DateTime scrapeStart = DateTime.Now; ScrapedPage[] pages = PageScraper.Scrape(pagesToScrape, scrapeStart).ToArray(); // Now update the per-page list of all scraped pages. foreach (PageMetadata pageMetadata in pagesToScrape) { ScrapedPage scrapedPage = pages.First(p => p.FacebookId == pageMetadata.FacebookId); pageMetadata.FanCountHistory.Insert(0, new DatedFanCount { Date = scrapedPage.Date, FanCount = scrapedPage.FanCount, }); pageMetadata.LatestScrape = scrapeStart; PageMetadataRepository.Save(pageMetadata, Refresh.False); // Only save the fan count on this date. pageMetadata.FanCountHistory = pageMetadata.FanCountHistory.Take(1).ToList(); } // Now update the total-page list of the scrape. var pageScrapeHistory = new PageScrapeHistory { Id = Guid.NewGuid().ToString(), ImportStart = scrapeStart, ImportEnd = DateTime.Now, Pages = pagesToScrape }; return(PageScrapeHistoryRepository.Save(pageScrapeHistory)); }