コード例 #1
0
        public PostScrapeHistory ScrapePosts([FromBody] PostScrapeRequest request)
        {
            Debug.Assert(request != null);
            Console.WriteLine("Started Scraping");

            // If no specific pages were specified, scrape them all.
            PageMetadata[] pages;
            if (request.Pages == null)
            {
                pages = PageMetadataRepository.All().Data.ToArray();
            }
            else
            {
                pages = request.Pages.Select(p => PageMetadataRepository.Get(p)).ToArray();
            }

            int numberOfComments = 0;

            ScrapedPost[] posts = PostScraper.Scrape(pages, request.Since, request.Until).ToArray();

            Console.WriteLine($"Started scraping comments for {posts.Length} posts");

            foreach (ScrapedPost post in posts)
            {
                ScrapedComment[] comments = CommentScraper.Scrape(post).ToArray();
                numberOfComments += comments.Length;
                Console.WriteLine(numberOfComments);
            }

            Console.WriteLine($"Done scraping {pages.Length} pages. Scraped {posts.Length} posts with {numberOfComments} comments");

            var postScrape = new PostScrapeHistory
            {
                Id               = Guid.NewGuid().ToString(),
                Since            = request.Since,
                Until            = request.Until,
                ImportStart      = posts.FirstOrDefault()?.Scraped ?? DateTime.Now,
                ImportEnd        = DateTime.Now,
                NumberOfPosts    = posts.Length,
                NumberOfComments = numberOfComments,
                Pages            = pages
            };

            return(PostScrapeHistoryRepository.Save(postScrape));
        }
コード例 #2
0
        public PageScrapeHistory ScrapePages([FromBody] IEnumerable <string> request)
        {
            // If no pages were specified, scrape them all.
            PageMetadata[] pagesToScrape;
            if (request == null)
            {
                pagesToScrape = PageMetadataRepository.All().Data.ToArray();
            }
            else
            {
                pagesToScrape = request.Select(id => PageMetadataRepository.Get(id)).ToArray();
            }

            DateTime scrapeStart = DateTime.Now;

            ScrapedPage[] pages = PageScraper.Scrape(pagesToScrape, scrapeStart).ToArray();

            // Now update the per-page list of all scraped pages.
            foreach (PageMetadata pageMetadata in pagesToScrape)
            {
                ScrapedPage scrapedPage = pages.First(p => p.FacebookId == pageMetadata.FacebookId);
                pageMetadata.FanCountHistory.Insert(0, new DatedFanCount
                {
                    Date     = scrapedPage.Date,
                    FanCount = scrapedPage.FanCount,
                });
                pageMetadata.LatestScrape = scrapeStart;
                PageMetadataRepository.Save(pageMetadata, Refresh.False);

                // Only save the fan count on this date.
                pageMetadata.FanCountHistory = pageMetadata.FanCountHistory.Take(1).ToList();
            }

            // Now update the total-page list of the scrape.
            var pageScrapeHistory = new PageScrapeHistory
            {
                Id          = Guid.NewGuid().ToString(),
                ImportStart = scrapeStart,
                ImportEnd   = DateTime.Now,
                Pages       = pagesToScrape
            };

            return(PageScrapeHistoryRepository.Save(pageScrapeHistory));
        }