Exemple #1
0
        public IActionResult ExportAsJson([FromBody] ElasticSearchRequest request)
        {
            IEnumerable <ScrapedComment> history = CommentScraper.All(request.Query, request.Sort).Data;

            byte[] serialized = Encoding.UTF8.GetBytes(JsonConvert.SerializeObject(history));
            return(File(serialized, "application/json-download", "export.json"));
        }
        public void GoThroughEachPostAndGetTheCommentsOhMyGodThisWillDestroyMyLaptop()
        {
            const int LastScrapeAmount = 0;
            int       i = 0;

            AllResponse <ScrapedPost> posts = PostScraper.All(new SortField[] { new SortField {
                                                                                    Field = "created_time", Order = SortOrder.Descending
                                                                                } });

            foreach (ScrapedPost post in posts.Data)
            {
                i++;
                if (post.CreatedTime < new DateTime(2017, 04, 01))
                {
                    continue;
                }
                if (i > LastScrapeAmount)
                {
                    List <ScrapedComment> comments = CommentScraper.Scrape(post).ToList();
                    Console.WriteLine($"{i}/{posts.TotalCount}: {post.Id}; {comments.Count}");
                }
                else
                {
                    Console.WriteLine($"{i}/{posts.TotalCount}: {post.Id}; Already scraped.");
                }
            }
        }
Exemple #3
0
        public IActionResult ExportAsCSV([FromBody] ElasticSearchRequest request)
        {
            IEnumerable <ScrapedComment> history = CommentScraper.All(request.Query, request.Sort).Data;

            byte[] serialized = CsvSerialization.Serialize(history, CsvSerialization.MapComment);
            return(File(serialized, "text/csv", "export.csv"));
        }
 public PostScrapeController(PostScraper postScraper, CommentScraper commentScraper, PageScraper pageScraper, ElasticSearchRepository <PageMetadata> pageMetadataRepository, ElasticSearchRepository <PostScrapeHistory> postScrapeHistoryRepository)
 {
     PostScraper                 = postScraper;
     CommentScraper              = commentScraper;
     PageScraper                 = pageScraper;
     PageMetadataRepository      = pageMetadataRepository;
     PostScrapeHistoryRepository = postScrapeHistoryRepository;
 }
        public PostScrapeHistory ScrapePosts([FromBody] PostScrapeRequest request)
        {
            Debug.Assert(request != null);
            Console.WriteLine("Started Scraping");

            // If no specific pages were specified, scrape them all.
            PageMetadata[] pages;
            if (request.Pages == null)
            {
                pages = PageMetadataRepository.All().Data.ToArray();
            }
            else
            {
                pages = request.Pages.Select(p => PageMetadataRepository.Get(p)).ToArray();
            }

            int numberOfComments = 0;

            ScrapedPost[] posts = PostScraper.Scrape(pages, request.Since, request.Until).ToArray();

            Console.WriteLine($"Started scraping comments for {posts.Length} posts");

            foreach (ScrapedPost post in posts)
            {
                ScrapedComment[] comments = CommentScraper.Scrape(post).ToArray();
                numberOfComments += comments.Length;
                Console.WriteLine(numberOfComments);
            }

            Console.WriteLine($"Done scraping {pages.Length} pages. Scraped {posts.Length} posts with {numberOfComments} comments");

            var postScrape = new PostScrapeHistory
            {
                Id               = Guid.NewGuid().ToString(),
                Since            = request.Since,
                Until            = request.Until,
                ImportStart      = posts.FirstOrDefault()?.Scraped ?? DateTime.Now,
                ImportEnd        = DateTime.Now,
                NumberOfPosts    = posts.Length,
                NumberOfComments = numberOfComments,
                Pages            = pages
            };

            return(PostScrapeHistoryRepository.Save(postScrape));
        }
Exemple #6
0
        public void ConfigureServices(IServiceCollection services)
        {
            // Boilerplate: add service and create Policy with options
            services.AddCors(options =>
            {
                options.AddPolicy("CorsPolicy",
                                  builder => builder.AllowAnyOrigin()
                                  .AllowAnyMethod()
                                  .AllowAnyHeader()
                                  .AllowCredentials());
            });

            services.AddMvc();
            services.AddSingleton(Configuration);

            // Register our repositories with ASP.NET Core to allow them to be injected
            // into our controllers. This preserves the same state between the controllers.

            Version facebookGraphAPIVersion = new Version(Configuration["facebook:graphAPIVersion"]);
            string  facebookAppId           = Configuration["facebook:appId"];
            string  facebookAppSecret       = Configuration["facebook:appSecret"];
            var     graphClient             = new GraphClient(facebookGraphAPIVersion, facebookAppId, facebookAppSecret);

            services.AddSingleton(graphClient);

            string elasticSearchUrl          = Configuration["elasticsearch:url"];
            string elasticSearchDefaultIndex = Configuration["elasticsearch:defaultIndex"];

            string elasticSearchUserName = Configuration["elasticsearch:user"];
            string elasticSearchPassword = Configuration["elasticsearch:password"];

            var node = new Uri(elasticSearchUrl);
            Func <ConnectionSettings> settings = () =>
            {
                var connectionSettings = new ConnectionSettings(node);
                if (string.IsNullOrEmpty(elasticSearchUserName))
                {
                    return(connectionSettings);
                }

                return(connectionSettings.BasicAuthentication(elasticSearchUserName, elasticSearchPassword));
            };

            var pageMetadataRepository = new ElasticSearchRepository <PageMetadata>(settings(), elasticSearchDefaultIndex + "-metadata-page");

            services.AddSingleton(pageMetadataRepository);

            var pageScrapeHistoryRepository = new ElasticSearchRepository <PageScrapeHistory>(settings(), elasticSearchDefaultIndex + "-metadata-pagescrape");

            services.AddSingleton(pageScrapeHistoryRepository);

            var postScrapeRepository = new ElasticSearchRepository <PostScrapeHistory>(settings(), elasticSearchDefaultIndex + "-metadata-postscrape");

            services.AddSingleton(postScrapeRepository);

            var pageScraper = new PageScraper(settings(), elasticSearchDefaultIndex + "-page", graphClient);

            services.AddSingleton(pageScraper);

            var postScraper = new PostScraper(settings(), elasticSearchDefaultIndex + "-post", pageScraper, graphClient);

            services.AddSingleton(postScraper);

            var commentScraper = new CommentScraper(settings(), elasticSearchDefaultIndex + "-comment", graphClient);

            services.AddSingleton(commentScraper);
        }
Exemple #7
0
 public IEnumerable <ScrapedComment> ScrapeComments([FromBody] CommentScrapeRequest request)
 {
     return(CommentScraper.Scrape(PostScraper.Get(request?.PostId)));
 }
Exemple #8
0
 public PagedResponse <ScrapedComment> AllComments([FromBody] ElasticSearchRequest request)
 {
     return(CommentScraper.Paged(request.PageNumber, request.PageSize, request.Query, request.Sort));
 }
Exemple #9
0
 public ScrapedComment GetComment(string id) => CommentScraper.Get(id);
Exemple #10
0
 public CommentScrapeController(CommentScraper commentScraper, PostScraper postScraper)
 {
     CommentScraper = commentScraper;
     PostScraper    = postScraper;
 }