//blah // Scraper Test // Tester: Jeremy Buentello Time: protected override async Task Scrape(ScraperTextClient client, HtmlWeb website, ILogger logger, CancellationToken token) { var watch = Stopwatch.StartNew(); logger.LogInformation("About to Fox Politics"); string sourceURL = "https://www.foxnews.com/politics"; var doc = await website.LoadFromWebAsync(sourceURL); var docNode = doc.DocumentNode; var test = docNode.QuerySelectorAll(".info .title"); foreach (var node in test) { var relativeURL = node.QuerySelector("a").Attributes["href"].Value; var nodeURL = sourceURL + relativeURL; var headlinerText = node.InnerText; try { // Try to upload the new scraped text data await client.SetEntity(new ScraperText { Text = headlinerText, Website = nodeURL }); } catch (HttpRequestException ex) { // If it is a conflict (already in the database) continue, otherwise throw the error. if (ex.StatusCode != System.Net.HttpStatusCode.Conflict) { throw; } } } watch.Stop(); var elapsedTime = watch.ElapsedMilliseconds; logger.LogInformation($"Scrapped Fox Politics in {elapsedTime} ms"); }
public async Task Scrape(ScraperTextClient client, ILogger logger, CancellationToken token) { HtmlWeb website = new HtmlWeb(); try { await Scrape(client, website, logger, token); } catch (Exception ex) { logger.LogError(ex, "test"); } }
public BaseTimedWebScaper(ScraperTextClient client, IScheduleConfig <T> config, ILogger <BaseTimedWebScaper <T> > logger) : base(config.CronExpression, TimeZoneInfo.Local) { _webScrapers = config.WebScrapers; _logger = logger; scraperClient = client; }
public TestViewModel(ScraperTextClient scraperClient, TextCategoryClient categoryClient) { this.scraperClient = scraperClient; this.categoryClient = categoryClient; }
protected override async Task Scrape(ScraperTextClient client, HtmlWeb website, ILogger logger, CancellationToken token) { await Task.CompletedTask; }
public TestWebScraper(ScraperTextClient client, IScheduleConfig <TestWebScraper> config, ILogger <BaseTimedWebScaper <TestWebScraper> > logger) : base(client, config, logger) { }
public WebScaper1Minute(ScraperTextClient client, IScheduleConfig <WebScaper1Minute> config, ILogger <BaseTimedWebScaper <WebScaper1Minute> > logger) : base(client, config, logger) { }
public WebScaper30Seconds(ScraperTextClient client, IScheduleConfig <WebScaper30Seconds> config, ILogger <BaseTimedWebScaper <WebScaper30Seconds> > logger) : base(client, config, logger) { }
protected abstract Task Scrape(ScraperTextClient client, HtmlWeb website, ILogger logger, CancellationToken token);
public TestViewModel(ScraperTextClient scraperClient, TextCategoryClient categoryClient) : base(scraperClient, categoryClient) { Title = "Test Scraper Categorizer"; CardDragEndCommand = new AsyncRelayCommand <KanbanDragEndEventArgs>(OnCardDragEnd); }