public static async Task <IActionResult> Run( [HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req, [Table(TableName.ScrapeConfig)] CloudTable scrapeConfigTable, ILogger log) { log.LogInformation("AddScrapeConfig trigger function processed a request."); string requestBody = await new StreamReader(req.Body).ReadToEndAsync(); ScrapeConfig scrapeConfig = JsonConvert.DeserializeObject <ScrapeConfig>(requestBody); if (scrapeConfig == null) { return(new BadRequestResult()); } TableOperation insertOperation = TableOperation.Insert(scrapeConfig); TableResult result = await scrapeConfigTable.ExecuteAsync(insertOperation); if (199 < result.HttpStatusCode && result.HttpStatusCode < 300) { return(new OkObjectResult(result.Result)); } else { return(new BadRequestObjectResult(result.Result)); } }
public static async Task <IActionResult> Run( [HttpTrigger(AuthorizationLevel.Function, "post", Route = FunctionName.DeleteScrapeConfig + "/{partitionKey}/{rowKey}")] HttpRequest req, [Table(TableName.ScrapeConfig)] CloudTable scrapeConfigTable, string partitionKey, string rowKey, ILogger log) { log.LogInformation("DeleteScrapeConfig trigger function processed a request."); TableOperation getOperation = TableOperation.Retrieve <ScrapeConfig>(partitionKey, rowKey); TableResult item = await scrapeConfigTable.ExecuteAsync(getOperation); ScrapeConfig itemToDelete = (ScrapeConfig)(dynamic)item.Result; TableOperation deleteOperation = TableOperation.Delete(itemToDelete); //Execute TableResult result = await scrapeConfigTable.ExecuteAsync(deleteOperation); if (199 < result.HttpStatusCode && result.HttpStatusCode < 300) { return(new OkObjectResult(result.Result)); } else { return(new BadRequestObjectResult(result.Result)); } }
public static IHostBuilder CreateHostBuilder(string[] args) => Host.CreateDefaultBuilder(args) .ConfigureServices((hostContext, services) => { services.AddDbContext <ntpContext>(cfg => { cfg.UseSqlServer(hostContext.Configuration.GetConnectionString("ntpConnectionString")); }); services.AddScoped <IIndicatorDataRepository, IndicatorDataRepository <ntpContext> >(); services.AddScheduler(); services.AddTransient <ScraperInvocable>(); services.AddTransient <PriceDownloaderInvocable>(); services.AddTransient <CVScraperInvocable>(); services.AddTransient <cvApiTrackerInvocable>(); services.AddSingleton <IConfiguration>(Program.Configuration); scrapeConfig = new ScrapeConfig(); Configuration.GetSection("ScrapeConfiguration").Bind(scrapeConfig); services.AddSingleton(scrapeConfig); scrapeCache = new ScrapeCache(); services.AddSingleton(scrapeCache); //services.AddHostedService<Worker>(); });
public CVScraperInvocable(ScrapeCache scrapeCache, ScrapeConfig scrapeConfig) { this._scrapeCache = scrapeCache; this._scrapeConfig = scrapeConfig; _ctx = new RabbitContext().Create(scrapeConfig.CoronaVirusScrape.GetConfigFile()); _sender = new RmqSender(_ctx); }
public WebClientRequestHandler(ScrapeConfig config = null) { ServicePointManager.Expect100Continue = false; ServicePointManager.DefaultConnectionLimit = 300; Log = LogManager.GetCurrentClassLogger(); _scrapeConfig = config; }
public async Task AddAsync(ScrapeConfig scrapeConfig) { scrapeConfig.Id = DateTime.Now.Ticks; scrapeConfig.RowKey = scrapeConfig.Id.ToString(); scrapeConfig.PartitionKey = scrapeConfig.URL.ToCoreUrl(); string url = _settings.Value.AzureFunctionURL + FunctionName.AddScrapeConfig + "?code=" + _settings.Value.AzureFunctionCode; await _httpHandlerService.HandlePostRequest(url, scrapeConfig); }
public void Setup() { logger = new Mock <ILogger>(); galerijaConfig = new ScrapeConfig() { Name = "Galerija.mk", URL = "https://www.galerija.com.mk", ProductNamePath = "//*[contains(@id,'product-')]/div[1]/div[2]/h1", ProductPricePath = "//*[contains(@id,'product-')]/div[1]/div[2]/p/span/span/text()", }; utils = new Utils(); }
public async Task <IActionResult> Edit(string partitionKey, string rowKey) { if (string.IsNullOrWhiteSpace(partitionKey) || string.IsNullOrWhiteSpace(rowKey)) { return(NotFound()); } ScrapeConfig scrapeConfig = await _scrapeConfigService.GetDetailsAsync(partitionKey, rowKey); if (scrapeConfig == null) { return(NotFound()); } return(View(scrapeConfig)); }
public async Task <IActionResult> Create([Bind("Name,URL,ProductNamePath,ProductPricePath,Currency,ProductAvailabilityPath,ProductAvailabilityValue,ProductAvailabilityIsAtributeValue")] ScrapeConfig scrapeConfig) { if (ModelState.IsValid) { scrapeConfig.ProductNamePath = scrapeConfig.ProductNamePath.RemoveSpecialCharacters(); scrapeConfig.ProductPricePath = scrapeConfig.ProductPricePath.RemoveSpecialCharacters(); scrapeConfig.Currency = scrapeConfig.Currency; scrapeConfig.ProductAvailabilityPath = scrapeConfig.ProductAvailabilityPath.RemoveSpecialCharacters(); await _scrapeConfigService.AddAsync(scrapeConfig); return(RedirectToAction(nameof(Index))); } return(View(scrapeConfig)); }
public async Task <IActionResult> Edit(long id, [Bind("PartitionKey,RowKey,Id,Name,URL,ProductNamePath,ProductPricePath,Currency,ProductAvailabilityPath,ProductAvailabilityValue,ProductAvailabilityIsAtributeValue")] ScrapeConfig scrapeConfig) { if (id != scrapeConfig.Id) { return(NotFound()); } if (ModelState.IsValid) { scrapeConfig.ProductNamePath = scrapeConfig.ProductNamePath.RemoveSpecialCharacters(); scrapeConfig.ProductPricePath = scrapeConfig.ProductPricePath.RemoveSpecialCharacters(); scrapeConfig.Currency = scrapeConfig.Currency; scrapeConfig.ProductAvailabilityPath = scrapeConfig.ProductAvailabilityPath.RemoveSpecialCharacters(); await _scrapeConfigService.UpdateAsync(scrapeConfig); return(RedirectToAction(nameof(Index))); } return(View(scrapeConfig)); }
public PriceDownloaderInvocable(IIndicatorDataRepository repository, ScrapeConfig scrapeConfig) { this._repository = repository; this._scrapeConfig = scrapeConfig; }
public cvApiTrackerInvocable(ScrapeConfig scrapeConfig) { this._scrapeConfig = scrapeConfig; _ctx = new RabbitContext().Create(scrapeConfig.CoronaVirusApiTracker.GetConfigFile()); _sender = new RmqSender(_ctx); }
public static async void Run( [QueueTrigger(QueueName.UsersReadyForNotifications, Connection = CommonName.Connection)] UserProfile userProfile, [Queue(QueueName.AddProductHistory)] IAsyncCollector <ProductInfo> addProductHistoryMessageQueue, [Queue(QueueName.ProductUpdateEmailNotifications)] IAsyncCollector <EmailMessage> emailMessageQueue, [Queue(QueueName.EmailsToSend)] IAsyncCollector <SendGridMessage> sendGridMessageQueue, IBinder binder, ILogger log) { log.LogInformation($"C# Queue trigger function processed: {userProfile.FirstName}"); CloudTable productInfoTable = await binder.BindAsync <CloudTable>(new TableAttribute(TableName.ProductInfo, userProfile.UserId) { Connection = CommonName.Connection }); CloudTable scrapeConfigTable = await binder.BindAsync <CloudTable>(new TableAttribute(TableName.ScrapeConfig) { Connection = CommonName.Connection }); TableQuery <ProductInfo> productQuery = new TableQuery <ProductInfo>(); TableQuerySegment <ProductInfo> userProducts = await productInfoTable.ExecuteQuerySegmentedAsync(productQuery, null); //Load all configs in the begginign/maybe this should be changed in the future TableQuery <ScrapeConfig> configsQuery = new TableQuery <ScrapeConfig>(); TableQuerySegment <ScrapeConfig> allConfigs = await scrapeConfigTable.ExecuteQuerySegmentedAsync(configsQuery, null); EmailMessage emailMessage; StringBuilder emailBodyBuilder = new StringBuilder(); log.LogInformation($"userProducts: {userProducts.Results.Count}"); foreach (ProductInfo product in userProducts) { //Find config from allConfigs ScrapeConfig config = allConfigs.FirstOrDefault(t => t.PartitionKey.Equals(product.URL.ToCoreUrl())); if (config != null) { log.LogInformation($"ScrapeConfig : {config.Name}"); Utils utils = new Utils(); try { await utils.Scrape(config, product, log); } catch (Exception ex) { string errorMsg = ex.Message; while (ex.InnerException != null) { errorMsg += Environment.NewLine + ex.InnerException.Message; ex = ex.InnerException; } var error = $"Error while trying to scrape product = {product.Name}, URL={product.URL}. Error: {ex.Message}"; log.LogError(error); SendGridMessage message = new SendGridMessage(); message.AddTo("*****@*****.**"); message.AddContent("text/html", error); message.SetFrom(new EmailAddress("*****@*****.**")); message.SetSubject("Product scrape exception"); await sendGridMessageQueue.AddAsync(message); } if (product.HasChangesSinceLastTime) { var productUpdateLine = utils.CreateProductEmailLine(product); emailBodyBuilder.AppendLine(productUpdateLine); emailBodyBuilder.AppendLine("<br>"); //Add to history queue await addProductHistoryMessageQueue.AddAsync(product); } //Update product in db TableOperation operation = TableOperation.InsertOrReplace(product); await productInfoTable.ExecuteAsync(operation); } else { log.LogInformation($"Multiple scrape config matches the criteria URL={product.URL}"); } } if (emailBodyBuilder.Length > 0) { emailBodyBuilder.AppendLine(); emailBodyBuilder.AppendLine("<br>"); emailBodyBuilder.AppendLine("<br>"); emailBodyBuilder.AppendLine("<a href='https://product-scrape.azurewebsites.net/Products'>Here you can see the list of your products</a>"); emailMessage = new EmailMessage { UserId = userProfile.UserId, Subject = "Products updates", Content = emailBodyBuilder.ToString() }; log.LogInformation($"EmailMessage Product updates"); await emailMessageQueue.AddAsync(emailMessage); } else if (userProfile.SendEmailWhenNoProductHasBeenChanged) { log.LogInformation($"EmailMessage No Product update"); emailMessage = new EmailMessage { UserId = userProfile.UserId, Subject = "Products updates", Content = "None of your products has been updated/changed since last check." }; await emailMessageQueue.AddAsync(emailMessage); } }
public async Task UpdateAsync(ScrapeConfig scrapeConfig) { string url = _settings.Value.AzureFunctionURL + FunctionName.UpdateScrapeConfig + "?code=" + _settings.Value.AzureFunctionCode; await _httpHandlerService.HandlePostRequest(url, scrapeConfig); }
public async Task Scrape(ScrapeConfig scrapeConfig, ProductInfo product, ILogger log) { if (string.IsNullOrWhiteSpace(product.URL)) { log.LogInformation("URL can not be empty!"); return; } string html = await _webClient.DownloadStringTaskAsync(product.URL); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); product.HasChangesSinceLastTime = false; product.Currency = scrapeConfig.Currency; try { HtmlNode titleNode = doc.DocumentNode.SelectSingleNode(scrapeConfig.ProductNamePath); if (titleNode != null && product.Name != titleNode.InnerText) { product.HasChangesSinceLastTime = true; product.PreviousName = product.Name; product.Name = titleNode.InnerText; } } catch (Exception ex) { log.LogInformation(ex.Message); } try { HtmlNode priceNode = doc.DocumentNode.SelectSingleNode(scrapeConfig.ProductPricePath); if (priceNode != null) { var newPrice = priceNode.InnerText.Replace(" ", ""); if (product.Price != newPrice) { product.HasChangesSinceLastTime = true; product.PreviousPrice = product.Price; product.Price = newPrice; } } } catch (Exception ex) { log.LogInformation(ex.Message); } try { HtmlNode availabilityNode = doc.DocumentNode.SelectSingleNode(scrapeConfig.ProductAvailabilityPath); if (availabilityNode != null) { bool isAviliable = false; if (scrapeConfig.ProductAvailabilityIsAtributeValue) { HtmlAttribute attr = availabilityNode.Attributes.FirstOrDefault(t => t.Value == scrapeConfig.ProductAvailabilityValue); if (attr != null) { isAviliable = true; } } else { if (!string.IsNullOrEmpty(scrapeConfig.ProductAvailabilityValue) && availabilityNode.InnerText == scrapeConfig.ProductAvailabilityValue) { isAviliable = true; } else { isAviliable = availabilityNode != null; } } if (product.Availability != isAviliable) { product.HasChangesSinceLastTime = true; product.PreviousAvailability = product.Availability; product.Availability = isAviliable; } } else { product.Availability = null; } } catch (Exception ex) { //Log the exception log.LogInformation(ex.Message); } product.LastCheckedOn = DateTime.UtcNow; }