Exemple #1
0
        public static async Task <IActionResult> Run(
            [HttpTrigger(AuthorizationLevel.Function, "post", Route = null)] HttpRequest req,
            [Table(TableName.ScrapeConfig)] CloudTable scrapeConfigTable,
            ILogger log)
        {
            log.LogInformation("AddScrapeConfig trigger function processed a request.");

            string       requestBody  = await new StreamReader(req.Body).ReadToEndAsync();
            ScrapeConfig scrapeConfig = JsonConvert.DeserializeObject <ScrapeConfig>(requestBody);

            if (scrapeConfig == null)
            {
                return(new BadRequestResult());
            }

            TableOperation insertOperation = TableOperation.Insert(scrapeConfig);
            TableResult    result          = await scrapeConfigTable.ExecuteAsync(insertOperation);

            if (199 < result.HttpStatusCode && result.HttpStatusCode < 300)
            {
                return(new OkObjectResult(result.Result));
            }
            else
            {
                return(new BadRequestObjectResult(result.Result));
            }
        }
Exemple #2
0
        public static async Task <IActionResult> Run(
            [HttpTrigger(AuthorizationLevel.Function, "post", Route = FunctionName.DeleteScrapeConfig + "/{partitionKey}/{rowKey}")] HttpRequest req,
            [Table(TableName.ScrapeConfig)] CloudTable scrapeConfigTable,
            string partitionKey,
            string rowKey,
            ILogger log)
        {
            log.LogInformation("DeleteScrapeConfig trigger function processed a request.");

            TableOperation getOperation = TableOperation.Retrieve <ScrapeConfig>(partitionKey, rowKey);

            TableResult item = await scrapeConfigTable.ExecuteAsync(getOperation);

            ScrapeConfig itemToDelete = (ScrapeConfig)(dynamic)item.Result;

            TableOperation deleteOperation = TableOperation.Delete(itemToDelete);

            //Execute
            TableResult result = await scrapeConfigTable.ExecuteAsync(deleteOperation);

            if (199 < result.HttpStatusCode && result.HttpStatusCode < 300)
            {
                return(new OkObjectResult(result.Result));
            }
            else
            {
                return(new BadRequestObjectResult(result.Result));
            }
        }
        public static IHostBuilder CreateHostBuilder(string[] args) =>
        Host.CreateDefaultBuilder(args)
        .ConfigureServices((hostContext, services) =>
        {
            services.AddDbContext <ntpContext>(cfg =>
            {
                cfg.UseSqlServer(hostContext.Configuration.GetConnectionString("ntpConnectionString"));
            });

            services.AddScoped <IIndicatorDataRepository, IndicatorDataRepository <ntpContext> >();
            services.AddScheduler();

            services.AddTransient <ScraperInvocable>();
            services.AddTransient <PriceDownloaderInvocable>();
            services.AddTransient <CVScraperInvocable>();
            services.AddTransient <cvApiTrackerInvocable>();

            services.AddSingleton <IConfiguration>(Program.Configuration);

            scrapeConfig = new ScrapeConfig();
            Configuration.GetSection("ScrapeConfiguration").Bind(scrapeConfig);
            services.AddSingleton(scrapeConfig);

            scrapeCache = new ScrapeCache();
            services.AddSingleton(scrapeCache);

            //services.AddHostedService<Worker>();
        });
 public CVScraperInvocable(ScrapeCache scrapeCache, ScrapeConfig scrapeConfig)
 {
     this._scrapeCache  = scrapeCache;
     this._scrapeConfig = scrapeConfig;
     _ctx    = new RabbitContext().Create(scrapeConfig.CoronaVirusScrape.GetConfigFile());
     _sender = new RmqSender(_ctx);
 }
        public WebClientRequestHandler(ScrapeConfig config = null)
        {
            ServicePointManager.Expect100Continue = false;
            ServicePointManager.DefaultConnectionLimit = 300;

            Log = LogManager.GetCurrentClassLogger();

            _scrapeConfig = config;
        }
Exemple #6
0
        public async Task AddAsync(ScrapeConfig scrapeConfig)
        {
            scrapeConfig.Id           = DateTime.Now.Ticks;
            scrapeConfig.RowKey       = scrapeConfig.Id.ToString();
            scrapeConfig.PartitionKey = scrapeConfig.URL.ToCoreUrl();

            string url = _settings.Value.AzureFunctionURL + FunctionName.AddScrapeConfig + "?code=" + _settings.Value.AzureFunctionCode;
            await _httpHandlerService.HandlePostRequest(url, scrapeConfig);
        }
Exemple #7
0
 public void Setup()
 {
     logger         = new Mock <ILogger>();
     galerijaConfig = new ScrapeConfig()
     {
         Name             = "Galerija.mk",
         URL              = "https://www.galerija.com.mk",
         ProductNamePath  = "//*[contains(@id,'product-')]/div[1]/div[2]/h1",
         ProductPricePath = "//*[contains(@id,'product-')]/div[1]/div[2]/p/span/span/text()",
     };
     utils = new Utils();
 }
Exemple #8
0
        public async Task <IActionResult> Edit(string partitionKey, string rowKey)
        {
            if (string.IsNullOrWhiteSpace(partitionKey) || string.IsNullOrWhiteSpace(rowKey))
            {
                return(NotFound());
            }
            ScrapeConfig scrapeConfig = await _scrapeConfigService.GetDetailsAsync(partitionKey, rowKey);

            if (scrapeConfig == null)
            {
                return(NotFound());
            }
            return(View(scrapeConfig));
        }
Exemple #9
0
        public async Task <IActionResult> Create([Bind("Name,URL,ProductNamePath,ProductPricePath,Currency,ProductAvailabilityPath,ProductAvailabilityValue,ProductAvailabilityIsAtributeValue")] ScrapeConfig scrapeConfig)
        {
            if (ModelState.IsValid)
            {
                scrapeConfig.ProductNamePath         = scrapeConfig.ProductNamePath.RemoveSpecialCharacters();
                scrapeConfig.ProductPricePath        = scrapeConfig.ProductPricePath.RemoveSpecialCharacters();
                scrapeConfig.Currency                = scrapeConfig.Currency;
                scrapeConfig.ProductAvailabilityPath = scrapeConfig.ProductAvailabilityPath.RemoveSpecialCharacters();
                await _scrapeConfigService.AddAsync(scrapeConfig);

                return(RedirectToAction(nameof(Index)));
            }
            return(View(scrapeConfig));
        }
Exemple #10
0
        public async Task <IActionResult> Edit(long id, [Bind("PartitionKey,RowKey,Id,Name,URL,ProductNamePath,ProductPricePath,Currency,ProductAvailabilityPath,ProductAvailabilityValue,ProductAvailabilityIsAtributeValue")] ScrapeConfig scrapeConfig)
        {
            if (id != scrapeConfig.Id)
            {
                return(NotFound());
            }

            if (ModelState.IsValid)
            {
                scrapeConfig.ProductNamePath         = scrapeConfig.ProductNamePath.RemoveSpecialCharacters();
                scrapeConfig.ProductPricePath        = scrapeConfig.ProductPricePath.RemoveSpecialCharacters();
                scrapeConfig.Currency                = scrapeConfig.Currency;
                scrapeConfig.ProductAvailabilityPath = scrapeConfig.ProductAvailabilityPath.RemoveSpecialCharacters();
                await _scrapeConfigService.UpdateAsync(scrapeConfig);

                return(RedirectToAction(nameof(Index)));
            }
            return(View(scrapeConfig));
        }
Exemple #11
0
 public PriceDownloaderInvocable(IIndicatorDataRepository repository, ScrapeConfig scrapeConfig)
 {
     this._repository   = repository;
     this._scrapeConfig = scrapeConfig;
 }
Exemple #12
0
 public cvApiTrackerInvocable(ScrapeConfig scrapeConfig)
 {
     this._scrapeConfig = scrapeConfig;
     _ctx    = new RabbitContext().Create(scrapeConfig.CoronaVirusApiTracker.GetConfigFile());
     _sender = new RmqSender(_ctx);
 }
Exemple #13
0
        public static async void Run(
            [QueueTrigger(QueueName.UsersReadyForNotifications, Connection = CommonName.Connection)] UserProfile userProfile,
            [Queue(QueueName.AddProductHistory)] IAsyncCollector <ProductInfo> addProductHistoryMessageQueue,
            [Queue(QueueName.ProductUpdateEmailNotifications)] IAsyncCollector <EmailMessage> emailMessageQueue,
            [Queue(QueueName.EmailsToSend)] IAsyncCollector <SendGridMessage> sendGridMessageQueue,
            IBinder binder,
            ILogger log)
        {
            log.LogInformation($"C# Queue trigger function processed: {userProfile.FirstName}");

            CloudTable productInfoTable = await binder.BindAsync <CloudTable>(new TableAttribute(TableName.ProductInfo, userProfile.UserId)
            {
                Connection = CommonName.Connection
            });

            CloudTable scrapeConfigTable = await binder.BindAsync <CloudTable>(new TableAttribute(TableName.ScrapeConfig)
            {
                Connection = CommonName.Connection
            });

            TableQuery <ProductInfo>        productQuery = new TableQuery <ProductInfo>();
            TableQuerySegment <ProductInfo> userProducts = await productInfoTable.ExecuteQuerySegmentedAsync(productQuery, null);

            //Load all configs in the begginign/maybe this should be changed in the future
            TableQuery <ScrapeConfig>        configsQuery = new TableQuery <ScrapeConfig>();
            TableQuerySegment <ScrapeConfig> allConfigs   = await scrapeConfigTable.ExecuteQuerySegmentedAsync(configsQuery, null);

            EmailMessage  emailMessage;
            StringBuilder emailBodyBuilder = new StringBuilder();

            log.LogInformation($"userProducts: {userProducts.Results.Count}");
            foreach (ProductInfo product in userProducts)
            {
                //Find config from allConfigs
                ScrapeConfig config = allConfigs.FirstOrDefault(t => t.PartitionKey.Equals(product.URL.ToCoreUrl()));
                if (config != null)
                {
                    log.LogInformation($"ScrapeConfig : {config.Name}");
                    Utils utils = new Utils();
                    try
                    {
                        await utils.Scrape(config, product, log);
                    }
                    catch (Exception ex)
                    {
                        string errorMsg = ex.Message;

                        while (ex.InnerException != null)
                        {
                            errorMsg += Environment.NewLine + ex.InnerException.Message;
                            ex        = ex.InnerException;
                        }

                        var error = $"Error while trying to scrape product = {product.Name}, URL={product.URL}. Error: {ex.Message}";
                        log.LogError(error);
                        SendGridMessage message = new SendGridMessage();
                        message.AddTo("*****@*****.**");
                        message.AddContent("text/html", error);
                        message.SetFrom(new EmailAddress("*****@*****.**"));
                        message.SetSubject("Product scrape exception");
                        await sendGridMessageQueue.AddAsync(message);
                    }

                    if (product.HasChangesSinceLastTime)
                    {
                        var productUpdateLine = utils.CreateProductEmailLine(product);
                        emailBodyBuilder.AppendLine(productUpdateLine);
                        emailBodyBuilder.AppendLine("<br>");

                        //Add to history queue
                        await addProductHistoryMessageQueue.AddAsync(product);
                    }

                    //Update product in db
                    TableOperation operation = TableOperation.InsertOrReplace(product);
                    await productInfoTable.ExecuteAsync(operation);
                }
                else
                {
                    log.LogInformation($"Multiple scrape config matches the criteria URL={product.URL}");
                }
            }
            if (emailBodyBuilder.Length > 0)
            {
                emailBodyBuilder.AppendLine();
                emailBodyBuilder.AppendLine("<br>");
                emailBodyBuilder.AppendLine("<br>");
                emailBodyBuilder.AppendLine("<a href='https://product-scrape.azurewebsites.net/Products'>Here you can see the list of your products</a>");
                emailMessage = new EmailMessage
                {
                    UserId  = userProfile.UserId,
                    Subject = "Products updates",
                    Content = emailBodyBuilder.ToString()
                };

                log.LogInformation($"EmailMessage Product updates");
                await emailMessageQueue.AddAsync(emailMessage);
            }
            else if (userProfile.SendEmailWhenNoProductHasBeenChanged)
            {
                log.LogInformation($"EmailMessage No Product update");
                emailMessage = new EmailMessage
                {
                    UserId  = userProfile.UserId,
                    Subject = "Products updates",
                    Content = "None of your products has been updated/changed since last check."
                };
                await emailMessageQueue.AddAsync(emailMessage);
            }
        }
Exemple #14
0
 public async Task UpdateAsync(ScrapeConfig scrapeConfig)
 {
     string url = _settings.Value.AzureFunctionURL + FunctionName.UpdateScrapeConfig + "?code=" + _settings.Value.AzureFunctionCode;
     await _httpHandlerService.HandlePostRequest(url, scrapeConfig);
 }
Exemple #15
0
        public async Task Scrape(ScrapeConfig scrapeConfig, ProductInfo product, ILogger log)
        {
            if (string.IsNullOrWhiteSpace(product.URL))
            {
                log.LogInformation("URL can not be empty!");
                return;
            }

            string html = await _webClient.DownloadStringTaskAsync(product.URL);

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(html);

            product.HasChangesSinceLastTime = false;
            product.Currency = scrapeConfig.Currency;
            try
            {
                HtmlNode titleNode = doc.DocumentNode.SelectSingleNode(scrapeConfig.ProductNamePath);
                if (titleNode != null && product.Name != titleNode.InnerText)
                {
                    product.HasChangesSinceLastTime = true;
                    product.PreviousName            = product.Name;
                    product.Name = titleNode.InnerText;
                }
            }
            catch (Exception ex)
            {
                log.LogInformation(ex.Message);
            }

            try
            {
                HtmlNode priceNode = doc.DocumentNode.SelectSingleNode(scrapeConfig.ProductPricePath);
                if (priceNode != null)
                {
                    var newPrice = priceNode.InnerText.Replace("&nbsp;", "");
                    if (product.Price != newPrice)
                    {
                        product.HasChangesSinceLastTime = true;
                        product.PreviousPrice           = product.Price;
                        product.Price = newPrice;
                    }
                }
            }
            catch (Exception ex)
            {
                log.LogInformation(ex.Message);
            }

            try
            {
                HtmlNode availabilityNode = doc.DocumentNode.SelectSingleNode(scrapeConfig.ProductAvailabilityPath);
                if (availabilityNode != null)
                {
                    bool isAviliable = false;

                    if (scrapeConfig.ProductAvailabilityIsAtributeValue)
                    {
                        HtmlAttribute attr = availabilityNode.Attributes.FirstOrDefault(t => t.Value == scrapeConfig.ProductAvailabilityValue);
                        if (attr != null)
                        {
                            isAviliable = true;
                        }
                    }
                    else
                    {
                        if (!string.IsNullOrEmpty(scrapeConfig.ProductAvailabilityValue) && availabilityNode.InnerText == scrapeConfig.ProductAvailabilityValue)
                        {
                            isAviliable = true;
                        }
                        else
                        {
                            isAviliable = availabilityNode != null;
                        }
                    }

                    if (product.Availability != isAviliable)
                    {
                        product.HasChangesSinceLastTime = true;
                        product.PreviousAvailability    = product.Availability;
                        product.Availability            = isAviliable;
                    }
                }
                else
                {
                    product.Availability = null;
                }
            }
            catch (Exception ex)
            {
                //Log the exception
                log.LogInformation(ex.Message);
            }
            product.LastCheckedOn = DateTime.UtcNow;
        }