Ejemplo n.º 1
0
        public WebScrappedData Webscrape(string hyperlink)
        {
            WebScrappedData ret = new WebScrappedData
            {
                ProductUrl      = hyperlink,
                ScrappedTextual = new WebScrappedTextual()
            };

            browser.Url = hyperlink;

            INavigation navigation = browser.Navigate();

            var levelData             = string.Empty;
            var levelStatusPointsData = string.Empty;
            var pointsData            = string.Empty;
            var nameData = string.Empty;

            // Level
            string level = "//span[@id='level-status-text']";

            if (WebscraperUtils.IsElementPresent(browser, By.XPath(level)))
            {
                var levelElement = browser.FindElements(By.XPath(level)).FirstOrDefault();
                levelData = WebscraperUtils.GetTextFromElement(levelElement);
            }

            // Level status points
            string levelStatusPoints = "//span[@id='level-status-points']";

            if (WebscraperUtils.IsElementPresent(browser, By.XPath(levelStatusPoints)))
            {
                var levelStatusPointsElement = browser.FindElements(By.XPath(levelStatusPoints)).FirstOrDefault();
                levelStatusPointsData = WebscraperUtils.GetTextFromElement(levelStatusPointsElement);
            }

            // Points
            string points = "//span[@id='level-status-points']/span[@class='has-text-weight-semibold']";

            if (WebscraperUtils.IsElementPresent(browser, By.XPath(points)))
            {
                var pointsElement = browser.FindElements(By.XPath(points)).FirstOrDefault();
                pointsData = WebscraperUtils.GetTextFromElement(pointsElement);
            }

            // Name
            string name = "//h1[@class='title has-margin-top-small has-margin-bottom-extra-small']";

            if (WebscraperUtils.IsElementPresent(browser, By.XPath(name)))
            {
                var nameElement = browser.FindElements(By.XPath(name)).FirstOrDefault();
                nameData = WebscraperUtils.GetTextFromElement(nameElement);
            }

            ret.IsSuccess    = true;
            ret.ErrorMessage = null;

            return(ret);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Comprobación de si se puede hacer scrapping de la web
        /// </summary>
        /// <param name="gln"></param>
        /// <param name="path"></param>
        /// <returns></returns>
        public static WebScrappedData WebScrappe(string gln, string path, string gtin, string internalCode, string description)
        {
            try
            {
                // using (var browser = new ChromeDriver(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location), options, TimeSpan.FromSeconds(60)))
                // {
                List <IWebScrapper> scrappers = new List <IWebScrapper>()
                {
                    new WebScrapperDia(browser),
                    //new WebScrapperLearn(browser),
                };

                foreach (var scrapper in scrappers)
                {
                    if (scrapper.CanWebscrape(gln, path))
                    {
                        WebScrappedData scrapResult = default;

                        if (path != null)
                        {
                            scrapResult = scrapper.Webscrape(path);
                        }
                        else
                        {
                            scrapResult = scrapper.FindAndWebscrape(gtin, internalCode, description);
                        }

                        return(scrapResult);
                    }
                }

                // browser.Quit();

                // Console.ForegroundColor = ConsoleColor.DarkMagenta;
                // Console.WriteLine("Quit browser");
                // Console.ResetColor();
                // }

                return(null);
            }
            catch (Exception)
            {
                throw;
            }
            finally
            {
                Console.ForegroundColor = ConsoleColor.DarkMagenta;
                Console.WriteLine("Exit browser");
                Console.ResetColor();
            }
        }
        public ActionResult <WebScrappedData> Webscrape(string path)
        {
            try
            {
                WebScrappedData result = WebScrapperService.WebScrappe(path);

                if (result == null)
                {
                    return(NotFound());
                }

                return(Ok(result));
            }
            catch (Exception)
            {
                throw;
            }
        }
 /// <summary>
 /// Do scrap!
 /// </summary>
 /// <param name="path"></param>
 /// <returns></returns>
 public static WebScrappedData WebScrappe(string path)
 {
     try
     {
         IWebScrapper    scrapper    = new WebScrapperLearn(browser);
         WebScrappedData scrapResult = scrapper.Webscrape(path);
         return(scrapResult);
     }
     catch (Exception)
     {
         throw;
     }
     finally
     {
         Console.ForegroundColor = ConsoleColor.DarkMagenta;
         Console.WriteLine("Exit browser");
         Console.ResetColor();
     }
 }
        public ActionResult <WebScrappedData> Webscrape(string gln, string path, string gtin, string internalCode, string description)
        {
            try
            {
                WebScrappedData result = WebScrapperService.WebScrappe(gln, path, gtin, internalCode, description);

                if (result == null)
                {
                    return(NotFound());
                }

                return(Ok(result));
            }
            catch (Exception)
            {
                throw;
                //return StatusCode(StatusCodes.Status500InternalServerError);
            }
        }
Ejemplo n.º 6
0
        private string GetNutrientInfo(WebScrappedData ret, string infoBasePath, string nutrientName)
        {
            try
            {
                string nutrient = string.Empty;

                string nutrientXpath = string.Format(infoBasePath, nutrientName);
                if (WebscraperUtils.IsElementPresent(browser, By.XPath(nutrientXpath)))
                {
                    var nutrientElem = browser.FindElements(By.XPath(nutrientXpath)).FirstOrDefault();
                    if (nutrientElem != null)
                    {
                        nutrient = WebscraperUtils.GetTextFromElement(nutrientElem);
                    }
                }

                return(nutrient);
            }
            catch (Exception)
            {
                throw;
            }
        }
Ejemplo n.º 7
0
        public WebScrappedData Webscrape(string hyperlink)
        {
            WebScrappedData ret = new WebScrappedData
            {
                IsSuccess       = false,
                ProductUrl      = hyperlink,
                ScrappedTextual = new WebScrappedTextual()
            };

            try
            {
                browser.Url = hyperlink;

                // INavigation navigation = browser.Navigate();

                #region [ Process page data ]

                string ingredientesXpath = "//*[@id=\"nutritionalinformation\"]/*[text()=\"Ingredientes\"]/following-sibling::div[1]";
                if (WebscraperUtils.IsElementPresent(browser, By.XPath(ingredientesXpath)))
                {
                    var elem = browser.FindElements(By.XPath(ingredientesXpath)).FirstOrDefault();
                    ret.ScrappedTextual.IngredientStatement = WebscraperUtils.GetTextFromElement(elem);
                }

                string columnaRacion100 = "//*[@id=\"nutritionalinformation\"]/div[@class=\"tabs-nutritionalinfo-table-nutrients\"]/table/tbody/tr[1]/td/*[contains(text(), 100)]";
                if (WebscraperUtils.IsElementPresent(browser, By.XPath(columnaRacion100)))
                {
                    columnaRacion100 = columnaRacion100 + "/ancestor::td";
                    string col = browser.FindElement(By.XPath(columnaRacion100)).GetAttribute("colspan");
                    if (!string.IsNullOrWhiteSpace(col))
                    {
                        int column;
                        Int32.TryParse(col, out column);

                        string infoBasePath = "//*[@id=\"nutritionalinformation\"]/div[@class=\"tabs-nutritionalinfo-table-nutrients\"]/table/tbody/tr/td/*[text()=\"{0}\"]/ancestor::td/following-sibling::td[" + (column - 1) + "]/div";

                        ret.ScrappedTextual.Nutrients.CarboHydrates = GetNutrientInfo(ret, infoBasePath, "hidratos de carbono");
                        ret.ScrappedTextual.Nutrients.Fibre         = GetNutrientInfo(ret, infoBasePath, "fibra alimentaria");
                        ret.ScrappedTextual.Nutrients.Fat           = GetNutrientInfo(ret, infoBasePath, "grasas");
                        ret.ScrappedTextual.Nutrients.Protein       = GetNutrientInfo(ret, infoBasePath, "proteínas");
                        // ret.ScrappedTextual.Nutrients.FatSaturated = GetNutrientInfo(ret, infoBasePath, "ácidos grasos saturados");
                        // ret.ScrappedTextual.Nutrients.FatMonoUnsaturated = GetNutrientInfo(ret, infoBasePath, "ácidos grasos monoinsaturados");
                        // ret.ScrappedTextual.Nutrients.FatPoliSaturated = GetNutrientInfo(ret, infoBasePath, "ácidos grasos poliinsaturados");
                        // ret.ScrappedTextual.Nutrients.Sugar = GetNutrientInfo(ret, infoBasePath, "azúcares");
                        // ret.ScrappedTextual.Nutrients.Salt = GetNutrientInfo(ret, infoBasePath, "sal");
                        // ret.ScrappedTextual.Nutrients.Niacin = GetNutrientInfo(ret, infoBasePath, "niacina");
                        // ret.ScrappedTextual.Nutrients.Riboflavin = GetNutrientInfo(ret, infoBasePath, "riboflavina");
                        // ret.ScrappedTextual.Nutrients.FolicAcid = GetNutrientInfo(ret, infoBasePath, "ácido fólico");
                        // ret.ScrappedTextual.Nutrients.Thiamine = GetNutrientInfo(ret, infoBasePath, "tiamina");
                        // ret.ScrappedTextual.Nutrients.Iron = GetNutrientInfo(ret, infoBasePath, "hierro");
                        // ret.ScrappedTextual.Nutrients.VitaminA = GetNutrientInfo(ret, infoBasePath, "vitamina A");
                        // ret.ScrappedTextual.Nutrients.VitaminB12 = GetNutrientInfo(ret, infoBasePath, "vitamina B12");
                        // ret.ScrappedTextual.Nutrients.VitaminB6 = GetNutrientInfo(ret, infoBasePath, "vitamina B6");
                        // ret.ScrappedTextual.Nutrients.VitaminC = GetNutrientInfo(ret, infoBasePath, "vitamina C");
                        // ret.ScrappedTextual.Nutrients.VitaminD = GetNutrientInfo(ret, infoBasePath, "vitamina D");
                        // ret.ScrappedTextual.Nutrients.VitaminE = GetNutrientInfo(ret, infoBasePath, "vitamina E");
                        // ret.ScrappedTextual.Nutrients.VitaminK = GetNutrientInfo(ret, infoBasePath, "vitamina K");
                        // ret.ScrappedTextual.Nutrients.Zinc = GetNutrientInfo(ret, infoBasePath, "zinc");
                        // ret.ScrappedTextual.Nutrients.Phosphorus = GetNutrientInfo(ret, infoBasePath, "fósforo");
                        // ret.ScrappedTextual.Nutrients.Biot = GetNutrientInfo(ret, infoBasePath, "biotina");
                        // ret.ScrappedTextual.Nutrients.Calcium = GetNutrientInfo(ret, infoBasePath, "calcio");
                        // ret.ScrappedTextual.Nutrients.Chloride = GetNutrientInfo(ret, infoBasePath, "cloruro");
                        // ret.ScrappedTextual.Nutrients.Chromium = GetNutrientInfo(ret, infoBasePath, "cromo");
                        // ret.ScrappedTextual.Nutrients.Copper = GetNutrientInfo(ret, infoBasePath, "cobre");
                        // ret.ScrappedTextual.Nutrients.Fluoride = GetNutrientInfo(ret, infoBasePath, "fluoruro");
                        // ret.ScrappedTextual.Nutrients.DietaryFiber = GetNutrientInfo(ret, infoBasePath, "fibra alimentaria");
                        // ret.ScrappedTextual.Nutrients.Iodo = GetNutrientInfo(ret, infoBasePath, "yodo");
                        // ret.ScrappedTextual.Nutrients.Potassium = GetNutrientInfo(ret, infoBasePath, "potasio");
                        // ret.ScrappedTextual.Nutrients.Magnesium = GetNutrientInfo(ret, infoBasePath, "magnesio");
                        // ret.ScrappedTextual.Nutrients.Manganese = GetNutrientInfo(ret, infoBasePath, "manganeso");
                        // ret.ScrappedTextual.Nutrients.Molybdenum = GetNutrientInfo(ret, infoBasePath, "molibdeno");
                        // ret.ScrappedTextual.Nutrients.PantothenicAcid = GetNutrientInfo(ret, infoBasePath, "ácido pantoténico");
                        // ret.ScrappedTextual.Nutrients.Polyalcohol = GetNutrientInfo(ret, infoBasePath, "polialcoholes");
                        // ret.ScrappedTextual.Nutrients.Selenium = GetNutrientInfo(ret, infoBasePath, "selenio");
                        // ret.ScrappedTextual.Nutrients.Starch = GetNutrientInfo(ret, infoBasePath, "almidón");

                        string energyXpath = string.Format(infoBasePath, "valor energético");
                        if (WebscraperUtils.IsElementPresent(browser, By.XPath(energyXpath)))
                        {
                            foreach (var elem in browser.FindElements(By.XPath(energyXpath)))
                            {
                                if (WebscraperUtils.GetTextFromElement(elem).ToLower().Contains("kj"))
                                {
                                    ret.ScrappedTextual.Nutrients.EnergyKJ = WebscraperUtils.GetTextFromElement(elem);
                                }
                                else if (WebscraperUtils.GetTextFromElement(elem).ToLower().Contains("kcal"))
                                {
                                    ret.ScrappedTextual.Nutrients.EnergyKCal = WebscraperUtils.GetTextFromElement(elem);
                                }
                            }
                        }
                    }
                }

                if (WebscraperUtils.IsElementPresent(browser, By.CssSelector(".zoomPad img")))
                {
                    var uriList = new List <string>().AsEnumerable();

                    var images = browser.FindElements(By.CssSelector("#productDetailUpdateable .owl-item img"));
                    if (images != null && images.Any())
                    {
                        uriList = images.Select(x => x.GetAttribute("data-zoomimagesrc"));
                    }
                    else
                    {
                        images  = browser.FindElements(By.CssSelector("#zoomImagen"));
                        uriList = images.Select(x => x.GetAttribute("href"));
                    }

                    ret.ScrappedImages = BinaryImageHelper.GetImageFromURI(uriList);
                }

                // Manufacturer
                string manufacturer = "//div[@class='tabs-nutritionalinfo-manufact-informationcontent' and ./h4/text() = 'Manufacturado']";
                if (WebscraperUtils.IsElementPresent(browser, By.XPath(manufacturer)))
                {
                    var address = browser.FindElements(By.XPath($"{manufacturer}/following::div[2]")).FirstOrDefault();
                    ret.ScrappedTextual.Address = WebscraperUtils.GetTextFromElement(address);

                    var country = browser.FindElements(By.XPath($"{manufacturer}/following::div[3]")).FirstOrDefault();
                    if (WebscraperUtils.GetTextFromElement(country).Contains("Pais de origen:"))
                    {
                        ret.ScrappedTextual.CountryOfOrigin = WebscraperUtils.GetTextFromElement(country).Replace("Pais de origen:", "").Trim();
                    }
                }

                // Net Content
                string netContent             = "//div[@class='tabs-nutritionalinfo-table-div' and ./div[@class='tabs-nutritionalinfo-manufact-value' and contains(text(),'CANTIDAD NETA (en masa)')]]/div[@class='tabs-nutritionalinfo-manufact-quantity']";
                string netContentVolume       = "//div[@class='tabs-nutritionalinfo-table-div' and ./div[@class='tabs-nutritionalinfo-manufact-value' and contains(text(),'CANTIDAD NETA (en volumen)')]]/div[@class='tabs-nutritionalinfo-manufact-quantity']";
                string netContentdisaggregate = "//div[@class='tabs-nutritionalinfo-table-div' and ./div[@class='tabs-nutritionalinfo-manufact-value' and contains(text(),'CANTIDAD NETA (disgregada)')]]/div[@class='tabs-nutritionalinfo-manufact-quantity']";
                string netContentDrained      = "//div[@class='tabs-nutritionalinfo-table-div' and ./div[@class='tabs-nutritionalinfo-manufact-value' and contains(text(),'Peso neto escurrido')]]/div[@class='tabs-nutritionalinfo-manufact-quantity']";

                if (WebscraperUtils.IsElementPresent(browser, By.XPath(netContent)))
                {
                    var netContentElement = browser.FindElements(By.XPath(netContent)).FirstOrDefault();
                    ret.ScrappedTextual.NetContent = WebscraperUtils.GetTextFromElement(netContentElement);
                }

                if (WebscraperUtils.IsElementPresent(browser, By.XPath(netContentVolume)))
                {
                    var netContentVolumeElement = browser.FindElements(By.XPath(netContentVolume)).FirstOrDefault();
                    ret.ScrappedTextual.NetContentVolume = WebscraperUtils.GetTextFromElement(netContentVolumeElement);
                }

                if (WebscraperUtils.IsElementPresent(browser, By.XPath(netContentdisaggregate)))
                {
                    var netContentdisaggregateElement = browser.FindElements(By.XPath(netContentdisaggregate)).FirstOrDefault();
                    ret.ScrappedTextual.DisaggregateNetContent = WebscraperUtils.GetTextFromElement(netContentdisaggregateElement);
                }

                if (WebscraperUtils.IsElementPresent(browser, By.XPath(netContentDrained)))
                {
                    var netContentDrainedElement = browser.FindElements(By.XPath(netContentDrained)).FirstOrDefault();
                    ret.ScrappedTextual.NetContentDrained = WebscraperUtils.GetTextFromElement(netContentDrainedElement);
                }

                // RegulatedProductName
                string regulatedProductName = "//div[@id='nutritionalinformation']/h4[ text() = 'Información Adicional']/following::div[@class='form_field-label']";
                if (WebscraperUtils.IsElementPresent(browser, By.XPath(regulatedProductName)))
                {
                    var regulatedProductNameElement = browser.FindElements(By.XPath(regulatedProductName)).FirstOrDefault();
                    ret.ScrappedTextual.RegulatedProductName = WebscraperUtils.GetTextFromElement(regulatedProductNameElement);
                }

                // ConsumerInstrucctions
                string consumerUsageStorageInstructions = "//div[@id='nutritionalinformation']/h4[ text() = 'Condiciones de conservación']/following::div[@class='form_field-label']";
                if (WebscraperUtils.IsElementPresent(browser, By.XPath(consumerUsageStorageInstructions)))
                {
                    var consumerUsageStorageInstructionsElement = browser.FindElements(By.XPath(consumerUsageStorageInstructions)).FirstOrDefault();
                    ret.ScrappedTextual.ConsumerUsageStorageInstructions = WebscraperUtils.GetTextFromElement(consumerUsageStorageInstructionsElement);
                }

                // PreparationInstructions
                string preparationInstructions = "//div[@id='nutritionalinformation']/h4[ text() = 'Modo de Empleo']/following::div[@class='form_field-label']";
                if (WebscraperUtils.IsElementPresent(browser, By.XPath(preparationInstructions)))
                {
                    var preparationInstructionsElement = browser.FindElements(By.XPath(preparationInstructions)).FirstOrDefault();
                    ret.ScrappedTextual.PreparationInstructions = WebscraperUtils.GetTextFromElement(preparationInstructionsElement);
                }

                // Rations
                string rations = "//div[@id='nutritionalinformation']/h4[ text() = 'Número de raciones por envase']/following::div[@class='form_field-label']";;
                if (WebscraperUtils.IsElementPresent(browser, By.XPath(rations)))
                {
                    var rationsElement = browser.FindElements(By.XPath(rations)).FirstOrDefault();
                    ret.ScrappedTextual.Rations = WebscraperUtils.GetTextFromElement(rationsElement);
                }

                #endregion

                ret.IsSuccess    = true;
                ret.ErrorMessage = null;
            }
            catch (Exception ex)
            {
                Console.ForegroundColor = ConsoleColor.Red;
                Console.WriteLine($"[EXCEPTION] {ex.Message}");
                Console.WriteLine($"[STACKTRACE] {ex.StackTrace}");
                Console.ResetColor();

                ret.IsSuccess    = false;
                ret.ErrorMessage = ex.Message;
            }

            return(ret);
        }
        public WebScrappedData Webscrape(string hyperlink)
        {
            WebScrappedData scrappedData = new WebScrappedData();

            browser.Url = hyperlink;

            Console.ForegroundColor = ConsoleColor.Yellow;
            Console.WriteLine("Navigate()");
            INavigation navigation = browser.Navigate();

            Console.ResetColor();

            System.Threading.Tasks.Task.Delay(2000).GetAwaiter().GetResult();

            // Level
            string level = "//span[@id='level-status-text']";

            if (WebscraperUtils.IsElementPresent(browser, By.XPath(level)))
            {
                var levelElement = browser.FindElements(By.XPath(level)).FirstOrDefault();
                var levelData    = WebscraperUtils.GetTextFromElement(levelElement);

                Console.WriteLine($"LEVEL -> {levelData}");
                scrappedData.Level = levelData;
            }

            // Level status points
            string levelStatusPoints = "//span[@id='level-status-points']";

            if (WebscraperUtils.IsElementPresent(browser, By.XPath(levelStatusPoints)))
            {
                var levelStatusPointsElement = browser.FindElements(By.XPath(levelStatusPoints)).FirstOrDefault();
                var levelStatusPointsData    = WebscraperUtils.GetTextFromElement(levelStatusPointsElement);

                Console.WriteLine($"LEVEL STATUS POINTS -> {levelStatusPointsData}");
                scrappedData.LevelStatusPoints = levelStatusPointsData;
            }

            // Points
            string points = "//span[@id='level-status-points']/span[@class='has-text-weight-semibold']";

            if (WebscraperUtils.IsElementPresent(browser, By.XPath(points)))
            {
                var pointsElement = browser.FindElements(By.XPath(points)).FirstOrDefault();
                var pointsData    = WebscraperUtils.GetTextFromElement(pointsElement);

                Console.WriteLine($"POINTS -> {pointsData}");
                scrappedData.Points = pointsData;
            }

            // Name
            string name = "//h1[@class='title has-margin-top-small has-margin-bottom-extra-small']";

            if (WebscraperUtils.IsElementPresent(browser, By.XPath(name)))
            {
                var nameElement = browser.FindElements(By.XPath(name)).FirstOrDefault();
                var nameData    = WebscraperUtils.GetTextFromElement(nameElement);
                scrappedData.Username = nameData;
            }

            scrappedData.IsSuccess = true;

            return(scrappedData);
        }