public WebScrappedData Webscrape(string hyperlink) { WebScrappedData ret = new WebScrappedData { ProductUrl = hyperlink, ScrappedTextual = new WebScrappedTextual() }; browser.Url = hyperlink; INavigation navigation = browser.Navigate(); var levelData = string.Empty; var levelStatusPointsData = string.Empty; var pointsData = string.Empty; var nameData = string.Empty; // Level string level = "//span[@id='level-status-text']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(level))) { var levelElement = browser.FindElements(By.XPath(level)).FirstOrDefault(); levelData = WebscraperUtils.GetTextFromElement(levelElement); } // Level status points string levelStatusPoints = "//span[@id='level-status-points']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(levelStatusPoints))) { var levelStatusPointsElement = browser.FindElements(By.XPath(levelStatusPoints)).FirstOrDefault(); levelStatusPointsData = WebscraperUtils.GetTextFromElement(levelStatusPointsElement); } // Points string points = "//span[@id='level-status-points']/span[@class='has-text-weight-semibold']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(points))) { var pointsElement = browser.FindElements(By.XPath(points)).FirstOrDefault(); pointsData = WebscraperUtils.GetTextFromElement(pointsElement); } // Name string name = "//h1[@class='title has-margin-top-small has-margin-bottom-extra-small']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(name))) { var nameElement = browser.FindElements(By.XPath(name)).FirstOrDefault(); nameData = WebscraperUtils.GetTextFromElement(nameElement); } ret.IsSuccess = true; ret.ErrorMessage = null; return(ret); }
private string GetNutrientInfo(WebScrappedData ret, string infoBasePath, string nutrientName) { try { string nutrient = string.Empty; string nutrientXpath = string.Format(infoBasePath, nutrientName); if (WebscraperUtils.IsElementPresent(browser, By.XPath(nutrientXpath))) { var nutrientElem = browser.FindElements(By.XPath(nutrientXpath)).FirstOrDefault(); if (nutrientElem != null) { nutrient = WebscraperUtils.GetTextFromElement(nutrientElem); } } return(nutrient); } catch (Exception) { throw; } }
public WebScrappedData Webscrape(string hyperlink) { WebScrappedData ret = new WebScrappedData { IsSuccess = false, ProductUrl = hyperlink, ScrappedTextual = new WebScrappedTextual() }; try { browser.Url = hyperlink; // INavigation navigation = browser.Navigate(); #region [ Process page data ] string ingredientesXpath = "//*[@id=\"nutritionalinformation\"]/*[text()=\"Ingredientes\"]/following-sibling::div[1]"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(ingredientesXpath))) { var elem = browser.FindElements(By.XPath(ingredientesXpath)).FirstOrDefault(); ret.ScrappedTextual.IngredientStatement = WebscraperUtils.GetTextFromElement(elem); } string columnaRacion100 = "//*[@id=\"nutritionalinformation\"]/div[@class=\"tabs-nutritionalinfo-table-nutrients\"]/table/tbody/tr[1]/td/*[contains(text(), 100)]"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(columnaRacion100))) { columnaRacion100 = columnaRacion100 + "/ancestor::td"; string col = browser.FindElement(By.XPath(columnaRacion100)).GetAttribute("colspan"); if (!string.IsNullOrWhiteSpace(col)) { int column; Int32.TryParse(col, out column); string infoBasePath = "//*[@id=\"nutritionalinformation\"]/div[@class=\"tabs-nutritionalinfo-table-nutrients\"]/table/tbody/tr/td/*[text()=\"{0}\"]/ancestor::td/following-sibling::td[" + (column - 1) + "]/div"; ret.ScrappedTextual.Nutrients.CarboHydrates = GetNutrientInfo(ret, infoBasePath, "hidratos de carbono"); ret.ScrappedTextual.Nutrients.Fibre = GetNutrientInfo(ret, infoBasePath, "fibra alimentaria"); ret.ScrappedTextual.Nutrients.Fat = GetNutrientInfo(ret, infoBasePath, "grasas"); ret.ScrappedTextual.Nutrients.Protein = GetNutrientInfo(ret, infoBasePath, "proteínas"); // ret.ScrappedTextual.Nutrients.FatSaturated = GetNutrientInfo(ret, infoBasePath, "ácidos grasos saturados"); // ret.ScrappedTextual.Nutrients.FatMonoUnsaturated = GetNutrientInfo(ret, infoBasePath, "ácidos grasos monoinsaturados"); // ret.ScrappedTextual.Nutrients.FatPoliSaturated = GetNutrientInfo(ret, infoBasePath, "ácidos grasos poliinsaturados"); // ret.ScrappedTextual.Nutrients.Sugar = GetNutrientInfo(ret, infoBasePath, "azúcares"); // ret.ScrappedTextual.Nutrients.Salt = GetNutrientInfo(ret, infoBasePath, "sal"); // ret.ScrappedTextual.Nutrients.Niacin = GetNutrientInfo(ret, infoBasePath, "niacina"); // ret.ScrappedTextual.Nutrients.Riboflavin = GetNutrientInfo(ret, infoBasePath, "riboflavina"); // ret.ScrappedTextual.Nutrients.FolicAcid = GetNutrientInfo(ret, infoBasePath, "ácido fólico"); // ret.ScrappedTextual.Nutrients.Thiamine = GetNutrientInfo(ret, infoBasePath, "tiamina"); // ret.ScrappedTextual.Nutrients.Iron = GetNutrientInfo(ret, infoBasePath, "hierro"); // ret.ScrappedTextual.Nutrients.VitaminA = GetNutrientInfo(ret, infoBasePath, "vitamina A"); // ret.ScrappedTextual.Nutrients.VitaminB12 = GetNutrientInfo(ret, infoBasePath, "vitamina B12"); // ret.ScrappedTextual.Nutrients.VitaminB6 = GetNutrientInfo(ret, infoBasePath, "vitamina B6"); // ret.ScrappedTextual.Nutrients.VitaminC = GetNutrientInfo(ret, infoBasePath, "vitamina C"); // ret.ScrappedTextual.Nutrients.VitaminD = GetNutrientInfo(ret, infoBasePath, "vitamina D"); // ret.ScrappedTextual.Nutrients.VitaminE = GetNutrientInfo(ret, infoBasePath, "vitamina E"); // ret.ScrappedTextual.Nutrients.VitaminK = GetNutrientInfo(ret, infoBasePath, "vitamina K"); // ret.ScrappedTextual.Nutrients.Zinc = GetNutrientInfo(ret, infoBasePath, "zinc"); // ret.ScrappedTextual.Nutrients.Phosphorus = GetNutrientInfo(ret, infoBasePath, "fósforo"); // ret.ScrappedTextual.Nutrients.Biot = GetNutrientInfo(ret, infoBasePath, "biotina"); // ret.ScrappedTextual.Nutrients.Calcium = GetNutrientInfo(ret, infoBasePath, "calcio"); // ret.ScrappedTextual.Nutrients.Chloride = GetNutrientInfo(ret, infoBasePath, "cloruro"); // ret.ScrappedTextual.Nutrients.Chromium = GetNutrientInfo(ret, infoBasePath, "cromo"); // ret.ScrappedTextual.Nutrients.Copper = GetNutrientInfo(ret, infoBasePath, "cobre"); // ret.ScrappedTextual.Nutrients.Fluoride = GetNutrientInfo(ret, infoBasePath, "fluoruro"); // ret.ScrappedTextual.Nutrients.DietaryFiber = GetNutrientInfo(ret, infoBasePath, "fibra alimentaria"); // ret.ScrappedTextual.Nutrients.Iodo = GetNutrientInfo(ret, infoBasePath, "yodo"); // ret.ScrappedTextual.Nutrients.Potassium = GetNutrientInfo(ret, infoBasePath, "potasio"); // ret.ScrappedTextual.Nutrients.Magnesium = GetNutrientInfo(ret, infoBasePath, "magnesio"); // ret.ScrappedTextual.Nutrients.Manganese = GetNutrientInfo(ret, infoBasePath, "manganeso"); // ret.ScrappedTextual.Nutrients.Molybdenum = GetNutrientInfo(ret, infoBasePath, "molibdeno"); // ret.ScrappedTextual.Nutrients.PantothenicAcid = GetNutrientInfo(ret, infoBasePath, "ácido pantoténico"); // ret.ScrappedTextual.Nutrients.Polyalcohol = GetNutrientInfo(ret, infoBasePath, "polialcoholes"); // ret.ScrappedTextual.Nutrients.Selenium = GetNutrientInfo(ret, infoBasePath, "selenio"); // ret.ScrappedTextual.Nutrients.Starch = GetNutrientInfo(ret, infoBasePath, "almidón"); string energyXpath = string.Format(infoBasePath, "valor energético"); if (WebscraperUtils.IsElementPresent(browser, By.XPath(energyXpath))) { foreach (var elem in browser.FindElements(By.XPath(energyXpath))) { if (WebscraperUtils.GetTextFromElement(elem).ToLower().Contains("kj")) { ret.ScrappedTextual.Nutrients.EnergyKJ = WebscraperUtils.GetTextFromElement(elem); } else if (WebscraperUtils.GetTextFromElement(elem).ToLower().Contains("kcal")) { ret.ScrappedTextual.Nutrients.EnergyKCal = WebscraperUtils.GetTextFromElement(elem); } } } } } if (WebscraperUtils.IsElementPresent(browser, By.CssSelector(".zoomPad img"))) { var uriList = new List <string>().AsEnumerable(); var images = browser.FindElements(By.CssSelector("#productDetailUpdateable .owl-item img")); if (images != null && images.Any()) { uriList = images.Select(x => x.GetAttribute("data-zoomimagesrc")); } else { images = browser.FindElements(By.CssSelector("#zoomImagen")); uriList = images.Select(x => x.GetAttribute("href")); } ret.ScrappedImages = BinaryImageHelper.GetImageFromURI(uriList); } // Manufacturer string manufacturer = "//div[@class='tabs-nutritionalinfo-manufact-informationcontent' and ./h4/text() = 'Manufacturado']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(manufacturer))) { var address = browser.FindElements(By.XPath($"{manufacturer}/following::div[2]")).FirstOrDefault(); ret.ScrappedTextual.Address = WebscraperUtils.GetTextFromElement(address); var country = browser.FindElements(By.XPath($"{manufacturer}/following::div[3]")).FirstOrDefault(); if (WebscraperUtils.GetTextFromElement(country).Contains("Pais de origen:")) { ret.ScrappedTextual.CountryOfOrigin = WebscraperUtils.GetTextFromElement(country).Replace("Pais de origen:", "").Trim(); } } // Net Content string netContent = "//div[@class='tabs-nutritionalinfo-table-div' and ./div[@class='tabs-nutritionalinfo-manufact-value' and contains(text(),'CANTIDAD NETA (en masa)')]]/div[@class='tabs-nutritionalinfo-manufact-quantity']"; string netContentVolume = "//div[@class='tabs-nutritionalinfo-table-div' and ./div[@class='tabs-nutritionalinfo-manufact-value' and contains(text(),'CANTIDAD NETA (en volumen)')]]/div[@class='tabs-nutritionalinfo-manufact-quantity']"; string netContentdisaggregate = "//div[@class='tabs-nutritionalinfo-table-div' and ./div[@class='tabs-nutritionalinfo-manufact-value' and contains(text(),'CANTIDAD NETA (disgregada)')]]/div[@class='tabs-nutritionalinfo-manufact-quantity']"; string netContentDrained = "//div[@class='tabs-nutritionalinfo-table-div' and ./div[@class='tabs-nutritionalinfo-manufact-value' and contains(text(),'Peso neto escurrido')]]/div[@class='tabs-nutritionalinfo-manufact-quantity']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(netContent))) { var netContentElement = browser.FindElements(By.XPath(netContent)).FirstOrDefault(); ret.ScrappedTextual.NetContent = WebscraperUtils.GetTextFromElement(netContentElement); } if (WebscraperUtils.IsElementPresent(browser, By.XPath(netContentVolume))) { var netContentVolumeElement = browser.FindElements(By.XPath(netContentVolume)).FirstOrDefault(); ret.ScrappedTextual.NetContentVolume = WebscraperUtils.GetTextFromElement(netContentVolumeElement); } if (WebscraperUtils.IsElementPresent(browser, By.XPath(netContentdisaggregate))) { var netContentdisaggregateElement = browser.FindElements(By.XPath(netContentdisaggregate)).FirstOrDefault(); ret.ScrappedTextual.DisaggregateNetContent = WebscraperUtils.GetTextFromElement(netContentdisaggregateElement); } if (WebscraperUtils.IsElementPresent(browser, By.XPath(netContentDrained))) { var netContentDrainedElement = browser.FindElements(By.XPath(netContentDrained)).FirstOrDefault(); ret.ScrappedTextual.NetContentDrained = WebscraperUtils.GetTextFromElement(netContentDrainedElement); } // RegulatedProductName string regulatedProductName = "//div[@id='nutritionalinformation']/h4[ text() = 'Información Adicional']/following::div[@class='form_field-label']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(regulatedProductName))) { var regulatedProductNameElement = browser.FindElements(By.XPath(regulatedProductName)).FirstOrDefault(); ret.ScrappedTextual.RegulatedProductName = WebscraperUtils.GetTextFromElement(regulatedProductNameElement); } // ConsumerInstrucctions string consumerUsageStorageInstructions = "//div[@id='nutritionalinformation']/h4[ text() = 'Condiciones de conservación']/following::div[@class='form_field-label']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(consumerUsageStorageInstructions))) { var consumerUsageStorageInstructionsElement = browser.FindElements(By.XPath(consumerUsageStorageInstructions)).FirstOrDefault(); ret.ScrappedTextual.ConsumerUsageStorageInstructions = WebscraperUtils.GetTextFromElement(consumerUsageStorageInstructionsElement); } // PreparationInstructions string preparationInstructions = "//div[@id='nutritionalinformation']/h4[ text() = 'Modo de Empleo']/following::div[@class='form_field-label']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(preparationInstructions))) { var preparationInstructionsElement = browser.FindElements(By.XPath(preparationInstructions)).FirstOrDefault(); ret.ScrappedTextual.PreparationInstructions = WebscraperUtils.GetTextFromElement(preparationInstructionsElement); } // Rations string rations = "//div[@id='nutritionalinformation']/h4[ text() = 'Número de raciones por envase']/following::div[@class='form_field-label']";; if (WebscraperUtils.IsElementPresent(browser, By.XPath(rations))) { var rationsElement = browser.FindElements(By.XPath(rations)).FirstOrDefault(); ret.ScrappedTextual.Rations = WebscraperUtils.GetTextFromElement(rationsElement); } #endregion ret.IsSuccess = true; ret.ErrorMessage = null; } catch (Exception ex) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine($"[EXCEPTION] {ex.Message}"); Console.WriteLine($"[STACKTRACE] {ex.StackTrace}"); Console.ResetColor(); ret.IsSuccess = false; ret.ErrorMessage = ex.Message; } return(ret); }
public WebScrappedData Webscrape(string hyperlink) { WebScrappedData scrappedData = new WebScrappedData(); browser.Url = hyperlink; Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine("Navigate()"); INavigation navigation = browser.Navigate(); Console.ResetColor(); System.Threading.Tasks.Task.Delay(2000).GetAwaiter().GetResult(); // Level string level = "//span[@id='level-status-text']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(level))) { var levelElement = browser.FindElements(By.XPath(level)).FirstOrDefault(); var levelData = WebscraperUtils.GetTextFromElement(levelElement); Console.WriteLine($"LEVEL -> {levelData}"); scrappedData.Level = levelData; } // Level status points string levelStatusPoints = "//span[@id='level-status-points']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(levelStatusPoints))) { var levelStatusPointsElement = browser.FindElements(By.XPath(levelStatusPoints)).FirstOrDefault(); var levelStatusPointsData = WebscraperUtils.GetTextFromElement(levelStatusPointsElement); Console.WriteLine($"LEVEL STATUS POINTS -> {levelStatusPointsData}"); scrappedData.LevelStatusPoints = levelStatusPointsData; } // Points string points = "//span[@id='level-status-points']/span[@class='has-text-weight-semibold']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(points))) { var pointsElement = browser.FindElements(By.XPath(points)).FirstOrDefault(); var pointsData = WebscraperUtils.GetTextFromElement(pointsElement); Console.WriteLine($"POINTS -> {pointsData}"); scrappedData.Points = pointsData; } // Name string name = "//h1[@class='title has-margin-top-small has-margin-bottom-extra-small']"; if (WebscraperUtils.IsElementPresent(browser, By.XPath(name))) { var nameElement = browser.FindElements(By.XPath(name)).FirstOrDefault(); var nameData = WebscraperUtils.GetTextFromElement(nameElement); scrappedData.Username = nameData; } scrappedData.IsSuccess = true; return(scrappedData); }