public UpcDbModel ScrapeWineDetail(string page, UpcDbModel productInfo) { //Category: // White Wine //Varietal: //Cortese //Region: //Italy » Piedmont » Gavi //Producer: //La Scolca var getHtmlWeb = new HtmlWeb(); var document = getHtmlWeb.Load(page); var upcNodes = document.DocumentNode.SelectNodes("//div[@class='characteristicsArea']//a"); productInfo.Category = upcNodes[0].InnerText; productInfo.Varietal = upcNodes[1].InnerText; productInfo.Region = $"{upcNodes[2].InnerText} / {upcNodes[3].InnerText} / {upcNodes[4].InnerText}"; productInfo.Winery = upcNodes[5].InnerText; //item title - itemTitle var upcTitleNodes = document.DocumentNode.SelectNodes("//span[@class='title']"); productInfo.WineName = upcTitleNodes[0].InnerText.Replace(productInfo.Winery, string.Empty).Trim(); productInfo.Year = Convert.ToInt32(productInfo.WineName.Substring(productInfo.WineName.Length - 4)); productInfo.WineName = productInfo.WineName.Replace(productInfo.Year.ToString(), string.Empty); return(productInfo); }
public bool Run() { var digitEyesProductInfo = GetUpcData("0089744756510"); digitEyesProductInfo.product_web_page = "http://www.vinerepublic.com/r/products/la-scolca-gavi-di-gavi-black-label-2011"; var productInfo = new UpcDbModel(); productInfo.UpcCode = digitEyesProductInfo.upc_code; productInfo.Size = digitEyesProductInfo.uom != null?Convert.ToInt32(digitEyesProductInfo.uom?.Replace("ML", string.Empty)) : 750; productInfo.WineName = digitEyesProductInfo.description; var x = ScrapeWineDetail(digitEyesProductInfo.product_web_page, productInfo); return(true); }
private async Task <UpcDbModel> GetUpcDataAsync(string page) { var getHtmlWeb = new HtmlWeb(); var document = getHtmlWeb.Load(page); try { //prem-prod-info var upcNodesName = document.DocumentNode.SelectNodes("//div[@class='product-name']//h1"); var upcNodesRatings = document.DocumentNode.SelectNodes("//div[@class='ratings']"); var upcNodesRegion = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-region']//dl/dd"); var upcNodesContents = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-contents']"); var upcNodesDetails = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/dd"); var upcNodesUpc = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/meta"); var upcNodesUpcImage = document.DocumentNode.SelectNodes("//p[@class='product-image']//img"); var wine = new UpcDbModel { WineName = upcNodesName?[0].InnerText.Replace("\n", string.Empty), Category = upcNodesDetails?[0].InnerText.Replace("\n", string.Empty), Winery = upcNodesDetails?[1].InnerText.Replace("\n", string.Empty), Varietal = upcNodesContents?[0].InnerText.Replace("\n", string.Empty), Region = upcNodesRegion?[0].InnerText.Replace("\n", string.Empty), UpcCode = upcNodesUpc?[0].Attributes["content"].Value.Replace("\n", string.Empty), Rating = upcNodesRatings?[0].InnerText.Replace("\n", string.Empty), ImagePath = upcNodesUpcImage?[0].Attributes["src"].Value.Replace("\n", string.Empty), }; var wineSize = 0; int.TryParse(upcNodesDetails?[2].InnerText.Replace(" ", string.Empty).Replace("ml.", string.Empty), out wineSize); wine.Size = wineSize; var wineYear = 0; int.TryParse(upcNodesDetails?[3].InnerText.Replace(" ", string.Empty).Replace("ml", string.Empty), out wineYear); wine.Year = wineYear; return(wine); } catch (Exception exception) { using (var processLog = File.AppendText(_fileNameError)) { await processLog.WriteLineAsync($"{page} :: {exception.Message}"); } } return(null); }
private UpcDbModel GetUpcData(string page) { int retries = 0; int eventualFailures = 0; HtmlDocument document = null; var policy = Policy .Handle <Exception>() .WaitAndRetry( retryCount: 3, // Retry 3 times sleepDurationProvider: attempt => TimeSpan.FromSeconds(5), // Wait 5s between each try. onRetry: (exception, calculatedWaitDuration) => // Capture some info for logging! { // This is your new exception handler! // Tell the user what they've won! var processErrorFile = _runPath + @"\policyError.txt"; var fi = new FileInfo(processErrorFile); processErrorFile = $"{fi.FullName.Replace(fi.Extension, string.Empty)}.{DateTime.Now.ToString("yyyy-MM-dd")}{fi.Extension}"; lock (_sync) { using (var processLog = File.AppendText(processErrorFile)) { document = null; // processLog.WriteLine($"Retries {retries}"); processLog.WriteLine($"Retries {retries} :: Policy logging: {page} :: {exception.Message}"); } retries++; } }); try { policy.Execute(() => { var getHtmlWeb = new HtmlWeb(); document = getHtmlWeb.Load(page); }); if (document == null) { return(null); } var upcNodes = document.DocumentNode.SelectNodes("//tr").Descendants("td").Where(o => o.GetAttributeValue("width", "") == "80%").ToList(); if (!upcNodes.Any()) { return(null); } var wine = new UpcDbModel { WineName = upcNodes[0].InnerText.Replace(" ", string.Empty), Winery = upcNodes[1].InnerText.Replace(" ", string.Empty), Varietal = upcNodes[2].InnerText.Replace(" ", string.Empty), Region = upcNodes[3].InnerText.Replace(" ", string.Empty), UpcCode = upcNodes[12].InnerText.Replace(" ", string.Empty), Rating = upcNodes[6].InnerText.Replace(" ", string.Empty) }; decimal wineSize = 0; decimal.TryParse(upcNodes[9].InnerText.Replace(" ", string.Empty).Replace("ml", string.Empty), out wineSize); wine.Size = wineSize; var wineYear = 0; int.TryParse(upcNodes[4].InnerText.Replace(" ", string.Empty).Replace("ml", string.Empty), out wineYear); wine.Year = wineYear; Console.WriteLine($"saving wine name : {wine.WineName}"); return(wine); } catch (Exception exception) { lock (_sync) { using (var processLog = File.AppendText(_fileNameError)) { eventualFailures += 1; processLog.WriteLine($"Time:: {DateTime.Now} :: {page} :: {exception.Message} :: failures :: {eventualFailures}"); processLog.WriteLine($"stack trace :: {exception}"); } } } return(null); }
private UpcDbModel GetUpcData(string page) { int retries = 0; int eventualFailures = 0; HtmlDocument document = null; var policy = Policy .Handle <Exception>() .WaitAndRetry( retryCount: 3, // Retry 3 times sleepDurationProvider: attempt => TimeSpan.FromSeconds(5), // Wait 5s between each try. onRetry: (exception, calculatedWaitDuration) => // Capture some info for logging! { // This is your new exception handler! // Tell the user what they've won! var processErrorFile = _runPath + @"\policyError.txt"; var fi = new FileInfo(processErrorFile); processErrorFile = $"{fi.FullName.Replace(fi.Extension, string.Empty)}.{DateTime.Now.ToString("yyyy-MM-dd")}{fi.Extension}"; lock (_sync) { using (var processLog = File.AppendText(processErrorFile)) { document = null; // processLog.WriteLine($"Retries {retries}"); processLog.WriteLine($"Retries {retries} :: Policy logging: {page} :: {exception.Message}"); } retries++; } }); try { policy.Execute(() => { var getHtmlWeb = new HtmlWeb(); document = getHtmlWeb.Load(page); }); if (document == null) { return(null); } //prem-prod-info var upcNodesName = document.DocumentNode.SelectNodes("//div[@class='product-name']//h1"); var upcNodesRatings = document.DocumentNode.SelectNodes("//div[@class='ratings']"); var upcNodesRegion = document.DocumentNode.SelectNodes( "//div[@id='prem-prod-info']//div[@id='prem-prod-region']//dl/dd"); var upcNodesContents = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-contents']"); var upcNodesDetails = document.DocumentNode.SelectNodes( "//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/dd"); var upcNodesUpc = document.DocumentNode.SelectNodes( "//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/meta"); var upcNodesUpcImage = document.DocumentNode.SelectNodes("//p[@class='product-image']//img"); var wine = new UpcDbModel { WineName = upcNodesName?[0].InnerText.Replace("\n", string.Empty), Category = upcNodesDetails?[0].InnerText.Replace("\n", string.Empty), Winery = upcNodesDetails?[1].InnerText.Replace("\n", string.Empty), Varietal = upcNodesContents?[0].InnerText.Replace("\n", string.Empty), // Region = upcNodesRegion?[0].InnerText.Replace("\n", string.Empty) + ", " + upcNodesRegion?[1].InnerText.Replace("\n", string.Empty), UpcCode = upcNodesUpc?[0].Attributes["content"].Value.Replace("\n", string.Empty), Rating = upcNodesRatings?[0].InnerText.Replace("\n", string.Empty), ImagePath = upcNodesUpcImage?[0].Attributes["src"].Value.Replace("\n", string.Empty), }; decimal wineSize; decimal.TryParse( upcNodesDetails?[2].InnerText.Replace(" ", string.Empty).Replace("ml.", string.Empty), out wineSize); wine.Size = wineSize; int wineYear; int.TryParse( upcNodesDetails?[3].InnerText.Replace(" ", string.Empty).Replace("ml", string.Empty), out wineYear); wine.Year = wineYear; if (upcNodesRegion == null) { return(wine); } if (!upcNodesRegion.Any()) { return(wine); } foreach (var region in upcNodesRegion) { wine.Region += region.InnerText.Replace("\n", string.Empty) + ", "; } var r = wine.Region.TrimEnd(' ').TrimEnd(','); wine.Region = r; //Region return(wine); } catch (Exception exception) { lock (_sync) { using (var processLog = File.AppendText(_fileNameError)) { eventualFailures += 1; processLog.WriteLine($"Time:: {DateTime.Now} :: {page} :: {exception.Message} :: failures :: {eventualFailures}"); processLog.WriteLine($"stack trace :: {exception}"); } } } return(null); }
private UpcDbModel GetUpcData(string page) { int retries = 0; int eventualFailures = 0; HtmlDocument document = null; var policy = Policy .Handle <Exception>() .WaitAndRetry( retryCount: 3, // Retry 3 times sleepDurationProvider: attempt => TimeSpan.FromSeconds(5), // Wait 5s between each try. onRetry: (exception, calculatedWaitDuration) => // Capture some info for logging! { // This is your new exception handler! // Tell the user what they've won! var processErrorFile = _runPath + @"\policyError.txt"; var fi = new FileInfo(processErrorFile); processErrorFile = $"{fi.FullName.Replace(fi.Extension, string.Empty)}.{DateTime.Now.ToString("yyyy-MM-dd")}{fi.Extension}"; lock (_sync) { using (var processLog = File.AppendText(processErrorFile)) { document = null; // processLog.WriteLine($"Retries {retries}"); processLog.WriteLine($"Retries {retries} :: Policy logging: {page} :: {exception.Message}"); } retries++; } }); try { policy.Execute(() => { var getHtmlWeb = new HtmlWeb(); document = getHtmlWeb.Load(page); }); if (document == null) { return(null); } var brandNode = document.DocumentNode.SelectSingleNode("//div[@class='brand']//span"); var varietalNode = document.DocumentNode.SelectSingleNode("//div/span[@class='name']"); var imageNode = document.DocumentNode.SelectSingleNode("//div[@class='image']/a/img"); var sizeNode = document.DocumentNode.SelectSingleNode("//section/p/span[@class='unit']"); var upcNode = document.DocumentNode.SelectSingleNode("//section/p/span[@class='sku']"); var wine = new UpcDbModel { // WineName = upcNodes[0].InnerText.Replace(" ", string.Empty), Winery = brandNode?.InnerText.Replace(" ", string.Empty), Varietal = varietalNode?.InnerText.Replace(" ", string.Empty), // Region = upcNodes[3].InnerText.Replace(" ", string.Empty), UpcCode = upcNode?.InnerText.Replace("SKU / UPC: ", string.Empty), // Rating = upcNodes[6].InnerText.Replace(" ", string.Empty) ImagePath = imageNode?.Attributes["src"].Value.Replace("\n", string.Empty), }; decimal wineSize = 0; decimal.TryParse(sizeNode?.InnerText.Replace(" fl oz", string.Empty).Replace("ml", string.Empty), out wineSize); wine.Size = wineSize; var wineYear = 0; // int.TryParse(upcNodes[4].InnerText.Replace(" ", string.Empty).Replace("ml", string.Empty), out wineYear); wine.Year = wineYear; Console.WriteLine($"saving wine name : {wine.WineName}"); return(wine); } catch (Exception exception) { lock (_sync) { using (var processLog = File.AppendText(_fileNameError)) { eventualFailures += 1; processLog.WriteLine($"Time:: {DateTime.Now} :: {page} :: {exception.Message} :: failures :: {eventualFailures}"); processLog.WriteLine($"stack trace :: {exception}"); } } } return(null); }
private UpcDbModel GetUpcData(string page) { int retries = 0; int eventualFailures = 0; HtmlDocument document = null; var policy = Policy .Handle <Exception>() .WaitAndRetry( retryCount: 3, // Retry 3 times sleepDurationProvider: attempt => TimeSpan.FromSeconds(5), // Wait 5s between each try. onRetry: (exception, calculatedWaitDuration) => // Capture some info for logging! { // This is your new exception handler! // Tell the user what they've won! var processErrorFile = _runPath + @"\policyError.txt"; var fi = new FileInfo(processErrorFile); processErrorFile = $"{fi.FullName.Replace(fi.Extension, string.Empty)}.{DateTime.Now.ToString("yyyy-MM-dd")}{fi.Extension}"; lock (_sync) { using (var processLog = File.AppendText(processErrorFile)) { document = null; // processLog.WriteLine($"Retries {retries}"); processLog.WriteLine($"Retries {retries} :: Policy logging: {page} :: {exception.Message}"); } retries++; } }); try { policy.Execute(() => { lock (_sync) { _getHtmlWeb = new HtmlWeb(); document = _getHtmlWeb.Load(page); } }); if (document == null) { return(null); } var upcNodes = document.DocumentNode.SelectNodes("//div[@class='characteristicsArea']//a"); if (document.DocumentNode.FirstChild == null) { throw new ArgumentNullException(page); } var category = upcNodes[0].InnerText; var varietal = upcNodes[1].InnerText; var country = upcNodes.Where(n => n.Attributes["href"].Value.Contains("Country")).Select(n => n.InnerText.Trim()); var regions = upcNodes.Where(n => n.Attributes["href"].Value.Contains("Region")).Select(n => n.InnerText.Trim()); var ctry = ""; var reg = ""; var enumerable = country as string[] ?? country.ToArray(); if (enumerable.ToList().Any()) { ctry = enumerable.ToList().First(); } var enumerable1 = regions as string[] ?? regions.ToArray(); if (enumerable1.ToList().Any()) { reg = enumerable1.ToList().First(); } var region = $"{ctry}, {reg}".TrimEnd(Convert.ToChar(",")); var brandId = upcNodes.Where(n => n.Attributes["href"].Value.Contains("?brandid")).Select(n => n.InnerText.Trim()); var id = brandId as string[] ?? brandId.ToArray(); string winery = ""; if (id.ToList().Any()) { winery = id.ToList()?.First(); } var alcoholNode = document.DocumentNode.SelectNodes("//div[@class='characteristicsArea']//p"); //item title - itemTitle var upcTitleNodes = document.DocumentNode.SelectNodes("//span[@class='title']"); var wineName = upcTitleNodes[0].InnerText.Replace(winery, string.Empty).Trim(); var yearValue = wineName.Substring(wineName.Length - 4); int year; int.TryParse(yearValue, out year); wineName = wineName.Replace(year.ToString(), string.Empty); var ratingLf = document.DocumentNode.SelectNodes("//td[@class='reviewIconLeft']")?[0].InnerText; var ratingRt = document.DocumentNode.SelectNodes("//td[@class='reviewIconRight']")?[0].InnerText; //upc var upcNodesUpc = document.DocumentNode.SelectNodes( "//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/meta"); //image var upcNodesUpcImage = document.DocumentNode.SelectNodes("//td[@class='imageArea']//img"); var wine = new UpcDbModel { WineName = wineName.Replace("\n", string.Empty), Category = category.Replace("\n", string.Empty), Winery = winery.Replace("\n", string.Empty), Varietal = varietal.Replace("\n", string.Empty), Region = region, UpcCode = upcNodesUpc?[0].Attributes["content"].Value.Replace("\n", string.Empty), Rating = ratingLf + ratingRt, ImagePath = "http://" + upcNodesUpcImage?[0].Attributes["src"].Value.Replace("\n", string.Empty), }; decimal wineSize; decimal.TryParse("750".Replace(" ", string.Empty).Replace("ml.", string.Empty), out wineSize); wine.Size = wineSize; int wineYear; int.TryParse(year.ToString().Replace(" ", string.Empty).Replace("ml", string.Empty), out wineYear); wine.Year = wineYear; var r = wine.Region.TrimEnd(' ').TrimEnd(','); wine.Region = r; foreach (var node in alcoholNode) { if (node.InnerText.Contains("%")) { wine.AlchoholLevel = Convert.ToDecimal(node.InnerText.Replace("%", string.Empty)); } } //Region return(wine); } catch (Exception exception) { lock (_sync) { using (var processLog = File.AppendText(_fileNameError)) { eventualFailures += 1; processLog.WriteLine($"Time:: {DateTime.Now} :: {page} :: {exception.Message} :: failures :: {eventualFailures}"); processLog.WriteLine($"stack trace :: {exception}"); //TimeSpan time1 = TimeSpan.FromHours(1); //TimeSpan ts = DateTime.Now.TimeOfDay; //var ts2 = ts.Add(time1); //System.Threading.Tasks.Task.Delay(ts2); } } } return(null); }