Exemplo n.º 1
0
        public UpcDbModel ScrapeWineDetail(string page, UpcDbModel productInfo)
        {
            //Category:
            //	White Wine

            //Varietal:
            //Cortese

            //Region:
            //Italy » Piedmont » Gavi

            //Producer:
            //La Scolca
            var getHtmlWeb = new HtmlWeb();
            var document   = getHtmlWeb.Load(page);

            var upcNodes = document.DocumentNode.SelectNodes("//div[@class='characteristicsArea']//a");

            productInfo.Category = upcNodes[0].InnerText;
            productInfo.Varietal = upcNodes[1].InnerText;
            productInfo.Region   = $"{upcNodes[2].InnerText} / {upcNodes[3].InnerText} / {upcNodes[4].InnerText}";
            productInfo.Winery   = upcNodes[5].InnerText;

            //item title - itemTitle
            var upcTitleNodes = document.DocumentNode.SelectNodes("//span[@class='title']");

            productInfo.WineName = upcTitleNodes[0].InnerText.Replace(productInfo.Winery, string.Empty).Trim();
            productInfo.Year     = Convert.ToInt32(productInfo.WineName.Substring(productInfo.WineName.Length - 4));
            productInfo.WineName = productInfo.WineName.Replace(productInfo.Year.ToString(), string.Empty);

            return(productInfo);
        }
Exemplo n.º 2
0
        public bool Run()
        {
            var digitEyesProductInfo = GetUpcData("0089744756510");

            digitEyesProductInfo.product_web_page = "http://www.vinerepublic.com/r/products/la-scolca-gavi-di-gavi-black-label-2011";
            var productInfo = new UpcDbModel();

            productInfo.UpcCode = digitEyesProductInfo.upc_code;
            productInfo.Size    = digitEyesProductInfo.uom != null?Convert.ToInt32(digitEyesProductInfo.uom?.Replace("ML", string.Empty)) : 750;

            productInfo.WineName = digitEyesProductInfo.description;
            var x = ScrapeWineDetail(digitEyesProductInfo.product_web_page, productInfo);

            return(true);
        }
        private async Task <UpcDbModel> GetUpcDataAsync(string page)
        {
            var getHtmlWeb = new HtmlWeb();
            var document   = getHtmlWeb.Load(page);

            try
            {
                //prem-prod-info
                var upcNodesName     = document.DocumentNode.SelectNodes("//div[@class='product-name']//h1");
                var upcNodesRatings  = document.DocumentNode.SelectNodes("//div[@class='ratings']");
                var upcNodesRegion   = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-region']//dl/dd");
                var upcNodesContents = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-contents']");
                var upcNodesDetails  = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/dd");
                var upcNodesUpc      = document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/meta");
                var upcNodesUpcImage = document.DocumentNode.SelectNodes("//p[@class='product-image']//img");

                var wine = new UpcDbModel
                {
                    WineName  = upcNodesName?[0].InnerText.Replace("\n", string.Empty),
                    Category  = upcNodesDetails?[0].InnerText.Replace("\n", string.Empty),
                    Winery    = upcNodesDetails?[1].InnerText.Replace("\n", string.Empty),
                    Varietal  = upcNodesContents?[0].InnerText.Replace("\n", string.Empty),
                    Region    = upcNodesRegion?[0].InnerText.Replace("\n", string.Empty),
                    UpcCode   = upcNodesUpc?[0].Attributes["content"].Value.Replace("\n", string.Empty),
                    Rating    = upcNodesRatings?[0].InnerText.Replace("\n", string.Empty),
                    ImagePath = upcNodesUpcImage?[0].Attributes["src"].Value.Replace("\n", string.Empty),
                };


                var wineSize = 0;
                int.TryParse(upcNodesDetails?[2].InnerText.Replace("&nbsp;", string.Empty).Replace("ml.", string.Empty), out wineSize);
                wine.Size = wineSize;

                var wineYear = 0;
                int.TryParse(upcNodesDetails?[3].InnerText.Replace("&nbsp;", string.Empty).Replace("ml", string.Empty), out wineYear);
                wine.Year = wineYear;

                return(wine);
            }
            catch (Exception exception)
            {
                using (var processLog = File.AppendText(_fileNameError))
                {
                    await processLog.WriteLineAsync($"{page} :: {exception.Message}");
                }
            }
            return(null);
        }
        private UpcDbModel GetUpcData(string page)
        {
            int retries          = 0;
            int eventualFailures = 0;

            HtmlDocument document = null;

            var policy = Policy
                         .Handle <Exception>()
                         .WaitAndRetry(
                retryCount: 3,                                             // Retry 3 times
                sleepDurationProvider: attempt => TimeSpan.FromSeconds(5), // Wait 5s between each try.
                onRetry: (exception, calculatedWaitDuration) =>            // Capture some info for logging!
            {
                // This is your new exception handler!
                // Tell the user what they've won!

                var processErrorFile = _runPath + @"\policyError.txt";
                var fi           = new FileInfo(processErrorFile);
                processErrorFile =
                    $"{fi.FullName.Replace(fi.Extension, string.Empty)}.{DateTime.Now.ToString("yyyy-MM-dd")}{fi.Extension}";

                lock (_sync)
                {
                    using (var processLog = File.AppendText(processErrorFile))
                    {
                        document = null;
                        // processLog.WriteLine($"Retries {retries}");
                        processLog.WriteLine($"Retries {retries} :: Policy logging: {page} :: {exception.Message}");
                    }

                    retries++;
                }
            });

            try
            {
                policy.Execute(() =>
                {
                    var getHtmlWeb = new HtmlWeb();
                    document       = getHtmlWeb.Load(page);
                });


                if (document == null)
                {
                    return(null);
                }

                var upcNodes = document.DocumentNode.SelectNodes("//tr").Descendants("td").Where(o => o.GetAttributeValue("width", "") == "80%").ToList();
                if (!upcNodes.Any())
                {
                    return(null);
                }
                var wine = new UpcDbModel
                {
                    WineName = upcNodes[0].InnerText.Replace("&nbsp;", string.Empty),
                    Winery   = upcNodes[1].InnerText.Replace("&nbsp;", string.Empty),
                    Varietal = upcNodes[2].InnerText.Replace("&nbsp;", string.Empty),
                    Region   = upcNodes[3].InnerText.Replace("&nbsp;", string.Empty),
                    UpcCode  = upcNodes[12].InnerText.Replace("&nbsp;", string.Empty),
                    Rating   = upcNodes[6].InnerText.Replace("&nbsp;", string.Empty)
                };

                decimal wineSize = 0;
                decimal.TryParse(upcNodes[9].InnerText.Replace("&nbsp;", string.Empty).Replace("ml", string.Empty), out wineSize);
                wine.Size = wineSize;

                var wineYear = 0;
                int.TryParse(upcNodes[4].InnerText.Replace("&nbsp;", string.Empty).Replace("ml", string.Empty), out wineYear);
                wine.Year = wineYear;
                Console.WriteLine($"saving wine name : {wine.WineName}");
                return(wine);
            }
            catch (Exception exception)
            {
                lock (_sync)
                {
                    using (var processLog = File.AppendText(_fileNameError))
                    {
                        eventualFailures += 1;

                        processLog.WriteLine($"Time:: {DateTime.Now} :: {page} :: {exception.Message} :: failures :: {eventualFailures}");
                        processLog.WriteLine($"stack trace :: {exception}");
                    }
                }
            }

            return(null);
        }
        private UpcDbModel GetUpcData(string page)
        {
            int retries          = 0;
            int eventualFailures = 0;

            HtmlDocument document = null;

            var policy = Policy
                         .Handle <Exception>()
                         .WaitAndRetry(
                retryCount: 3,                                             // Retry 3 times
                sleepDurationProvider: attempt => TimeSpan.FromSeconds(5), // Wait 5s between each try.
                onRetry: (exception, calculatedWaitDuration) =>            // Capture some info for logging!
            {
                // This is your new exception handler!
                // Tell the user what they've won!

                var processErrorFile = _runPath + @"\policyError.txt";
                var fi           = new FileInfo(processErrorFile);
                processErrorFile =
                    $"{fi.FullName.Replace(fi.Extension, string.Empty)}.{DateTime.Now.ToString("yyyy-MM-dd")}{fi.Extension}";

                lock (_sync)
                {
                    using (var processLog = File.AppendText(processErrorFile))
                    {
                        document = null;
                        // processLog.WriteLine($"Retries {retries}");
                        processLog.WriteLine($"Retries {retries} :: Policy logging: {page} :: {exception.Message}");
                    }

                    retries++;
                }
            });

            try
            {
                policy.Execute(() =>
                {
                    var getHtmlWeb = new HtmlWeb();
                    document       = getHtmlWeb.Load(page);
                });


                if (document == null)
                {
                    return(null);
                }

                //prem-prod-info
                var upcNodesName    = document.DocumentNode.SelectNodes("//div[@class='product-name']//h1");
                var upcNodesRatings = document.DocumentNode.SelectNodes("//div[@class='ratings']");
                var upcNodesRegion  =
                    document.DocumentNode.SelectNodes(
                        "//div[@id='prem-prod-info']//div[@id='prem-prod-region']//dl/dd");
                var upcNodesContents =
                    document.DocumentNode.SelectNodes("//div[@id='prem-prod-info']//div[@id='prem-prod-contents']");
                var upcNodesDetails =
                    document.DocumentNode.SelectNodes(
                        "//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/dd");
                var upcNodesUpc =
                    document.DocumentNode.SelectNodes(
                        "//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/meta");
                var upcNodesUpcImage = document.DocumentNode.SelectNodes("//p[@class='product-image']//img");

                var wine = new UpcDbModel
                {
                    WineName = upcNodesName?[0].InnerText.Replace("\n", string.Empty),
                    Category = upcNodesDetails?[0].InnerText.Replace("\n", string.Empty),
                    Winery   = upcNodesDetails?[1].InnerText.Replace("\n", string.Empty),
                    Varietal = upcNodesContents?[0].InnerText.Replace("\n", string.Empty),
                    //    Region = upcNodesRegion?[0].InnerText.Replace("\n", string.Empty) + ", " + upcNodesRegion?[1].InnerText.Replace("\n", string.Empty),
                    UpcCode   = upcNodesUpc?[0].Attributes["content"].Value.Replace("\n", string.Empty),
                    Rating    = upcNodesRatings?[0].InnerText.Replace("\n", string.Empty),
                    ImagePath = upcNodesUpcImage?[0].Attributes["src"].Value.Replace("\n", string.Empty),
                };


                decimal wineSize;
                decimal.TryParse(
                    upcNodesDetails?[2].InnerText.Replace("&nbsp;", string.Empty).Replace("ml.", string.Empty),
                    out wineSize);
                wine.Size = wineSize;

                int wineYear;
                int.TryParse(
                    upcNodesDetails?[3].InnerText.Replace("&nbsp;", string.Empty).Replace("ml", string.Empty),
                    out wineYear);
                wine.Year = wineYear;

                if (upcNodesRegion == null)
                {
                    return(wine);
                }
                if (!upcNodesRegion.Any())
                {
                    return(wine);
                }
                foreach (var region in upcNodesRegion)
                {
                    wine.Region += region.InnerText.Replace("\n", string.Empty) + ", ";
                }
                var r = wine.Region.TrimEnd(' ').TrimEnd(',');
                wine.Region = r;

                //Region
                return(wine);
            }
            catch (Exception exception)
            {
                lock (_sync)
                {
                    using (var processLog = File.AppendText(_fileNameError))
                    {
                        eventualFailures += 1;

                        processLog.WriteLine($"Time:: {DateTime.Now} :: {page} :: {exception.Message} :: failures :: {eventualFailures}");
                        processLog.WriteLine($"stack trace :: {exception}");
                    }
                }
            }

            return(null);
        }
Exemplo n.º 6
0
        private UpcDbModel GetUpcData(string page)
        {
            int retries          = 0;
            int eventualFailures = 0;

            HtmlDocument document = null;

            var policy = Policy
                         .Handle <Exception>()
                         .WaitAndRetry(
                retryCount: 3,                                             // Retry 3 times
                sleepDurationProvider: attempt => TimeSpan.FromSeconds(5), // Wait 5s between each try.
                onRetry: (exception, calculatedWaitDuration) =>            // Capture some info for logging!
            {
                // This is your new exception handler!
                // Tell the user what they've won!

                var processErrorFile = _runPath + @"\policyError.txt";
                var fi           = new FileInfo(processErrorFile);
                processErrorFile =
                    $"{fi.FullName.Replace(fi.Extension, string.Empty)}.{DateTime.Now.ToString("yyyy-MM-dd")}{fi.Extension}";

                lock (_sync)
                {
                    using (var processLog = File.AppendText(processErrorFile))
                    {
                        document = null;
                        // processLog.WriteLine($"Retries {retries}");
                        processLog.WriteLine($"Retries {retries} :: Policy logging: {page} :: {exception.Message}");
                    }

                    retries++;
                }
            });

            try
            {
                policy.Execute(() =>
                {
                    var getHtmlWeb = new HtmlWeb();
                    document       = getHtmlWeb.Load(page);
                });


                if (document == null)
                {
                    return(null);
                }

                var brandNode = document.DocumentNode.SelectSingleNode("//div[@class='brand']//span");

                var varietalNode = document.DocumentNode.SelectSingleNode("//div/span[@class='name']");
                var imageNode    = document.DocumentNode.SelectSingleNode("//div[@class='image']/a/img");
                var sizeNode     = document.DocumentNode.SelectSingleNode("//section/p/span[@class='unit']");
                var upcNode      = document.DocumentNode.SelectSingleNode("//section/p/span[@class='sku']");

                var wine = new UpcDbModel
                {
                    //      WineName = upcNodes[0].InnerText.Replace("&nbsp;", string.Empty),
                    Winery   = brandNode?.InnerText.Replace("&nbsp;", string.Empty),
                    Varietal = varietalNode?.InnerText.Replace("&nbsp;", string.Empty),
                    //  Region = upcNodes[3].InnerText.Replace("&nbsp;", string.Empty),
                    UpcCode = upcNode?.InnerText.Replace("SKU / UPC: ", string.Empty),
                    //  Rating = upcNodes[6].InnerText.Replace("&nbsp;", string.Empty)
                    ImagePath = imageNode?.Attributes["src"].Value.Replace("\n", string.Empty),
                };

                decimal wineSize = 0;
                decimal.TryParse(sizeNode?.InnerText.Replace(" fl oz", string.Empty).Replace("ml", string.Empty), out wineSize);
                wine.Size = wineSize;

                var wineYear = 0;
                //  int.TryParse(upcNodes[4].InnerText.Replace("&nbsp;", string.Empty).Replace("ml", string.Empty), out wineYear);
                wine.Year = wineYear;
                Console.WriteLine($"saving wine name : {wine.WineName}");
                return(wine);
            }
            catch (Exception exception)
            {
                lock (_sync)
                {
                    using (var processLog = File.AppendText(_fileNameError))
                    {
                        eventualFailures += 1;

                        processLog.WriteLine($"Time:: {DateTime.Now} :: {page} :: {exception.Message} :: failures :: {eventualFailures}");
                        processLog.WriteLine($"stack trace :: {exception}");
                    }
                }
            }

            return(null);
        }
Exemplo n.º 7
0
        private UpcDbModel GetUpcData(string page)
        {
            int retries          = 0;
            int eventualFailures = 0;

            HtmlDocument document = null;

            var policy = Policy
                         .Handle <Exception>()
                         .WaitAndRetry(
                retryCount: 3,                                             // Retry 3 times
                sleepDurationProvider: attempt => TimeSpan.FromSeconds(5), // Wait 5s between each try.
                onRetry: (exception, calculatedWaitDuration) =>            // Capture some info for logging!
            {
                // This is your new exception handler!
                // Tell the user what they've won!

                var processErrorFile = _runPath + @"\policyError.txt";
                var fi           = new FileInfo(processErrorFile);
                processErrorFile =
                    $"{fi.FullName.Replace(fi.Extension, string.Empty)}.{DateTime.Now.ToString("yyyy-MM-dd")}{fi.Extension}";

                lock (_sync)
                {
                    using (var processLog = File.AppendText(processErrorFile))
                    {
                        document = null;
                        // processLog.WriteLine($"Retries {retries}");
                        processLog.WriteLine($"Retries {retries} :: Policy logging: {page} :: {exception.Message}");
                    }

                    retries++;
                }
            });

            try
            {
                policy.Execute(() =>
                {
                    lock (_sync)
                    {
                        _getHtmlWeb = new HtmlWeb();
                        document    = _getHtmlWeb.Load(page);
                    }
                });


                if (document == null)
                {
                    return(null);
                }

                var upcNodes = document.DocumentNode.SelectNodes("//div[@class='characteristicsArea']//a");
                if (document.DocumentNode.FirstChild == null)
                {
                    throw new ArgumentNullException(page);
                }
                var category = upcNodes[0].InnerText;

                var varietal = upcNodes[1].InnerText;

                var country = upcNodes.Where(n => n.Attributes["href"].Value.Contains("Country")).Select(n => n.InnerText.Trim());
                var regions = upcNodes.Where(n => n.Attributes["href"].Value.Contains("Region")).Select(n => n.InnerText.Trim());

                var ctry       = "";
                var reg        = "";
                var enumerable = country as string[] ?? country.ToArray();
                if (enumerable.ToList().Any())
                {
                    ctry = enumerable.ToList().First();
                }
                var enumerable1 = regions as string[] ?? regions.ToArray();
                if (enumerable1.ToList().Any())
                {
                    reg = enumerable1.ToList().First();
                }

                var region = $"{ctry}, {reg}".TrimEnd(Convert.ToChar(","));

                var    brandId = upcNodes.Where(n => n.Attributes["href"].Value.Contains("?brandid")).Select(n => n.InnerText.Trim());
                var    id      = brandId as string[] ?? brandId.ToArray();
                string winery  = "";
                if (id.ToList().Any())
                {
                    winery = id.ToList()?.First();
                }


                var alcoholNode = document.DocumentNode.SelectNodes("//div[@class='characteristicsArea']//p");

                //item title - itemTitle
                var upcTitleNodes = document.DocumentNode.SelectNodes("//span[@class='title']");
                var wineName      = upcTitleNodes[0].InnerText.Replace(winery, string.Empty).Trim();
                var yearValue     = wineName.Substring(wineName.Length - 4);
                int year;
                int.TryParse(yearValue, out year);
                wineName = wineName.Replace(year.ToString(), string.Empty);

                var ratingLf = document.DocumentNode.SelectNodes("//td[@class='reviewIconLeft']")?[0].InnerText;
                var ratingRt = document.DocumentNode.SelectNodes("//td[@class='reviewIconRight']")?[0].InnerText;

                //upc
                var upcNodesUpc =
                    document.DocumentNode.SelectNodes(
                        "//div[@id='prem-prod-info']//div[@id='prem-prod-details']//dl/meta");

                //image
                var upcNodesUpcImage = document.DocumentNode.SelectNodes("//td[@class='imageArea']//img");

                var wine = new UpcDbModel
                {
                    WineName  = wineName.Replace("\n", string.Empty),
                    Category  = category.Replace("\n", string.Empty),
                    Winery    = winery.Replace("\n", string.Empty),
                    Varietal  = varietal.Replace("\n", string.Empty),
                    Region    = region,
                    UpcCode   = upcNodesUpc?[0].Attributes["content"].Value.Replace("\n", string.Empty),
                    Rating    = ratingLf + ratingRt,
                    ImagePath = "http://" + upcNodesUpcImage?[0].Attributes["src"].Value.Replace("\n", string.Empty),
                };


                decimal wineSize;
                decimal.TryParse("750".Replace("&nbsp;", string.Empty).Replace("ml.", string.Empty), out wineSize);
                wine.Size = wineSize;

                int wineYear;
                int.TryParse(year.ToString().Replace("&nbsp;", string.Empty).Replace("ml", string.Empty), out wineYear);
                wine.Year = wineYear;
                var r = wine.Region.TrimEnd(' ').TrimEnd(',');
                wine.Region = r;


                foreach (var node in alcoholNode)
                {
                    if (node.InnerText.Contains("%"))
                    {
                        wine.AlchoholLevel = Convert.ToDecimal(node.InnerText.Replace("%", string.Empty));
                    }
                }
                //Region
                return(wine);
            }
            catch (Exception exception)
            {
                lock (_sync)
                {
                    using (var processLog = File.AppendText(_fileNameError))
                    {
                        eventualFailures += 1;

                        processLog.WriteLine($"Time:: {DateTime.Now} :: {page} :: {exception.Message} :: failures :: {eventualFailures}");
                        processLog.WriteLine($"stack trace :: {exception}");

                        //TimeSpan time1 = TimeSpan.FromHours(1);
                        //TimeSpan ts = DateTime.Now.TimeOfDay;
                        //var ts2 = ts.Add(time1);
                        //System.Threading.Tasks.Task.Delay(ts2);
                    }
                }
            }

            return(null);
        }