Пример #1
0
        public void ParseSneakersFromPage(Catalog catalog, string link)
        {
            Console.WriteLine("Парсим " + link);
            Uri    uri      = new Uri(link);
            string source   = new WebClient().DownloadString(uri);
            var    parser   = new HtmlParser();
            var    document = parser.Parse(source);
            var    items    = document.QuerySelectorAll("div.item");

            foreach (var item in items)
            {
                var sneaker = new Sneaker();

                sneaker.sku = item.QuerySelector("div.art").InnerHtml;
                var    priceDiv = item.QuerySelector("div.price");
                string priceStr;
                if (priceDiv.QuerySelector("span.ssale") != null)
                {
                    priceStr         = priceDiv.QuerySelector("span.ssale").InnerHtml;
                    sneaker.price    = Int32.Parse(priceStr.Replace("р.", ""));
                    sneaker.oldPrice = Int32.Parse(priceDiv.QuerySelector("s").InnerHtml);
                }
                else
                {
                    priceStr      = priceDiv.InnerHtml;
                    sneaker.price = Int32.Parse(priceStr.Replace("р.", ""));
                }

                //из заголовка еще надо вытащить type и убрать nike
                sneaker.title = item.QuerySelector("span.name").InnerHtml;
                sneaker.ParseTitle();
                sneaker.link  = SITEURL + item.QuerySelector("a").GetAttribute("href");
                sneaker.brand = this.brand;

                //sizes
                string[] sizesStrArr = item.QuerySelector("div.item_sizes").InnerHtml.Split(',');
                foreach (var size in sizesStrArr)
                {
                    SneakerSize sneakerSize = new SneakerSize(sneaker, size.Trim());
                    sneaker.sizes.Add(sneakerSize);
                }

                //catalog.sneakers.Add(stockSneaker);
                catalog.AddUniqueSneaker(sneaker);
            }

            //next page
            var nextlinks = document.QuerySelector("div.pages").QuerySelectorAll("a.arrow");

            foreach (var nextlink in nextlinks)
            {
                if (nextlink.ClassName == "arrow r")
                {
                    string nextPageLink = nextlink.GetAttribute("href");
                    Thread.Sleep(100);
                    ParseSneakersFromPage(catalog, SITEURL + nextPageLink);
                }
            }
        }
        public void ParseSneakersFromPage(Catalog catalog, string link, string sex, string category)
        {
            Console.WriteLine("Парсим " + link);
            Uri       uri       = new Uri(link);
            WebClient webClient = new WebClient();

            webClient.Encoding = Encoding.UTF8;

            string source = webClient.DownloadString(uri);

            webClient.Dispose();
            var parser   = new HtmlParser();
            var document = parser.Parse(source);
            var items    = document.QuerySelectorAll("div.ms2_product");

            foreach (var item in items)
            {
                var sneaker = new Sneaker();
                sneaker.sex      = sex;
                sneaker.category = category;
                sneaker.link     = SITEURL + "/" + item.QuerySelector("a").GetAttribute("href");

                //title
                sneaker.title = item.QuerySelector("span.item-txt").InnerHtml;
                sneaker.ParseTitle();
                //stockSneaker.title = stockSneaker.brand + " " + stockSneaker.title.Replace(stockSneaker.brand, "").Trim();
                sneaker.title = sneaker.title.Replace("Jordan AIR JORDAN", "AIR JORDAN");
                sneaker.title = sneaker.title.Replace("Jordan JORDAN", "AIR JORDAN");

                //price
                string priceString = item.QuerySelector("div.item-price").InnerHtml.Replace("<!--h5>RUB</h5-->", "").Replace("RUB", "").Trim().Replace(" ", "");
                sneaker.price = Double.Parse(priceString);

                //sizes
                string sizeString = item.QuerySelector("div.size-box").InnerHtml.Replace("US", "");
                sizeString = sizeString.Replace("UK", ""); //есть один артикул у которого вместо US UK
                string[] stringSeparators = new string[] { "\n" };
                string[] sizeArr          = sizeString.Split(stringSeparators, StringSplitOptions.RemoveEmptyEntries);
                foreach (var size in sizeArr)
                {
                    if (!String.IsNullOrWhiteSpace(size))
                    {
                        SneakerSize snSize = new SneakerSize(size.Trim());
                        sneaker.sizes.Add(snSize);
                    }
                }
                sneaker.DeleteDuplicateSizes();
                catalog.AddUniqueSneaker(sneaker);
            }
        }
Пример #3
0
        public void ParseSneakersFromPage(Catalog catalog, string brand, string link)
        {
            _logger.Info("Парсим " + link);
            var document = GetHtmlPageCrawlera5Try(link);

            if (document == null)
            {
                return;
            }

            //items
            var items = document.QuerySelector("ul.products-list").QuerySelectorAll("li");

            foreach (var item in items)
            {
                var sneaker = new Sneaker();

                //fullCatalogSneaker.sex = sex;
                //fullCatalogSneaker.categorySneakerFullCatalog = categorySneakerFullCatalog;

                //brand
                sneaker.brand = brand;

                //link
                var links = item.QuerySelectorAll("a");
                sneaker.link = links[2].GetAttribute("href");

                //prices
                var    priceHTML     = item.QuerySelector("div.price");
                var    salePriceHTML = item.QuerySelector("div.sale-price");
                string priceString   = String.Empty;
                //если товар идет по сейлу
                if (salePriceHTML != null)
                {
                    //price
                    priceString   = salePriceHTML.QuerySelector("b").InnerHtml;
                    priceString   = priceString.Replace(".", ",").Trim();
                    sneaker.price = double.Parse(priceString);

                    //old price
                    priceString      = priceHTML.QuerySelector("b").InnerHtml;
                    priceString      = priceString.Replace(".", ",").Trim();
                    sneaker.oldPrice = double.Parse(priceString);
                }
                else
                {
                    priceString   = priceHTML.QuerySelector("b").InnerHtml;
                    priceString   = priceString.Replace(".", ",").Trim();
                    sneaker.price = double.Parse(priceString);
                }

                //title
                sneaker.title = item.QuerySelector("span.model").InnerHtml;
                if (!sneaker.title.Contains(brand.ToUpper()))
                {
                    sneaker.title = brand.ToUpper() + " " + sneaker.title.ToUpper();
                }
                sneaker.title = WebUtility.HtmlDecode(sneaker.title);
                sneaker.ParseTitle();

                if (!catalog.isExistSneakerInCatalog(sneaker))
                {
                    catalog.sneakers.Add(sneaker);
                }
            }

            //next page
            var nextPages = document.QuerySelector("ol.left").QuerySelectorAll("a");

            foreach (var nextPage in nextPages)
            {
                if (nextPage.GetAttribute("title") == "Next page")
                {
                    string nextPageLink = nextPage.GetAttribute("href");
                    ParseSneakersFromPage(catalog, brand, nextPageLink);
                }
            }
        }
        public void ParseSneakersFromPage(Catalog catalog, string brand, string link)
        {
            //link = "https://en.titolo.ch/sneakers/nike?limit=36&p=3";
            Console.WriteLine("Парсим " + link);
            Uri       uri       = new Uri(link);
            WebClient webClient = new WebClient();

            webClient.Encoding = Encoding.UTF8;
            webClient.Headers.Add("User-Agent", "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36");

            string source = webClient.DownloadString(uri);

            webClient.Dispose();
            var parser   = new HtmlParser();
            var document = parser.Parse(source);

            //items
            var items = document.QuerySelectorAll("li.item");

            foreach (var item in items)
            {
                if (item.QuerySelector("p.out-of-stock") == null)
                {
                    var sneaker = new Sneaker();

                    //fullCatalogSneaker.sex = sex;
                    //fullCatalogSneaker.categorySneakerFullCatalog = categorySneakerFullCatalog;

                    //brand
                    sneaker.brand = brand;

                    //link
                    var linkHTML = item.QuerySelector("a.product-image");
                    sneaker.link = linkHTML.GetAttribute("href");

                    //prices
                    var    priceHTML     = item.QuerySelector("div.price-box");
                    var    salePriceHTML = item.QuerySelector("p.special-price");
                    string priceString   = String.Empty;
                    //если товар идет по сейлу
                    if (salePriceHTML != null)
                    {
                        //price
                        priceString   = salePriceHTML.QuerySelector("span.price").InnerHtml;
                        priceString   = priceString.Replace("CHF", "");
                        priceString   = priceString.Replace(".", ",").Trim();
                        sneaker.price = double.Parse(priceString);

                        //old price
                        priceString      = priceHTML.QuerySelector("p.old-price").QuerySelector("span.price").InnerHtml;
                        priceString      = priceString.Replace("CHF", "");
                        priceString      = priceString.Replace(".", ",").Trim();
                        sneaker.oldPrice = double.Parse(priceString);
                    }
                    else
                    {
                        priceString   = priceHTML.QuerySelector("span.price").InnerHtml;
                        priceString   = priceString.Replace("CHF", "");
                        priceString   = priceString.Replace(".", ",").Trim();
                        sneaker.price = double.Parse(priceString);
                    }

                    //title
                    sneaker.title = String.Empty;
                    if (sneaker.brand != "Jordan")
                    {
                        sneaker.title = sneaker.brand + " ";                            //у найк и найк сб нет бренда в тайтле, а у джордан уже есть
                    }
                    sneaker.title += item.QuerySelector("span.name").InnerHtml;
                    sneaker.title  = sneaker.title.ToUpper();
                    sneaker.ParseTitle();

                    //add to catalog
                    if (!catalog.isExistSneakerInCatalog(sneaker))
                    {
                        catalog.sneakers.Add(sneaker);
                    }
                }
            }

            //next page
            var nextPage = document.QuerySelector("a.next");

            if (nextPage != null)
            {
                string nextPageLink = nextPage.GetAttribute("href");
                //Thread.Sleep(5000);
                ParseSneakersFromPage(catalog, brand, nextPageLink);
            }
        }
        private Catalog ParseCatalogFromJson()
        {
            Catalog catalog = new Catalog();

            var items = _json.listings;

            foreach (var item in items)
            {
                var sneaker = new Sneaker();
                sneaker.brand    = item.brand;
                sneaker.link     = item.url;
                sneaker.price    = item.price;
                sneaker.oldPrice = item.old_price;
                sneaker.sku      = item.sku;
                sneaker.images   = item.images;
                if (item.sizes != null)
                {
                    if (item.sizes.Count > 0)
                    {
                        //sizes
                        foreach (var sizeitem in item.sizes)
                        {
                            string      sizeUS = sizeitem.us;
                            SneakerSize size   = new SneakerSize(sizeUS);
                            sneaker.sizes.Add(size);
                        } //sizes


                        //title
                        sneaker.title = item.title;
                        if (!sneaker.title.ToUpper().Contains(item.brand.ToUpper()))
                        {
                            sneaker.title = item.brand.ToUpper() + " " + sneaker.title;
                        }
                        sneaker.ParseTitle();

                        if (sneaker.title.ToUpper().Contains("NIKE NIKE"))
                        {
                            bool test = true;
                        }

                        if (item.category == "men")
                        {
                            sneaker.category = Settings.CATEGORY_MEN;
                        }
                        else if (item.category == "women")
                        {
                            sneaker.category = Settings.CATEGORY_WOMEN;
                        }
                        else if (item.category == "kids")
                        {
                            sneaker.category = Settings.CATEGORY_KIDS;
                        }
                        else
                        {
                            throw new Exception("wrong category");
                        }

                        if (item.sex == "men")
                        {
                            sneaker.sex = Settings.GENDER_MAN;
                        }
                        else if (item.sex == "women")
                        {
                            sneaker.sex = Settings.GENDER_WOMAN;
                        }
                        else if (item.sex == null)
                        {
                            sneaker.sex = null;
                        }
                        else
                        {
                            bool test = true;
                        }

                        sneaker.color = item.colorbrand;



                        //add to catalog
                        if (!catalog.isExistSneakerInCatalog(sneaker))
                        {
                            catalog.sneakers.Add(sneaker);
                        }
                    }
                    else
                    {
                        bool test = true;
                    }
                }
                else
                {
                    bool test = true;
                }
            }

            return(catalog);
            //throw new NotImplementedException();
        }
        public void ParseSneakersFromPage(Catalog catalog, string brand, string link)
        {
            Console.WriteLine("Парсим " + link);
            Uri       uri       = new Uri(link);
            WebClient webClient = new WebClient();

            webClient.Encoding = Encoding.UTF8;

            string source = webClient.DownloadString(uri);

            webClient.Dispose();
            var parser   = new HtmlParser();
            var document = parser.Parse(source);

            //items
            var items = document.QuerySelector("ul.products-list").QuerySelectorAll("li");

            foreach (var item in items)
            {
                var sneaker = new Sneaker();

                //fullCatalogSneaker.sex = sex;
                //fullCatalogSneaker.categorySneakerFullCatalog = categorySneakerFullCatalog;

                //brand
                sneaker.brand = brand;

                //link
                var links = item.QuerySelectorAll("a");
                sneaker.link = links[2].GetAttribute("href");

                //prices
                var    priceHTML     = item.QuerySelector("div.price");
                var    salePriceHTML = item.QuerySelector("div.sale-price");
                string priceString   = String.Empty;
                //если товар идет по сейлу
                if (salePriceHTML != null)
                {
                    //price
                    priceString   = salePriceHTML.QuerySelector("b").InnerHtml;
                    priceString   = priceString.Replace(".", ",").Trim();
                    sneaker.price = double.Parse(priceString);

                    //old price
                    priceString      = priceHTML.QuerySelector("b").InnerHtml;
                    priceString      = priceString.Replace(".", ",").Trim();
                    sneaker.oldPrice = double.Parse(priceString);
                }
                else
                {
                    priceString   = priceHTML.QuerySelector("b").InnerHtml;
                    priceString   = priceString.Replace(".", ",").Trim();
                    sneaker.price = double.Parse(priceString);
                }

                //title
                sneaker.title = item.QuerySelector("span.model").InnerHtml;
                if (!sneaker.title.Contains(brand.ToUpper()))
                {
                    sneaker.title = brand.ToUpper() + " " + sneaker.title;
                }
                sneaker.ParseTitle();

                if (!catalog.isExistSneakerInCatalog(sneaker))
                {
                    catalog.sneakers.Add(sneaker);
                }
            }

            //next page
            var nextPages = document.QuerySelector("ol.left").QuerySelectorAll("a");

            foreach (var nextPage in nextPages)
            {
                if (nextPage.GetAttribute("title") == "Next page")
                {
                    string nextPageLink = nextPage.GetAttribute("href");
                    ParseSneakersFromPage(catalog, brand, nextPageLink);
                }
            }
        }
Пример #7
0
        public Catalog ParseCatalogFromJson(RootParsingObject json)
        {
            Catalog catalog = new Catalog();

            var items = json.listings;

            foreach (var item in items)
            {
                var sneaker = new Sneaker();
                sneaker.brand    = item.brand;
                sneaker.link     = item.url;
                sneaker.price    = item.price;
                sneaker.oldPrice = item.old_price;
                sneaker.sku      = item.sku;
                sneaker.images   = item.images;
                sneaker.color    = item.colorbrand;
                if (item.sizes != null)
                {
                    if (item.sizes.Count > 0)
                    {
                        //title
                        sneaker.title = item.title;
                        if (!sneaker.title.ToUpper().Contains(item.brand.ToUpper()))
                        {
                            sneaker.title = item.brand.ToUpper() + " " + sneaker.title;
                        }
                        sneaker.ParseTitle();

                        //categorySneakerFullCatalog
                        if (item.category == "men")
                        {
                            sneaker.category = Settings.CATEGORY_MEN;
                        }
                        else if (item.category == "women")
                        {
                            sneaker.category = Settings.CATEGORY_WOMEN;
                        }
                        else if (item.category == "kids")
                        {
                            sneaker.category = Settings.CATEGORY_KIDS;
                        }
                        //если категория нул, смотрим есть ли артикул в фулкаталоге и заполнена ли у него категория
                        if (String.IsNullOrWhiteSpace(item.category))
                        {
                            var fullCatalogSneaker = fullCatalog.GetSneakerFromSKU(item.sku);
                            if (fullCatalogSneaker != null)
                            {
                                if (!String.IsNullOrWhiteSpace(fullCatalogSneaker.category))
                                {
                                    sneaker.category = fullCatalogSneaker.category;
                                }
                            }
                            else
                            {
                                bool test = true;
                            }
                        }
                        //если категория нул, то пробуем ее определить по другим размерам
                        if (String.IsNullOrWhiteSpace(sneaker.category))
                        {
                            if (item.sizes != null)
                            {
                                if (item.sizes.Count > 0)
                                {
                                    var sizeitem = item.sizes[0];
                                    sneaker.category = SizeConverters.SizeConverter.GetCategory(sizeitem.us, sizeitem.eu, sizeitem.uk, sizeitem.cm);
                                }
                            }
                        }

                        //Если категория пустая, то дальше нет смысла продолжать
                        if (!String.IsNullOrWhiteSpace(sneaker.category))
                        {
                            //sizes
                            foreach (var sizeitem in item.sizes)
                            {
                                string sizeUS = String.Empty;
                                if (!String.IsNullOrWhiteSpace(sizeitem.us))
                                {
                                    sizeUS = sizeitem.us;
                                }
                                else if (!String.IsNullOrWhiteSpace(sizeitem.eu))
                                {
                                    var sizes       = sizeConverter.sizeChart.sizes.FindAll(x => x.eu == sizeitem.eu);
                                    var engCategory = Helper.ConvertCategoryRusToEng(sneaker.category);
                                    var size        = sizes.Find(x => x.category == engCategory);
                                    if (size != null)
                                    {
                                        sizeUS = size.us;
                                    }
                                }
                                else if (!String.IsNullOrWhiteSpace(sizeitem.uk))
                                {
                                    var sizes       = sizeConverter.sizeChart.sizes.FindAll(x => x.uk == sizeitem.uk);
                                    var engCategory = Helper.ConvertCategoryRusToEng(sneaker.category);
                                    var size        = sizes.Find(x => x.category == engCategory);
                                    if (size != null)
                                    {
                                        sizeUS = size.us;
                                    }
                                }
                                else if (!String.IsNullOrWhiteSpace(sizeitem.cm))
                                {
                                    var sizes       = sizeConverter.sizeChart.sizes.FindAll(x => x.cm == sizeitem.cm);
                                    var engCategory = Helper.ConvertCategoryRusToEng(sneaker.category);
                                    var size        = sizes.Find(x => x.category == engCategory);
                                    if (size != null)
                                    {
                                        sizeUS = size.us;
                                    }
                                }
                                else
                                {
                                    throw new Exception("wrong size");
                                }

                                if (!String.IsNullOrWhiteSpace(sizeUS))
                                {
                                    SneakerSize size = new SneakerSize(sizeUS);
                                    sneaker.sizes.Add(size);
                                }
                                else
                                {
                                    Program.Logger.Warn("Wrong size or category. SKU:" + item.sku + " category:" + sneaker.category + " Size: us:" + sizeitem.us + " eu:" + sizeitem.eu + " uk:" + sizeitem.uk + " cm:" + sizeitem.cm);
                                    //throw new Exception("Wrong sizeUS");
                                }
                            } //sizes

                            //sex
                            if (item.sex == "men")
                            {
                                sneaker.sex = Settings.GENDER_MAN;
                            }
                            else if (item.sex == "women")
                            {
                                sneaker.sex = Settings.GENDER_WOMAN;
                            }
                            else if (item.sex == null)
                            {
                                sneaker.sex = null;
                            }
                            else
                            {
                                Program.Logger.Warn("wrong sex: " + item.sku);
                                bool test = true;
                            }

                            //add to catalog
                            if (!catalog.isExistSneakerInCatalog(sneaker))
                            {
                                catalog.sneakers.Add(sneaker);
                            }
                        }
                        else //если категория нулл
                        {
                            Program.Logger.Warn("wrong category: " + item.sku);
                        }
                    }
                    else
                    {
                        bool test = true;
                    }
                }
                else
                {
                    bool test = true;
                }
            }

            return(catalog);
            //throw new NotImplementedException();
        }
        public void ParseSneakersFromPage(Catalog catalog, string link)
        {
            Console.WriteLine("Парсим " + link);
            Uri       uri       = new Uri(link);
            WebClient webClient = new WebClient();

            webClient.Encoding = SITE_ENCODING;
            string source   = webClient.DownloadString(uri);
            var    parser   = new HtmlParser();
            var    document = parser.Parse(source);

            //как определить кроссовок
            var items = document.QuerySelectorAll("div.product");

            foreach (var item in items)
            {
                var sneaker = new Sneaker();

                sneaker.link  = item.QuerySelector("a").GetAttribute("href");
                sneaker.brand = "Nike";
                //fullCatalogSneaker.sku = item.QuerySelector("div.art").InnerHtml;

                //price
                var    priceDiv = item.QuerySelector("div.price");
                string priceStr;
                if (priceDiv.QuerySelector("del") != null)
                {
                    priceStr         = priceDiv.QuerySelector("del").InnerHtml;
                    priceStr         = priceStr.Substring(0, priceStr.IndexOf("<em")).Replace(" ", "");
                    sneaker.oldPrice = Int32.Parse(priceStr);

                    //oldprice
                    priceStr      = priceDiv.InnerHtml;
                    priceStr      = priceStr.Substring(0, priceStr.IndexOf("<em"));
                    priceStr      = priceStr.Replace(" ", "");
                    sneaker.price = Int32.Parse(priceStr);
                }
                else
                {
                    priceStr      = priceDiv.InnerHtml;
                    priceStr      = priceStr.Substring(0, priceStr.IndexOf("<em"));
                    priceStr      = priceStr.Replace(" ", "");
                    sneaker.price = Int32.Parse(priceStr);
                }

                //title
                sneaker.title = item.QuerySelector("a").GetAttribute("title");
                sneaker.title = sneaker.title.Replace("Мужские", "").Replace("Женские", "").Replace("Детские", "").Replace("Подростковые", "").Trim();
                sneaker.title = sneaker.title.Replace("Кроссовки", "").Replace("кроссовки", "").Trim();
                sneaker.ParseTitle();


                //fullCatalogSneaker.brand = this.brand;

                //sizes
                //string[] sizesStrArr = item.QuerySelector("div.item_sizes").InnerHtml.Split(',');
                //foreach (var sizeUS in sizesStrArr)
                //{

                //    SneakerSize sneakerSize = new SneakerSize(fullCatalogSneaker, sizeUS.Trim());
                //    fullCatalogSneaker.sizes.Add(sneakerSize);
                //}

                //catalog.sneakers.Add(stockSneaker);
                catalog.AddUniqueSneaker(sneaker);
            }

            //next page
            var nextlinks = document.QuerySelector("div.pn").QuerySelectorAll("a.textlink");

            foreach (var nextlink in nextlinks)
            {
                if (nextlink != null)
                {
                    if (nextlink.InnerHtml == "далее")
                    {
                        string nextPageLink = nextlink.GetAttribute("href");
                        Thread.Sleep(100);
                        ParseSneakersFromPage(catalog, nextPageLink);
                    }
                }
            }
        }