예제 #1
0
        public static IEnumerable <BulletinSheetCache> GetBulletinsByQuery(string initialUrl, string query)
        {
            var bulletins = new List <BulletinSheetCache>();

            BCT.Execute(d =>
            {
                var regex = new Regex(imagePattern);
                FirefoxDriverHelper.ExecuteWithVisual(b =>
                {
                    for (var i = 0; i < maxPages; i++)
                    {
                        var url = string.Format($"{initialUrl}/rossiya?q={query}");
                        if (i > 0)
                        {
                            if (b.Url.Contains("telefony"))
                            {
                                url = string.Format($"{initialUrl}//rossiya/telefony/{query.ToLower()}?p={i + 1}&sgtd=8");
                            }
                            else
                            {
                                url = string.Format($"{initialUrl}/rossiya?p={i + 1}&q={query}&sgtd=8");
                            }
                        }

                        b.Manage().Timeouts().ImplicitWait = new TimeSpan(0, 0, 10);
                        b.Navigate().GoToUrl(url);
                        Thread.Sleep(5000);
                        b.Manage().Timeouts().ImplicitWait = new TimeSpan(0);

                        var elements = b.FindElementsByClassName("item_table");
                        foreach (var e in elements)
                        {
                            var descBlock     = e.FindElement(By.ClassName("item-description-title-link"));
                            var bulletinUrl   = descBlock.GetAttribute("href");
                            var bulletinTitle = descBlock.Text;

                            var priceBlock     = e.FindElement(By.ClassName("about"));
                            var bulletinPrice  = priceBlock.Text;
                            var dateBlock      = e.FindElement(By.ClassName("c-2"));
                            var bulletinDate   = dateBlock.Text;
                            var bulletinImages = new List <string>();
                            if (e.FindElementSafe(By.CssSelector("a.photo-wrapper")).Exists())
                            {
                                var wrapper = e.FindElement(By.CssSelector("a.photo-wrapper"));
                                var img     = wrapper.FindElementSafe(By.TagName("img"));
                                if (img != null)
                                {
                                    var src = img.GetAttribute("src");
                                    bulletinImages.Add(src);
                                }
                            }
                            else
                            {
                                if (e.FindElementSafe(By.XPath(".//div[contains(@class, 'item-slider-image')]")).Exists())
                                {
                                    var imageBlocks = e.FindElements(By.XPath(".//div[contains(@class, 'item-slider-image')]"));
                                    if (imageBlocks.Count > 0)
                                    {
                                        var srcs      = imageBlocks.Select(q => q.GetAttribute("data-srcpath")).ToList();
                                        var srcIsNull = srcs.FirstOrDefault() == null;
                                        if (srcIsNull)
                                        {
                                            var rawUrls = imageBlocks.Select(q => q.GetAttribute("style")).ToList();
                                            foreach (var rawUrl in rawUrls)
                                            {
                                                Match match = regex.Match(rawUrl);
                                                if (match.Success)
                                                {
                                                    var handledUrl = match.Groups[1].Value;
                                                    handledUrl     = handledUrl.Replace("\"", "");
                                                    bulletinImages.Add("http:" + handledUrl);
                                                }
                                            }
                                        }
                                        else
                                        {
                                            bulletinImages = srcs.Select(q => "http:" + q).ToList();
                                        }
                                    }
                                }
                            }

                            var cache = new BulletinSheetCache
                            {
                                Url    = bulletinUrl,
                                Title  = bulletinTitle,
                                Price  = bulletinPrice,
                                Images = bulletinImages.ToArray(),
                            };
                            bulletins.Add(cache);
                        }
                    }
                }, Timeout: 10);
            });
            return(bulletins);
        }
예제 #2
0
        public static IEnumerable <BulletinSheetCache> GetBulletinsBySheets(IEnumerable <BulletinSheetCache> caches)
        {
            BCT.Execute(d =>
            {
                FirefoxDriverHelper.ExecuteWithVisual(b =>
                {
                    foreach (var cache in caches)
                    {
                        if (cache.IsHandled)
                        {
                            continue;
                        }

                        b.Manage().Timeouts().ImplicitWait = new TimeSpan(0, 0, 10);

                        b.Navigate().GoToUrl(cache.Url);
                        Thread.Sleep(3000);

                        b.Manage().Timeouts().ImplicitWait = new TimeSpan(0, 0, 0);
                        var bulletinDescription            = string.Empty;
                        var descriptionBlock = b.FindElementSafe(By.ClassName("item-description-html"));
                        if (descriptionBlock.Exists())
                        {
                            bulletinDescription = descriptionBlock.Text;
                        }
                        else
                        {
                            var simpleDescriptionBlock = b.FindElementSafe(By.ClassName("item-description-text"));
                            if (simpleDescriptionBlock.Exists())
                            {
                                bulletinDescription = simpleDescriptionBlock.Text;
                            }
                        }

                        var isIndividualSeller = false;
                        var sellerBlock        = b.FindElementSafe(By.ClassName("seller-info-col"));
                        if (sellerBlock != null)
                        {
                            isIndividualSeller = sellerBlock.Text.Contains("Продавец");
                        }

                        var regions      = new string[2];
                        var addressBlock = b.FindElementsByClassName("seller-info-prop").FirstOrDefault(q => q.Text.Contains("Адрес"));
                        if (addressBlock != null)
                        {
                            var cityBlock = addressBlock.FindElementSafe(By.ClassName("seller-info-value"));
                            var rawAdress = cityBlock.Text;
                            var match     = RegexHelper.Execute(addressPattern, rawAdress).FirstOrDefault();
                            if (match != null)
                            {
                                regions[0] = match.Groups[1].Value;
                                regions[1] = match.Groups[2].Value;
                            }
                        }

                        var bulletinViews = 0;
                        var viewBlock     = b.FindElementSafe(By.ClassName("title-info-views"));
                        if (viewBlock != null)
                        {
                            bulletinViews = Int32.Parse(Regex.Match(viewBlock.Text, "\\d+").Value);
                        }

                        var categoriesBlocks = b.FindElementsByClassName("js-breadcrumbs-link").ToArray();
                        var rawCategories    = new string[5];

                        //Skip city in category block
                        for (var i = 1; i < categoriesBlocks.Length; i++)
                        {
                            var categoryElement  = categoriesBlocks[i];
                            rawCategories[i - 1] = categoryElement.Text;
                        }

                        cache.IsIndividualSeller = isIndividualSeller;
                        cache.Region1            = regions[0];
                        cache.Region2            = regions[1];

                        cache.Category1 = rawCategories[0];
                        cache.Category2 = rawCategories[1];
                        cache.Category3 = rawCategories[2];
                        cache.Category4 = rawCategories[3];
                        cache.Category5 = rawCategories[4];

                        cache.Views       = bulletinViews;
                        cache.Description = bulletinDescription;
                        cache.IsHandled   = true;
                    }
                }, Timeout: 10);
            });
            return(caches);
        }