Ejemplo n.º 1
0
        private List <WebSearchResult> SearchOnPage(int pageNumber, Layout layout)
        {
            AvitoPage page = new AvitoPage(_settings, new AvitoPageRequest(_settings, pageNumber, layout.SearchText));
            List <WebSearchResult> results = new List <WebSearchResult>();
            Filter filter = new Filter();

            string[] titleContains = layout.SearchText.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < titleContains.Length; i++)
            {
                titleContains[i] = new LowerText(new TrimText(titleContains[i]));
            }
            filter.TitleContains = titleContains;
            string[] titleDoesNotContain = layout.SearchTextExclude.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < titleDoesNotContain.Length; i++)
            {
                titleDoesNotContain[i] = new LowerText(new TrimText(titleDoesNotContain[i]));
            }
            filter.TitleDoesNotContain = titleDoesNotContain;
            filter.MinPrice            = layout.MinPrice;
            filter.MaxPrice            = layout.MaxPrice < 1 ? int.MaxValue : layout.MaxPrice;
            filter.DatesAfter          = layout.DatesAfter;
            var webSearchResults = page.GetResults(filter);

            foreach (var result in webSearchResults)
            {
                if (result.ImageUrl != null)
                {
                    var image = ImageUtil.GetImage(result.ImageUrl).GetThumbnailImage(100, 70, null, IntPtr.Zero);
                    result.ImageBase64 = ImageUtil.ImageToBase64String(image);
                }
            }
            results.AddRange(webSearchResults);
            return(results);
        }
Ejemplo n.º 2
0
        public List <WebSearchResult> GetResults(Filter filter)
        {
            List <WebSearchResult> results = new List <WebSearchResult>();

            HtmlNode node = new HtmlToHtmlNode(_content.Value);

            if (node == null)
            {
                Log.Error("Can't parse content. Invalid HtmlNode.");
                return(results);
            }

            // <div class="snippet-date-info" data-marker="item-date" data-shape="default" data-tooltip="27 августа 14:12" flow="down">
            // 2 дня назад
            // </div>
            HtmlNodeCollection dates = node.SelectNodes("//div[@data-marker='item-date']");

            if (dates == null)
            {
                Log.Error("Can't parse dates. Unexpected html structure.");
                return(results);
            }

            DateTime prevDate = DateTime.Today;

            foreach (HtmlNode dateNode in dates)
            {
                // <div class="description item_table-description">
                // <div class="snippet-title-row">...
                HtmlNode entryNode  = dateNode.ParentNode.ParentNode.ParentNode;
                HtmlNode titleANode = entryNode.SelectSingleNode(".//div[contains(@class,'snippet-title-row')]/h3/a");
                WebLink  titleValue = GetTitleValue(titleANode);
                string   title      = new LowerText(titleValue.Text);
                bool     @break     = false;
                foreach (string contains in filter.TitleContains)
                {
                    if (!title.Contains(contains))
                    {
                        @break = true;
                        break;
                    }
                }
                if (@break)
                {
                    continue;
                }

                foreach (string contains in filter.TitleDoesNotContain)
                {
                    if (title.Contains(contains))
                    {
                        @break = true;
                        break;
                    }
                }
                if (@break)
                {
                    continue;
                }

                DateTime?dateValue = AvitoParser.GetDateValue(dateNode.InnerText, prevDate, DateTime.Now);
                if (dateValue == null || dateValue < filter.DatesAfter)
                {
                    continue;
                }

                // <div class="snippet-price-row"><span itemprop="offers" itemtype="http://schema.org/Offer" itemscope="" class="snippet-price " data-shape="default" data-marker="item-price"><meta itemprop="priceCurrency" content="RUB"><meta itemprop="price" content="6490"><meta itemprop="availability" content="https://schema.org/LimitedAvailability">
                // 6 490  ₽
                HtmlNode priceNode  = entryNode.SelectSingleNode(".//span[@data-marker='item-price']");
                int?     priceValue = AvitoParser.GetPriceValue(priceNode?.InnerText);
                if (priceValue != null)
                {
                    if (priceValue < filter.MinPrice || priceValue > filter.MaxPrice)
                    {
                        continue;
                    }
                }

                bool isVIPad = priceNode.ParentNode.ParentNode.GetAttributeValue("class", "") == "options" && priceNode.ParentNode.ParentNode.GetAttributeValue("itemprop", "") == "offers";

                // <div class="data">
                // <p>Часы и украшения</p>
                HtmlNode categoryNode = entryNode.SelectSingleNode(".//div[@class='data']/p");

                // <span class="item-address-georeferences-item__content">Комендантский проспект</span>
                HtmlNode addressNode = entryNode.SelectSingleNode(".//span[@class='item-address-georeferences-item__content']");

                // <div class="item-photo" data-marker="item-photo">
                HtmlNode imgNode = entryNode.ParentNode.ParentNode.SelectSingleNode(".//div[@data-marker='item-photo']");
                imgNode = imgNode?.SelectSingleNode(".//img");

                if (isVIPad)
                {
                    if (imgNode == null)
                    {
                        imgNode = entryNode.ParentNode.ParentNode.SelectSingleNode(".//div[@class='img-container']");
                        imgNode = imgNode?.SelectSingleNode(".//img");
                    }
                }

                WebSearchResult result = new WebSearchResult();
                result.Title    = titleValue;
                result.IsVIPad  = isVIPad;
                result.Date     = dateValue.Value;
                prevDate        = result.Date.Date;
                result.Category = AvitoParser.GetCategoryValue(categoryNode?.InnerText);
                result.Address  = AvitoParser.GetAddressValue(addressNode?.InnerText);
                result.ImageUrl = GetImageUrl(imgNode);
                result.Price    = priceValue;
                results.Add(result);
            }

            return(results);
        }