private List <WebSearchResult> SearchOnPage(int pageNumber, Layout layout) { AvitoPage page = new AvitoPage(_settings, new AvitoPageRequest(_settings, pageNumber, layout.SearchText)); List <WebSearchResult> results = new List <WebSearchResult>(); Filter filter = new Filter(); string[] titleContains = layout.SearchText.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < titleContains.Length; i++) { titleContains[i] = new LowerText(new TrimText(titleContains[i])); } filter.TitleContains = titleContains; string[] titleDoesNotContain = layout.SearchTextExclude.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < titleDoesNotContain.Length; i++) { titleDoesNotContain[i] = new LowerText(new TrimText(titleDoesNotContain[i])); } filter.TitleDoesNotContain = titleDoesNotContain; filter.MinPrice = layout.MinPrice; filter.MaxPrice = layout.MaxPrice < 1 ? int.MaxValue : layout.MaxPrice; filter.DatesAfter = layout.DatesAfter; var webSearchResults = page.GetResults(filter); foreach (var result in webSearchResults) { if (result.ImageUrl != null) { var image = ImageUtil.GetImage(result.ImageUrl).GetThumbnailImage(100, 70, null, IntPtr.Zero); result.ImageBase64 = ImageUtil.ImageToBase64String(image); } } results.AddRange(webSearchResults); return(results); }
public List <WebSearchResult> GetResults(Filter filter) { List <WebSearchResult> results = new List <WebSearchResult>(); HtmlNode node = new HtmlToHtmlNode(_content.Value); if (node == null) { Log.Error("Can't parse content. Invalid HtmlNode."); return(results); } // <div class="snippet-date-info" data-marker="item-date" data-shape="default" data-tooltip="27 августа 14:12" flow="down"> // 2 дня назад // </div> HtmlNodeCollection dates = node.SelectNodes("//div[@data-marker='item-date']"); if (dates == null) { Log.Error("Can't parse dates. Unexpected html structure."); return(results); } DateTime prevDate = DateTime.Today; foreach (HtmlNode dateNode in dates) { // <div class="description item_table-description"> // <div class="snippet-title-row">... HtmlNode entryNode = dateNode.ParentNode.ParentNode.ParentNode; HtmlNode titleANode = entryNode.SelectSingleNode(".//div[contains(@class,'snippet-title-row')]/h3/a"); WebLink titleValue = GetTitleValue(titleANode); string title = new LowerText(titleValue.Text); bool @break = false; foreach (string contains in filter.TitleContains) { if (!title.Contains(contains)) { @break = true; break; } } if (@break) { continue; } foreach (string contains in filter.TitleDoesNotContain) { if (title.Contains(contains)) { @break = true; break; } } if (@break) { continue; } DateTime?dateValue = AvitoParser.GetDateValue(dateNode.InnerText, prevDate, DateTime.Now); if (dateValue == null || dateValue < filter.DatesAfter) { continue; } // <div class="snippet-price-row"><span itemprop="offers" itemtype="http://schema.org/Offer" itemscope="" class="snippet-price " data-shape="default" data-marker="item-price"><meta itemprop="priceCurrency" content="RUB"><meta itemprop="price" content="6490"><meta itemprop="availability" content="https://schema.org/LimitedAvailability"> // 6 490 ₽ HtmlNode priceNode = entryNode.SelectSingleNode(".//span[@data-marker='item-price']"); int? priceValue = AvitoParser.GetPriceValue(priceNode?.InnerText); if (priceValue != null) { if (priceValue < filter.MinPrice || priceValue > filter.MaxPrice) { continue; } } bool isVIPad = priceNode.ParentNode.ParentNode.GetAttributeValue("class", "") == "options" && priceNode.ParentNode.ParentNode.GetAttributeValue("itemprop", "") == "offers"; // <div class="data"> // <p>Часы и украшения</p> HtmlNode categoryNode = entryNode.SelectSingleNode(".//div[@class='data']/p"); // <span class="item-address-georeferences-item__content">Комендантский проспект</span> HtmlNode addressNode = entryNode.SelectSingleNode(".//span[@class='item-address-georeferences-item__content']"); // <div class="item-photo" data-marker="item-photo"> HtmlNode imgNode = entryNode.ParentNode.ParentNode.SelectSingleNode(".//div[@data-marker='item-photo']"); imgNode = imgNode?.SelectSingleNode(".//img"); if (isVIPad) { if (imgNode == null) { imgNode = entryNode.ParentNode.ParentNode.SelectSingleNode(".//div[@class='img-container']"); imgNode = imgNode?.SelectSingleNode(".//img"); } } WebSearchResult result = new WebSearchResult(); result.Title = titleValue; result.IsVIPad = isVIPad; result.Date = dateValue.Value; prevDate = result.Date.Date; result.Category = AvitoParser.GetCategoryValue(categoryNode?.InnerText); result.Address = AvitoParser.GetAddressValue(addressNode?.InnerText); result.ImageUrl = GetImageUrl(imgNode); result.Price = priceValue; results.Add(result); } return(results); }