internal WebLink GetTitleValue(HtmlNode aNode) { string url = _settings.UrlPrefix + aNode.GetAttributeValue("href", "parse error"); string title = AvitoParser.GetTitleValue(aNode.InnerText); return(new WebLink(url, title)); }
public List <WebSearchResult> GetResults(Filter filter) { List <WebSearchResult> results = new List <WebSearchResult>(); HtmlNode node = new HtmlToHtmlNode(_content.Value); if (node == null) { Log.Error("Can't parse content. Invalid HtmlNode."); return(results); } // <div class="snippet-date-info" data-marker="item-date" data-shape="default" data-tooltip="27 августа 14:12" flow="down"> // 2 дня назад // </div> HtmlNodeCollection dates = node.SelectNodes("//div[@data-marker='item-date']"); if (dates == null) { Log.Error("Can't parse dates. Unexpected html structure."); return(results); } DateTime prevDate = DateTime.Today; foreach (HtmlNode dateNode in dates) { // <div class="description item_table-description"> // <div class="snippet-title-row">... HtmlNode entryNode = dateNode.ParentNode.ParentNode.ParentNode; HtmlNode titleANode = entryNode.SelectSingleNode(".//div[contains(@class,'snippet-title-row')]/h3/a"); WebLink titleValue = GetTitleValue(titleANode); string title = new LowerText(titleValue.Text); bool @break = false; foreach (string contains in filter.TitleContains) { if (!title.Contains(contains)) { @break = true; break; } } if (@break) { continue; } foreach (string contains in filter.TitleDoesNotContain) { if (title.Contains(contains)) { @break = true; break; } } if (@break) { continue; } DateTime?dateValue = AvitoParser.GetDateValue(dateNode.InnerText, prevDate, DateTime.Now); if (dateValue == null || dateValue < filter.DatesAfter) { continue; } // <div class="snippet-price-row"><span itemprop="offers" itemtype="http://schema.org/Offer" itemscope="" class="snippet-price " data-shape="default" data-marker="item-price"><meta itemprop="priceCurrency" content="RUB"><meta itemprop="price" content="6490"><meta itemprop="availability" content="https://schema.org/LimitedAvailability"> // 6 490 ₽ HtmlNode priceNode = entryNode.SelectSingleNode(".//span[@data-marker='item-price']"); int? priceValue = AvitoParser.GetPriceValue(priceNode?.InnerText); if (priceValue != null) { if (priceValue < filter.MinPrice || priceValue > filter.MaxPrice) { continue; } } bool isVIPad = priceNode.ParentNode.ParentNode.GetAttributeValue("class", "") == "options" && priceNode.ParentNode.ParentNode.GetAttributeValue("itemprop", "") == "offers"; // <div class="data"> // <p>Часы и украшения</p> HtmlNode categoryNode = entryNode.SelectSingleNode(".//div[@class='data']/p"); // <span class="item-address-georeferences-item__content">Комендантский проспект</span> HtmlNode addressNode = entryNode.SelectSingleNode(".//span[@class='item-address-georeferences-item__content']"); // <div class="item-photo" data-marker="item-photo"> HtmlNode imgNode = entryNode.ParentNode.ParentNode.SelectSingleNode(".//div[@data-marker='item-photo']"); imgNode = imgNode?.SelectSingleNode(".//img"); if (isVIPad) { if (imgNode == null) { imgNode = entryNode.ParentNode.ParentNode.SelectSingleNode(".//div[@class='img-container']"); imgNode = imgNode?.SelectSingleNode(".//img"); } } WebSearchResult result = new WebSearchResult(); result.Title = titleValue; result.IsVIPad = isVIPad; result.Date = dateValue.Value; prevDate = result.Date.Date; result.Category = AvitoParser.GetCategoryValue(categoryNode?.InnerText); result.Address = AvitoParser.GetAddressValue(addressNode?.InnerText); result.ImageUrl = GetImageUrl(imgNode); result.Price = priceValue; results.Add(result); } return(results); }