private List <Link> GetLinks() { var numberOfPages = GetMaxPage(_browser); var aTags = new List <HtmlNode>(); for (var i = 1; i <= numberOfPages; i++) { var page = _browser.GetPage(new Uri($"{BaseUri}{PageQuery}{i}")); var aTagsFromPage = page.CssSelect(AdvertisementClassName); _logger.Log(LogType.Info, $"Added links from {BaseUri}{PageQuery}{i}."); aTags.AddRange(aTagsFromPage); } var dateOfLastScrapping = _utilityRepository.GetByKind(OfferType.Olx)?.DateOfLastScraping ?? DateTime.MinValue; var links = aTags .Where(x => IsNewer(x, dateOfLastScrapping)) .Select(x => x.CssSelect(LinkClassName) .FirstOrDefault() .MapToLink()) .Where(x => x != null) .ToList(); return(links); }
public ICollection <Link> Gather() { var links = new List <Link>(); var pagesCount = GetPagesCount(_browser); var linksCount = 1; var dateOfLastScrapping = _utilityRepository.GetByKind(OfferType.OtoDom)?.DateOfLastScraping; for (var i = 1; i <= pagesCount; i++) { var pageQuery = i > 1 ? $"{PageQuery}{i}" : string.Empty; var page = _browser.GetPage(new Uri($"{BaseUri}{pageQuery}")); var articles = page.Descendants() .Where(x => x.Name == AdvertisementElementName) .Where(x => x.GetAttributeValue("data-featured-name", "") == AdvertisementClassName) .ToList(); var offerLinks = articles.Select(x => x.GetAttributeValue("data-url", "")).Where(x => x.Contains("www.otodom.pl/oferta/")).ToList(); foreach (var offerLink in offerLinks) { var offerPage = _browser.GetPage(new Uri(offerLink)); var offerDateText = offerPage.Descendants().First(x => x.Name == "p" && x.InnerText.Contains("Data dodania")).InnerText.Split(':').Last(); var numberOfDays = Regex.Match(offerDateText, "\\d+").Value; var offerDateTime = new DateTime(); if (!DateTime.TryParse(offerDateText, new CultureInfo("pl-PL"), DateTimeStyles.None, out offerDateTime)) { offerDateTime = DateTime.Now.AddDays(int.Parse(numberOfDays, CultureInfo.InvariantCulture) * -1); } if (dateOfLastScrapping != null && dateOfLastScrapping > offerDateTime) { UpdateDateOfLastScraping(); return(links); } _logger.Log(LogType.Info, $"Added {offerLink} link from OtoDom."); links.Add(new Link { Id = linksCount++.ToString(), Uri = offerLink, LinkSourceKind = OfferType.OtoDom, LastUpdate = DateTime.Now, Status = Status.New, }); } } UpdateDateOfLastScraping(); return(links); }