示例#1
0
        private List <Link> GetLinks()
        {
            var numberOfPages = GetMaxPage(_browser);

            var aTags = new List <HtmlNode>();

            for (var i = 1; i <= numberOfPages; i++)
            {
                var page          = _browser.GetPage(new Uri($"{BaseUri}{PageQuery}{i}"));
                var aTagsFromPage = page.CssSelect(AdvertisementClassName);

                _logger.Log(LogType.Info, $"Added links from {BaseUri}{PageQuery}{i}.");

                aTags.AddRange(aTagsFromPage);
            }

            var dateOfLastScrapping = _utilityRepository.GetByKind(OfferType.Olx)?.DateOfLastScraping ?? DateTime.MinValue;

            var links = aTags
                        .Where(x => IsNewer(x, dateOfLastScrapping))
                        .Select(x => x.CssSelect(LinkClassName)
                                .FirstOrDefault()
                                .MapToLink())
                        .Where(x => x != null)
                        .ToList();

            return(links);
        }
示例#2
0
        public ICollection <Link> Gather()
        {
            var links = new List <Link>();

            var pagesCount = GetPagesCount(_browser);
            var linksCount = 1;

            var dateOfLastScrapping = _utilityRepository.GetByKind(OfferType.OtoDom)?.DateOfLastScraping;

            for (var i = 1; i <= pagesCount; i++)
            {
                var pageQuery = i > 1 ? $"{PageQuery}{i}" : string.Empty;
                var page      = _browser.GetPage(new Uri($"{BaseUri}{pageQuery}"));
                var articles  = page.Descendants()
                                .Where(x => x.Name == AdvertisementElementName)
                                .Where(x => x.GetAttributeValue("data-featured-name", "") == AdvertisementClassName)
                                .ToList();
                var offerLinks = articles.Select(x => x.GetAttributeValue("data-url", "")).Where(x => x.Contains("www.otodom.pl/oferta/")).ToList();
                foreach (var offerLink in offerLinks)
                {
                    var offerPage     = _browser.GetPage(new Uri(offerLink));
                    var offerDateText = offerPage.Descendants().First(x => x.Name == "p" && x.InnerText.Contains("Data dodania")).InnerText.Split(':').Last();
                    var numberOfDays  = Regex.Match(offerDateText, "\\d+").Value;
                    var offerDateTime = new DateTime();
                    if (!DateTime.TryParse(offerDateText, new CultureInfo("pl-PL"), DateTimeStyles.None, out offerDateTime))
                    {
                        offerDateTime = DateTime.Now.AddDays(int.Parse(numberOfDays, CultureInfo.InvariantCulture) * -1);
                    }

                    if (dateOfLastScrapping != null && dateOfLastScrapping > offerDateTime)
                    {
                        UpdateDateOfLastScraping();
                        return(links);
                    }

                    _logger.Log(LogType.Info, $"Added {offerLink} link from OtoDom.");

                    links.Add(new Link
                    {
                        Id             = linksCount++.ToString(),
                        Uri            = offerLink,
                        LinkSourceKind = OfferType.OtoDom,
                        LastUpdate     = DateTime.Now,
                        Status         = Status.New,
                    });
                }
            }

            UpdateDateOfLastScraping();
            return(links);
        }