Exemplo n.º 1
0
        public string ScrapeInfo(string url)
        {
            //Sleep
            Thread.Sleep(1);

            var node = _scraper.GetHtml(url);
            var html = node.CssSelect(_scrapeSettings.Info);

            if (html.Any())
            {
                return(html.First().InnerHtml);
            }

            return("");
        }
        public IEnumerable <JobDto> ScrapeUrls(int pageLimit)
        {
            var jobUrls = new List <JobDto>();

            for (int i = 0; i < pageLimit; i++)
            {
                //Sleep
                Thread.Sleep(delay);

                var html = _scraper.GetHtml($"{cvOnlineUrl}?page={i}");

                var nodes = html.CssSelect(_scrapeSettings.Posting);

                if (!nodes.Any())
                {
                    // finished last page - stop scraping
                    break;
                }

                foreach (var node in nodes)
                {
                    var nameResult = node.CssSelect(_scrapeSettings.Name);
                    if (nameResult.Any())
                    {
                        var jobUrl = new JobDto();

                        var infoNode = nameResult.First();

                        jobUrl.Name        = infoNode.InnerText;
                        jobUrl.Url         = UrlHelpers.ProcessUrl(infoNode.Attributes["href"].Value);
                        jobUrl.Salary      = Selectors.SelectName(node, _scrapeSettings.Salary);
                        jobUrl.CompanyName = Selectors.SelectCompany(node, _scrapeSettings.Company);
                        jobUrl.Logourl     = Selectors.SelectLogoUrl(node, _scrapeSettings.LogoUrl);

                        jobUrls.Add(jobUrl);
                    }
                }
            }

            return(jobUrls);
        }
Exemplo n.º 3
0
        public IEnumerable <JobDto> ScrapeUrls(int pageLimit)
        {
            var jobUrls = new List <JobDto>();

            for (int i = 0; i < pageLimit; i++)
            {
                //Sleep
                Thread.Sleep(delay);

                var html = _scraper.GetHtml($"{url}-{i}");

                var nodes = html.CssSelect(_scrapeSettings.Posting);

                if (!nodes.Any())
                {
                    // finished last page - stop scraping
                    break;
                }

                foreach (var node in nodes)
                {
                    var nameResult = node.CssSelect(_scrapeSettings.Name);
                    if (nameResult.Any())
                    {
                        var nameInfoNode = nameResult.First();
                        var jobUrl       = new JobDto();

                        jobUrl.Name        = nameInfoNode.InnerText;
                        jobUrl.Url         = "https://www.cvmarket.lt/" + Selectors.SelectUrl(node, _scrapeSettings.Url);
                        jobUrl.Salary      = Selectors.SelectName(node, _scrapeSettings.Salary);
                        jobUrl.CompanyName = Selectors.SelectCompany(node, _scrapeSettings.Company);
                        jobUrl.Logourl     = Selectors.SelectLogoUrl(node, _scrapeSettings.LogoUrl);

                        jobUrls.Add(jobUrl);
                    }
                }
            }

            return(jobUrls);
        }