public static string GetJobDescription(string url, string xpath) { var htmlDoc = CustomWebClient.GetHtmlDocument(url); var htmlBody = htmlDoc?.DocumentNode.SelectSingleNode(xpath); if (htmlDoc == null || htmlBody == null) { return(string.Empty); } return(htmlBody.InnerText.Trim()); }
// Initiate scraping procedure. public static void StartScraping() { Website = new Website(Form1.UrlBox.Text); do { var htmlBody = CustomWebClient.GetHtmlDocument(Website.Url.AbsoluteUri); var newJobs = GetJobs(htmlBody, Constants.WebsiteXpaths[Website.Url.Host], Options.DescriptionScraping); // If we have limited jobs on a website, // check if we are not past them. if (newJobs.Count == 0) { break; } else if (Website.MaxWebsiteJobs.HasValue) { if (Website.MaxWebsiteJobs.Value <= Jobs.Count) { break; } } // Get the end of the parsing if we know total job count before hand. if (Website.MaxWebsiteJobs.HasValue == false && Website.Url.Host == Constants.Website.CVbankas) { Website.MaxWebsiteJobs = GetCVbankasScrapingEnd(htmlBody, Constants.WebsiteXpaths[Website.Url.Host]); } Jobs.AddRange(newJobs); Form1.Grid.Rows.AddRange(newJobs.Select(x => x.GetDataGridViewRow()).ToArray()); // Increment URL parameter by one. Website.Url = Website.Url.IncreaseQueryIntegerValue(Constants.WebsiteParam.Page); Statistics.DisplayTotalJobCount(Jobs.Count); Thread.Sleep(Constants.SleepTime); }while (Options.ScrapAllPages); }