Exemple #1
0
        public static string GetJobDescription(string url, string xpath)
        {
            var htmlDoc  = CustomWebClient.GetHtmlDocument(url);
            var htmlBody = htmlDoc?.DocumentNode.SelectSingleNode(xpath);

            if (htmlDoc == null || htmlBody == null)
            {
                return(string.Empty);
            }

            return(htmlBody.InnerText.Trim());
        }
Exemple #2
0
        // Initiate scraping procedure.
        public static void StartScraping()
        {
            Website = new Website(Form1.UrlBox.Text);

            do
            {
                var htmlBody = CustomWebClient.GetHtmlDocument(Website.Url.AbsoluteUri);
                var newJobs  = GetJobs(htmlBody, Constants.WebsiteXpaths[Website.Url.Host], Options.DescriptionScraping);

                // If we have limited jobs on a website,
                // check if we are not past them.

                if (newJobs.Count == 0)
                {
                    break;
                }
                else if (Website.MaxWebsiteJobs.HasValue)
                {
                    if (Website.MaxWebsiteJobs.Value <= Jobs.Count)
                    {
                        break;
                    }
                }

                // Get the end of the parsing if we know total job count before hand.
                if (Website.MaxWebsiteJobs.HasValue == false &&
                    Website.Url.Host == Constants.Website.CVbankas)
                {
                    Website.MaxWebsiteJobs = GetCVbankasScrapingEnd(htmlBody,
                                                                    Constants.WebsiteXpaths[Website.Url.Host]);
                }

                Jobs.AddRange(newJobs);
                Form1.Grid.Rows.AddRange(newJobs.Select(x => x.GetDataGridViewRow()).ToArray());

                // Increment URL parameter by one.
                Website.Url = Website.Url.IncreaseQueryIntegerValue(Constants.WebsiteParam.Page);

                Statistics.DisplayTotalJobCount(Jobs.Count);
                Thread.Sleep(Constants.SleepTime);
            }while (Options.ScrapAllPages);
        }