static void Main(string[] args) { ScraperService scraperService = new ScraperService(); using (var client = new HttpClient()) { // "Hey, look at this HTML page, and check out this table!" var html = client.GetStreamAsync("http://www.espn.com/esports/story/_/id/21152905/college-esports-list-varsity-esports-programs-north-america").Result; var parser = new HtmlParser(); var document = parser.Parse(html); var tableRows = document.QuerySelectorAll("table.inline-table tr.last"); List <School_Scraper> results = new List <School_Scraper>(); // Loop through table to scrape data foreach (var tr in tableRows) { var name = tr.QuerySelector("td"); var state = tr.QuerySelector("td:nth-child(2)"); var athletics = tr.QuerySelector("td:nth-child(3)"); var school = new School_Scraper(); school.Name = name.TextContent; school.State = state.TextContent; school.Athletics = athletics.TextContent; results.Add(school); // give school obj to Service 1 at a time scraperService.Post(school); } // Look at the list of schools we just scrapped! Console.WriteLine(JsonConvert.SerializeObject(results)); } // calls client.Dispose() }
static void Main(string[] args) { List <JobPosting> jobs = new List <JobPosting>(); string initialUrl = "https://www.linkedin.com/jobs/search?keywords=Software+Developer&distance=15&locationId=PLACES%2Eus%2E7-1-0-19-99&f_TP=1%2C2&f_E=3%2C2&orig=FCTD&trk=jobs_jserp_facet_exp"; ChromeOptions options = new ChromeOptions(); options.AddArgument("--headless"); options.AddArgument("--incognito"); options.AddArgument("--ignore-certificate-errors"); IWebDriver chromeDriver = new ChromeDriver(options); int start = 1; string pageRange = "&start=" + start + "&count=50"; string initialRange = initialUrl + pageRange; chromeDriver.Url = initialRange; var html = chromeDriver.PageSource; var parser = new HtmlParser(); var doc = parser.Parse(html); var listings = doc.QuerySelectorAll("li.job-listing"); string findListings = doc.QuerySelector("div.results-context > div > strong").TextContent; int totalListings = 0; if (findListings != null) { totalListings = Convert.ToInt32(findListings); } int pages = 1; addJobs(initialRange); if (totalListings > 50) { int extraPage = 0; if (totalListings % 50 > 0) { extraPage = 1; } pages = (int)Math.Floor((decimal)totalListings / 50) + extraPage; for (int j = 1; j < pages; j++) { start = j * 50 + 1; pageRange = "&start=" + start.ToString() + "&count=50"; addJobs(initialUrl + pageRange); } } void addJobs(string url) { if (pages > 1) { //INavigation GoToUrl(url); options = new ChromeOptions(); options.AddArgument("--headless"); options.AddArgument("--incognito"); options.AddArgument("--ignore-certificate-errors"); chromeDriver = new ChromeDriver(options); chromeDriver.Url = url; html = chromeDriver.PageSource; parser = new HtmlParser(); doc = parser.Parse(html); listings = doc.QuerySelectorAll("li.job-listing"); } for (int i = 0; i < listings.Length; i++) { JobPosting job = new JobPosting(); var listing = listings[i] .QuerySelector("div.job-details"); var checkTitle = listing.QuerySelector("span.job-title-text").TextContent; if (!checkTitle.Contains("Senior") && !checkTitle.Contains("Sr") && !checkTitle.Contains("Lead") && !checkTitle.Contains("Principal") && !checkTitle.Contains("Java") && !checkTitle.Contains("Clearance") && !checkTitle.Contains("Graphics") && !checkTitle.Contains("Android") && !checkTitle.Contains("iOS") && !checkTitle.Contains("Wordpress") && !checkTitle.Contains("WordPress") && !checkTitle.Contains("PHP") // && checkTitle.IndexOf("Architect", StringComparison.OrdinalIgnoreCase) != -1 && !checkTitle.Contains("Ruby") && !checkTitle.Contains("Manager") && !checkTitle.Contains("Design") && !checkTitle.Contains("UI") && !checkTitle.Contains("Python") && !checkTitle.Contains("HTML") && !checkTitle.Contains("CSS") && !checkTitle.Contains("Salesforce") && !checkTitle.Contains("SENIOR") && !checkTitle.Contains("Analyst") && !checkTitle.Contains("SR") && checkTitle.Contains("Software") //this needs to be changed with each search ) { job.JobTitle = checkTitle; job.PostDate = listing.QuerySelector("span.date-posted-or-new").TextContent; job.Company = listing.QuerySelector("span.company-name-text").TextContent; string checkLocation = listing.QuerySelector("span.job-location > span").TextContent; if (checkLocation.Contains(", US")) { job.Location = checkLocation.Replace(", US", ""); } else { job.Location = checkLocation; } job.JobDescription = listing.QuerySelector("div.job-description").TextContent; //Job Link XmlDocument xml = new XmlDocument(); xml.LoadXml(listing.QuerySelector("a.job-title-link").OuterHtml); XmlElement elem = xml.DocumentElement; if (elem.HasAttribute("href")) { String attr = elem.GetAttribute("href"); var uri = attr.Split('?')[0]; //var uri = new Uri(attr); job.Url = uri; } jobs.Add(job); } } } ScraperService scraperService = new ScraperService(ConfigurationManager.ConnectionStrings["LIConnection"].ConnectionString); scraperService.Post(jobs); }