static void Main(string[] args) { // SCRAPING PROCESS // Download page content // Use regular expressions & builder pattern to create search criteria - search through elements we want in HTML // scrape try { Console.WriteLine("Please enter which city you would like to scrape information from:"); var craigslistCity = Console.ReadLine() ?? string.Empty; Console.WriteLine("Please enter the CraigsList category that you would like to scrape:"); var craigslistCategoryName = Console.ReadLine() ?? string.Empty; using (WebClient client = new WebClient()) { string content = client.DownloadString($"http://{craigslistCity.Replace(" ", string.Empty)}.craigslist.org/{Method}/sss?query={craigslistCategoryName}"); ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder() .WithData(content) .WithRegex(@"<a href=\""(.*?)\"" data-id=\""(.*?)\"" class=\""result-title hdrlnk\"">(.*?)</a>") // regex to pick out specific things from html element <a> .WithRegexOption(RegexOptions.ExplicitCapture) .WithPart(new ScrapeCriteriaPartBuilder() .WithRegex(@">(.*?)</a>") // specifying parts in the link - using builder within a builder .WithRegexOption(RegexOptions.Singleline) .Build()) .WithPart(new ScrapeCriteriaPartBuilder() .WithRegex(@"href=\""(.*?)\""") .WithRegexOption(RegexOptions.Singleline) .Build()) .Build(); // Now building all of it Scraper scraper = new Scraper(); var scrapedElements = scraper.Scrape(scrapeCriteria); if (scrapedElements.Any()) { foreach (var scrapedElement in scrapedElements) { Console.WriteLine(scrapedElement); } } else { Console.WriteLine("There were no matches for the specified scrape criteria"); } } } catch (Exception ex) { Console.WriteLine(ex.Message); } }
static void Main(string[] args) { try { Console.Write("Enter the city to scrape information from: "); var craigslistCity = Console.ReadLine() ?? string.Empty; Console.Write("Enter the CraigList category: "); var craigslistCategory = Console.ReadLine() ?? string.Empty; using (WebClient client = new WebClient()) { string content = client.DownloadString($"http://{craigslistCity.Replace(" ", string.Empty)}.craigslist.org/{Method}/{craigslistCategory}"); ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder() .WithData(content) .WithRegex(@"<a href=\""(.*?)\"" data-id=\""(.*?)\"" class=\""result-title hdrlnk\"">(.*?)</a>") .WithRegexOption(RegexOptions.ExplicitCapture) .WithPart(new ScrapeCriteriaPartBuilder() .WithRegex(@">(.*?)</a>") .WithRegexOption(RegexOptions.Singleline) .Build()) .WithPart(new ScrapeCriteriaPartBuilder() .WithRegex(@"href=\""(.*?)\""") .WithRegexOption(RegexOptions.Singleline) .Build()) .Build(); Scraper scraper = new Scraper(); var scrapedElements = scraper.Scrape(scrapeCriteria); if (scrapedElements.Any()) { foreach (var scrapedElement in scrapedElements) { Console.WriteLine(scrapedElement); } ; } else { Console.WriteLine("There were no matches for the specified scrape criteria."); } } Console.WriteLine(); } catch (Exception ex) { Console.WriteLine(ex.Message); } }
static void Main(string[] args) { try { Console.WriteLine("Enter which city you would like to scrape info from:"); var cityName = Console.ReadLine(); Console.WriteLine("Enter which the CraigsList category:"); var categoryName = Console.ReadLine(); using (WebClient client = new WebClient()) { string content = client.DownloadString($"https://{cityName.Replace(" ", string.Empty)}.craigslist.org/{Method}/{categoryName}"); ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder() .WithData(content) .WithRegex(@"<a href=\""(.*?)\"" data-id=\""(.*?)\"" class=\""result-title hdrlnk\"">(.*?)</a>") .WithRegexOption(RegexOptions.ExplicitCapture) .WithParts(new ScrapeCriteriaPartBuilder() .WithRegex(@">(.*?)</a>") .WithRegexOption(RegexOptions.Singleline) .Build()) .WithParts(new ScrapeCriteriaPartBuilder() .WithRegex(@"href=\""(.*?)\""") .WithRegexOption(RegexOptions.Singleline) .Build()) .Build(); Scraper scraper = new Scraper(); var scraperdElements = scraper.Scrape(scrapeCriteria); if (scraperdElements.Any()) { foreach (var scrapedElement in scraperdElements) { Console.WriteLine(scrapedElement); } } else { Console.WriteLine("there were no matches for scraping"); } } Console.ReadLine(); } catch (Exception e) { Console.WriteLine(e.Message); } }
static void Main(string[] args) { Console.WriteLine("Please enter which city you would like to scrape:"); var craigsListCity = Console.ReadLine() ?? string.Empty; Console.WriteLine("Please enter the CraigsList category you would like to scrape:"); var craigsListCategory = Console.ReadLine() ?? string.Empty; using (WebClient client = new WebClient()) { string content = client.DownloadString($"http://{craigsListCity.Replace(" ", string.Empty)}.craigslist.org/{Method}/{craigsListCategory}"); ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder() .WithData(content) .WithRegex(@"<a href=\""(.*?)\"" data-id=\""(.*?)\"" class=\""result=title hdrlnk\"">(.*?)</a>") .WithRegexOption(System.Text.RegularExpressions.RegexOptions.ExplicitCapture) .WithPart(new ScrapeCriteriaPartBuilder() .WithRegex(@">(.*?)</a>") .WithRegexOption(System.Text.RegularExpressions.RegexOptions.Singleline) .Build()) .WithPart(new ScrapeCriteriaPartBuilder() .WithRegex(@"href=\""(.*?)\""") .WithRegexOption(System.Text.RegularExpressions.RegexOptions.Singleline) .Build()) .Build(); Scraper scraper = new Scraper(); var scrapedElements = scraper.Scrape(scrapeCriteria); if (scrapedElements.Any()) { foreach (var scrapedElement in scrapedElements) { Console.WriteLine(scrapedElement); } } else { Console.WriteLine("no matches"); } } }
static void Main(string[] args) { try { Console.WriteLine("Please enter the city you would like to scrape information from:"); var craigsListCity = Console.ReadLine() ?? String.Empty; WebData webData = new WebDataBuilder() .WithCity(craigsListCity) .Build(); WebDownloader downloadContent = new WebDownloader(); Content = downloadContent.DownloadContentFrom(webData); CategoryScraper scrapeCategory = new CategoryScraper(); Categories = scrapeCategory.GetCategoryFrom(Content); var userCategory = "sss"; if (Categories.Any()) { int x = Categories.Count; for (int c = 0; c < x; c += 2) { Console.WriteLine("Category: {0}, Value: {1}", Categories[c + 1], Categories[c]); Console.WriteLine(); } Console.Write("Please enter the \"Value\" of the category you'd like to scrape elements from:"); userCategory = Console.ReadLine() ?? String.Empty; } else { Console.WriteLine("There were no elements found in the category list."); Console.Write("A default category will be chosen for you."); } webData = new WebDataBuilder() .WithCity(craigsListCity) .WithCategory(userCategory) .Build(); Content = downloadContent.DownloadContentFrom(webData); //Need to check for errors on userCategory // https://boston.craigslist.org/search //link example for city only // https://boston.craigslist.org/search/cta //link example w/ category ScrapeCriteria scrapeCriteria = new ScrapeCriteriaBuilder() .WithData(Content) .WithRegex(@"<a href=""(.*?)"" data-id=""(.*?)"" class=""(.*?)"">(.*?)</a>") //this regex pattern works .WithRegexOption(RegexOptions.ExplicitCapture) .WithParts(new ScrapeCriteriaPartBuilder() .WithRegex(@">(.*?)<") .WithRegexOption(RegexOptions.Singleline) .Build()) .WithParts(new ScrapeCriteriaPartBuilder() .WithRegex(@"href=""(.*?)""") .WithRegexOption(RegexOptions.Singleline) .Build()) .Build(); Scraper scraper = new Scraper(); var scrapedElements = scraper.Scrape(scrapeCriteria); if (scrapedElements.Any()) { int count = 1; foreach (var scrapedElement in scrapedElements) { Console.WriteLine(scrapedElement); if (count % 2 == 0) { Console.WriteLine(); } count++; } } else { Console.WriteLine("There were no matches found for the specified scrape Criteria."); } } catch (Exception ex) { Console.WriteLine("There was an error found: {0}", ex.Message); } Console.WriteLine(); Console.WriteLine("The program will close shortly, please acknowledge by pressing any key."); Console.ReadKey(); }