public static CraiglistSearchPage Parse(string plainHtml) { var result = new CraiglistSearchPage(); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(plainHtml); var nodes = htmlDoc.DocumentNode.SelectNodes("//div[@class='content']//p/a"); if (nodes == null) { return(result); } foreach (var x in nodes) { var href = x.GetAttributeValue("href", null); if (href != null) { result.Urls.Add(href); } } nodes = htmlDoc.DocumentNode.SelectNodes("//a[@class='button next']"); if (nodes != null && nodes.Count > 0) { result.NextUrl = nodes[0].GetAttributeValue("href", null); } return(result); }
static void Main(string[] args) { var names = new[] { "civic", "accord", "fit", "corolla", "camry", "mazda+3", "mazda+6", "fusion", "focus" }; foreach (var name in names) { var readHtml = new ReadWaiter(new ReadHtml()); var searchPageHtml = readHtml.Read(CraiglistUrls.SearchCarByOwnerInNashville(name)); var cars = new List <Car>(); while (true) { var searchPage = CraiglistSearchPage.Parse(searchPageHtml); foreach (var url in searchPage.Urls) { var fullCarUrl = CraiglistUrls.CraigListUrlInNashville(url); var carHtml = readHtml.Read(fullCarUrl); if (carHtml != null) { var car = CraiglistCarPage.Parse(carHtml); car.Url = fullCarUrl; cars.Add(car); Console.WriteLine($"{name} - {cars.Count}"); CsvWriter.Write(car, $@"d:\sergiy\projects\craiglister\output\{name}.csv"); } } if (searchPage.NextUrl == null) { break; } var nextUrl = CraiglistUrls.CraigListUrlInNashville(searchPage.NextUrl); searchPageHtml = readHtml.Read(nextUrl); } } }