public static CraiglistSearchPage Parse(string plainHtml)
        {
            var result  = new CraiglistSearchPage();
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(plainHtml);
            var nodes = htmlDoc.DocumentNode.SelectNodes("//div[@class='content']//p/a");

            if (nodes == null)
            {
                return(result);
            }

            foreach (var x in nodes)
            {
                var href = x.GetAttributeValue("href", null);
                if (href != null)
                {
                    result.Urls.Add(href);
                }
            }

            nodes = htmlDoc.DocumentNode.SelectNodes("//a[@class='button next']");
            if (nodes != null && nodes.Count > 0)
            {
                result.NextUrl = nodes[0].GetAttributeValue("href", null);
            }
            return(result);
        }
Пример #2
0
        static void Main(string[] args)
        {
            var names = new[] { "civic", "accord", "fit", "corolla", "camry", "mazda+3", "mazda+6", "fusion", "focus" };

            foreach (var name in names)
            {
                var readHtml = new ReadWaiter(new ReadHtml());

                var searchPageHtml = readHtml.Read(CraiglistUrls.SearchCarByOwnerInNashville(name));
                var cars           = new List <Car>();
                while (true)
                {
                    var searchPage = CraiglistSearchPage.Parse(searchPageHtml);
                    foreach (var url in searchPage.Urls)
                    {
                        var fullCarUrl = CraiglistUrls.CraigListUrlInNashville(url);
                        var carHtml    = readHtml.Read(fullCarUrl);
                        if (carHtml != null)
                        {
                            var car = CraiglistCarPage.Parse(carHtml);
                            car.Url = fullCarUrl;
                            cars.Add(car);
                            Console.WriteLine($"{name} - {cars.Count}");
                            CsvWriter.Write(car, $@"d:\sergiy\projects\craiglister\output\{name}.csv");
                        }
                    }

                    if (searchPage.NextUrl == null)
                    {
                        break;
                    }

                    var nextUrl = CraiglistUrls.CraigListUrlInNashville(searchPage.NextUrl);
                    searchPageHtml = readHtml.Read(nextUrl);
                }
            }
        }