public FundaResult ParsePage(string data) { FundaResult result = new FundaResult(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(data); IEnumerable<HtmlNode> houseNodes = doc.DocumentNode.GetNodesForClass("div", "search-result-content-inner"); foreach (HtmlNode house in houseNodes) { FundaObject fundaObject = new FundaObject(); var streetHouseNumber = house.GetNodesForClass("h3", "search-result-title").First(); fundaObject.StraatAdres = ParseAddress(streetHouseNumber.InnerText); fundaObject.PostCodePlaats = ParsePostcodePlaats(streetHouseNumber); result.FundaObjects.Add(fundaObject); } var nextPage = doc.DocumentNode.GetNodesForClass("li", "previous-next-page").ToList(); foreach (HtmlNode node in nextPage) { var link = node.Descendants("a").First(); if (link.InnerText.IndexOf("volgende", StringComparison.OrdinalIgnoreCase) > -1) { result.HasNextPage = true; string urlPath = link.Attributes["href"].Value; result.NextPage = new Uri(_baseUrl, urlPath); } } return result; }
public IEnumerable <FundaObject> GetAllHouses(Uri startUrl) { bool loop = true; Uri urlToParse = startUrl; while (loop) { string data = _webRepo.GetHtml(urlToParse); FundaResult result = _parser.ParsePage(data); foreach (var fundaObject in result.FundaObjects) { yield return(fundaObject); } loop = result.HasNextPage; urlToParse = result.NextPage; } }
public FundaResult ParsePage(string data) { FundaResult result = new FundaResult(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(data); IEnumerable <HtmlNode> houseNodes = doc.DocumentNode.GetNodesForClass("div", "search-result-content-inner"); foreach (HtmlNode house in houseNodes) { FundaObject fundaObject = new FundaObject(); var streetHouseNumber = house.GetNodesForClass("h3", "search-result-title").First(); fundaObject.StraatAdres = ParseAddress(streetHouseNumber.InnerText); fundaObject.PostCodePlaats = ParsePostcodePlaats(streetHouseNumber); result.FundaObjects.Add(fundaObject); } var nextPage = doc.DocumentNode.GetNodesForClass("li", "previous-next-page").ToList(); foreach (HtmlNode node in nextPage) { var link = node.Descendants("a").First(); if (link.InnerText.IndexOf("volgende", StringComparison.OrdinalIgnoreCase) > -1) { result.HasNextPage = true; string urlPath = link.Attributes["href"].Value; result.NextPage = new Uri(_baseUrl, urlPath); } } return(result); }
public FundaResult ParsePage(string data) { FundaResult result = new FundaResult(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(data); IEnumerable <HtmlNode> houseNodes = doc.DocumentNode.GetNodesForClass("div", "specs"); foreach (HtmlNode house in houseNodes) { FundaObject fundaObject = new FundaObject(); var streetHouseNumber = house.Descendants("h3").First().Descendants("a").First(); fundaObject.StraatAdres = ParseAddress(streetHouseNumber.InnerText); var liItems = house.GetNodesForThatDoesNotHaveClass("li", "object-tagline").ToList(); var surface = house.GetNodesForAttribute("span", "title", "Oppervlakte").FirstOrDefault(); string surfaceValue = surface?.InnerText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault(); if (!string.IsNullOrWhiteSpace(surfaceValue)) { fundaObject.Surface = int.Parse(surfaceValue, NumberStyles.AllowThousands, nlCulture); } var pricesNode = house.GetNodesForClass("span", "price-wrapper").FirstOrDefault(); if (pricesNode != null) { var prices = pricesNode.InnerHtml.Split(new string[] { "<br>" }, StringSplitOptions.RemoveEmptyEntries); foreach (var s in prices) { HtmlDocument node = new HtmlDocument(); node.LoadHtml(s); var price = node.DocumentNode.GetNodesForClass("span", "price").FirstOrDefault(); var priceExt = node.DocumentNode.GetNodesForClass("abbr", "price-ext").FirstOrDefault(); if (priceExt != null && priceExt.InnerText.Contains("Huurprijs") || (price != null && (price.InnerText.Contains("/mnd") || price.InnerText.Contains("/jr")))) { if (price != null) { fundaObject.Huurprijs = ParsePrice(price.InnerText); } if (priceExt != null) { fundaObject.HuurprijsSpec = WebUtility.HtmlDecode(priceExt.InnerText); } } else { if (price != null) { fundaObject.Koopprijs = ParsePrice(price.InnerText); } if (priceExt != null) { fundaObject.KoopprijsSpec = WebUtility.HtmlDecode(priceExt.InnerText); } } } } if (liItems.Count > 1) { var address = liItems[0].InnerText; fundaObject.PostCodePlaats = ParseZipCodeRow(address); } result.FundaObjects.Add(fundaObject); } var nextPage = doc.DocumentNode.GetNodesForClass("a", "paging next").ToList(); if (nextPage.Any()) { result.HasNextPage = true; string urlPath = nextPage[0].Attributes["href"].Value; result.NextPage = new Uri(_baseUrl, urlPath); } return(result); }
public FundaResult ParsePage(string data) { FundaResult result = new FundaResult(); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(data); IEnumerable<HtmlNode> houseNodes = doc.DocumentNode.GetNodesForClass("div", "specs"); foreach (HtmlNode house in houseNodes) { FundaObject fundaObject = new FundaObject(); var streetHouseNumber = house.Descendants("h3").First().Descendants("a").First(); fundaObject.StraatAdres = ParseAddress(streetHouseNumber.InnerText); var liItems = house.GetNodesForThatDoesNotHaveClass("li", "object-tagline").ToList(); var surface = house.GetNodesForAttribute("span", "title", "Oppervlakte").FirstOrDefault(); string surfaceValue = surface?.InnerText.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault(); if (!string.IsNullOrWhiteSpace(surfaceValue)) { fundaObject.Surface = int.Parse(surfaceValue, NumberStyles.AllowThousands, nlCulture); } var pricesNode = house.GetNodesForClass("span", "price-wrapper").FirstOrDefault(); if (pricesNode != null) { var prices = pricesNode.InnerHtml.Split(new string[] {"<br>"}, StringSplitOptions.RemoveEmptyEntries); foreach (var s in prices) { HtmlDocument node = new HtmlDocument(); node.LoadHtml(s); var price = node.DocumentNode.GetNodesForClass("span", "price").FirstOrDefault(); var priceExt = node.DocumentNode.GetNodesForClass("abbr", "price-ext").FirstOrDefault(); if (priceExt != null && priceExt.InnerText.Contains("Huurprijs") || (price != null && (price.InnerText.Contains("/mnd") || price.InnerText.Contains("/jr")))) { if (price != null) { fundaObject.Huurprijs = ParsePrice(price.InnerText); } if (priceExt != null) { fundaObject.HuurprijsSpec = WebUtility.HtmlDecode(priceExt.InnerText); } } else { if (price != null) { fundaObject.Koopprijs = ParsePrice(price.InnerText); } if (priceExt != null) { fundaObject.KoopprijsSpec = WebUtility.HtmlDecode(priceExt.InnerText); } } } } if (liItems.Count > 1) { var address = liItems[0].InnerText; fundaObject.PostCodePlaats = ParseZipCodeRow(address); } result.FundaObjects.Add(fundaObject); } var nextPage = doc.DocumentNode.GetNodesForClass("a", "paging next").ToList(); if (nextPage.Any()) { result.HasNextPage = true; string urlPath = nextPage[0].Attributes["href"].Value; result.NextPage = new Uri(_baseUrl, urlPath); } return result; }