Пример #1
0
        public FundaResult ParsePage(string data)
        {
            FundaResult result = new FundaResult();

            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(data);

            IEnumerable<HtmlNode> houseNodes = doc.DocumentNode.GetNodesForClass("div", "search-result-content-inner");

            foreach (HtmlNode house in houseNodes)
            {
                FundaObject fundaObject = new FundaObject();

                var streetHouseNumber = house.GetNodesForClass("h3", "search-result-title").First();
                fundaObject.StraatAdres = ParseAddress(streetHouseNumber.InnerText);
                fundaObject.PostCodePlaats = ParsePostcodePlaats(streetHouseNumber);

                result.FundaObjects.Add(fundaObject);
            }

            var nextPage = doc.DocumentNode.GetNodesForClass("li", "previous-next-page").ToList();
            foreach (HtmlNode node in nextPage)
            {
                var link = node.Descendants("a").First();
                if (link.InnerText.IndexOf("volgende", StringComparison.OrdinalIgnoreCase) > -1)
                {
                    result.HasNextPage = true;
                    string urlPath = link.Attributes["href"].Value;
                    result.NextPage = new Uri(_baseUrl, urlPath);
                }
            }

            return result;
        }
Пример #2
0
        public IEnumerable <FundaObject> GetAllHouses(Uri startUrl)
        {
            bool loop       = true;
            Uri  urlToParse = startUrl;

            while (loop)
            {
                string      data   = _webRepo.GetHtml(urlToParse);
                FundaResult result = _parser.ParsePage(data);
                foreach (var fundaObject in result.FundaObjects)
                {
                    yield return(fundaObject);
                }
                loop       = result.HasNextPage;
                urlToParse = result.NextPage;
            }
        }
Пример #3
0
        public FundaResult ParsePage(string data)
        {
            FundaResult result = new FundaResult();

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(data);

            IEnumerable <HtmlNode> houseNodes = doc.DocumentNode.GetNodesForClass("div", "search-result-content-inner");

            foreach (HtmlNode house in houseNodes)
            {
                FundaObject fundaObject = new FundaObject();

                var streetHouseNumber = house.GetNodesForClass("h3", "search-result-title").First();
                fundaObject.StraatAdres    = ParseAddress(streetHouseNumber.InnerText);
                fundaObject.PostCodePlaats = ParsePostcodePlaats(streetHouseNumber);

                result.FundaObjects.Add(fundaObject);
            }

            var nextPage = doc.DocumentNode.GetNodesForClass("li", "previous-next-page").ToList();

            foreach (HtmlNode node in nextPage)
            {
                var link = node.Descendants("a").First();
                if (link.InnerText.IndexOf("volgende", StringComparison.OrdinalIgnoreCase) > -1)
                {
                    result.HasNextPage = true;
                    string urlPath = link.Attributes["href"].Value;
                    result.NextPage = new Uri(_baseUrl, urlPath);
                }
            }

            return(result);
        }
Пример #4
0
        public FundaResult ParsePage(string data)
        {
            FundaResult result = new FundaResult();

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(data);

            IEnumerable <HtmlNode> houseNodes = doc.DocumentNode.GetNodesForClass("div", "specs");

            foreach (HtmlNode house in houseNodes)
            {
                FundaObject fundaObject = new FundaObject();

                var streetHouseNumber = house.Descendants("h3").First().Descendants("a").First();
                fundaObject.StraatAdres = ParseAddress(streetHouseNumber.InnerText);
                var liItems = house.GetNodesForThatDoesNotHaveClass("li", "object-tagline").ToList();

                var    surface      = house.GetNodesForAttribute("span", "title", "Oppervlakte").FirstOrDefault();
                string surfaceValue = surface?.InnerText.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault();
                if (!string.IsNullOrWhiteSpace(surfaceValue))
                {
                    fundaObject.Surface = int.Parse(surfaceValue, NumberStyles.AllowThousands, nlCulture);
                }

                var pricesNode = house.GetNodesForClass("span", "price-wrapper").FirstOrDefault();
                if (pricesNode != null)
                {
                    var prices = pricesNode.InnerHtml.Split(new string[] { "<br>" }, StringSplitOptions.RemoveEmptyEntries);
                    foreach (var s in prices)
                    {
                        HtmlDocument node = new HtmlDocument();
                        node.LoadHtml(s);
                        var price    = node.DocumentNode.GetNodesForClass("span", "price").FirstOrDefault();
                        var priceExt = node.DocumentNode.GetNodesForClass("abbr", "price-ext").FirstOrDefault();

                        if (priceExt != null && priceExt.InnerText.Contains("Huurprijs") ||
                            (price != null && (price.InnerText.Contains("/mnd") || price.InnerText.Contains("/jr"))))
                        {
                            if (price != null)
                            {
                                fundaObject.Huurprijs = ParsePrice(price.InnerText);
                            }
                            if (priceExt != null)
                            {
                                fundaObject.HuurprijsSpec = WebUtility.HtmlDecode(priceExt.InnerText);
                            }
                        }
                        else
                        {
                            if (price != null)
                            {
                                fundaObject.Koopprijs = ParsePrice(price.InnerText);
                            }
                            if (priceExt != null)
                            {
                                fundaObject.KoopprijsSpec = WebUtility.HtmlDecode(priceExt.InnerText);
                            }
                        }
                    }
                }

                if (liItems.Count > 1)
                {
                    var address = liItems[0].InnerText;
                    fundaObject.PostCodePlaats = ParseZipCodeRow(address);
                }



                result.FundaObjects.Add(fundaObject);
            }

            var nextPage = doc.DocumentNode.GetNodesForClass("a", "paging next").ToList();

            if (nextPage.Any())
            {
                result.HasNextPage = true;
                string urlPath = nextPage[0].Attributes["href"].Value;
                result.NextPage = new Uri(_baseUrl, urlPath);
            }

            return(result);
        }
Пример #5
0
        public FundaResult ParsePage(string data)
        {
            FundaResult result = new FundaResult();

            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(data);

            IEnumerable<HtmlNode> houseNodes = doc.DocumentNode.GetNodesForClass("div", "specs");

            foreach (HtmlNode house in houseNodes)
            {
                FundaObject fundaObject = new FundaObject();

                var streetHouseNumber = house.Descendants("h3").First().Descendants("a").First();
                fundaObject.StraatAdres = ParseAddress(streetHouseNumber.InnerText);
                var liItems = house.GetNodesForThatDoesNotHaveClass("li", "object-tagline").ToList();

                var surface = house.GetNodesForAttribute("span", "title", "Oppervlakte").FirstOrDefault();
                string surfaceValue = surface?.InnerText.Split(new[] {' '}, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault();
                if (!string.IsNullOrWhiteSpace(surfaceValue))
                {
                    fundaObject.Surface = int.Parse(surfaceValue, NumberStyles.AllowThousands, nlCulture);
                }

                var pricesNode = house.GetNodesForClass("span", "price-wrapper").FirstOrDefault();
                if (pricesNode != null)
                {
                    var prices = pricesNode.InnerHtml.Split(new string[] {"<br>"}, StringSplitOptions.RemoveEmptyEntries);
                    foreach (var s in prices)
                    {
                        HtmlDocument node = new HtmlDocument();
                        node.LoadHtml(s);
                        var price = node.DocumentNode.GetNodesForClass("span", "price").FirstOrDefault();
                        var priceExt = node.DocumentNode.GetNodesForClass("abbr", "price-ext").FirstOrDefault();

                        if (priceExt != null && priceExt.InnerText.Contains("Huurprijs") ||
                            (price != null && (price.InnerText.Contains("/mnd") || price.InnerText.Contains("/jr"))))
                        {
                            if (price != null)
                            {
                                fundaObject.Huurprijs = ParsePrice(price.InnerText);
                            }
                            if (priceExt != null)
                            {
                                fundaObject.HuurprijsSpec = WebUtility.HtmlDecode(priceExt.InnerText);
                            }
                            
                        }
                        else
                        {
                            if (price != null)
                            {
                                fundaObject.Koopprijs = ParsePrice(price.InnerText);
                            }
                            if (priceExt != null)
                            {
                                fundaObject.KoopprijsSpec = WebUtility.HtmlDecode(priceExt.InnerText);
                            }
                        }


                    }
                    
                }

                if (liItems.Count > 1)
                {
                    var address = liItems[0].InnerText;
                    fundaObject.PostCodePlaats = ParseZipCodeRow(address);
                }

                

                result.FundaObjects.Add(fundaObject);
            }

            var nextPage = doc.DocumentNode.GetNodesForClass("a", "paging next").ToList();
            if (nextPage.Any())
            {
                result.HasNextPage = true;
                string urlPath = nextPage[0].Attributes["href"].Value;
                result.NextPage = new Uri(_baseUrl, urlPath);
            }

            return result;
        }