Esempio n. 1
0
        private RealEstateOfferModel ParseSingleOfferFromHtml(string webAddress, string a_html)
        {
            RealEstateOfferModel offer = new RealEstateOfferModel(webAddress);
            string currentState        = null;

            try
            {
                HtmlDocument document = new HtmlDocument();
                document.LoadHtml(a_html);
                //Get offer images
                var offerImagesNode     = document.DocumentNode.SelectSingleNode(offerImagesInHtml);
                var offerImagesListNode = offerImagesNode.SelectNodes(offerImageInHtml);
                foreach (var imageNode in offerImagesListNode)
                {
                    string address = "https://oferty-dom.pl" + imageNode.SelectSingleNode(offerSingleImageInHtml).GetAttributeValue("src", null);
                    if (address != null)
                    {
                        offer.ImagesAddresses.Add(address);
                    }
                }
                currentState = "offer images";

                //Get offer title
                var offerTitleNode = document.DocumentNode.SelectSingleNode(offerTitleInHtml);
                offer.Title  = offerTitleNode.InnerText.Trim();
                currentState = "offer title node";

                //Get offer description
                var offerDescriptionNode = document.DocumentNode.SelectSingleNode(offerDescInHtml);
                offer.Description = offerDescriptionNode.InnerText.Trim();
                currentState      = "offer description node";

                //Get offer price
                var offerPriceNode = document.DocumentNode.SelectSingleNode(offerPriceInHtml);
                offer.Price  = ParsePrice(offerPriceNode.InnerText.Trim());
                currentState = "offer price node";

                //Get offer type (advertisement and real estate type)
                var    offerTypeNode = document.DocumentNode.SelectSingleNode(offerTypeInHtml);
                string offerType     = offerTypeNode.InnerText.ToLower();
                currentState = "offer type node";

                var    offerCityNode = document.DocumentNode.SelectSingleNode(offerCityInHtml);
                string offerCity     = offerCityNode.InnerText.Trim();
                currentState = "offer city node";

                var    offerAreaNode = document.DocumentNode.SelectSingleNode(offerAreaInHtml);
                string offerArea     = offerAreaNode.InnerText.Trim().ToLower().Remove(0, 4);
                if (offerArea.Contains("("))
                {
                    offerArea = offerArea.Remove(offerArea.IndexOf("(")).Trim();
                }
                offerArea    = Regex.Replace(offerArea, @"\s+", "");
                currentState = "offer area node";

                //Get province
                offer.Province = Province.INVALID;
                foreach (var province in Province.PROVINCES)
                {
                    if (Regex.IsMatch(Encoding.ASCII.GetString(Encoding.GetEncoding("Cyrillic").GetBytes(offerType.Replace("-", " ").ToLower())), "\\b" + Encoding.ASCII.GetString(Encoding.GetEncoding("Cyrillic").GetBytes(province.Name.Replace("-", " ").ToLower())) + "\\b"))
                    {
                        offer.Province = province;
                        break;
                    }
                }

                currentState = "province";

                //Get city
                offer.OfferCity = City.INVALID;
                foreach (var city in City.CITIES)
                {
                    if (Encoding.ASCII.GetString(Encoding.GetEncoding("Cyrillic").GetBytes(offerCity.Replace("-", " ").ToLower())).Equals(Encoding.ASCII.GetString(Encoding.GetEncoding("Cyrillic").GetBytes(city.CityDetails.Nazwa.Replace("-", " ").ToLower()))))
                    {
                        offer.OfferCity = City.GetCity(offer.Province.Name, offerArea, city.CityDetails.Nazwa);
                        break;
                    }
                }
                if (offer.OfferCity == null || offer.OfferCity == City.INVALID)
                {
                    throw new Exception("City " + offerCity + " could not be parsed");
                }
                currentState = "city";

                //Get advertisement type
                if (offerType.Contains("sprzedaż") && offerType.Contains("wynajem"))
                {
                    offer.AdvType = AdvertisementType.ALL;
                }
                else if (offerType.Contains("sprzedaż"))
                {
                    offer.AdvType = AdvertisementType.SALE;
                }
                else if (offerType.Contains("wynajem"))
                {
                    offer.AdvType = AdvertisementType.RENT;
                }
                currentState = "advertisement type";

                //Get property type
                if (offerType.Contains("mieszkanie"))
                {
                    offer.PropType = PropertyType.FLAT;
                }
                else if (offerType.Contains("dom"))
                {
                    offer.PropType = PropertyType.HOUSE;
                }
                else if (offerType.Contains("lokal użytkowy"))
                {
                    offer.PropType = PropertyType.PREMISE;
                }
                else if (offerType.Contains("działka"))
                {
                    offer.PropType = PropertyType.LAND;
                }
                else if (offerType.Contains("pokój"))
                {
                    offer.PropType = PropertyType.ROOM;
                }
                else if (offerType.Contains("garaż"))
                {
                    offer.PropType = PropertyType.GARAGE;
                }
                else
                {
                    offer.PropType = PropertyType.ELSE;
                }
                currentState = "property type";
            }
            catch (Exception ex)
            {
                offer = null;
                Debug.WriteLine(currentState);
                Debug.WriteLine(ex);
            }

            return(offer);
        }
        private RealEstateOfferModel ParseSingleOfferFromHtml(string webAddress, string a_html)
        {
            RealEstateOfferModel offer = new RealEstateOfferModel(webAddress);
            string currentState        = null;

            try
            {
                HtmlDocument document = new HtmlDocument();
                document.LoadHtml(a_html);
                //Get offer images
                var offerImagesNode = document.DocumentNode.SelectNodes(offerImagesInHtml);
                foreach (var imageNode in offerImagesNode)
                {
                    string address = imageNode.GetAttributeValue("content", null);
                    if (address != null)
                    {
                        offer.ImagesAddresses.Add(address);
                    }
                }
                currentState = "offer images";
                //Get offer title
                var offerTitleNode = document.DocumentNode.SelectSingleNode(offerTitleInHtml);
                offer.Title  = offerTitleNode.InnerText.Trim();
                currentState = "offer title node";

                //Get offer description
                var offerDescriptionNode = document.DocumentNode.SelectSingleNode(offerDescInHtml);
                offer.Description = offerDescriptionNode.InnerText.Trim();
                currentState      = "offer description node";

                //Get offer price
                var offerPriceNode = document.DocumentNode.SelectSingleNode(offerPriceInHtml);
                offer.Price  = ParsePrice(offerPriceNode.GetAttributeValue("content", "0"));
                currentState = "offer price node";

                //Get offer type(advertisement and real estate type)
                var offerTypeNode = document.DocumentNode.SelectSingleNode(offerTypeInHtml);

                var locationNodes = document.DocumentNode.SelectSingleNode(offerLocationInHtml).SelectNodes(".//a");
                currentState = "offer location node";

                string offerProvince = locationNodes[0].InnerText;
                offer.Province = Province.FromString(offerProvince);
                currentState   = "province";

                string offerCity = locationNodes[1].InnerText;
                offer.OfferCity = City.FromString(offerCity);
                if (offer.OfferCity == null || offer.OfferCity == City.INVALID)
                {
                    throw new Exception("City " + offerCity + " could not be parsed");
                }
                currentState = "city";


                string offerType = System.Net.WebUtility.HtmlDecode(offerTypeNode.InnerText.ToLower());
                currentState = "offer type node";

                //Get advertisement type
                if (offerType.Contains("sprzedam"))
                {
                    offer.AdvType = AdvertisementType.SALE;
                }
                else if (offerType.Contains("wynajmę"))
                {
                    offer.AdvType = AdvertisementType.RENT;
                }
                currentState = "advertisement type";

                //Get property type
                if (offerType.Contains("mieszkanie"))
                {
                    offer.PropType = PropertyType.FLAT;
                }
                else if (offerType.Contains("dom"))
                {
                    offer.PropType = PropertyType.HOUSE;
                }
                else if (offerType.Contains("lokal użytkowy"))
                {
                    offer.PropType = PropertyType.PREMISE;
                }
                else if (offerType.Contains("działka"))
                {
                    offer.PropType = PropertyType.LAND;
                }
                else if (offerType.Contains("pokój"))
                {
                    offer.PropType = PropertyType.ROOM;
                }
                else if (offerType.Contains("garaż"))
                {
                    offer.PropType = PropertyType.GARAGE;
                }
                else
                {
                    offer.PropType = PropertyType.INVALID;
                }
                currentState = "property type";
            }
            catch (Exception ex)
            {
                offer = null;
                Debug.WriteLine(currentState);
                Debug.WriteLine(ex);
            }

            return(offer);
        }