private RealEstateOfferModel ParseSingleOfferFromHtml(string webAddress, string a_html) { RealEstateOfferModel offer = new RealEstateOfferModel(webAddress); string currentState = null; try { HtmlDocument document = new HtmlDocument(); document.LoadHtml(a_html); //Get offer images var offerImagesNode = document.DocumentNode.SelectSingleNode(offerImagesInHtml); var offerImagesListNode = offerImagesNode.SelectNodes(offerImageInHtml); foreach (var imageNode in offerImagesListNode) { string address = "https://oferty-dom.pl" + imageNode.SelectSingleNode(offerSingleImageInHtml).GetAttributeValue("src", null); if (address != null) { offer.ImagesAddresses.Add(address); } } currentState = "offer images"; //Get offer title var offerTitleNode = document.DocumentNode.SelectSingleNode(offerTitleInHtml); offer.Title = offerTitleNode.InnerText.Trim(); currentState = "offer title node"; //Get offer description var offerDescriptionNode = document.DocumentNode.SelectSingleNode(offerDescInHtml); offer.Description = offerDescriptionNode.InnerText.Trim(); currentState = "offer description node"; //Get offer price var offerPriceNode = document.DocumentNode.SelectSingleNode(offerPriceInHtml); offer.Price = ParsePrice(offerPriceNode.InnerText.Trim()); currentState = "offer price node"; //Get offer type (advertisement and real estate type) var offerTypeNode = document.DocumentNode.SelectSingleNode(offerTypeInHtml); string offerType = offerTypeNode.InnerText.ToLower(); currentState = "offer type node"; var offerCityNode = document.DocumentNode.SelectSingleNode(offerCityInHtml); string offerCity = offerCityNode.InnerText.Trim(); currentState = "offer city node"; var offerAreaNode = document.DocumentNode.SelectSingleNode(offerAreaInHtml); string offerArea = offerAreaNode.InnerText.Trim().ToLower().Remove(0, 4); if (offerArea.Contains("(")) { offerArea = offerArea.Remove(offerArea.IndexOf("(")).Trim(); } offerArea = Regex.Replace(offerArea, @"\s+", ""); currentState = "offer area node"; //Get province offer.Province = Province.INVALID; foreach (var province in Province.PROVINCES) { if (Regex.IsMatch(Encoding.ASCII.GetString(Encoding.GetEncoding("Cyrillic").GetBytes(offerType.Replace("-", " ").ToLower())), "\\b" + Encoding.ASCII.GetString(Encoding.GetEncoding("Cyrillic").GetBytes(province.Name.Replace("-", " ").ToLower())) + "\\b")) { offer.Province = province; break; } } currentState = "province"; //Get city offer.OfferCity = City.INVALID; foreach (var city in City.CITIES) { if (Encoding.ASCII.GetString(Encoding.GetEncoding("Cyrillic").GetBytes(offerCity.Replace("-", " ").ToLower())).Equals(Encoding.ASCII.GetString(Encoding.GetEncoding("Cyrillic").GetBytes(city.CityDetails.Nazwa.Replace("-", " ").ToLower())))) { offer.OfferCity = City.GetCity(offer.Province.Name, offerArea, city.CityDetails.Nazwa); break; } } if (offer.OfferCity == null || offer.OfferCity == City.INVALID) { throw new Exception("City " + offerCity + " could not be parsed"); } currentState = "city"; //Get advertisement type if (offerType.Contains("sprzedaż") && offerType.Contains("wynajem")) { offer.AdvType = AdvertisementType.ALL; } else if (offerType.Contains("sprzedaż")) { offer.AdvType = AdvertisementType.SALE; } else if (offerType.Contains("wynajem")) { offer.AdvType = AdvertisementType.RENT; } currentState = "advertisement type"; //Get property type if (offerType.Contains("mieszkanie")) { offer.PropType = PropertyType.FLAT; } else if (offerType.Contains("dom")) { offer.PropType = PropertyType.HOUSE; } else if (offerType.Contains("lokal użytkowy")) { offer.PropType = PropertyType.PREMISE; } else if (offerType.Contains("działka")) { offer.PropType = PropertyType.LAND; } else if (offerType.Contains("pokój")) { offer.PropType = PropertyType.ROOM; } else if (offerType.Contains("garaż")) { offer.PropType = PropertyType.GARAGE; } else { offer.PropType = PropertyType.ELSE; } currentState = "property type"; } catch (Exception ex) { offer = null; Debug.WriteLine(currentState); Debug.WriteLine(ex); } return(offer); }
private RealEstateOfferModel ParseSingleOfferFromHtml(string webAddress, string a_html) { RealEstateOfferModel offer = new RealEstateOfferModel(webAddress); string currentState = null; try { HtmlDocument document = new HtmlDocument(); document.LoadHtml(a_html); //Get offer images var offerImagesNode = document.DocumentNode.SelectNodes(offerImagesInHtml); foreach (var imageNode in offerImagesNode) { string address = imageNode.GetAttributeValue("content", null); if (address != null) { offer.ImagesAddresses.Add(address); } } currentState = "offer images"; //Get offer title var offerTitleNode = document.DocumentNode.SelectSingleNode(offerTitleInHtml); offer.Title = offerTitleNode.InnerText.Trim(); currentState = "offer title node"; //Get offer description var offerDescriptionNode = document.DocumentNode.SelectSingleNode(offerDescInHtml); offer.Description = offerDescriptionNode.InnerText.Trim(); currentState = "offer description node"; //Get offer price var offerPriceNode = document.DocumentNode.SelectSingleNode(offerPriceInHtml); offer.Price = ParsePrice(offerPriceNode.GetAttributeValue("content", "0")); currentState = "offer price node"; //Get offer type(advertisement and real estate type) var offerTypeNode = document.DocumentNode.SelectSingleNode(offerTypeInHtml); var locationNodes = document.DocumentNode.SelectSingleNode(offerLocationInHtml).SelectNodes(".//a"); currentState = "offer location node"; string offerProvince = locationNodes[0].InnerText; offer.Province = Province.FromString(offerProvince); currentState = "province"; string offerCity = locationNodes[1].InnerText; offer.OfferCity = City.FromString(offerCity); if (offer.OfferCity == null || offer.OfferCity == City.INVALID) { throw new Exception("City " + offerCity + " could not be parsed"); } currentState = "city"; string offerType = System.Net.WebUtility.HtmlDecode(offerTypeNode.InnerText.ToLower()); currentState = "offer type node"; //Get advertisement type if (offerType.Contains("sprzedam")) { offer.AdvType = AdvertisementType.SALE; } else if (offerType.Contains("wynajmę")) { offer.AdvType = AdvertisementType.RENT; } currentState = "advertisement type"; //Get property type if (offerType.Contains("mieszkanie")) { offer.PropType = PropertyType.FLAT; } else if (offerType.Contains("dom")) { offer.PropType = PropertyType.HOUSE; } else if (offerType.Contains("lokal użytkowy")) { offer.PropType = PropertyType.PREMISE; } else if (offerType.Contains("działka")) { offer.PropType = PropertyType.LAND; } else if (offerType.Contains("pokój")) { offer.PropType = PropertyType.ROOM; } else if (offerType.Contains("garaż")) { offer.PropType = PropertyType.GARAGE; } else { offer.PropType = PropertyType.INVALID; } currentState = "property type"; } catch (Exception ex) { offer = null; Debug.WriteLine(currentState); Debug.WriteLine(ex); } return(offer); }