public void convert_string_to_float(string value, float expectedResult) { // Arrange float result; // Act result = ScrapExtensions.ConvertStringToFloat(value); // Assert Assert.Equal(result, expectedResult); }
public AdDetails ParseDetailsPage(HtmlDocument doc, Ad ad) { DateTime createAt = DateTime.UtcNow; string district = null; string city = null; string typeOfProperty = null; //string parking = null; bool agency = false; int numberOfRooms = 0; int numberOfBathrooms = 0; float size = 0; decimal priceM2 = 0; HtmlNodeCollection docs = doc.DocumentNode.SelectNodes("//ul[@class='selMenu'] / li / div"); foreach (HtmlNode docParameter in docs) { string nameParam = docParameter.SelectSingleNode("span[@class='name']")?.InnerText.Trim(); string valueParam = docParameter.SelectSingleNode("span[@class='value']")?.InnerText.Trim(); if (nameParam.Empty() || valueParam.Empty()) { break; } switch (nameParam) { case "Data dodania": { DateTime.TryParse(valueParam, out DateTime now); createAt = now; } break; case "Lokalizacja": var location = valueParam?.Split(","); if (location != null) { district = location[0].Trim(); city = location[1].Trim(); } break; case "Na sprzedaż przez": if (valueParam == "Właściciel") { agency = false; } else if (valueParam == "Agencja") { agency = true; } else { agency = true; } break; case "Rodzaj nieruchomości": typeOfProperty = valueParam?.Trim(); break; case "Liczba pokoi": numberOfRooms = ScrapExtensions.ConvertStringToInt(valueParam); break; case "Liczba łazienek": numberOfBathrooms = ScrapExtensions.ConvertStringToInt(valueParam); break; case "Wielkość (m2)": size = ScrapExtensions.ConvertStringToFloat(valueParam); break; case "Parking": //parking = valueParam.Trim(); break; default: break; } } if (size != 0) { decimal tempPriceM2 = (ad.Price / (decimal)size); priceM2 = decimal.Round(tempPriceM2, 2, MidpointRounding.AwayFromZero); } else { priceM2 = 0; } var tempUsername = doc.DocumentNode.SelectSingleNode("//span[@class='username'] / a /text()"); string username = tempUsername.InnerText.Trim(); AdDetails adDetails = AdDetails.Create( priceM2, district, city, agency, typeOfProperty, numberOfRooms, numberOfBathrooms, size, username, new List <string>(), createAt); return(adDetails); }
public AdDetails ParseDetailsPage(HtmlDocument doc, Ad ad) { try { DateTime createAt = DateTime.UtcNow; string district = null; string city = null; string typeOfProperty = null; //string parking = null; bool agency = false; int numberOfRooms = 0; int numberOfBathrooms = 0; float size = 0; decimal priceM2 = 0; List <string> images = new List <string>(); HtmlNodeCollection docs = doc.DocumentNode.SelectNodes("//ul[@class='main-list'] / li"); if (docs == null) { Logger.Error("Docs is null. Perhaps problem with scrap url: {@ad}", ad); return(null); } // images var imagesTemp = doc.DocumentNode.SelectNodes("//figure[@itemprop='associatedMedia'] / a / img"); foreach (var img in imagesTemp) { string res = img?.Attributes["src"]?.Value; images.Add(res); } foreach (HtmlNode docParameter in docs) { string nameParam = docParameter.SelectSingleNode("text()").InnerText.Trim(); string valueParam = docParameter.SelectSingleNode("span / strong")?.InnerText.Trim(); switch (nameParam) { case "Cena": decimal price = ScrapExtensions.ConvertStringToDecimal(valueParam); break; case "Piętro": int pietro = ScrapExtensions.ConvertStringToInt(valueParam); break; case "Liczba pokoi": numberOfRooms = ScrapExtensions.ConvertStringToInt(valueParam); break; case "Powierzchnia": Match result = Regex.Match(valueParam, @"\b[,); +]+.*$"); var sizeTemp = valueParam.Replace(result.Value, ""); size = ScrapExtensions.ConvertStringToFloat(sizeTemp); break; default: break; } } HtmlNodeCollection subDocs = doc.DocumentNode.SelectNodes("//ul[@class='sub-list'] / li"); foreach (HtmlNode subDoc in subDocs) { string nameParam = subDoc.SelectSingleNode("strong").InnerText.Trim().Replace(":", ""); string valueParam = subDoc.SelectSingleNode("text()")?.InnerText.Trim(); switch (nameParam) { case "Rynek": string rynek = valueParam; break; case "Rodzaj zabudowy": typeOfProperty = valueParam; break; case "Materiał budynku": string materialy = valueParam; break; case "Okna": string onka = valueParam; break; case "Ogrzewanie": string ogrzewanie = valueParam; break; case "Rok budowy": int rokBudowy = ScrapExtensions.ConvertStringToInt(valueParam); break; case "Stan wykończenia": string stanWykonczenia = valueParam; break; case "Czynsz": decimal czynsz = ScrapExtensions.ConvertStringToDecimal(valueParam); break; case "Forma własności": string formaWlasnosci = valueParam; break; default: break; } } // location var location = doc.DocumentNode.SelectNodes("//address / p[@class='address-links'] / a"); city = location[1].InnerText.Trim(); district = location.Count < 3 ? "-" : location[2].InnerText?.Trim(); // price m2 if (size != 0) { decimal tempPriceM2 = (ad.Price / (decimal)size); priceM2 = decimal.Round(tempPriceM2, 2, MidpointRounding.AwayFromZero); } else { priceM2 = 0; } // user var tempUser = doc.DocumentNode.SelectSingleNode( "//div[@class='box-person'] / span[@itemprop='name']"); if (tempUser == null) { tempUser = doc.DocumentNode.SelectSingleNode( "//div[@class='box-person'] / a / span[@itemprop='name']"); } string username = tempUser?.InnerText?.Trim(); username = username.Empty() ? "-" : username; // agency var agencyTemp = doc.DocumentNode.SelectNodes("//h5[@class='box-title']"); var agent = agencyTemp?.Any(x => x?.InnerText?.Trim() == "Biuro nieruchomości"); var agencyOfferTemp = doc.DocumentNode.SelectNodes("//h6[@class='box-contact-info-type']"); bool?priv = agencyOfferTemp?.Any(x => x?.InnerText?.Trim() == "Oferta prywatna"); agency = !priv.GetValueOrDefault(false) && agent.GetValueOrDefault(true); AdDetails adDetails = AdDetails.Create( priceM2, district, city, agency, typeOfProperty ?? "blok", numberOfRooms, numberOfBathrooms, size, username, images, DateTime.UtcNow); return(adDetails); } catch (Exception ex) { throw ex; } }
public AdDetails ParseDetailsPage(HtmlDocument doc, Ad ad) { DateTime createAt = DateTime.UtcNow; string district = null; string city = null; string typeOfProperty = null; //string parking = null; bool agency = false; int numberOfRooms = 0; int numberOfBathrooms = 0; float size = 0; decimal priceM2 = 0; HtmlNode details = doc.DocumentNode.SelectSingleNode( "//div[@class='offer-titlebox'] / div[@class='offer-titlebox__details']"); if (details == null) { if (ad.Url.Contains("otodom")) { Logger.Warning("Start scrap otodom: {@ad}.", ad); ad.SetHostUrl("https://www.otodom.pl"); ad.SetHost("otodom"); AdDetails result = new OtodomScraper().ParseDetailsPage(doc, ad); return(result); } Logger.Error("Docs is null and url not contains otodom: {@ad}.", ad); return(null); } var locationTemp = details.SelectSingleNode("a").InnerText; var location = locationTemp.Split(","); city = location[0]; district = location[2]; var createAtTemp = details.SelectSingleNode("em").InnerText.Trim(); var regexBeforeChar = Regex.Replace(createAtTemp, "^[^_]*o ", ""); var regexAfterChar = Regex.Replace(regexBeforeChar, ", ID.*$", ""); createAt = DateTime.ParseExact(regexAfterChar, "HH:mm, d MMMM yyyy", CultureInfo.CreateSpecificCulture("pl-PL")); var offerDescriptions = doc.DocumentNode.SelectNodes( "//div[@id='offerdescription'] / div[contains(@class, 'descriptioncontent')] / table / tr / td"); foreach (var description in offerDescriptions) { var name = description.SelectSingleNode("table / tr / th")?.InnerText.Trim(); var value = description.SelectSingleNode("table / tr / td / strong")?.InnerText?.Trim(); switch (name) { case "Oferta od": if (value == "Osoby prywatnej") { agency = false; } else if (value == "Biuro / Deweloper") { agency = true; } else { agency = true; } break; case "Cena za m2": priceM2 = ScrapExtensions.ConvertStringToDecimal(value); break; case "Poziom": int poziom = ScrapExtensions.ConvertStringToInt(value); break; case "Umeblowane": /*bool umeblowanie = false; * if (value == "Tak") * umeblowanie = true; * else if (value == "Nie") * umeblowanie = false; * else * umeblowanie = false;*/ break; case "Rynek": string rynek = value; break; case "Rodzaj zabudowy": typeOfProperty = value; break; case "Powierzchnia": size = ScrapExtensions.ConvertStringToFloat(value.Replace("m2", "")); break; case "Liczba pokoi": numberOfRooms = ScrapExtensions.ConvertStringToInt(value); break; case "Finanse": break; default: break; } } var tempUsername = doc.DocumentNode.SelectSingleNode("//div[@class='offer-user__details'] / h4 / a"); string username = tempUsername?.InnerText?.Trim(); AdDetails adDetails = AdDetails.Create( priceM2, district, city, agency, typeOfProperty, numberOfRooms, numberOfBathrooms, size, username, new List <string>(), createAt); return(adDetails); }