void SetBuilding(Building b, String name, String value) { switch (name) { case "Адрес:": b.Address = value; break; case "Застройщик:": b.Developer = value; break; case "Дата сдачи:": b.DateFinish = value; break; case "Класс жилья:": b.BuildingClass = value; break; case "Тип здания:": b.BuildingType = value; break; case "Этажность:": b.Floor = value; break; case "Квартир:": b.FlatCounts = value; break; } }
Building ParseBuildingFromHtml(BuildingJsonInfo info, String html) { var start = "<table id=\"BuildData\">"; var end = "</table>"; var startIndex = html.IndexOf(start); var endIndex = html.IndexOf(end, startIndex) + end.Length; var tableXml = html.Substring(startIndex, endIndex - startIndex); var building = new Building() { Name = info.name, Latitude = info.lat, Longitude = info.lng }; HtmlDocument document = new HtmlDocument(); document.LoadHtml(tableXml); HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//tr"); foreach (HtmlNode property in collection) { var trs = property.Descendants("td"); var name = trs.First().InnerText.Trim(); var value = trs.Last().FirstChild.InnerText.Trim(); if (String.IsNullOrEmpty(value)) { value = trs.Last().InnerText.Trim(); value = value.Replace("\t", ""); value = value.Replace("\n", ""); } SetBuilding(building, name, value); } return building; }
void ParseBuildingWithTableClassObjectsPlate(HtmlDocument document, Building building) { var tbody = document.DocumentNode.SelectNodes("//table[@class='objects_plate']").First(); foreach (var property in tbody.ChildNodes) { var propertyName = property.FirstChild.InnerText; switch (propertyName) { case "Расположение": building.Address = property.ChildNodes[1].LastChild.InnerText.Replace("Адрес:", "").Trim(); break; case "Состояние": building.DateFinish = property.ChildNodes[1].InnerHtml.Replace("<br>", ", ").Trim(); break; case "Девелопер": building.Developer = property.ChildNodes[1].FirstChild.InnerText.Trim(); break; } } var headerAdditionalPropertiesDiv = document.DocumentNode.SelectNodes("//div[starts-with(., 'Характеристики проекта')]"); if (headerAdditionalPropertiesDiv != null && headerAdditionalPropertiesDiv.Count > 0) { var additionalPropertiesDiv = headerAdditionalPropertiesDiv.First().NextSibling; foreach (var propertyDiv in additionalPropertiesDiv.ChildNodes) { var propertyName = propertyDiv.FirstChild.InnerText.Trim(); switch (propertyName) { case "Общая площадь:": building.SqCommon = propertyDiv.ChildNodes[1].InnerText; break; case "Этажность:": building.Floor = propertyDiv.LastChild.InnerText; break; case "Количество квартир:": building.FlatCounts = propertyDiv.LastChild.InnerText; break; } } } }
void ParseBuildingWithTableClassMain(HtmlDocument document, Building building) { var mainTableTBody = document.DocumentNode.SelectNodes("//table[@class='main']").First(); foreach (var property in mainTableTBody.ChildNodes) { var propertyName = property.FirstChild.InnerText; if ( propertyName.Contains("астройщик")) building.Developer = propertyName + " " + property.LastChild.InnerText.Trim(); switch (propertyName) { case "Адрес:": building.Address = property.LastChild.InnerText.Trim(); break; case "Почтовый адрес:": building.MailAddress = property.LastChild.InnerText.Trim(); break; case "Строительный адрес:": building.BuildAddress = property.LastChild.InnerText.Trim(); break; case "Состояние объекта:": building.DateFinish = property.LastChild.InnerText.Replace("—", "").Trim(); break; } } }
void ParseHtmlForBuilding(String html, Building building) { HtmlDocument document = new HtmlDocument(); document.LoadHtml(html); building.Name = document.DocumentNode.SelectNodes("//div[@class='h2']").First().InnerText; var tablesWithClassMain = document.DocumentNode.SelectNodes("//table[@class='main']"); if (tablesWithClassMain != null && tablesWithClassMain.Count() > 0) { ParseBuildingWithTableClassMain(document, building); return; } var tableWithClassObjectsPlate = document.DocumentNode.SelectNodes("//table[@class='objects_plate']").FirstOrDefault(); if (tableWithClassObjectsPlate != null) { ParseBuildingWithTableClassObjectsPlate(document, building); return; } throw new NotImplementedException("Не реализован парсинг для такого типа html-страницы!"); }
MineResult GetBuildings(List<BuildingShortInfo> buildShortInfos) { var buildings = new ConcurrentQueue<Building>(); var warnings = new ConcurrentQueue<String>(); var tasks = new List<Task>(); var i = 0; foreach (var buildInfo in buildShortInfos) { i++; tasks.Add(Task.Run(async () => { var currentStep = i; var buildingUrl = "http://mosnovostroy.ru" + buildInfo.Url; try { var html = await HttpHelper.HttpGet(buildingUrl); var building = new Building() { Latitude = buildInfo.Latitude, Longitude = buildInfo.Longitude, Url = buildingUrl }; ParseHtmlForBuilding(html, building); buildings.Enqueue(building); Logger.Instance.Info(String.Format("{0}/{1} Получена информация по дому {2}", currentStep.ToString(), buildShortInfos.Count, buildingUrl)); } catch (Exception ex) { var msg = String.Format("{0}/{1} При получении информации по дому {2} возникла ошибка!", currentStep.ToString(), buildShortInfos.Count, buildingUrl); Logger.Instance.Error(msg, ex); warnings.Enqueue(msg); } })); Task.Delay(Settings.Default.MosnovostroyRequestDelay).Wait(); } Task.WaitAll(tasks.ToArray()); return new MineResult() { Buildings = buildings.ToList(), Warnings = warnings.ToList() }; }