private static LocationDataObject GetLocationDataObject(Location location, int? parent_id, int country_id) { if (location == null) throw new ArgumentNullException("location"); LocationDataObject location1DataObject = new LocationDataObject { country_id = country_id, parent_id = parent_id, name = location.Name, level = location.Level, is_capital = location.IsCapital ? 1 : 0 }; return location1DataObject; }
public static List<Location> Parse(Country country) { if (country == null) throw new ArgumentNullException("country"); List<Location> locationList = new List<Location>(); string html1; using (WebClient client = new WebClient()) { html1 = Request .GetWebText( url: string.Format("http://tonkosti.ru{0}", country.Url), encodingName: "utf-8"); } // ищем в тексте страницы ссылку на "Города и курорты" string locationsLinkPattern = "<a href=\"(?<href>[^<]*)\" title=\"[^<]*\">Города и курорты</a>"; if (!Regex.IsMatch(html1, locationsLinkPattern, RegexOptions.CultureInvariant | RegexOptions.Multiline)) return locationList; Match hrefMatch = Regex.Match(html1, locationsLinkPattern, RegexOptions.CultureInvariant | RegexOptions.Multiline); string locationsUrl = hrefMatch.Result("${href}"); string html2; using (WebClient client = new WebClient()) { html2 = Request .GetWebText( url: string.Format("http://tonkosti.ru{0}", locationsUrl), encodingName: "utf-8") .Replace("\n", string.Empty); } // ищем в тексте страницы "Города и курорты" локации сгруппированные по регионам (исключаем список по алфавиту) string byRegionsTablePattern = "<div id=\"RegionsListByRegions\">(?<by_regions>.*)</div><noindex><div id=\"RegionsListByAlphabet\" style=\"display: none;\">"; Match byRegionsTableMatch = Regex.Match(html2, byRegionsTablePattern, RegexOptions.CultureInvariant | RegexOptions.Multiline); string byRegionsTable = byRegionsTableMatch.Result("${by_regions}"); // загружаем фрагмент как xml XDocument doc = XDocument.Parse(byRegionsTable); var level1 = doc .Root .Elements("tr") .SelectMany(tr => tr.Elements("td")) .SelectMany(td => td.Elements("ul")) .Where(ul => ul.Attribute("class").Value == "Level1") .SelectMany(ul => ul.Elements("li")) .ToList(); foreach (var liL1 in level1) { string nameL1 = liL1.Element("a") != null ? liL1.Element("a").Value : liL1.FirstNode.ToString(); bool isCapitalL1 = liL1.Attribute("class") != null ? liL1.Attribute("class").Value == "Capital" : false; var level2 = liL1 .Elements("ul") .Where(ul => ul.Attribute("class").Value == "Level2") .SelectMany(ul => ul.Elements("li")) .ToList(); Location locationL1 = new Location( name: nameL1, level: 1, isCapital: isCapitalL1); foreach (var liL2 in level2) { string nameL2 = liL2.Element("a") != null ? liL2.Element("a").Value : liL2.FirstNode.ToString(); bool isCapitalL2 = liL2.Attribute("class") != null ? liL2.Attribute("class").Value == "Capital" : false; var level3 = liL2 .Elements("div") .SelectMany(div => div.Elements("span")) .ToList(); Location locationL2 = new Location( name: nameL2, level: 2, isCapital: isCapitalL2); foreach (var spanL3 in level3) { string nameL3 = spanL3.Element("a").Value; Location locationL3 = new Location( name: nameL3, level: 3, isCapital: false); // может ли элемент третьего уровня быть столицей? если да - то как определить? locationL2.Locations.Add(locationL3); } locationL1.Locations.Add(locationL2); } locationList.Add(locationL1); } return locationList; }