예제 #1
        public static List<Country> Parse(bool withLocations = false)
            List<Country> countryList = new List<Country>();

            string html;

            using (WebClient client = new WebClient())
                html = Request.GetWebText(
                    url: "http://tonkosti.ru/",
                    encodingName: "utf-8");

            string tableOfCountries = html
                    "<!-- Start: Table of countries (displaying element) -->",
                    "<!-- End: Table of countries (displaying element) -->")

            string countryPattern = "<li(?<class>.*)><a href=\"(?<href>.*)\">(?<inner>.*)</a></li>";

            if (!Regex.IsMatch(tableOfCountries, countryPattern))
                throw new Exception("countryPattern not matched");

            MatchCollection matches = Regex.Matches(tableOfCountries, countryPattern);

            foreach (Match match in matches)
                Country country = new Country(
                    name: match.Result("${inner}"),
                    url: match.Result("${href}"),
                    isHot: match.Result("${class}").Contains("hots"));


            return countryList;
예제 #2
        public static List<Location> Parse(Country country)
            if (country == null)
                throw new ArgumentNullException("country");

            List<Location> locationList = new List<Location>();

            string html1;

            using (WebClient client = new WebClient())
                html1 = Request
                        url: string.Format("http://tonkosti.ru{0}", country.Url),
                        encodingName: "utf-8");

            // ищем в тексте страницы ссылку на "Города и курорты"
            string locationsLinkPattern = "<a href=\"(?<href>[^<]*)\" title=\"[^<]*\">Города и курорты</a>";

            if (!Regex.IsMatch(html1, locationsLinkPattern, RegexOptions.CultureInvariant | RegexOptions.Multiline))
                return locationList;

            Match hrefMatch = Regex.Match(html1, locationsLinkPattern, RegexOptions.CultureInvariant | RegexOptions.Multiline);

            string locationsUrl = hrefMatch.Result("${href}");

            string html2;

            using (WebClient client = new WebClient())
                html2 = Request
                        url: string.Format("http://tonkosti.ru{0}", locationsUrl),
                        encodingName: "utf-8")
                    .Replace("\n", string.Empty);

            // ищем в тексте страницы "Города и курорты" локации сгруппированные по регионам (исключаем список по алфавиту)
            string byRegionsTablePattern = "<div id=\"RegionsListByRegions\">(?<by_regions>.*)</div><noindex><div id=\"RegionsListByAlphabet\" style=\"display: none;\">";

            Match byRegionsTableMatch = Regex.Match(html2, byRegionsTablePattern, RegexOptions.CultureInvariant | RegexOptions.Multiline);

            string byRegionsTable = byRegionsTableMatch.Result("${by_regions}");

            // загружаем фрагмент как xml

            XDocument doc = XDocument.Parse(byRegionsTable);

            var level1 = doc
                .SelectMany(tr => tr.Elements("td"))
                .SelectMany(td => td.Elements("ul"))
                .Where(ul => ul.Attribute("class").Value == "Level1")
                .SelectMany(ul => ul.Elements("li"))

            foreach (var liL1 in level1)
                string nameL1 = liL1.Element("a") != null
                    ? liL1.Element("a").Value
                    : liL1.FirstNode.ToString();

                bool isCapitalL1 = liL1.Attribute("class") != null
                    ? liL1.Attribute("class").Value == "Capital"
                    : false;

                var level2 = liL1
                    .Where(ul => ul.Attribute("class").Value == "Level2")
                    .SelectMany(ul => ul.Elements("li"))

                Location locationL1 = new Location(
                    name: nameL1,
                    level: 1,
                    isCapital: isCapitalL1);

                foreach (var liL2 in level2)
                    string nameL2 = liL2.Element("a") != null
                        ? liL2.Element("a").Value
                        : liL2.FirstNode.ToString();

                    bool isCapitalL2 = liL2.Attribute("class") != null
                        ? liL2.Attribute("class").Value == "Capital"
                        : false;

                    var level3 = liL2
                        .SelectMany(div => div.Elements("span"))

                    Location locationL2 = new Location(
                        name: nameL2,
                        level: 2,
                        isCapital: isCapitalL2);

                    foreach (var spanL3 in level3)
                        string nameL3 = spanL3.Element("a").Value;

                        Location locationL3 = new Location(
                            name: nameL3,
                            level: 3,
                            isCapital: false);	// может ли элемент третьего уровня быть столицей? если да - то как определить?




            return locationList;