Esempio n. 1
0
            public void Parse()
            {
                WebClient client = new WebClient();

                client.Encoding = Encoding.GetEncoding(
                    SearchEnc.SearchEncoding(url));
                var htmlNode = new HtmlDocument();

                htmlNode.LoadHtml(client.DownloadString(url));
                var documentNode = htmlNode.DocumentNode;
                var lastIssue    = url + documentNode.SelectSingleNode("//div[@id='avatar-right']//li[1]/a")
                                   .GetAttributeValue("href", "Can't parse");

                htmlNode.LoadHtml(client.DownloadString(lastIssue));
                documentNode = htmlNode.DocumentNode;
                date         = Tuple.Create(Convert.ToInt32(documentNode
                                                            .SelectNodes("//div[@class='category-list']/h2/span") [0]
                                                            .InnerText
                                                            .Replace(" / ", " ").Split(' ')[0]),
                                            Convert.ToInt32(documentNode
                                                            .SelectNodes("//div[@class='category-list']/h2/span") [0]
                                                            .InnerText
                                                            .Replace(" / ", " ").Split(' ')[1]));
                articles = documentNode
                           .SelectNodes("//div[@class='sectionlist']/ul/li/a")
                           .Select(node => node.Attributes ["href"] != null
                                                ? HttpUtility.HtmlDecode(url + node.Attributes ["href"].Value.ToString())
                                                : "Can't parse")
                           .ToArray();
            }
Esempio n. 2
0
            public void Parse()
            {
                WebClient client = new WebClient();

                client.Encoding = Encoding.GetEncoding(
                    SearchEnc.SearchEncoding(url));
                var htmlNode = new HtmlDocument();

                htmlNode.LoadHtml(client.DownloadString(url));
                var documentNode = htmlNode.DocumentNode;
                var lastIssue    = documentNode
                                   .SelectNodes("//tr[2]/td[@class='nr' and last()]/a")
                                   .Select(node => node.Attributes ["href"] != null
                                                ? HttpUtility.HtmlDecode(url + node.Attributes ["href"].Value.ToString())
                                                : "Can't parse").ElementAt(0);
                var uriAddress = new Uri(lastIssue);
                var issueDate  = uriAddress.AbsolutePath.Split('/')[1] + '/';

                htmlNode.LoadHtml(client.DownloadString(lastIssue));
                documentNode = htmlNode.DocumentNode;
                var articlesArray = documentNode
                                    .SelectNodes("//tr/td[@class='pub_pp']/a")
                                    .Select(node => node.Attributes ["href"] != null
                                                ? HttpUtility.HtmlDecode(url + issueDate + node.Attributes ["href"].Value.ToString())
                                                : "Can't parse")
                                    .ToArray();

                date = Tuple.Create(Convert.ToInt32(articlesArray [0]
                                                    .Split('/') [3]
                                                    .Replace('-', ' ').Split(' ')[0]),
                                    Convert.ToInt32(articlesArray [0]
                                                    .Split('/') [3]
                                                    .Replace('-', ' ').Split(' ')[1]));
                articles = articlesArray;
            }
Esempio n. 3
0
            public void Parse()
            {
                WebClient client = new WebClient();

                client.Encoding = Encoding.GetEncoding(
                    SearchEnc.SearchEncoding(url));
                var htmlNode = new HtmlDocument();

                while (!String.IsNullOrEmpty(url))
                {
                    htmlNode.LoadHtml(client.DownloadString(url));
                    var documentNode = htmlNode.DocumentNode;
                    try {
                        var ulNode = documentNode
                                     .SelectNodes("//ul")
                                     .Select(node => node.LastChild);
                        var aNode    = ulNode.ElementAt(0).SelectSingleNode("a");
                        var linkNode = aNode.Attributes ["href"] != null
                                                        ? HttpUtility.HtmlDecode(sites [6] + aNode.Attributes ["href"].Value.ToString())
                                                        : "Can't parse";

                        date = Tuple.Create(Convert.ToInt32(aNode.InnerText.Split(new Char [] { ' ', '&' }).ToList()[1]), -1);
                        url  = linkNode;
                    } catch (ArgumentNullException) {
                        var articlesUrl = documentNode
                                          .SelectNodes("//td[not(@*)]/a")
                                          .Select(node => node.Attributes ["href"].Value != null
                                                                ? HttpUtility.HtmlDecode(sites [6] + node.Attributes ["href"].Value)
                                                                : "Can't parse")
                                          .ToArray();
                        articles = articlesUrl;
                    }
                }
                articles = new string[] {};
            }
Esempio n. 4
0
            public void Parse()
            {
                WebClient client = new WebClient();

                client.Encoding = Encoding.GetEncoding(
                    SearchEnc.SearchEncoding(url));
                var htmlNode = new HtmlDocument();

                htmlNode.LoadHtml(client.DownloadString(url));
                var documentNode = htmlNode.DocumentNode;
                var lastIssue    = documentNode
                                   .SelectNodes("//dd/a")
                                   .Where(node => node.InnerText == "Последний выпуск")
                                   .Select(node => sites[1] + node.Attributes["href"].Value.ToString())
                                   .First();

                htmlNode.LoadHtml(client.DownloadString(lastIssue));
                documentNode = htmlNode.DocumentNode;
                date         = Tuple.Create(Convert.ToInt32(documentNode
                                                            .SelectNodes("//body/p")[0]
                                                            .InnerText
                                                            .Split('№')[1]
                                                            .Replace("(", string.Empty)
                                                            .Replace(")", string.Empty)
                                                            .Substring(1)
                                                            .Split(' ')[0]),
                                            Convert.ToInt32(documentNode
                                                            .SelectNodes("//body/p")[0]
                                                            .InnerText
                                                            .Split('№')[1]
                                                            .Replace("(", string.Empty)
                                                            .Replace(")", string.Empty)
                                                            .Substring(1)
                                                            .Split(' ')[1]));
                var aNode = documentNode
                            .SelectNodes("//dd/a");

                articles = aNode
                           .Take(aNode.Count - 2)
                           .Select(node => node.Attributes ["href"] != null
                                                ? HttpUtility.HtmlDecode(sites[1] + node.Attributes ["href"].Value.ToString())
                                                : "Can't parse")
                           .ToArray();
            }
Esempio n. 5
0
            public void Parse()
            {
                WebClient client = new WebClient();

                client.Encoding = Encoding.GetEncoding(
                    SearchEnc.SearchEncoding(url));
                var htmlNode = new HtmlDocument();

                htmlNode.LoadHtml(client.DownloadString(url));
                var documentNode = htmlNode.DocumentNode;
                var trNode       = documentNode
                                   .SelectNodes("//tr[@class='leftmenuarticles']") [0]
                                   .SelectSingleNode("td/div");
                var aNode = trNode.SelectSingleNode("a");

                date = Tuple.Create(Convert.ToInt32(aNode.InnerText.Replace(" / ", " ").Split(' ') [0]),
                                    Convert.ToInt32(aNode.InnerText.Replace(" / ", " ").Split(' ')[1]));
                var link = aNode.Attributes ["href"] != null
                                        ? HttpUtility.HtmlDecode(aNode.Attributes ["href"].Value.ToString())
                                        : "Can't parse";

                htmlNode.LoadHtml(client.DownloadString(link));
                documentNode = htmlNode.DocumentNode;
                var trArtNodes = documentNode
                                 .SelectNodes("//tr[@class='leftmenuarticles']");
                List <string> articlesList = new List <string>();

                articlesList.Add(link);
                foreach (var trArtNode in trArtNodes.Skip(1))
                {
                    var aArtNodes = trArtNode
                                    .SelectNodes("td/div/a");
                    foreach (var aArtNode in aArtNodes)
                    {
                        articlesList.Add(aArtNode.Attributes ["href"] != null
                                                        ? HttpUtility.HtmlDecode(aArtNode.Attributes ["href"].Value.ToString())
                                                        : "Can't parse");
                    }
                }
                articles = articlesList.ToArray();
            }
Esempio n. 6
0
            public void Parse()
            {
                WebClient client = new WebClient();

                client.Encoding = Encoding.GetEncoding(
                    SearchEnc.SearchEncoding(url));
                var htmlNode = new HtmlDocument();

                htmlNode.LoadHtml(client.DownloadString(url));
                var documentNode = htmlNode.DocumentNode;
                var lastIssue    = documentNode.SelectSingleNode("//div[@class='journal']//div[@class='new-num']/a")
                                   .GetAttributeValue("href", "Can't parse");

                htmlNode.LoadHtml(client.DownloadString(lastIssue));
                documentNode = htmlNode.DocumentNode;
                date         = Tuple.Create(Convert.ToInt32(new Uri(lastIssue).AbsolutePath.Substring(1).Split('-')[0]),
                                            Convert.ToInt32(new Uri(lastIssue).AbsolutePath.Substring(1).Split('-')[2].Split('%')[0]));
                articles = documentNode
                           .SelectNodes("//table[@class='link']//div[@class='link']/a")
                           .Select(node => node.Attributes ["href"] != null
                                                ? HttpUtility.HtmlDecode(node.Attributes ["href"].Value.ToString())
                                                : "Can't parse")
                           .ToArray();
            }