public void Parse() { WebClient client = new WebClient(); client.Encoding = Encoding.GetEncoding( SearchEnc.SearchEncoding(url)); var htmlNode = new HtmlDocument(); htmlNode.LoadHtml(client.DownloadString(url)); var documentNode = htmlNode.DocumentNode; var lastIssue = url + documentNode.SelectSingleNode("//div[@id='avatar-right']//li[1]/a") .GetAttributeValue("href", "Can't parse"); htmlNode.LoadHtml(client.DownloadString(lastIssue)); documentNode = htmlNode.DocumentNode; date = Tuple.Create(Convert.ToInt32(documentNode .SelectNodes("//div[@class='category-list']/h2/span") [0] .InnerText .Replace(" / ", " ").Split(' ')[0]), Convert.ToInt32(documentNode .SelectNodes("//div[@class='category-list']/h2/span") [0] .InnerText .Replace(" / ", " ").Split(' ')[1])); articles = documentNode .SelectNodes("//div[@class='sectionlist']/ul/li/a") .Select(node => node.Attributes ["href"] != null ? HttpUtility.HtmlDecode(url + node.Attributes ["href"].Value.ToString()) : "Can't parse") .ToArray(); }
public void Parse() { WebClient client = new WebClient(); client.Encoding = Encoding.GetEncoding( SearchEnc.SearchEncoding(url)); var htmlNode = new HtmlDocument(); htmlNode.LoadHtml(client.DownloadString(url)); var documentNode = htmlNode.DocumentNode; var lastIssue = documentNode .SelectNodes("//tr[2]/td[@class='nr' and last()]/a") .Select(node => node.Attributes ["href"] != null ? HttpUtility.HtmlDecode(url + node.Attributes ["href"].Value.ToString()) : "Can't parse").ElementAt(0); var uriAddress = new Uri(lastIssue); var issueDate = uriAddress.AbsolutePath.Split('/')[1] + '/'; htmlNode.LoadHtml(client.DownloadString(lastIssue)); documentNode = htmlNode.DocumentNode; var articlesArray = documentNode .SelectNodes("//tr/td[@class='pub_pp']/a") .Select(node => node.Attributes ["href"] != null ? HttpUtility.HtmlDecode(url + issueDate + node.Attributes ["href"].Value.ToString()) : "Can't parse") .ToArray(); date = Tuple.Create(Convert.ToInt32(articlesArray [0] .Split('/') [3] .Replace('-', ' ').Split(' ')[0]), Convert.ToInt32(articlesArray [0] .Split('/') [3] .Replace('-', ' ').Split(' ')[1])); articles = articlesArray; }
public void Parse() { WebClient client = new WebClient(); client.Encoding = Encoding.GetEncoding( SearchEnc.SearchEncoding(url)); var htmlNode = new HtmlDocument(); while (!String.IsNullOrEmpty(url)) { htmlNode.LoadHtml(client.DownloadString(url)); var documentNode = htmlNode.DocumentNode; try { var ulNode = documentNode .SelectNodes("//ul") .Select(node => node.LastChild); var aNode = ulNode.ElementAt(0).SelectSingleNode("a"); var linkNode = aNode.Attributes ["href"] != null ? HttpUtility.HtmlDecode(sites [6] + aNode.Attributes ["href"].Value.ToString()) : "Can't parse"; date = Tuple.Create(Convert.ToInt32(aNode.InnerText.Split(new Char [] { ' ', '&' }).ToList()[1]), -1); url = linkNode; } catch (ArgumentNullException) { var articlesUrl = documentNode .SelectNodes("//td[not(@*)]/a") .Select(node => node.Attributes ["href"].Value != null ? HttpUtility.HtmlDecode(sites [6] + node.Attributes ["href"].Value) : "Can't parse") .ToArray(); articles = articlesUrl; } } articles = new string[] {}; }
public void Parse() { WebClient client = new WebClient(); client.Encoding = Encoding.GetEncoding( SearchEnc.SearchEncoding(url)); var htmlNode = new HtmlDocument(); htmlNode.LoadHtml(client.DownloadString(url)); var documentNode = htmlNode.DocumentNode; var lastIssue = documentNode .SelectNodes("//dd/a") .Where(node => node.InnerText == "Последний выпуск") .Select(node => sites[1] + node.Attributes["href"].Value.ToString()) .First(); htmlNode.LoadHtml(client.DownloadString(lastIssue)); documentNode = htmlNode.DocumentNode; date = Tuple.Create(Convert.ToInt32(documentNode .SelectNodes("//body/p")[0] .InnerText .Split('№')[1] .Replace("(", string.Empty) .Replace(")", string.Empty) .Substring(1) .Split(' ')[0]), Convert.ToInt32(documentNode .SelectNodes("//body/p")[0] .InnerText .Split('№')[1] .Replace("(", string.Empty) .Replace(")", string.Empty) .Substring(1) .Split(' ')[1])); var aNode = documentNode .SelectNodes("//dd/a"); articles = aNode .Take(aNode.Count - 2) .Select(node => node.Attributes ["href"] != null ? HttpUtility.HtmlDecode(sites[1] + node.Attributes ["href"].Value.ToString()) : "Can't parse") .ToArray(); }
public void Parse() { WebClient client = new WebClient(); client.Encoding = Encoding.GetEncoding( SearchEnc.SearchEncoding(url)); var htmlNode = new HtmlDocument(); htmlNode.LoadHtml(client.DownloadString(url)); var documentNode = htmlNode.DocumentNode; var trNode = documentNode .SelectNodes("//tr[@class='leftmenuarticles']") [0] .SelectSingleNode("td/div"); var aNode = trNode.SelectSingleNode("a"); date = Tuple.Create(Convert.ToInt32(aNode.InnerText.Replace(" / ", " ").Split(' ') [0]), Convert.ToInt32(aNode.InnerText.Replace(" / ", " ").Split(' ')[1])); var link = aNode.Attributes ["href"] != null ? HttpUtility.HtmlDecode(aNode.Attributes ["href"].Value.ToString()) : "Can't parse"; htmlNode.LoadHtml(client.DownloadString(link)); documentNode = htmlNode.DocumentNode; var trArtNodes = documentNode .SelectNodes("//tr[@class='leftmenuarticles']"); List <string> articlesList = new List <string>(); articlesList.Add(link); foreach (var trArtNode in trArtNodes.Skip(1)) { var aArtNodes = trArtNode .SelectNodes("td/div/a"); foreach (var aArtNode in aArtNodes) { articlesList.Add(aArtNode.Attributes ["href"] != null ? HttpUtility.HtmlDecode(aArtNode.Attributes ["href"].Value.ToString()) : "Can't parse"); } } articles = articlesList.ToArray(); }
public void Parse() { WebClient client = new WebClient(); client.Encoding = Encoding.GetEncoding( SearchEnc.SearchEncoding(url)); var htmlNode = new HtmlDocument(); htmlNode.LoadHtml(client.DownloadString(url)); var documentNode = htmlNode.DocumentNode; var lastIssue = documentNode.SelectSingleNode("//div[@class='journal']//div[@class='new-num']/a") .GetAttributeValue("href", "Can't parse"); htmlNode.LoadHtml(client.DownloadString(lastIssue)); documentNode = htmlNode.DocumentNode; date = Tuple.Create(Convert.ToInt32(new Uri(lastIssue).AbsolutePath.Substring(1).Split('-')[0]), Convert.ToInt32(new Uri(lastIssue).AbsolutePath.Substring(1).Split('-')[2].Split('%')[0])); articles = documentNode .SelectNodes("//table[@class='link']//div[@class='link']/a") .Select(node => node.Attributes ["href"] != null ? HttpUtility.HtmlDecode(node.Attributes ["href"].Value.ToString()) : "Can't parse") .ToArray(); }