/// <summary> /// Parses article from given Url /// </summary> /// <param name="article">Article container. Url is stored inside </param> /// <returns>Success indication flag</returns> public override bool ParseArticle(ArticleContainer article) { var task = GetArticle(article.Header.Link); task.Wait(); if (task.Result == null) return false; var doc = task.Result; var nodeList = doc.DocumentNode.Descendants().Where (x => (x.Name == "div" && x.Attributes["itemprop"] != null && x.Attributes["itemprop"].Value.Contains("articleBody"))).ToList(); var sb = new StringBuilder(); var newsBody = nodeList.FirstOrDefault(); if (newsBody != null) { foreach (var node in newsBody.ChildNodes.Where(n => n.Name == "p")) { sb.Append(node.InnerText + " "); } article.Body = new BodyContainer { Body = sb.ToString(), HasPicture = false }; return true; } return false; }
/// <summary> /// Parses article from given Url /// </summary> /// <param name="article">Article container. Url is stored inside </param> /// <returns>Success indication flag</returns> public override bool ParseArticle(ArticleContainer article) { var task = GetArticle(article.Header.Link); task.Wait(); if (task.Result == null) return false; var doc = task.Result; var nodeList = doc.DocumentNode.Descendants().Where (x => x.Name == "div" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("article__text js-module js-mediator-article")).ToList(); var newsBody = nodeList.FirstOrDefault(); var sb = new StringBuilder(); if (newsBody != null) { var indexNode = GetDescendantByAttributes(newsBody, "div", "class", "article__item_html"); if (indexNode != null) { foreach (var node in indexNode.ChildNodes.Where(n => n.Name == "p")) sb.Append(node.InnerText.Replace(" ", " ") + " "); var img = GetDescendantByAttributes(newsBody, "img", "class", "photo__pic"); var hasPicture = false; if (img != null) { try { var url = img.Attributes["src"].Value; var binaryTask = GetBinaryContent(url); binaryTask.Wait(); if (binaryTask.Result != null) { var pict = binaryTask.Result; article.Header.Enclosure = pict; hasPicture = true; } } catch (Exception ex) { Error = new ErrorDescription { Level = ApplicationLevel.NewsParser, Description = ex.Message }; } } article.Body = new BodyContainer { Body = sb.ToString(), HasPicture = hasPicture }; return true; } } return false; }
public void OnGet() { ArticleContainer = _articleContainer.Value; }
/// <summary> /// Parses article from given Url /// </summary> /// <param name="article">Article container. Url is stored inside </param> /// <returns>Success indication flag</returns> public override bool ParseArticle(ArticleContainer article) { var task = GetArticle(article.Header.Link); task.Wait(); if (task.Result == null) return false; var doc = task.Result; var nodeList = doc.DocumentNode.Descendants().Where (x => (x.Name == "div" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("news_body"))).ToList(); var newsBody = nodeList.FirstOrDefault(); var sb = new StringBuilder(); if (newsBody != null) { foreach (var node in newsBody.ChildNodes.Where(n => n.Name == "p")) sb.Append(node.InnerText + " "); var imgList = doc.DocumentNode.Descendants().Where (x => (x.Name == "img" && x.Attributes["class"] != null && x.Attributes["class"].Value.Contains("main_image"))).ToList(); var img = imgList.FirstOrDefault(); var hasPicture = false; if (img != null) { try { var url = img.Attributes["src"].Value; var binaryTask = GetBinaryContent(url); binaryTask.Wait(); if (binaryTask.Result != null) { var pict = binaryTask.Result; article.Header.Enclosure = pict; hasPicture = true; } } catch (Exception ex) { Error = new ErrorDescription { Level = ApplicationLevel.NewsParser, Description = ex.Message }; } } article.Body = new BodyContainer { Body = sb.ToString(), HasPicture = hasPicture }; return true; } return false; }