Exemplo n.º 1
0
        /// <summary>
        /// Parses article from given Url
        /// </summary>
        /// <param name="article">Article container. Url is stored inside </param>
        /// <returns>Success indication flag</returns>
        public override bool ParseArticle(ArticleContainer article)
        {
            var task = GetArticle(article.Header.Link);
            task.Wait();
            if (task.Result == null)
                return false;

            var doc = task.Result;
            var nodeList = doc.DocumentNode.Descendants().Where
                (x =>
                    (x.Name == "div" && x.Attributes["itemprop"] != null &&
                     x.Attributes["itemprop"].Value.Contains("articleBody"))).ToList();

            var sb = new StringBuilder();

            var newsBody = nodeList.FirstOrDefault();
            if (newsBody != null)
            {
                foreach (var node in newsBody.ChildNodes.Where(n => n.Name == "p"))
                {
                    sb.Append(node.InnerText + " ");
                }
                article.Body = new BodyContainer
                {
                    Body = sb.ToString(),
                    HasPicture = false
                };
                return true;
            }
            return false;
        }
Exemplo n.º 2
0
        /// <summary>
        /// Parses article from given Url
        /// </summary>
        /// <param name="article">Article container. Url is stored inside </param>
        /// <returns>Success indication flag</returns>
        public override bool ParseArticle(ArticleContainer article)
        {
            var task = GetArticle(article.Header.Link);
            task.Wait();
            if (task.Result == null)
                return false;

            var doc = task.Result;
            var nodeList = doc.DocumentNode.Descendants().Where
                (x =>
                    x.Name == "div" && x.Attributes["class"] != null &&
                     x.Attributes["class"].Value.Contains("article__text js-module js-mediator-article")).ToList();
            var newsBody = nodeList.FirstOrDefault();
            var sb = new StringBuilder();
            if (newsBody != null)
            {
                var indexNode = GetDescendantByAttributes(newsBody, "div", "class", "article__item_html");

                if (indexNode != null)
                {
                    foreach (var node in indexNode.ChildNodes.Where(n => n.Name == "p"))
                        sb.Append(node.InnerText.Replace("&nbsp;", " ") + " ");
                    
                    var img = GetDescendantByAttributes(newsBody, "img", "class", "photo__pic");

                    var hasPicture = false;

                    if (img != null)
                    {
                        try
                        {
                            var url = img.Attributes["src"].Value;

                            var binaryTask = GetBinaryContent(url);
                            binaryTask.Wait();
                            if (binaryTask.Result != null)
                            {
                                var pict = binaryTask.Result;
                                article.Header.Enclosure = pict;
                                hasPicture = true;
                            }
                        }
                        catch (Exception ex)
                        {
                            Error = new ErrorDescription
                            {
                                Level = ApplicationLevel.NewsParser,
                                Description = ex.Message
                            };
                        }
                    }

                    article.Body = new BodyContainer
                    {
                        Body = sb.ToString(),
                        HasPicture = hasPicture
                    };

                    return true;
                }
            }
            return false;
        }
Exemplo n.º 3
0
 public void OnGet()
 {
     ArticleContainer = _articleContainer.Value;
 }
Exemplo n.º 4
0
        /// <summary>
        /// Parses article from given Url
        /// </summary>
        /// <param name="article">Article container. Url is stored inside </param>
        /// <returns>Success indication flag</returns>
        public override bool ParseArticle(ArticleContainer article)
        {
            var task = GetArticle(article.Header.Link);
            task.Wait();
            if (task.Result == null)
                return false;

            var doc = task.Result;
            var nodeList = doc.DocumentNode.Descendants().Where
                (x =>
                    (x.Name == "div" && x.Attributes["class"] != null &&
                     x.Attributes["class"].Value.Contains("news_body"))).ToList();
            var newsBody = nodeList.FirstOrDefault();
            var sb = new StringBuilder();
            if (newsBody != null)
            {
                foreach (var node in newsBody.ChildNodes.Where(n => n.Name == "p"))
                    sb.Append(node.InnerText + " ");

                var imgList = doc.DocumentNode.Descendants().Where
                    (x =>
                        (x.Name == "img" && x.Attributes["class"] != null &&
                         x.Attributes["class"].Value.Contains("main_image"))).ToList();
                var img = imgList.FirstOrDefault();

                var hasPicture = false;

                if (img != null)
                {
                    try
                    {
                        var url = img.Attributes["src"].Value;

                        var binaryTask = GetBinaryContent(url);
                        binaryTask.Wait();
                        if (binaryTask.Result != null)
                        {
                            var pict = binaryTask.Result;
                            article.Header.Enclosure = pict;
                            hasPicture = true;
                        }
                    }
                    catch (Exception ex)
                    {
                        Error = new ErrorDescription
                        {
                            Level = ApplicationLevel.NewsParser,
                            Description = ex.Message
                        };
                    }
                }

                article.Body = new BodyContainer
                {
                    Body = sb.ToString(),
                    HasPicture = hasPicture
                };

                return true;
            }
            return false;
        }